/* SPDX-FileCopyrightText: 2005 Ingo Kloecker SPDX-FileCopyrightText: 2007 Allen Winter SPDX-License-Identifier: LGPL-2.0-only */ #include "ktexttohtmltest.h" #include "kcoreaddons_debug.h" #include "../src/lib/text/ktexttohtml.h" #include "../src/lib/text/ktexttohtml_p.h" #include #include #include QTEST_MAIN(KTextToHTMLTest) Q_DECLARE_METATYPE(KTextToHTML::Options) #ifndef Q_OS_WIN void initLocale() { setenv("LC_ALL", "en_US.utf-8", 1); } Q_CONSTRUCTOR_FUNCTION(initLocale) #endif void KTextToHTMLTest::testGetEmailAddress() { // empty input const QString emptyQString; KTextToHTMLHelper ll1(emptyQString, 0); QVERIFY(ll1.getEmailAddress().isEmpty()); // no '@' at scan position KTextToHTMLHelper ll2(QStringLiteral("foo@bar.baz"), 0); QVERIFY(ll2.getEmailAddress().isEmpty()); // '@' in local part KTextToHTMLHelper ll3(QStringLiteral("foo@bar@bar.baz"), 7); QVERIFY(ll3.getEmailAddress().isEmpty()); // empty local part KTextToHTMLHelper ll4(QStringLiteral("@bar.baz"), 0); QVERIFY(ll4.getEmailAddress().isEmpty()); KTextToHTMLHelper ll5(QStringLiteral(".@bar.baz"), 1); QVERIFY(ll5.getEmailAddress().isEmpty()); KTextToHTMLHelper ll6(QStringLiteral(" @bar.baz"), 1); QVERIFY(ll6.getEmailAddress().isEmpty()); KTextToHTMLHelper ll7(QStringLiteral(".!#$%&'*+-/=?^_`{|}~@bar.baz"), qstrlen(".!#$%&'*+-/=?^_`{|}~")); QVERIFY(ll7.getEmailAddress().isEmpty()); // allowed special chars in local part of address KTextToHTMLHelper ll8(QStringLiteral("a.!#$%&'*+-/=?^_`{|}~@bar.baz"), qstrlen("a.!#$%&'*+-/=?^_`{|}~")); QCOMPARE(ll8.getEmailAddress(), QStringLiteral("a.!#$%&'*+-/=?^_`{|}~@bar.baz")); // '@' in domain part KTextToHTMLHelper ll9(QStringLiteral("foo@bar@bar.baz"), 3); QVERIFY(ll9.getEmailAddress().isEmpty()); // domain part without dot KTextToHTMLHelper lla(QStringLiteral("foo@bar"), 3); QVERIFY(lla.getEmailAddress().isEmpty()); KTextToHTMLHelper llb(QStringLiteral("foo@bar."), 3); QVERIFY(llb.getEmailAddress().isEmpty()); KTextToHTMLHelper llc(QStringLiteral(".foo@bar"), 4); QVERIFY(llc.getEmailAddress().isEmpty()); KTextToHTMLHelper lld(QStringLiteral("foo@bar "), 3); QVERIFY(lld.getEmailAddress().isEmpty()); KTextToHTMLHelper lle(QStringLiteral(" foo@bar"), 4); QVERIFY(lle.getEmailAddress().isEmpty()); KTextToHTMLHelper llf(QStringLiteral("foo@bar-bar"), 3); QVERIFY(llf.getEmailAddress().isEmpty()); // empty domain part KTextToHTMLHelper llg(QStringLiteral("foo@"), 3); QVERIFY(llg.getEmailAddress().isEmpty()); KTextToHTMLHelper llh(QStringLiteral("foo@."), 3); QVERIFY(llh.getEmailAddress().isEmpty()); KTextToHTMLHelper lli(QStringLiteral("foo@-"), 3); QVERIFY(lli.getEmailAddress().isEmpty()); // simple address KTextToHTMLHelper llj(QStringLiteral("foo@bar.baz"), 3); QCOMPARE(llj.getEmailAddress(), QStringLiteral("foo@bar.baz")); KTextToHTMLHelper llk(QStringLiteral("foo@bar.baz."), 3); QCOMPARE(llk.getEmailAddress(), QStringLiteral("foo@bar.baz")); KTextToHTMLHelper lll(QStringLiteral(".foo@bar.baz"), 4); QCOMPARE(lll.getEmailAddress(), QStringLiteral("foo@bar.baz")); KTextToHTMLHelper llm(QStringLiteral("foo@bar.baz-"), 3); QCOMPARE(llm.getEmailAddress(), QStringLiteral("foo@bar.baz")); KTextToHTMLHelper lln(QStringLiteral("-foo@bar.baz"), 4); QCOMPARE(lln.getEmailAddress(), QStringLiteral("foo@bar.baz")); KTextToHTMLHelper llo(QStringLiteral("foo@bar.baz "), 3); QCOMPARE(llo.getEmailAddress(), QStringLiteral("foo@bar.baz")); KTextToHTMLHelper llp(QStringLiteral(" foo@bar.baz"), 4); QCOMPARE(llp.getEmailAddress(), QStringLiteral("foo@bar.baz")); KTextToHTMLHelper llq(QStringLiteral("foo@bar-bar.baz"), 3); QCOMPARE(llq.getEmailAddress(), QStringLiteral("foo@bar-bar.baz")); } void KTextToHTMLTest::testGetUrl() { QStringList brackets; brackets << QString() << QString(); // no brackets brackets << QStringLiteral("<") << QStringLiteral(">"); brackets << QStringLiteral("[") << QStringLiteral("]"); brackets << QStringLiteral("\"") << QStringLiteral("\""); brackets << QStringLiteral("") << QStringLiteral(""); for (int i = 0; i < brackets.count(); i += 2) { testGetUrl2(brackets[i], brackets[i + 1]); } } void KTextToHTMLTest::testGetUrl2(const QString &left, const QString &right) { QStringList schemas; schemas << QStringLiteral("http://"); schemas << QStringLiteral("https://"); schemas << QStringLiteral("vnc://"); schemas << QStringLiteral("fish://"); schemas << QStringLiteral("ftp://"); schemas << QStringLiteral("ftps://"); schemas << QStringLiteral("sftp://"); schemas << QStringLiteral("smb://"); schemas << QStringLiteral("file://"); schemas << QStringLiteral("irc://"); schemas << QStringLiteral("ircs://"); QStringList urls; urls << QStringLiteral("www.kde.org"); urls << QStringLiteral("user@www.kde.org"); urls << QStringLiteral("user:pass@www.kde.org"); urls << QStringLiteral("user:pass@www.kde.org:1234"); urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path"); urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path?a=1"); urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path?a=1#anchor"); urls << QStringLiteral("user:pass@www.kde.org:1234/sub/\npath \n /long/ path \t ?a=1#anchor"); urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path/special(123)?a=1#anchor"); urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor"); urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla"); urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla]"); urls << QStringLiteral("user:pass@www.kde.org:1234/\nsub/path:with:colon/\nspecial(123)?\na=1#anchor[bla]"); urls << QStringLiteral("user:pass@www.kde.org:1234/ \n sub/path:with:colon/ \n\t \t special(123)?") + QStringLiteral("\n\t \n\t a=1#anchor[bla]"); urls << QStringLiteral("en.wikipedia.org/wiki/%C3%98_(disambiguation)"); for (const QString &schema : std::as_const(schemas)) { for (QString url : std::as_const(urls)) { // by definition: if the URL is enclosed in brackets, the URL itself is not allowed // to contain the closing bracket, as this would be detected as the end of the URL if ((left.length() == 1) && (url.contains(right[0]))) { continue; } // if the url contains a whitespace, it must be enclosed with brackets if ((url.contains(QLatin1Char('\n')) || url.contains(QLatin1Char('\t')) || url.contains(QLatin1Char(' '))) && left.isEmpty()) { continue; } QString test(left + schema + url + right); KTextToHTMLHelper ll(test, left.length()); QString gotUrl = ll.getUrl(); // we want to have the url without whitespace url.remove(QLatin1Char(' ')); url.remove(QLatin1Char('\n')); url.remove(QLatin1Char('\t')); bool ok = (gotUrl == (schema + url)); if (!ok) { qCDebug(KCOREADDONS_DEBUG) << "got:" << gotUrl; } QVERIFY2(ok, qPrintable(test)); } } QStringList urlsWithoutSchema; urlsWithoutSchema << QStringLiteral(".kde.org"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path?a=1"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path?a=1#anchor"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path/special(123)?a=1#anchor"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla]"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/\nsub/path:with:colon/\nspecial(123)?\na=1#anchor[bla]"); urlsWithoutSchema << QStringLiteral(".kde.org:1234/ \n sub/path:with:colon/ \n\t \t special(123)?") + QStringLiteral("\n\t \n\t a=1#anchor[bla]"); QStringList starts; starts << QStringLiteral("www") << QStringLiteral("ftp") << QStringLiteral("news:www"); for (const QString &start : std::as_const(starts)) { for (QString url : std::as_const(urlsWithoutSchema)) { // by definition: if the URL is enclosed in brackets, the URL itself is not allowed // to contain the closing bracket, as this would be detected as the end of the URL if ((left.length() == 1) && (url.contains(right[0]))) { continue; } // if the url contains a whitespace, it must be enclosed with brackets if ((url.contains(QLatin1Char('\n')) || url.contains(QLatin1Char('\t')) || url.contains(QLatin1Char(' '))) && left.isEmpty()) { continue; } QString test(left + start + url + right); KTextToHTMLHelper ll(test, left.length()); QString gotUrl = ll.getUrl(); // we want to have the url without whitespace url.remove(QLatin1Char(' ')); url.remove(QLatin1Char('\n')); url.remove(QLatin1Char('\t')); bool ok = (gotUrl == (start + url)); if (!ok) { qCDebug(KCOREADDONS_DEBUG) << "got:" << gotUrl; } QVERIFY2(ok, qPrintable(gotUrl)); } } // test max url length QString url = QStringLiteral("https://www.kde.org/this/is/a_very_loooooong_url/test/test/test"); { KTextToHTMLHelper ll(url, 0, 10); QVERIFY(ll.getUrl().isEmpty()); // url too long } { KTextToHTMLHelper ll(url, 0, url.length() - 1); QVERIFY(ll.getUrl().isEmpty()); // url too long } { KTextToHTMLHelper ll(url, 0, url.length()); QCOMPARE(ll.getUrl(), url); } { KTextToHTMLHelper ll(url, 0, url.length() + 1); QCOMPARE(ll.getUrl(), url); } // mailto { QString addr = QStringLiteral("mailto:test@kde.org"); QString test(left + addr + right); KTextToHTMLHelper ll(test, left.length()); QString gotUrl = ll.getUrl(); bool ok = (gotUrl == addr); if (!ok) { qCDebug(KCOREADDONS_DEBUG) << "got:" << gotUrl; } QVERIFY2(ok, qPrintable(gotUrl)); } } void KTextToHTMLTest::testHtmlConvert_data() { QTest::addColumn("plainText"); QTest::addColumn("flags"); QTest::addColumn("htmlText"); // Linker error when using PreserveSpaces, therefore the hardcoded 0x01 or 0x09 // Test preserving whitespace correctly QTest::newRow("") << " foo" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << " foo"; QTest::newRow("") << " foo" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "  foo"; QTest::newRow("") << " foo " << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "  foo  "; QTest::newRow("") << " foo " << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "  foo "; QTest::newRow("") << "bla bla bla bla bla" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "bla bla bla bla bla"; QTest::newRow("") << "bla bla bla \n bla bla bla " << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "bla bla bla 
\n  bla bla bla "; QTest::newRow("") << "bla bla bla" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "bla bla  bla"; QTest::newRow("") << " bla bla \n bla bla a\n bla bla " << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << " bla bla 
\n bla bla a
\n" "  bla bla "; // Test highlighting with *, / and _ QTest::newRow("") << "Ce paragraphe _contient_ des mots ou des _groupes de mots_ à mettre en" " forme…" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "Ce paragraphe _contient_ des mots ou des" " _groupes de mots_ à mettre en forme…"; QTest::newRow("punctation-bug") << "Ce texte *a l'air* de _fonctionner_, à condition" " d’utiliser le guillemet ASCII." << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "Ce texte *a l'air* de _fonctionner_, à" " condition d’utiliser le guillemet ASCII."; QTest::newRow("punctation-bug") << "Un répertoire /est/ un *dossier* où on peut mettre des" " *fichiers*." << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "Un répertoire /est/ un" " *dossier* où on peut mettre des *fichiers*."; QTest::newRow("punctation-bug") << "*BLA BLA BLA BLA*." << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "BLA BLA BLA BLA."; QTest::newRow("") << "Je vais tenter de repérer des faux positif*" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "Je vais tenter de repérer des faux positif*"; QTest::newRow("") << "*Ouais !* *Yes!*" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "*Ouais !* *Yes!*"; QTest::newRow("multispace") << "*Ouais foo*" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "*Ouais foo*"; QTest::newRow("multispace3") << "*Ouais: foo*" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "*Ouais: foo*"; QTest::newRow("multi-") << "** Ouais: foo **" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "** Ouais:  foo **"; QTest::newRow("multi-") << "*** Ouais: foo ***" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "*** Ouais:  foo ***"; QTest::newRow("nohtmlversion") << "* Ouais: foo *" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "* Ouais:     foo *"; QTest::newRow("nohtmlversion2") << "*Ouais: foo *" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "*Ouais:     foo *"; QTest::newRow("nohtmlversion3") << "* Ouais: foo*" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "* Ouais:     foo*"; QTest::newRow("nohtmlversion3") << "* Ouais: *ff sfsdf* foo *" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "* Ouais: *ff sfsdf* foo *"; QTest::newRow("") << "the /etc/{rsyslog.d,syslog-ng.d}/package.rpmnew file" << KTextToHTML::Options(KTextToHTML::PreserveSpaces | KTextToHTML::HighlightText) << "the /etc/{rsyslog.d,syslog-ng.d}/package.rpmnew file"; // This test has problems with the encoding, apparently. // QTest::newRow( "" ) << "*Ça fait plaisir de pouvoir utiliser des lettres accentuées dans du" // " texte mis en forme*." << 0x09 << "Ça fait plaisir de pouvoir" // " utiliser des lettres accentuées dans du texte mis en forme."; // Bug reported by dfaure, the would get lost QTest::newRow("") << "QUrl url(\"http://strange/\");" << KTextToHTML::Options(KTextToHTML::ReplaceSmileys | KTextToHTML::HighlightText) << "QUrl url("/\">" "http://strange<hostname>/");"; // Bug: 211128 - plain text emails should not replace ampersand & with & QTest::newRow("bug211128") << "https://green-site/?Ticket=85&Page=next" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "" "https://green-site/?Ticket=85&Page=next"; QTest::newRow("dotBeforeEnd") << "Look at this file: www.example.com/example.h" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "Look at this file: " "www.example.com/example.h"; QTest::newRow("dotInMiddle") << "Look at this file: www.example.com/.bashrc" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "Look at this file: " "www.example.com/.bashrc"; // A dot at the end of an URL is explicitly ignored QTest::newRow("dotAtEnd") << "Look at this file: www.example.com/test.cpp." << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "Look at this file: " "www.example.com/test.cpp."; // Bug 313719 - URL in parenthesis QTest::newRow("url-in-parenthesis-1") << "KDE (website https://www.kde.org)" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "KDE (website https://www.kde.org)"; QTest::newRow("url-in-parenthesis-2") << "KDE website (https://www.kde.org)" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "KDE website (https://www.kde.org)"; QTest::newRow("url-in-parenthesis-3") << "bla (https://www.kde.org - section 5.2)" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "bla (https://www.kde.org - section 5.2)"; // Fix url as foo < > when we concatened them. QTest::newRow("url-with-url") << "foo >" << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "foo <https://www.kde.org/ <https://www.kde.org/>>"; // Fix url exploit QTest::newRow("url-exec-html") << "https://\">