/* This file is part of the Kate project. SPDX-FileCopyrightText: 2021 Jan Paul Batrina SPDX-License-Identifier: LGPL-2.0-or-later */ #include "encodingtest.h" #include "katetextbuffer.h" QTEST_MAIN(KateEncodingTest) void KateEncodingTest::utfBomTest() { // setup stuff Kate::TextBuffer buffer(nullptr); buffer.setFallbackTextCodec(QStringLiteral("UTF-8")); bool encodingErrors; bool tooLongLinesWrapped; bool success; int longestLineLoaded; QString prefixText; // utf8 tests buffer.setTextCodec(QStringLiteral("UTF-8")); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf8.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); QVERIFY(success && !encodingErrors); QVERIFY(!buffer.generateByteOrderMark()); // file has no bom // since the utf8 bom is 3 bytes, the first 3 chars should not be the bom prefixText = buffer.text().left(3); QCOMPARE(prefixText, QLatin1String("Tes")); buffer.setTextCodec(QStringLiteral("UTF-8")); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf8-bom-only.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); QVERIFY(success && !encodingErrors); QVERIFY(buffer.generateByteOrderMark()); // aside from bom, file is empty so there should be no text prefixText = buffer.text(); QVERIFY(prefixText.isEmpty()); // utf16 tests, we need to allow encoding detection as if the byteorder is not ok, QStringDecoder will not show errors buffer.setTextCodec(QStringLiteral("UTF-16")); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf16.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, false); QVERIFY(success && !encodingErrors); QVERIFY(buffer.generateByteOrderMark()); // since the utf16 bom is 2 bytes, the first 2 chars should not be the bom prefixText = buffer.text().left(2); QCOMPARE(prefixText, QLatin1String("Te")); buffer.setTextCodec(QStringLiteral("UTF-16")); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf16be.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, false); QVERIFY(success && !encodingErrors); QVERIFY(buffer.generateByteOrderMark()); // since the utf16 bom is 2 bytes, the first 2 chars should not be the bom prefixText = buffer.text().left(2); QCOMPARE(prefixText, QLatin1String("Te")); // utf32 tests buffer.setTextCodec(QStringLiteral("UTF-32")); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf32.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); QVERIFY(success && !encodingErrors); QVERIFY(buffer.generateByteOrderMark()); // since the utf16 bom is 4 bytes, the first 4 chars should not be the bom prefixText = buffer.text().left(4); QCOMPARE(prefixText, QLatin1String("Test")); buffer.setTextCodec(QStringLiteral("UTF-32")); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf32be.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); QVERIFY(success && !encodingErrors); QVERIFY(buffer.generateByteOrderMark()); // since the utf16 bom is 4 bytes, the first 4 chars should not be the bom prefixText = buffer.text().left(4); QCOMPARE(prefixText, QLatin1String("Test")); // Ensure that a mismatching bom is not processed (e.g. utf8 bom should not be used for utf16) buffer.setTextCodec(QStringLiteral("UTF-16")); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/utf8-bom-only.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); QVERIFY(success && !encodingErrors); // even though the file does not have a bom, Kate::TextBuffer::setTextCodec always enables bom generation // for utf16 and utf32 since the byte order is useful and relevant for reading the file QVERIFY(buffer.generateByteOrderMark()); prefixText = buffer.text(); // 0xFFBBEF is processed as a single char 0xBBEF, which is a hangul character QCOMPARE(prefixText.front(), QChar(0xBBEF)); } void KateEncodingTest::nonUtfNoBomTest() { // setup stuff Kate::TextBuffer buffer(nullptr); buffer.setFallbackTextCodec(QStringLiteral("UTF-8")); bool encodingErrors; bool tooLongLinesWrapped; bool success; int longestLineLoaded; QString prefixText; // latin15, should not contain any bom buffer.setTextCodec(QString::fromUtf8(QStringConverter::nameForEncoding(QStringConverter::Latin1))); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/latin15.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); QVERIFY(success && !encodingErrors); QVERIFY(!buffer.generateByteOrderMark()); prefixText = buffer.text().left(4); QCOMPARE(prefixText, QLatin1String("Test")); // Even if a bom is somehow found, it should be processed normally as text for non-UTF char sets buffer.setTextCodec(QString::fromUtf8(QStringConverter::nameForEncoding(QStringConverter::Latin1))); success = buffer.load(QLatin1String(TEST_DATA_DIR "encoding/latin15-with-utf8-bom.txt"), encodingErrors, tooLongLinesWrapped, longestLineLoaded, true); QVERIFY(success && !encodingErrors); QVERIFY(!buffer.generateByteOrderMark()); // utf8 bom shouldn't be processed // the utf8 bom is 0xEFBBBF, which is "" in Latin15 prefixText = buffer.text().left(3); QCOMPARE(prefixText, QStringLiteral("")); } #include "moc_encodingtest.cpp"