diff --git a/src/core/toxstring.cpp b/src/core/toxstring.cpp index a0bf0a40a5..cb20d029d2 100644 --- a/src/core/toxstring.cpp +++ b/src/core/toxstring.cpp @@ -64,19 +64,30 @@ size_t ToxString::size() const } /** - * @brief Gets the string as QString. - * @return QString representation of the string. + * @brief Interpret the string as UTF-8 encoded QString. + * + * Removes any non-printable characters from the string. This is a defense-in-depth measure to + * prevent some potential security issues caused by bugs in client code or one of its dependencies. */ QString ToxString::getQString() const { - return QString::fromUtf8(string); + const auto tainted = QString::fromUtf8(string).toStdU32String(); + std::u32string cleaned; + std::copy_if(tainted.cbegin(), tainted.cend(), std::back_inserter(cleaned), [](char32_t c) { + // Cf (Other_Format) is to allow skin-color modifiers for emojis. + return QChar::isPrint(c) || QChar::category(c) == QChar::Category::Other_Format; + }); + return QString::fromStdU32String(cleaned); } /** - * @brief getBytes Gets the bytes of the string. - * @return Bytes of the string as QByteArray. + * @brief Returns the bytes verbatim as they were received from or will be sent to the network. + * + * No cleanup or interpretation is done here. The bytes are returned as they were received from the + * network. Callers should be careful when processing these bytes. If UTF-8 messages are expected, + * use getQString() instead. */ -QByteArray ToxString::getBytes() const +const QByteArray& ToxString::getBytes() const { - return QByteArray(string); + return string; } diff --git a/src/core/toxstring.h b/src/core/toxstring.h index b66e31d770..28dee47243 100644 --- a/src/core/toxstring.h +++ b/src/core/toxstring.h @@ -20,7 +20,7 @@ class ToxString const uint8_t* data() const; size_t size() const; QString getQString() const; - QByteArray getBytes() const; + const QByteArray& getBytes() const; private: QByteArray string; diff --git a/test/core/toxstring_test.cpp b/test/core/toxstring_test.cpp index 5c15be2f81..1c7b13b74e 100644 --- a/test/core/toxstring_test.cpp +++ b/test/core/toxstring_test.cpp @@ -9,6 +9,8 @@ #include #include +#include "src/widget/form/settings/generalform.h" // getLocales + class TestToxString : public QObject { Q_OBJECT @@ -20,6 +22,10 @@ private slots: void emptyQByteTest(); void emptyUINT8Test(); void nullptrUINT8Test(); + void localesTest(); + void filterTest(); + void emojiTest(); + void coloredEmojiTest(); private: /* Test Strings */ @@ -232,5 +238,91 @@ void TestToxString::nullptrUINT8Test() } } +/** + * @brief Check that we can encode and decode all native locale names. + * + * This is a smoke test as opposed to anything comprehensive, but it ensures that scripts used + * in the languages we have translations for can be encoded and decoded. + */ +void TestToxString::localesTest() +{ + const QStringList& locales = GeneralForm::getLocales(); + for (const QString& locale : locales) { + const QString lang = QLocale(locale).nativeLanguageName(); + ToxString test(lang); + const QString test_string = test.getQString(); + QCOMPARE(lang, test_string); + } +} + +/** + * @brief Check that we filter out non-printable characters. + */ +void TestToxString::filterTest() +{ + const struct TestCase + { + QString input; + QString expected; + } testCases[] = { + {QStringLiteral("Hello, World!"), QStringLiteral("Hello, World!")}, + {QStringLiteral("Hello, \x00World!"), QStringLiteral("Hello, World!")}, + {QStringLiteral("Hello, \x01World!"), QStringLiteral("Hello, World!")}, + {QStringLiteral("Hello, \x7FWorld!"), QStringLiteral("Hello, World!")}, + {QStringLiteral("Hello, \x80World!"), QStringLiteral("Hello, World!")}, + }; + for (const auto& testCase : testCases) { + QCOMPARE(ToxString(testCase.input).getQString(), testCase.expected); + } +} + +/** + * @brief Check that we can encode and decode emojis. + * + * These are high code point characters that are not single code units in UTF-16. + */ +void TestToxString::emojiTest() +{ + const struct TestCase + { + QString input; + QString expected; + } testCases[] = { + {QStringLiteral("🙂"), QStringLiteral("🙂")}, {QStringLiteral("🙁"), QStringLiteral("🙁")}, + {QStringLiteral("🤣"), QStringLiteral("🤣")}, {QStringLiteral("🤷"), QStringLiteral("🤷")}, + {QStringLiteral("🤼"), QStringLiteral("🤼")}, {QStringLiteral("🥃"), QStringLiteral("🥃")}, + {QStringLiteral("🥌"), QStringLiteral("🥌")}, {QStringLiteral("🥐"), QStringLiteral("🥐")}, + }; + + for (const auto& testCase : testCases) { + QCOMPARE(ToxString(testCase.input).getQString(), testCase.expected); + } +} + +/** + * @brief Check that we can encode and decode emojis with color modifiers. + * + * These use modifier characters to change the color of the emoji. + */ +void TestToxString::coloredEmojiTest() +{ + const struct TestCase + { + QString input; + QString expected; + } testCases[] = { + {QStringLiteral("🫱🏼‍🫲🏽"), QStringLiteral("🫱🏼‍🫲🏽")}, + {QStringLiteral("👨🏻‍👩🏻‍👧🏻‍👦🏻"), QStringLiteral("👨🏻‍👩🏻‍👧🏻‍👦🏻")}, + {QStringLiteral("👨🏼‍👩🏼‍👧🏼‍👦🏼"), QStringLiteral("👨🏼‍👩🏼‍👧🏼‍👦🏼")}, + {QStringLiteral("👨🏽‍👩🏽‍👧🏽‍👦🏽"), QStringLiteral("👨🏽‍👩🏽‍👧🏽‍👦🏽")}, + {QStringLiteral("👨🏾‍👩🏾‍👧🏾‍👦🏾"), QStringLiteral("👨🏾‍👩🏾‍👧🏾‍👦🏾")}, + {QStringLiteral("👨🏿‍👩🏿‍👧🏿‍👦🏿"), QStringLiteral("👨🏿‍👩🏿‍👧🏿‍👦🏿")}, + }; + + for (const auto& testCase : testCases) { + QCOMPARE(ToxString(testCase.input).getQString(), testCase.expected); + } +} + QTEST_GUILESS_MAIN(TestToxString) #include "toxstring_test.moc"