Skip to content

Commit

Permalink
fix(security): Only allow printable characters in incoming messages.
Browse files Browse the repository at this point in the history
  • Loading branch information
iphydf committed Nov 24, 2024
1 parent bb1324e commit a5a8d40
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 8 deletions.
25 changes: 18 additions & 7 deletions src/core/toxstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,30 @@ size_t ToxString::size() const
}

/**
* @brief Gets the string as QString.
* @return QString representation of the string.
* @brief Interpret the string as UTF-8 encoded QString.
*
* Removes any non-printable characters from the string. This is a defense-in-depth measure to
* prevent some potential security issues caused by bugs in client code or one of its dependencies.
*/
QString ToxString::getQString() const
{
return QString::fromUtf8(string);
const auto tainted = QString::fromUtf8(string).toStdU32String();
std::u32string cleaned;
std::copy_if(tainted.cbegin(), tainted.cend(), std::back_inserter(cleaned), [](char32_t c) {
// Cf (Other_Format) is to allow skin-color modifiers for emojis.
return QChar::isPrint(c) || QChar::category(c) == QChar::Category::Other_Format;
});
return QString::fromStdU32String(cleaned);
}

/**
* @brief getBytes Gets the bytes of the string.
* @return Bytes of the string as QByteArray.
* @brief Returns the bytes verbatim as they were received from or will be sent to the network.
*
* No cleanup or interpretation is done here. The bytes are returned as they were received from the
* network. Callers should be careful when processing these bytes. If UTF-8 messages are expected,
* use getQString() instead.
*/
QByteArray ToxString::getBytes() const
const QByteArray& ToxString::getBytes() const
{
return QByteArray(string);
return string;
}
2 changes: 1 addition & 1 deletion src/core/toxstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ToxString
const uint8_t* data() const;
size_t size() const;
QString getQString() const;
QByteArray getBytes() const;
const QByteArray& getBytes() const;

private:
QByteArray string;
Expand Down
92 changes: 92 additions & 0 deletions test/core/toxstring_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <QString>
#include <QtTest/QtTest>

#include "src/widget/form/settings/generalform.h" // getLocales

class TestToxString : public QObject
{
Q_OBJECT
Expand All @@ -20,6 +22,10 @@ private slots:
void emptyQByteTest();
void emptyUINT8Test();
void nullptrUINT8Test();
void localesTest();
void filterTest();
void emojiTest();
void coloredEmojiTest();

private:
/* Test Strings */
Expand Down Expand Up @@ -232,5 +238,91 @@ void TestToxString::nullptrUINT8Test()
}
}

/**
* @brief Check that we can encode and decode all native locale names.
*
* This is a smoke test as opposed to anything comprehensive, but it ensures that scripts used
* in the languages we have translations for can be encoded and decoded.
*/
void TestToxString::localesTest()
{
const QStringList& locales = GeneralForm::getLocales();
for (const QString& locale : locales) {
const QString lang = QLocale(locale).nativeLanguageName();
ToxString test(lang);
const QString test_string = test.getQString();
QCOMPARE(lang, test_string);
}
}

/**
* @brief Check that we filter out non-printable characters.
*/
void TestToxString::filterTest()
{
const struct TestCase
{
QString input;
QString expected;
} testCases[] = {
{QStringLiteral("Hello, World!"), QStringLiteral("Hello, World!")},
{QStringLiteral("Hello, \x00World!"), QStringLiteral("Hello, World!")},
{QStringLiteral("Hello, \x01World!"), QStringLiteral("Hello, World!")},
{QStringLiteral("Hello, \x7FWorld!"), QStringLiteral("Hello, World!")},
{QStringLiteral("Hello, \x80World!"), QStringLiteral("Hello, World!")},
};
for (const auto& testCase : testCases) {
QCOMPARE(ToxString(testCase.input).getQString(), testCase.expected);
}
}

/**
* @brief Check that we can encode and decode emojis.
*
* These are high code point characters that are not single code units in UTF-16.
*/
void TestToxString::emojiTest()
{
const struct TestCase
{
QString input;
QString expected;
} testCases[] = {
{QStringLiteral("🙂"), QStringLiteral("🙂")}, {QStringLiteral("🙁"), QStringLiteral("🙁")},
{QStringLiteral("🤣"), QStringLiteral("🤣")}, {QStringLiteral("🤷"), QStringLiteral("🤷")},
{QStringLiteral("🤼"), QStringLiteral("🤼")}, {QStringLiteral("🥃"), QStringLiteral("🥃")},
{QStringLiteral("🥌"), QStringLiteral("🥌")}, {QStringLiteral("🥐"), QStringLiteral("🥐")},
};

for (const auto& testCase : testCases) {
QCOMPARE(ToxString(testCase.input).getQString(), testCase.expected);
}
}

/**
* @brief Check that we can encode and decode emojis with color modifiers.
*
* These use modifier characters to change the color of the emoji.
*/
void TestToxString::coloredEmojiTest()
{
const struct TestCase
{
QString input;
QString expected;
} testCases[] = {
{QStringLiteral("🫱🏼‍🫲🏽"), QStringLiteral("🫱🏼‍🫲🏽")},
{QStringLiteral("👨🏻‍👩🏻‍👧🏻‍👦🏻"), QStringLiteral("👨🏻‍👩🏻‍👧🏻‍👦🏻")},
{QStringLiteral("👨🏼‍👩🏼‍👧🏼‍👦🏼"), QStringLiteral("👨🏼‍👩🏼‍👧🏼‍👦🏼")},
{QStringLiteral("👨🏽‍👩🏽‍👧🏽‍👦🏽"), QStringLiteral("👨🏽‍👩🏽‍👧🏽‍👦🏽")},
{QStringLiteral("👨🏾‍👩🏾‍👧🏾‍👦🏾"), QStringLiteral("👨🏾‍👩🏾‍👧🏾‍👦🏾")},
{QStringLiteral("👨🏿‍👩🏿‍👧🏿‍👦🏿"), QStringLiteral("👨🏿‍👩🏿‍👧🏿‍👦🏿")},
};

for (const auto& testCase : testCases) {
QCOMPARE(ToxString(testCase.input).getQString(), testCase.expected);
}
}

QTEST_GUILESS_MAIN(TestToxString)
#include "toxstring_test.moc"

0 comments on commit a5a8d40

Please sign in to comment.