Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 40 additions & 9 deletions src/global/Charset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ using namespace string_consts;
X(0x201Eu, C_DQUOTE, "bdquo") XSEP() \
X(0x201Fu, C_DQUOTE, "double high reversed quotation mark") XSEP() \
X(0x2022u, C_ASTERISK, "bull") XSEP() \
X(0x202Fu, C_NBSP, "nnbsp") XSEP() \
X(0x2039u, C_LESS_THAN, "lsaquo") XSEP() \
X(0x203Au, C_GREATER_THAN, "rsaquo")

Expand All @@ -76,12 +77,10 @@ NODISCARD constexpr bool is_latin_control(const T uc)
&& (uc >= 0x80u && uc < 0xA0u); // Latin1 control character
}
template<typename T>
NODISCARD constexpr bool is_noncontrol_ascii(const T ascii)
NODISCARD constexpr bool is_noncontrol_latin1(const T byte)
{
// non-control ASCII
return std::is_same_v<char, T> //
&& (static_cast<uint8_t>(ascii) & 0x7Fu) == static_cast<uint8_t>(ascii) //
&& static_cast<uint8_t>(ascii) >= 0x20u; //
return std::is_same_v<char, T> && !is_latin_control(static_cast<uint8_t>(byte));
Comment on lines +80 to +83
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): The new is_noncontrol_latin1 no longer enforces the non-control range invariant due to a type mismatch with is_latin_control.

is_noncontrol_ascii previously guaranteed _ascii was a non-control ASCII char (0x20–0x7F). In the new version, is_latin_control is constrained to T == char16_t, but here T is deduced as uint8_t, so is_latin_control(static_cast<uint8_t>(byte)) is never actually called and effectively always yields false. The condition then collapses to std::is_same_v<char, T>, so static_assert(is_noncontrol_latin1(_ascii)); no longer excludes Latin1 control bytes or out-of-range values. To preserve the invariant, either add an 8-bit overload of is_latin_control or restore an explicit range check on the uint8_t value.

}
template<typename T>
NODISCARD constexpr bool is_16bit_unicode(const T uc)
Expand All @@ -98,7 +97,7 @@ XFOREACH_WINDOWS_125x(XCASE, XSEMI);
#undef XCASE

#define XCASE(_unicode, _ascii, _name) \
static_assert(is_noncontrol_ascii(_ascii)); \
static_assert(is_noncontrol_latin1(_ascii)); \
static_assert(is_16bit_unicode(_unicode))
XFOREACH_UNICODE_TRANSLIT(XCASE, XSEMI);
#undef XCASE
Expand Down Expand Up @@ -194,8 +193,7 @@ NODISCARD static constexpr char16_t simple_unicode_translit(const char16_t input
case (_unicode): \
return conversion::to_char16(_ascii)

const auto maybe_bigger = windows125x_to_unicode(input);
switch (maybe_bigger) {
switch (MAYBE_UNUSED const auto maybe_bigger = windows125x_to_unicode(input)) {
XFOREACH_UNICODE_TRANSLIT(XCASE, XSEMI);
default:
return input;
Expand Down Expand Up @@ -297,6 +295,23 @@ NODISCARD QChar simple_unicode_translit(const QChar qc) noexcept
return QChar{charset::simple_unicode_translit(static_cast<char16_t>(qc.unicode()))};
}

NODISCARD static bool isValidLatin1(char latin1)
{
// Eventually we may want to exclude some other ASCII control codes,
// but be careful not to exclude things like "\t\r\n".
return latin1 != C_NUL && !is_latin_control(latin1);
Comment on lines +298 to +302
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): isValidLatin1 also relies on is_latin_control with a mismatched type, so Latin1 control characters will be treated as valid.

Here latin1 is a char, so this calls is_latin_control<char>, whose implementation always returns false because it only handles char16_t. As a result, isValidLatin1 only rejects C_NUL and treats all Latin1 control characters (0x80–0x9F) as valid. To actually filter those controls, cast to a 16-bit type (e.g. char16_t) before calling is_latin_control, or add a dedicated 8-bit overload and use it consistently (including in is_noncontrol_latin1).

}

NODISCARD static QLatin1Char toQLatin1Char(const QChar input)
{
const QChar qc = mmqt::simple_unicode_translit(input);
if (const char latin1 = qc.toLatin1(); isValidLatin1(latin1)) {
return QLatin1Char{latin1};
} else {
return QLatin1Char{'?'};
}
}

ALLOW_DISCARD QString &toLatin1InPlace(QString &str)
{
if (containsAnySurrogates(str)) {
Expand All @@ -308,8 +323,7 @@ ALLOW_DISCARD QString &toLatin1InPlace(QString &str)
}

for (QChar &qc : str) {
mmqt::simple_unicode_translit_in_place(qc);
qc = QLatin1Char{qc.toLatin1()};
qc = mmqt::toQLatin1Char(qc);
}
return str;
}
Expand Down Expand Up @@ -978,6 +992,23 @@ void testStringsExtreme()

void testMmqtLatin1()
{
{
const QString narrowNbspStr{"\U0000202F"}; // NNBSP (narrow NBSP)
{
const auto latin1 = mmqt::toLatin1(narrowNbspStr);
TEST_ASSERT(latin1 == mmqt::QS_NBSP);
}
{
// thumbs up uses a surrogate pair, which forces mmqt::toLatin1() to call
// a different code path.
const auto thumbsUpStr = QString{"\U0001F44D"};
const QString input = narrowNbspStr + mmqt::QS_COMMA + thumbsUpStr + mmqt::QS_NEWLINE;
const QString expect = mmqt::QS_NBSP + mmqt::QS_COMMA + mmqt::QS_QUESTION_MARK
+ mmqt::QS_NEWLINE;
const auto latin1 = mmqt::toLatin1(input);
TEST_ASSERT(latin1 == expect);
}
}
// verify that surrogates are handled as expected.
{
const QString thumbsUpQstr{"\U0001F44D"};
Expand Down
8 changes: 7 additions & 1 deletion src/global/SendToUser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@ void registerSendToUser(Signal2Lifetime &lifetime, Signal2<QString>::Function ca

void sendToUser(const QString &str)
{
using namespace mmqt;
ABORT_IF_NOT_ON_MAIN_THREAD();
auto &fn = getSendToUser();
fn.invoke(str);
if (str.endsWith(QC_NEWLINE)) {
fn.invoke(str);
} else {
qWarning() << "sendToUser() missing a newline: " << str;
fn.invoke(str + QS_NEWLINE);
}
}
} // namespace global
2 changes: 1 addition & 1 deletion src/proxy/MudTelnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ void MudTelnet::virt_receiveGmcpMessage(const GmcpMessage &msg)
const auto result = optString.value_or("missing text");
qDebug() << "Failure" << action << "remote message" << id << result;
// Mume doesn't send anything, so we have to make our own message.
global::sendToUser(QString("Failure %1 remote message: %1").arg(action, result));
global::sendToUser(QString("Failure %1 remote message: %2\n").arg(action, result));
}
return;
}
Expand Down
Loading