Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions qrexec-lib/pure.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,19 @@ QUBES_PURE_PUBLIC bool
qubes_pure_string_safe_for_display(const char *untrusted_str,
size_t line_length);

/**
* Implements filtering and replaces non-printable/non-safe characters with `_`.
*
* @param str Input string (read-only)
* @param result Buffer to store the sanitized output (must be max_line_length)
* @param max_line_length Maximum length of the input string to process
* @return The length of the sanitized string written to result
*/
QUBES_PURE_PUBLIC size_t
qubes_pure_sanitize_string_safe_for_display(const char *untrusted_str,
char *result,
size_t max_line_length);

/** Initialize a QubesSlice from a nul-terminated string. */
static inline struct QubesSlice
qubes_pure_buffer_init_from_nul_terminated_string(const char *str)
Expand Down
67 changes: 58 additions & 9 deletions qrexec-lib/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ qubes_pure_code_point_safe_for_display(uint32_t code_point) {
}

/* validate single UTF-8 character
* return bytes count of this character, or 0 if the character is invalid */
static int validate_utf8_char(const uint8_t *untrusted_c) {
* return bytes count of this character, or minus bytes count if the character
is invalid or not safe to display*/
static int validate_utf8_char_and_return_len(const uint8_t *untrusted_c) {
int tails_count = 0;
int total_size = 0;
uint32_t code_point;
Expand Down Expand Up @@ -66,7 +67,8 @@ static int validate_utf8_char(const uint8_t *untrusted_c) {
if (*untrusted_c >= 0xA0 && *untrusted_c <= 0xBF)
tails_count = 1;
else
return 0;
// invalid UTF-8, skip this byte and try to parse the next one
return -1;
code_point = *untrusted_c & 0x3F;
break;
case 0xE1 ... 0xEF:
Expand All @@ -80,7 +82,8 @@ static int validate_utf8_char(const uint8_t *untrusted_c) {
if (*untrusted_c >= 0x90 && *untrusted_c <= 0xBF)
tails_count = 2;
else
return 0;
// invalid UTF-8, skip this byte and try to parse the next one
return -1;
code_point = *untrusted_c & 0x3F;
break;
case 0xF1 ... 0xF4:
Expand All @@ -89,17 +92,24 @@ static int validate_utf8_char(const uint8_t *untrusted_c) {
code_point = *untrusted_c & 0x7;
break;
default:
return 0; // control ASCII or invalid UTF-8
return -1; // control ASCII or invalid UTF-8
}

while (tails_count-- > 0) {
untrusted_c++;
if (!(*untrusted_c >= 0x80 && *untrusted_c <= 0xBF))
return 0;
return -1;
code_point = code_point << 6 | (*untrusted_c & 0x3F);
}

return qubes_pure_code_point_safe_for_display(code_point) ? total_size : 0;
return qubes_pure_code_point_safe_for_display(code_point) ? total_size : -total_size;
}

/* validate single UTF-8 character
* return bytes count of this character, or 0 if the character is invalid */
static int validate_utf8_char_safe_for_display(const uint8_t *untrusted_c) {
int result = validate_utf8_char_and_return_len(untrusted_c);
return result > 0 ? result : 0;
}

// Statically assert that a statement is not reachable.
Expand Down Expand Up @@ -209,7 +219,7 @@ static ssize_t validate_path(const uint8_t *const untrusted_name,
(flags & QUBES_PURE_ALLOW_UNSAFE_CHARACTERS) != 0) {
/* loop will advance past this */
} else {
int utf8_ret = validate_utf8_char((const unsigned char *)(untrusted_name + i));
int utf8_ret = validate_utf8_char_safe_for_display((const unsigned char *)(untrusted_name + i));
if (utf8_ret > 0) {
i += (size_t)(utf8_ret - 1); /* loop will do one more increment */
} else {
Expand Down Expand Up @@ -306,7 +316,7 @@ qubes_pure_string_safe_for_display(const char *untrusted_str, size_t line_length
if (untrusted_str[i] >= 0x20 && untrusted_str[i] <= 0x7E) {
i++;
} else {
int utf8_ret = validate_utf8_char((const uint8_t *)(untrusted_str + i));
int utf8_ret = validate_utf8_char_safe_for_display((const uint8_t *)(untrusted_str + i));
if (utf8_ret > 0) {
i += utf8_ret;
} else {
Expand All @@ -316,3 +326,42 @@ qubes_pure_string_safe_for_display(const char *untrusted_str, size_t line_length
} while (untrusted_str[i]);
return true;
}

QUBES_PURE_PUBLIC size_t
qubes_pure_sanitize_string_safe_for_display(const char *untrusted_str,
char *result,
size_t max_line_length)
{
if (max_line_length == 0) {
return 0;
}
size_t i = 0;
size_t j = 0;
while (untrusted_str[i] && j < max_line_length - 1) {
if (untrusted_str[i] >= 0x20 && untrusted_str[i] <= 0x7E) {
// keep the valid ASCII character
result[j++] = untrusted_str[i++];
continue;
}
int utf8_ret = validate_utf8_char_and_return_len((const uint8_t *)(untrusted_str + i));
if (utf8_ret < 0) {
// unsafe character with length of -utf8_ret
// replace unsafe utf8 (possibly multiple bytes) with '_'
result[j++] = '_';
i -= utf8_ret;
continue;
}
if (j + utf8_ret >= max_line_length - 1) {
// not enough space for the whole character, truncate here
break;
}
// keep the valid UTF-8 character to the result buffer
for (int k = 0; k < utf8_ret; k++) {
result[j++] = untrusted_str[i++];
}
};

// Enforce null termination of the result string
result[j] = '\0';
return j;
}
63 changes: 63 additions & 0 deletions qrexec-lib/validator-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <inttypes.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>

#include "pure.h"
#include <unicode/utf8.h>
Expand Down Expand Up @@ -89,10 +90,72 @@ static int symlink_test(const struct symlink_test symlink_checks[], size_t size)
return (int)failed;
}

static void test_string_sanitization(void)
{
char buf[128];
size_t len;

// 1. Empty string
len = qubes_pure_sanitize_string_safe_for_display("", buf, sizeof(buf));
assert(len == 0);
assert(buf[0] == '\0');

// 2. Normal ASCII
len = qubes_pure_sanitize_string_safe_for_display("Hello", buf, sizeof(buf));
assert(len == 5);
assert(strcmp(buf, "Hello") == 0);

// 3. Safe UTF-8 (Greek Beta: \xCE\xB2)
len = qubes_pure_sanitize_string_safe_for_display("\xCE\xB2", buf, sizeof(buf));
assert(len == 2);
assert(strcmp(buf, "\xCE\xB2") == 0);

// 4. Unsafe UTF-8
// \U0001f642 is \xF0\x9F\x99\x82 (4 bytes).
len = qubes_pure_sanitize_string_safe_for_display("\xF0\x9F\x99\x82", buf, sizeof(buf));
assert(len == 1);
assert(strcmp(buf, "_") == 0);

// 5. Invalid UTF-8 (partial)
len = qubes_pure_sanitize_string_safe_for_display("\xE0", buf, sizeof(buf));
assert(len == 1);
assert(strcmp(buf, "_") == 0);

// 6. Invalid UTF-8 (bad continuation)
len = qubes_pure_sanitize_string_safe_for_display("\xE0 ", buf, sizeof(buf));
assert(len == 2);
assert(strcmp(buf, "_ ") == 0);

// 7. Max line length
len = qubes_pure_sanitize_string_safe_for_display("ABCD", buf, 4);
assert(len == 3);
assert(strcmp(buf, "ABC") == 0);

// 8. Truncation in middle of UTF-8
// needs 3+1=4, but we have 3
len = qubes_pure_sanitize_string_safe_for_display("A\xCE\xB2", buf, 3);
assert(len == 1);
assert(strcmp(buf, "A") == 0);

// 9. Unsafe char replacement fits
// A + Emoji, so 5 bytes but the emoji is replaced with '_' and fits
len = qubes_pure_sanitize_string_safe_for_display("A\xF0\x9F\x99\x82", buf, 3);
assert(len == 2);
assert(strcmp(buf, "A_") == 0);

// 10. mixed valid invalid
len = qubes_pure_sanitize_string_safe_for_display("a\x80""b", buf, 10);
assert(len == 3);
assert(strcmp(buf, "a_b") == 0);
}

int main(int argc, char **argv)
{
(void)argc;
(void)argv;

test_string_sanitization();

assert(qubes_pure_validate_file_name((const uint8_t *)u8"simple_safe_filename.txt"));

// Directory traversal checks
Expand Down