From b8578f4acd405ace9ee105bbc722cf7188ef4fba Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 25 Dec 2025 07:50:31 +0000 Subject: [PATCH] Fix unknown escape sequences in string literals to preserve backslash Previously, when the lexer encountered an unknown escape sequence like \S in a string, it would only write the character after the backslash, losing the backslash itself. This caused strings like 'Win\Sys' to be stored as 'WinSys' instead of 'Win\Sys'. This fix preserves both the backslash and the following character for unknown escape sequences, matching ClickHouse's behavior. Enables 5 passing tests. --- lexer/lexer.go | 3 ++- .../01497_extract_all_groups_empty_match/metadata.json | 2 +- parser/testdata/01847_bad_like/metadata.json | 2 +- parser/testdata/02374_regexp_replace/metadata.json | 2 +- parser/testdata/02580_like_substring_search_bug/metadata.json | 2 +- .../metadata.json | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 8f72a46ec..a8a5736b9 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -397,7 +397,8 @@ func (l *Lexer) readString(quote rune) Item { val := hexValue(hex1)*16 + hexValue(hex2) sb.WriteByte(byte(val)) default: - // Unknown escape, just write the character after backslash + // Unknown escape, preserve both the backslash and the character + sb.WriteRune('\\') sb.WriteRune(l.ch) } l.readChar() diff --git a/parser/testdata/01497_extract_all_groups_empty_match/metadata.json b/parser/testdata/01497_extract_all_groups_empty_match/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/01497_extract_all_groups_empty_match/metadata.json +++ b/parser/testdata/01497_extract_all_groups_empty_match/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/01847_bad_like/metadata.json b/parser/testdata/01847_bad_like/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/01847_bad_like/metadata.json +++ b/parser/testdata/01847_bad_like/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/02374_regexp_replace/metadata.json b/parser/testdata/02374_regexp_replace/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/02374_regexp_replace/metadata.json +++ b/parser/testdata/02374_regexp_replace/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/02580_like_substring_search_bug/metadata.json b/parser/testdata/02580_like_substring_search_bug/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/02580_like_substring_search_bug/metadata.json +++ b/parser/testdata/02580_like_substring_search_bug/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/03031_input_format_allow_errors_num_bad_escape_sequence/metadata.json b/parser/testdata/03031_input_format_allow_errors_num_bad_escape_sequence/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/03031_input_format_allow_errors_num_bad_escape_sequence/metadata.json +++ b/parser/testdata/03031_input_format_allow_errors_num_bad_escape_sequence/metadata.json @@ -1 +1 @@ -{"todo": true} +{}