From 8a400e37cd82483fa6e727c41d9e9e323aa754b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mari=C3=A1n=20Kon=C4=8Dek?= <marian.koncek@mailbox.org>
Date: Tue, 9 Sep 2025 15:19:19 +0200
Subject: [PATCH 1/3] Implement Unicode literal handling

Generated-by: Claude Code
---
 src/java_symbols.hpp                  | 238 ++++++++++++++++++++++++--
 test.sh                               |   3 +
 test_resources/Unicode_escapes.1.java |  16 ++
 test_resources/Unicode_escapes.2.java |  15 ++
 test_resources/Unicode_escapes.java   |  17 ++
 5 files changed, 274 insertions(+), 15 deletions(-)
 create mode 100644 test_resources/Unicode_escapes.1.java
 create mode 100644 test_resources/Unicode_escapes.2.java
 create mode 100644 test_resources/Unicode_escapes.java
diff --git a/src/java_symbols.hpp b/src/java_symbols.hpp
index b344a44..97b7e77 100644
--- a/src/java_symbols.hpp
+++ b/src/java_symbols.hpp
@@ -134,6 +134,93 @@ inline static auto strict_mode = std::optional<Strict_mode>();
  */
 namespace java_symbols
 {
+/*!
+ * Decodes a Unicode escape sequence \uXXXX to its character representation.
+ * 
+ * @param content The string content containing potential Unicode escape
+ * @param position The starting position to check for Unicode escape
+ * @param decoded_char Output parameter for the decoded character
+ * @param sequence_length Output parameter for the length of the Unicode sequence (0 if not a valid escape)
+ * 
+ * @return True if a valid Unicode escape sequence was found and decoded
+ */
+inline bool decode_unicode_escape(std::string_view content, std::ptrdiff_t position, char32_t& decoded_char, std::ptrdiff_t& sequence_length) noexcept
+{
+	sequence_length = 0;
+	
+	if (position + 5 >= std::ssize(content) || content[position] != '\\' || content[position + 1] != 'u')
+	{
+		return false;
+	}
+	
+	// Check for valid hex digits
+	auto hex_start = position + 2;
+	for (std::ptrdiff_t i = 0; i < 4; ++i)
+	{
+		char c = content[hex_start + i];
+		if (!std::isxdigit(static_cast<unsigned char>(c)))
+		{
+			return false;
+		}
+	}
+	
+	// Parse hex digits
+	char32_t result = 0;
+	for (std::ptrdiff_t i = 0; i < 4; ++i)
+	{
+		char c = content[hex_start + i];
+		result = result * 16;
+		if (c >= '0' && c <= '9')
+		{
+			result += c - '0';
+		}
+		else if (c >= 'A' && c <= 'F')
+		{
+			result += c - 'A' + 10;
+		}
+		else if (c >= 'a' && c <= 'f')
+		{
+			result += c - 'a' + 10;
+		}
+	}
+	
+	decoded_char = result;
+	sequence_length = 6; // \uXXXX
+	return true;
+}
+
+/*!
+ * Checks if a character at the given position (potentially Unicode-escaped) matches the target character.
+ * 
+ * @param content The string content to check
+ * @param position The position to check
+ * @param target_char The character to match against
+ * @param actual_length Output parameter for the actual length of the character representation
+ * 
+ * @return True if the character at position matches target_char
+ */
+inline bool char_matches_at_position(std::string_view content, std::ptrdiff_t position, char target_char, std::ptrdiff_t& actual_length) noexcept
+{
+	actual_length = 1; // Default for non-Unicode chars
+	
+	if (position >= std::ssize(content))
+	{
+		return false;
+	}
+	
+	// First check for Unicode escape
+	char32_t decoded_char;
+	std::ptrdiff_t unicode_length;
+	if (decode_unicode_escape(content, position, decoded_char, unicode_length))
+	{
+		actual_length = unicode_length;
+		return static_cast<char>(decoded_char) == target_char;
+	}
+	
+	// Regular character check
+	return content[position] == target_char;
+}
+
 /*!
  * Iterates over @p content starting at @p position to find the first character
  * which is not part of a Java comment nor a whitespace character.
@@ -190,11 +277,49 @@ inline bool is_identifier_char(char c) noexcept
 	return c == '_' or (not std::ispunct(static_cast<unsigned char>(c)) and not std::isspace(static_cast<unsigned char>(c)));
 }
 
+/*!
+ * Checks if a character at the given position (potentially Unicode-escaped) is a valid identifier character.
+ * 
+ * @param content The string content to check
+ * @param position The position to check
+ * @param char_length Output parameter for the actual length of the character representation
+ * 
+ * @return True if the character at position is a valid identifier character
+ */
+inline bool is_identifier_char_at_position(std::string_view content, std::ptrdiff_t position, std::ptrdiff_t& char_length) noexcept
+{
+	char_length = 1; // Default for non-Unicode chars
+	
+	if (position >= std::ssize(content))
+	{
+		return false;
+	}
+	
+	// Check for Unicode escape first
+	char32_t decoded_char;
+	std::ptrdiff_t unicode_length;
+	if (decode_unicode_escape(content, position, decoded_char, unicode_length))
+	{
+		char_length = unicode_length;
+		// For Unicode escapes, we check if the decoded character would be a valid identifier char
+		if (decoded_char <= 127) // ASCII range
+		{
+			return is_identifier_char(static_cast<char>(decoded_char));
+		}
+		// For non-ASCII Unicode characters, they are generally valid identifier chars
+		// unless they are punctuation or whitespace
+		return decoded_char != 0x0040 && decoded_char != 0x002F && decoded_char != 0x002A; // '@', '/', '*'
+	}
+	
+	// Regular character check
+	return is_identifier_char(content[position]);
+}
+
 /*!
  * Iterates over @p content starting at @p position to find the next uncommented
  * symbol and returns it and an index pointing past it. The symbol is either a
  * sequence of alphanumeric characters or a single non-alphanumeric character or
- * an empty string if the end has been reached.
+ * an empty string if the end has been reached. Supports Unicode escape sequences.
  */
 inline std::tuple<std::string_view, std::ptrdiff_t> next_symbol(std::string_view content, std::ptrdiff_t position = 0) noexcept
 {
@@ -206,13 +331,39 @@ inline std::tuple<std::string_view, std::ptrdiff_t> next_symbol(std::string_view
 		
 		if (position < std::ssize(content))
 		{
-			symbol_length = 1;
+			std::ptrdiff_t char_length;
 			
-			if (is_identifier_char(content[position]))
+			// Check if the first character is an identifier character (possibly Unicode-escaped)
+			if (is_identifier_char_at_position(content, position, char_length))
+			{
+				symbol_length = char_length;
+				
+				// Continue reading identifier characters
+				while (position + symbol_length < std::ssize(content))
+				{
+					std::ptrdiff_t next_char_length;
+					if (is_identifier_char_at_position(content, position + symbol_length, next_char_length))
+					{
+						symbol_length += next_char_length;
+					}
+					else
+					{
+						break;
+					}
+				}
+			}
+			else
 			{
-				while (position + symbol_length != std::ssize(content) and is_identifier_char(content[position + symbol_length]))
+				// Single non-identifier character (could be Unicode-escaped)
+				char32_t decoded_char;
+				std::ptrdiff_t unicode_length;
+				if (decode_unicode_escape(content, position, decoded_char, unicode_length))
+				{
+					symbol_length = unicode_length;
+				}
+				else
 				{
-					++symbol_length;
+					symbol_length = 1;
 				}
 			}
 		}
@@ -224,7 +375,7 @@ inline std::tuple<std::string_view, std::ptrdiff_t> next_symbol(std::string_view
 /*!
  * Iterates over @p content starting at @p position to find a string @p token
  * which is present in the source code neither inside a comment nor inside a
- * string nor inside a character literal.
+ * string nor inside a character literal. Supports Unicode escape sequences.
  * 
  * Special case when @p token == ')', this function counts opening and closing
  * parentheses and returns the first parenthesis outside.
@@ -238,7 +389,7 @@ inline std::tuple<std::string_view, std::ptrdiff_t> next_symbol(std::string_view
 inline std::ptrdiff_t find_token(std::string_view content, std::string_view token,
 	std::ptrdiff_t position = 0, bool alphanumeric = false, std::ptrdiff_t stack = 0) noexcept
 {
-	while (position + std::ssize(token) <= std::ssize(content))
+	while (position < std::ssize(content))
 	{
 		position = ignore_whitespace_comments(content, position);
 		
@@ -247,15 +398,58 @@ inline std::ptrdiff_t find_token(std::string_view content, std::string_view toke
 			break;
 		}
 		
-		auto substr = content.substr(position, token.length());
-		
-		if ((token != ")" or stack == 0) and substr == token
-			and not (alphanumeric and ((position > 0 and is_identifier_char(content[position - 1]))
-				or (position + std::ssize(token) < std::ssize(content) and (is_identifier_char(content[position + token.length()]))))))
+		// Check for single character tokens (like '@', '/', '*') that might be Unicode-escaped
+		if (token.length() == 1)
+		{
+			std::ptrdiff_t char_length;
+			if (char_matches_at_position(content, position, token[0], char_length))
+			{
+				bool is_valid_match = true;
+				
+				// Apply alphanumeric constraints if needed
+				if (alphanumeric)
+				{
+					std::ptrdiff_t prev_char_length;
+					if (position > 0 && is_identifier_char_at_position(content, position - 1, prev_char_length))
+					{
+						is_valid_match = false;
+					}
+					else
+					{
+						std::ptrdiff_t next_char_length;
+						if (position + char_length < std::ssize(content) && is_identifier_char_at_position(content, position + char_length, next_char_length))
+						{
+							is_valid_match = false;
+						}
+					}
+				}
+				
+				// Special handling for parentheses stack counting
+				if (token == ")" && stack == 0 && is_valid_match)
+				{
+					return position;
+				}
+				else if (token != ")" && is_valid_match)
+				{
+					return position;
+				}
+			}
+		}
+		// Multi-character tokens - check regular substring match
+		else if (position + std::ssize(token) <= std::ssize(content))
 		{
-			return position;
+			auto substr = content.substr(position, token.length());
+			
+			if ((token != ")" or stack == 0) and substr == token
+				and not (alphanumeric and ((position > 0 and is_identifier_char(content[position - 1]))
+					or (position + std::ssize(token) < std::ssize(content) and (is_identifier_char(content[position + token.length()]))))))
+			{
+				return position;
+			}
 		}
-		else if (content[position] == '\'')
+		
+		// Handle string and character literals, and parentheses counting
+		if (content[position] == '\'')
 		{
 			if (content.substr(position, 4) == "'\\''")
 			{
@@ -295,6 +489,16 @@ inline std::ptrdiff_t find_token(std::string_view content, std::string_view toke
 		{
 			++stack;
 		}
+		else
+		{
+			// Check if we're at a Unicode escape and skip it appropriately
+			char32_t decoded_char;
+			std::ptrdiff_t unicode_length;
+			if (decode_unicode_escape(content, position, decoded_char, unicode_length))
+			{
+				position += unicode_length - 1; // -1 because we'll increment at the end of the loop
+			}
+		}
 		
 		++position;
 	}
@@ -320,8 +524,12 @@ inline std::tuple<std::string_view, std::string> next_annotation(std::string_vie
 	
 	if (position < std::ssize(content))
 	{
+		// Calculate the actual length of the @ symbol (could be Unicode-escaped)
+		std::ptrdiff_t at_symbol_length;
+		char_matches_at_position(content, position, '@', at_symbol_length);
+		
 		auto symbol = std::string_view();
-		std::tie(symbol, end_pos) = next_symbol(content, position + 1);
+		std::tie(symbol, end_pos) = next_symbol(content, position + at_symbol_length);
 		auto new_end_pos = end_pos;
 		
 		while (not symbol.empty())
diff --git a/test.sh b/test.sh
index 93557cd..3b74830 100755
--- a/test.sh
+++ b/test.sh
@@ -152,6 +152,9 @@ test_file "Array.java" "Array.5.java" -a -n C -n D -n E -n F
 
 test_file "Package_info.java" "Package_info.1.java" -a -n MyAnn
 
+test_file "Unicode_escapes.java" "Unicode_escapes.1.java" -n "TestAnnotation"
+test_file "Unicode_escapes.java" "Unicode_escapes.2.java" -a -n "TestAnnotation"
+
 ################################################################################
 # Tests for tool termination on invalid sources, result is irrelevant
 
diff --git a/test_resources/Unicode_escapes.1.java b/test_resources/Unicode_escapes.1.java
new file mode 100644
index 0000000..25f8a6e
--- /dev/null
+++ b/test_resources/Unicode_escapes.1.java
@@ -0,0 +1,16 @@
+import com.example\u002FSubPackage\u002ESecondAnnotation;
+
+\u0040TestAnnotation
+\u0040com.example\u002FSubPackage\u002ESecondAnnotation(value = "test")
+class Unicode_escapes {
+    // This file tests Unicode escape sequences:
+    // \u0040 = @ (at symbol)
+    // \u002F = / (forward slash)
+    // \u002E = . (period/dot)
+    // \u002A = * (asterisk)
+    
+    public void testMethod() {
+        // Some comment with unicode escape \u0040
+        String test = "Unicode \u0040 in string";
+    }
+}
diff --git a/test_resources/Unicode_escapes.2.java b/test_resources/Unicode_escapes.2.java
new file mode 100644
index 0000000..67c7677
--- /dev/null
+++ b/test_resources/Unicode_escapes.2.java
@@ -0,0 +1,15 @@
+import com.example\u002FSubPackage\u002ESecondAnnotation;
+
+\u0040com.example\u002FSubPackage\u002ESecondAnnotation(value = "test")
+class Unicode_escapes {
+    // This file tests Unicode escape sequences:
+    // \u0040 = @ (at symbol)
+    // \u002F = / (forward slash)
+    // \u002E = . (period/dot)
+    // \u002A = * (asterisk)
+    
+    public void testMethod() {
+        // Some comment with unicode escape \u0040
+        String test = "Unicode \u0040 in string";
+    }
+}
diff --git a/test_resources/Unicode_escapes.java b/test_resources/Unicode_escapes.java
new file mode 100644
index 0000000..ccdf7e3
--- /dev/null
+++ b/test_resources/Unicode_escapes.java
@@ -0,0 +1,17 @@
+import com.example.TestAnnotation;
+import com.example\u002FSubPackage\u002ESecondAnnotation;
+
+\u0040TestAnnotation
+\u0040com.example\u002FSubPackage\u002ESecondAnnotation(value = "test")
+class Unicode_escapes {
+    // This file tests Unicode escape sequences:
+    // \u0040 = @ (at symbol)
+    // \u002F = / (forward slash)
+    // \u002E = . (period/dot)
+    // \u002A = * (asterisk)
+    
+    public void testMethod() {
+        // Some comment with unicode escape \u0040
+        String test = "Unicode \u0040 in string";
+    }
+}

From e9b8385b7f0dbeb7f75ebebb35811714bf3babdb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mari=C3=A1n=20Kon=C4=8Dek?= <marian.koncek@mailbox.org>
Date: Tue, 9 Sep 2025 15:32:13 +0200
Subject: [PATCH 2/3] Simplify Unicode literal handling

Generated-by: Claude Code
---
 src/java_symbols.hpp | 55 +++++++++++---------------------------------
 1 file changed, 13 insertions(+), 42 deletions(-)

diff --git a/src/java_symbols.hpp b/src/java_symbols.hpp
index 97b7e77..9bbe426 100644
--- a/src/java_symbols.hpp
+++ b/src/java_symbols.hpp
@@ -136,11 +136,12 @@ namespace java_symbols
 {
 /*!
  * Decodes a Unicode escape sequence \uXXXX to its character representation.
+ * Simplified version assuming valid Java source files.
  * 
  * @param content The string content containing potential Unicode escape
  * @param position The starting position to check for Unicode escape
  * @param decoded_char Output parameter for the decoded character
- * @param sequence_length Output parameter for the length of the Unicode sequence (0 if not a valid escape)
+ * @param sequence_length Output parameter for the length of the Unicode sequence
  * 
  * @return True if a valid Unicode escape sequence was found and decoded
  */
@@ -153,7 +154,7 @@ inline bool decode_unicode_escape(std::string_view content, std::ptrdiff_t posit
 		return false;
 	}
 	
-	// Check for valid hex digits
+	// Check for valid hex digits - simplified validation
 	auto hex_start = position + 2;
 	for (std::ptrdiff_t i = 0; i < 4; ++i)
 	{
@@ -164,7 +165,7 @@ inline bool decode_unicode_escape(std::string_view content, std::ptrdiff_t posit
 		}
 	}
 	
-	// Parse hex digits
+	// Parse hex digits - simplified logic
 	char32_t result = 0;
 	for (std::ptrdiff_t i = 0; i < 4; ++i)
 	{
@@ -191,6 +192,7 @@ inline bool decode_unicode_escape(std::string_view content, std::ptrdiff_t posit
 
 /*!
  * Checks if a character at the given position (potentially Unicode-escaped) matches the target character.
+ * Simplified version assuming valid Java source files.
  * 
  * @param content The string content to check
  * @param position The position to check
@@ -331,39 +333,13 @@ inline std::tuple<std::string_view, std::ptrdiff_t> next_symbol(std::string_view
 		
 		if (position < std::ssize(content))
 		{
-			std::ptrdiff_t char_length;
+			symbol_length = 1;
 			
-			// Check if the first character is an identifier character (possibly Unicode-escaped)
-			if (is_identifier_char_at_position(content, position, char_length))
-			{
-				symbol_length = char_length;
-				
-				// Continue reading identifier characters
-				while (position + symbol_length < std::ssize(content))
-				{
-					std::ptrdiff_t next_char_length;
-					if (is_identifier_char_at_position(content, position + symbol_length, next_char_length))
-					{
-						symbol_length += next_char_length;
-					}
-					else
-					{
-						break;
-					}
-				}
-			}
-			else
+			if (is_identifier_char(content[position]))
 			{
-				// Single non-identifier character (could be Unicode-escaped)
-				char32_t decoded_char;
-				std::ptrdiff_t unicode_length;
-				if (decode_unicode_escape(content, position, decoded_char, unicode_length))
+				while (position + symbol_length != std::ssize(content) and is_identifier_char(content[position + symbol_length]))
 				{
-					symbol_length = unicode_length;
-				}
-				else
-				{
-					symbol_length = 1;
+					++symbol_length;
 				}
 			}
 		}
@@ -409,18 +385,13 @@ inline std::ptrdiff_t find_token(std::string_view content, std::string_view toke
 				// Apply alphanumeric constraints if needed
 				if (alphanumeric)
 				{
-					std::ptrdiff_t prev_char_length;
-					if (position > 0 && is_identifier_char_at_position(content, position - 1, prev_char_length))
+					if (position > 0 && is_identifier_char(content[position - 1]))
 					{
 						is_valid_match = false;
 					}
-					else
+					else if (position + char_length < std::ssize(content) && is_identifier_char(content[position + char_length]))
 					{
-						std::ptrdiff_t next_char_length;
-						if (position + char_length < std::ssize(content) && is_identifier_char_at_position(content, position + char_length, next_char_length))
-						{
-							is_valid_match = false;
-						}
+						is_valid_match = false;
 					}
 				}
 				
@@ -496,7 +467,7 @@ inline std::ptrdiff_t find_token(std::string_view content, std::string_view toke
 			std::ptrdiff_t unicode_length;
 			if (decode_unicode_escape(content, position, decoded_char, unicode_length))
 			{
-				position += unicode_length - 1; // -1 because we'll increment at the end of the loop
+				position += unicode_length - 1; // -1 because we'll increment at the end
 			}
 		}
 		

From eb6f8b8193b429e59dadfe341db8613bf0cdd4cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mari=C3=A1n=20Kon=C4=8Dek?= <marian.koncek@mailbox.org>
Date: Tue, 23 Sep 2025 12:14:04 +0200
Subject: [PATCH 3/3] Add unicode handling for comments

---
 src/java_symbols.hpp                   | 61 ++++++++++++++++++++++++++
 test.sh                                |  2 +
 test_resources/Unicode_comments.1.java | 12 +++++
 test_resources/Unicode_comments.java   | 14 ++++++
 4 files changed, 89 insertions(+)
 create mode 100644 test_resources/Unicode_comments.1.java
 create mode 100644 test_resources/Unicode_comments.java

diff --git a/src/java_symbols.hpp b/src/java_symbols.hpp
index 9bbe426..369cf77 100644
--- a/src/java_symbols.hpp
+++ b/src/java_symbols.hpp
@@ -223,9 +223,60 @@ inline bool char_matches_at_position(std::string_view content, std::ptrdiff_t po
 	return content[position] == target_char;
 }
 
+/*!
+ * Helper function to check for Unicode-escaped comment delimiters.
+ * Checks for patterns like \u002F\u002F (//) or \u002F\u002A (slash-star)
+ */
+inline std::ptrdiff_t check_unicode_comment_start(std::string_view content, std::ptrdiff_t position) noexcept
+{
+	// Check for \u002F\u002F (Unicode-escaped //)
+	std::ptrdiff_t char1_length, char2_length;
+	if (char_matches_at_position(content, position, '/', char1_length) &&
+	    position + char1_length < std::ssize(content) &&
+	    char_matches_at_position(content, position + char1_length, '/', char2_length))
+	{
+		// Single-line comment - find newline
+		auto comment_start = position + char1_length + char2_length;
+		position = content.find('\n', comment_start);
+		
+		if (position == std::ptrdiff_t(content.npos))
+		{
+			return std::ssize(content);
+		}
+		
+		return position + 1;
+	}
+	// Check for \u002F\u002A (Unicode-escaped /*)
+	else if (char_matches_at_position(content, position, '/', char1_length) &&
+	         position + char1_length < std::ssize(content) &&
+	         char_matches_at_position(content, position + char1_length, '*', char2_length))
+	{
+		// Multi-line comment - find */
+		auto comment_start = position + char1_length + char2_length;
+		
+		// Look for end of comment (could also be Unicode-escaped)
+		while (comment_start < std::ssize(content))
+		{
+			std::ptrdiff_t end_char1_length, end_char2_length;
+			if (char_matches_at_position(content, comment_start, '*', end_char1_length) &&
+			    comment_start + end_char1_length < std::ssize(content) &&
+			    char_matches_at_position(content, comment_start + end_char1_length, '/', end_char2_length))
+			{
+				return comment_start + end_char1_length + end_char2_length;
+			}
+			++comment_start;
+		}
+		
+		return std::ssize(content); // Unterminated comment
+	}
+	
+	return position; // No comment found
+}
+
 /*!
  * Iterates over @p content starting at @p position to find the first character
  * which is not part of a Java comment nor a whitespace character.
+ * Supports Unicode-escaped comment delimiters.
  * 
  * @return The position of the first non-whitespace non-comment character or the
  * length of the string if none is found.
@@ -242,6 +293,7 @@ inline std::ptrdiff_t ignore_whitespace_comments(std::string_view content, std::
 		
 		auto result = position;
 		
+		// Check for regular comment delimiters first
 		if (auto subst = content.substr(position, 2); subst == "//")
 		{
 			position = content.find('\n', position + 2);
@@ -264,6 +316,15 @@ inline std::ptrdiff_t ignore_whitespace_comments(std::string_view content, std::
 			
 			position += 2;
 		}
+		else
+		{
+			// Check for Unicode-escaped comment delimiters
+			auto unicode_comment_pos = check_unicode_comment_start(content, position);
+			if (unicode_comment_pos != position)
+			{
+				position = unicode_comment_pos;
+			}
+		}
 		
 		if (result == position)
 		{
diff --git a/test.sh b/test.sh
index 3b74830..a5f38d1 100755
--- a/test.sh
+++ b/test.sh
@@ -155,6 +155,8 @@ test_file "Package_info.java" "Package_info.1.java" -a -n MyAnn
 test_file "Unicode_escapes.java" "Unicode_escapes.1.java" -n "TestAnnotation"
 test_file "Unicode_escapes.java" "Unicode_escapes.2.java" -a -n "TestAnnotation"
 
+test_file "Unicode_comments.java" "Unicode_comments.1.java" -a -n "TestAnnotation"
+
 ################################################################################
 # Tests for tool termination on invalid sources, result is irrelevant
 
diff --git a/test_resources/Unicode_comments.1.java b/test_resources/Unicode_comments.1.java
new file mode 100644
index 0000000..f823129
--- /dev/null
+++ b/test_resources/Unicode_comments.1.java
@@ -0,0 +1,12 @@
+
+\u002F\u002F This is a Unicode-escaped single-line comment
+\u002F\u002A This is a Unicode-escaped 
+   multi-line comment \u002A\u002F
+class Unicode_comments {
+    // Regular comment
+    /* Regular multi-line comment */
+    
+    public void testMethod() {
+        String test = "Unicode comments test";
+    }
+}
diff --git a/test_resources/Unicode_comments.java b/test_resources/Unicode_comments.java
new file mode 100644
index 0000000..e5eff06
--- /dev/null
+++ b/test_resources/Unicode_comments.java
@@ -0,0 +1,14 @@
+import com.example.TestAnnotation;
+
+\u002F\u002F This is a Unicode-escaped single-line comment
+@TestAnnotation
+\u002F\u002A This is a Unicode-escaped 
+   multi-line comment \u002A\u002F
+class Unicode_comments {
+    // Regular comment
+    /* Regular multi-line comment */
+    
+    public void testMethod() {
+        String test = "Unicode comments test";
+    }
+}