From dff249a60463d83b70d1abeea99fc0b1141e12bf Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Fri, 18 Jul 2025 01:48:24 +0000 Subject: [PATCH 1/2] Implement matches_regex and not_matches_regex assertion types - Add matches_regex and not_matches_regex to AssertionType enum in parser.zig - Add string parsing support for both regex types in fromString method - Implement basic regex matcher with support for: - .* (any characters) - ^ (start anchor) and $ (end anchor) - . (any single character) - [abc] (character classes) - Literal text matching - Add assertion checking logic for both regex types in assertion_checker.zig - Add comprehensive test case for regex functionality Co-authored-by: Brad --- src/httpfile/assertion_checker.zig | 206 +++++++++++++++++++++++++++++ src/httpfile/parser.zig | 8 +- 2 files changed, 210 insertions(+), 4 deletions(-) diff --git a/src/httpfile/assertion_checker.zig b/src/httpfile/assertion_checker.zig index 2de1bf4..d9d84ce 100644 --- a/src/httpfile/assertion_checker.zig +++ b/src/httpfile/assertion_checker.zig @@ -54,6 +54,102 @@ fn extractHeaderName(key: []const u8) ![]const u8 { return key[start_quote + 1 .. end_quote]; } +fn matchesRegex(text: []const u8, pattern: []const u8) bool { + if (pattern.len == 0) return text.len == 0; + + // Handle anchors + const starts_with_anchor = pattern[0] == '^'; + const ends_with_anchor = pattern.len > 0 and pattern[pattern.len - 1] == '$'; + + var actual_pattern = pattern; + if (starts_with_anchor) actual_pattern = pattern[1..]; + if (ends_with_anchor and actual_pattern.len > 0) actual_pattern = actual_pattern[0..actual_pattern.len - 1]; + + if (starts_with_anchor and ends_with_anchor) { + return matchesRegexAt(text, actual_pattern, 0) == text.len; + } else if (starts_with_anchor) { + return matchesRegexAt(text, actual_pattern, 0) != null; + } else if (ends_with_anchor) { + var i: usize = 0; + while (i <= text.len) : (i += 1) { + if (matchesRegexAt(text[i..], actual_pattern, 0)) |end_pos| { + if (i + end_pos == text.len) return true; + } + } + return false; + } else { + var i: usize = 0; + while (i <= text.len) : (i += 1) { + if (matchesRegexAt(text[i..], actual_pattern, 0) != null) return true; + } + return false; + } +} + +fn matchesRegexAt(text: []const u8, pattern: []const u8, text_pos: usize) ?usize { + var p_pos: usize = 0; + var t_pos = text_pos; + + while (p_pos < pattern.len and t_pos < text.len) { + if (p_pos + 1 < pattern.len and pattern[p_pos + 1] == '*') { + // Handle .* or character* + const match_char = pattern[p_pos]; + p_pos += 2; // Skip char and * + + // Try matching zero occurrences first + if (matchesRegexAt(text, pattern[p_pos..], t_pos)) |end_pos| { + return t_pos + end_pos; + } + + // Try matching one or more occurrences + while (t_pos < text.len) { + if (match_char == '.' or text[t_pos] == match_char) { + t_pos += 1; + if (matchesRegexAt(text, pattern[p_pos..], t_pos)) |end_pos| { + return t_pos + end_pos; + } + } else { + break; + } + } + return null; + } else if (pattern[p_pos] == '.') { + // Match any single character + t_pos += 1; + p_pos += 1; + } else if (pattern[p_pos] == '[') { + // Character class + const close_bracket = std.mem.indexOfScalarPos(u8, pattern, p_pos + 1, ']') orelse return null; + const char_class = pattern[p_pos + 1..close_bracket]; + var matched = false; + for (char_class) |c| { + if (text[t_pos] == c) { + matched = true; + break; + } + } + if (!matched) return null; + t_pos += 1; + p_pos = close_bracket + 1; + } else { + // Literal character match + if (text[t_pos] != pattern[p_pos]) return null; + t_pos += 1; + p_pos += 1; + } + } + + // Handle remaining .* patterns at end + while (p_pos + 1 < pattern.len and pattern[p_pos + 1] == '*') { + p_pos += 2; + } + + if (p_pos == pattern.len) { + return t_pos - text_pos; + } + return null; +} + pub fn check(request: *HttpParser.HttpRequest, response: Client.HttpResponse) !void { const stderr = std.io.getStdErr().writer(); for (request.assertions.items) |assertion| { @@ -184,6 +280,58 @@ pub fn check(request: *HttpParser.HttpRequest, response: Client.HttpResponse) !v return error.InvalidAssertionKey; } }, + .matches_regex => { + if (std.ascii.eqlIgnoreCase(assertion.key, "status")) { + var status_buf: [3]u8 = undefined; + const status_code = @intFromEnum(response.status.?); + const status_str = std.fmt.bufPrint(&status_buf, "{}", .{status_code}) catch return error.StatusCodeFormat; + if (!matchesRegex(status_str, assertion.value)) { + stderr.print("[Fail] Expected status code to match regex \"{s}\", got \"{s}\"\n", .{ assertion.value, status_str }) catch {}; + return error.StatusCodeNotMatchesRegex; + } + } else if (std.ascii.eqlIgnoreCase(assertion.key, "body")) { + if (!matchesRegex(response.body, assertion.value)) { + stderr.print("[Fail] Expected body content to match regex \"{s}\", got \"{s}\"\n", .{ assertion.value, response.body }) catch {}; + return error.BodyContentNotMatchesRegex; + } + } else if (std.mem.startsWith(u8, assertion.key, "header[\"")) { + const header_name = try extractHeaderName(assertion.key); + const actual_value = response.headers.get(header_name); + if (actual_value == null or !matchesRegex(actual_value.?, assertion.value)) { + stderr.print("[Fail] Expected header \"{s}\" to match regex \"{s}\", got \"{s}\"\n", .{ header_name, assertion.value, actual_value orelse "null" }) catch {}; + return error.HeaderNotMatchesRegex; + } + } else { + stderr.print("[Fail] Invalid assertion key for matches_regex: {s}\n", .{assertion.key}) catch {}; + return error.InvalidAssertionKey; + } + }, + .not_matches_regex => { + if (std.ascii.eqlIgnoreCase(assertion.key, "status")) { + var status_buf: [3]u8 = undefined; + const status_code = @intFromEnum(response.status.?); + const status_str = std.fmt.bufPrint(&status_buf, "{}", .{status_code}) catch return error.StatusCodeFormat; + if (matchesRegex(status_str, assertion.value)) { + stderr.print("[Fail] Expected status code to NOT match regex \"{s}\", got \"{s}\"\n", .{ assertion.value, status_str }) catch {}; + return error.StatusCodeMatchesRegexButShouldnt; + } + } else if (std.ascii.eqlIgnoreCase(assertion.key, "body")) { + if (matchesRegex(response.body, assertion.value)) { + stderr.print("[Fail] Expected body content to NOT match regex \"{s}\", got \"{s}\"\n", .{ assertion.value, response.body }) catch {}; + return error.BodyContentMatchesRegexButShouldnt; + } + } else if (std.mem.startsWith(u8, assertion.key, "header[\"")) { + const header_name = try extractHeaderName(assertion.key); + const actual_value = response.headers.get(header_name); + if (actual_value != null and matchesRegex(actual_value.?, assertion.value)) { + stderr.print("[Fail] Expected header \"{s}\" to NOT match regex \"{s}\", got \"{s}\"\n", .{ header_name, assertion.value, actual_value orelse "null" }) catch {}; + return error.HeaderMatchesRegexButShouldnt; + } + } else { + stderr.print("[Fail] Invalid assertion key for not_matches_regex: {s}\n", .{assertion.key}) catch {}; + return error.InvalidAssertionKey; + } + }, else => {}, } } @@ -340,3 +488,61 @@ test "HttpParser supports starts_with for status, body, and header" { try check(&request, response); } + +test "HttpParser supports matches_regex and not_matches_regex for status, body, and headers" { + const allocator = std.testing.allocator; + + var assertions = std.ArrayList(HttpParser.Assertion).init(allocator); + defer assertions.deinit(); + + // Should pass: status matches regex for 2xx codes + try assertions.append(HttpParser.Assertion{ + .key = "status", + .value = "^2.*", + .assertion_type = .matches_regex, + }); + + // Should pass: body matches regex for JSON-like content + try assertions.append(HttpParser.Assertion{ + .key = "body", + .value = ".*success.*", + .assertion_type = .matches_regex, + }); + + // Should pass: header matches regex for application/* content types + try assertions.append(HttpParser.Assertion{ + .key = "header[\"content-type\"]", + .value = "application/.*", + .assertion_type = .matches_regex, + }); + + // Should pass: status does not match regex for error codes + try assertions.append(HttpParser.Assertion{ + .key = "status", + .value = "^[45].*", + .assertion_type = .not_matches_regex, + }); + + var request = HttpParser.HttpRequest{ + .method = .GET, + .url = "https://api.example.com", + .headers = std.ArrayList(http.Header).init(allocator), + .assertions = assertions, + .body = null, + }; + + var response_headers = std.StringHashMap([]const u8).init(allocator); + try response_headers.put("content-type", "application/json"); + defer response_headers.deinit(); + + const body = try allocator.dupe(u8, "Operation success completed"); + defer allocator.free(body); + const response = Client.HttpResponse{ + .status = http.Status.ok, + .headers = response_headers, + .body = body, + .allocator = allocator, + }; + + try check(&request, response); +} diff --git a/src/httpfile/parser.zig b/src/httpfile/parser.zig index 9a67c2d..75af035 100644 --- a/src/httpfile/parser.zig +++ b/src/httpfile/parser.zig @@ -13,8 +13,8 @@ const AssertionType = enum { not_contains, starts_with, ends_with, - // matches_regex, TODO: Soon. - // not_matches_regex, + matches_regex, + not_matches_regex, pub fn fromString(s: []const u8) ?AssertionType { if (std.ascii.eqlIgnoreCase(s, "==")) return .equal; @@ -24,8 +24,8 @@ const AssertionType = enum { if (std.ascii.eqlIgnoreCase(s, "not_contains")) return .not_contains; if (std.ascii.eqlIgnoreCase(s, "starts_with")) return .starts_with; if (std.ascii.eqlIgnoreCase(s, "ends_with")) return .ends_with; - // if (std.ascii.eqlIgnoreCase(s, "matches_regex")) return .matches_regex; - // if (std.ascii.eqlIgnoreCase(s, "not_matches_regex")) return .not_matches_regex; + if (std.ascii.eqlIgnoreCase(s, "matches_regex")) return .matches_regex; + if (std.ascii.eqlIgnoreCase(s, "not_matches_regex")) return .not_matches_regex; return null; } }; From e8e05dbedd1066f8b8a49a56c7c94c87b4fe5f2e Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:49:28 +0000 Subject: [PATCH 2/2] Replace hand-rolled regex implementation with zig-regex library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add zig-regex dependency to build.zig.zon and build.zig - Replace custom matchesRegex() and matchesRegexAt() functions with library call - Simplify implementation from 94 lines to 10 lines - Improve security by removing potential ReDoS vulnerabilities - Use proper memory management with ArenaAllocator 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Brad --- build.zig | 1 + build.zig.zon | 4 ++ src/httpfile/assertion_checker.zig | 98 +++--------------------------- 3 files changed, 12 insertions(+), 91 deletions(-) diff --git a/build.zig b/build.zig index 29ca7eb..1fda320 100644 --- a/build.zig +++ b/build.zig @@ -4,6 +4,7 @@ pub fn build(b: *std.Build) void { const exe_name = b.option([]const u8, "exe_name", "Name of the executable") orelse "httpspec"; const dependencies = [_][]const u8{ "clap", + "regex", }; const target = b.standardTargetOptions(.{}); diff --git a/build.zig.zon b/build.zig.zon index 8c83780..bc094a4 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -40,6 +40,10 @@ .url = "git+https://github.com/Hejsil/zig-clap#cc5c6a5d71a317ed4b0ad776842d1d0655f72d0a", .hash = "clap-0.10.0-oBajB7jkAQAZ4cKLlzkeV9mDu2yGZvtN2QuOyfAfjBij", }, + .regex = .{ + .url = "git+https://github.com/tiehuis/zig-regex#8e38e11d45d3c45e06ed3e994e1eb2e62ed60637", + .hash = "1220c65e96eb14c7de3e3a82bfc45a66e7ca72b80e0ae82d1b6b6e58b7d8c9e7b8", + }, }, .paths = .{ "build.zig", diff --git a/src/httpfile/assertion_checker.zig b/src/httpfile/assertion_checker.zig index d9d84ce..6e236ac 100644 --- a/src/httpfile/assertion_checker.zig +++ b/src/httpfile/assertion_checker.zig @@ -43,6 +43,7 @@ test "HttpParser supports contains and not_contains for headers" { } const std = @import("std"); const http = std.http; +const regex = @import("regex"); const HttpParser = @import("./parser.zig"); const Client = @import("./http_client.zig"); @@ -55,99 +56,14 @@ fn extractHeaderName(key: []const u8) ![]const u8 { } fn matchesRegex(text: []const u8, pattern: []const u8) bool { - if (pattern.len == 0) return text.len == 0; + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); - // Handle anchors - const starts_with_anchor = pattern[0] == '^'; - const ends_with_anchor = pattern.len > 0 and pattern[pattern.len - 1] == '$'; + const compiled_regex = regex.compile(allocator, pattern) catch return false; + defer compiled_regex.deinit(); - var actual_pattern = pattern; - if (starts_with_anchor) actual_pattern = pattern[1..]; - if (ends_with_anchor and actual_pattern.len > 0) actual_pattern = actual_pattern[0..actual_pattern.len - 1]; - - if (starts_with_anchor and ends_with_anchor) { - return matchesRegexAt(text, actual_pattern, 0) == text.len; - } else if (starts_with_anchor) { - return matchesRegexAt(text, actual_pattern, 0) != null; - } else if (ends_with_anchor) { - var i: usize = 0; - while (i <= text.len) : (i += 1) { - if (matchesRegexAt(text[i..], actual_pattern, 0)) |end_pos| { - if (i + end_pos == text.len) return true; - } - } - return false; - } else { - var i: usize = 0; - while (i <= text.len) : (i += 1) { - if (matchesRegexAt(text[i..], actual_pattern, 0) != null) return true; - } - return false; - } -} - -fn matchesRegexAt(text: []const u8, pattern: []const u8, text_pos: usize) ?usize { - var p_pos: usize = 0; - var t_pos = text_pos; - - while (p_pos < pattern.len and t_pos < text.len) { - if (p_pos + 1 < pattern.len and pattern[p_pos + 1] == '*') { - // Handle .* or character* - const match_char = pattern[p_pos]; - p_pos += 2; // Skip char and * - - // Try matching zero occurrences first - if (matchesRegexAt(text, pattern[p_pos..], t_pos)) |end_pos| { - return t_pos + end_pos; - } - - // Try matching one or more occurrences - while (t_pos < text.len) { - if (match_char == '.' or text[t_pos] == match_char) { - t_pos += 1; - if (matchesRegexAt(text, pattern[p_pos..], t_pos)) |end_pos| { - return t_pos + end_pos; - } - } else { - break; - } - } - return null; - } else if (pattern[p_pos] == '.') { - // Match any single character - t_pos += 1; - p_pos += 1; - } else if (pattern[p_pos] == '[') { - // Character class - const close_bracket = std.mem.indexOfScalarPos(u8, pattern, p_pos + 1, ']') orelse return null; - const char_class = pattern[p_pos + 1..close_bracket]; - var matched = false; - for (char_class) |c| { - if (text[t_pos] == c) { - matched = true; - break; - } - } - if (!matched) return null; - t_pos += 1; - p_pos = close_bracket + 1; - } else { - // Literal character match - if (text[t_pos] != pattern[p_pos]) return null; - t_pos += 1; - p_pos += 1; - } - } - - // Handle remaining .* patterns at end - while (p_pos + 1 < pattern.len and pattern[p_pos + 1] == '*') { - p_pos += 2; - } - - if (p_pos == pattern.len) { - return t_pos - text_pos; - } - return null; + return compiled_regex.match(text); } pub fn check(request: *HttpParser.HttpRequest, response: Client.HttpResponse) !void {