diff --git a/README.md b/README.md index 2719dbe..9a97f7a 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,480 @@ [![parcom ci](https://github.com/dokwork/parcom/actions/workflows/ci.yml/badge.svg)](https://github.com/dokwork/parcom/actions/workflows/ci.yml) ![zig version](https://img.shields.io/badge/zig%20version-0.14.0-fcca77) +_Consume input, not memory._ + > [!WARNING] > This library is underdeveloped. API is not stable. -Parser combinators for Zig. +This library provides an implementation of the parser combinators. + +Three different types of parser implementations exist: + + - The base parser implementations contain the logic for parsing input and serve + as the fundamental building blocks; + - The `ParserCombinator`provides methods to combine parsers and create new ones; + - The `TaggedParser` erases the type of the underlying parser and simplifies + the parser's type declaration. + +Every parser provides the type of the parsing result as a constant `ResultType: +type`. + +`Parcom` offers two options for consuming data: + - parse the entire input string at once, + - or consume and parse byte by byte from `AnyReader`. + +When the input is a reader, `Parcom` works as a buffered reader. It reads few +bytes to the buffer and then parse them. + +The result of parsing by any parser can be a value of type `ResultType` in successful +case, or `null` if parsing was failed. In successful case not whole input can be +consumed. If you have to be sure, that every byte was consumed and parsed, use the +[`end()`](https://dokwork.github.io/parcom/index.html#parcom.end) parser explicitly. + +## Installation + +Fetch `Parcom` from github: +```sh +zig fetch --save git+https://github.com/dokwork/parcom +``` +Check that it was added to the list of dependencies in your `build.zig.zon` file: +```zig +... + .dependencies = .{ + .parcom = .{ + .url = "git+https://github.com/dokwork/parcom#b93b8fb14f489007f27d42f8254f12b7d57d07da", + .hash = "parcom-0.3.0-Hs8wfHFUAQBhhH-swYl1wrMLSh76uApvVzYBl56t90Ua", + }, + }, +``` +Add `Parcom` module to your `build.zig`: +```zig + const parcom = b.dependency("parcom", .{ + .target = target, + .optimize = optimize, + }); + ... + exe.root_module.addImport("parcom", parcom.module("parcom")); +``` + +## Quick start + +Let's create a parser, which will parse and execute a simple math expression with follow +grammar: +``` +# The `number` is a sequence of unsigned integer numbers +Number := [0-9]+ +# The `value` is a `number` or an `expression` in brackets +Value := Number / '(' Expr ')' +# The `sum` is an operation of adding or substraction of two or more values +Sum := Value (('+' / '-') Value)* +# The `expression` is result of evaluation the combination of values and operations +Expr := evaluate(Sum) +``` +Our parser will be capable of parsing and evaluating mathematical expressions +that include addition and subtraction operations, unsigned integers, and nested +expressions within brackets. + +### Base parser + +The `number` from the grammar above is a sequence of symbols from the range ['0', '9']. +Parcom has a constructor of the parser of bytes in a range, but we will create +our own parser starting from the base parser `AnyChar`. `AnyChar` is a simplest +parser consumed the input. It returns the next byte from the input, or +`null` if the input is empty. + +To parse only numeric symbols we should provide a classifier - function that +receives the result of a parser and returns true only if it is an expected value: +```zig +const parcom = @import("parcom"); + +// ResultType: u8 +const num_char = parcom.anyChar().suchThat({}, struct { + fn condition(_: void, ch: u8) bool { + return switch (ch) { + '0' ... '9' => true, + else => false, + }; + } +}.condition); +``` +Every function required i combinators in `Parcom` library has a `context` parameter. +That gives more flexibility for possible implementations of that functions. + +### Repeat parsers + +Next, we should continue applying our parser until we encounter the first +non-numeric symbol or reach the end of the input. To achieve this, we need to +store the parsed results. The simplest solution is to use a sentinel array: +```zig +// ResultType: [10:0]u8 +const number = num_char.repeatToSentinelArray(.{ .max_count = 10 }); +``` +But that option is available only for parsers with scalar result types. For more +general cases a regular array can be used. If you know exact count of elements +in the parsed sequence, you can specified it to have an array with exact length +as result: +```zig +// ResultType: [3]u8 +const number = num_char.repeatToArray(3); +``` +However, this is a rare case. More often, the exact number of elements is +unknown, but the maximum number can be estimated: +```zig +// ResultType: struct { [10]u8, usize } +const number = num_char.repeatToArray(.{ .max_count = 10 }); +``` +In such cases, the result is a tuple consisting of the array and a count of the +parsed items within it. + +For cases, when impossible to predict the maximum count we can allocate a slice +to store the parsed results: +```zig +// ResultType: []u8 +const number = num_char.repeat(allocator, .{}); + +// Don't forget to free the memory, allocated for the slice! +``` +or use an arbitrary storage and a function to add an item to it: +```zig +var list = std.ArrayList(u8).init(allocator); +defer list.deinit(); +// ResultType: *std.ArrayList(u8) +const p = anyChar().repeatTo(&list, .{}, std.ArrayList(u8).append); +``` + +Notice, that no matter which combinator you use to collected repeated numbers, +you have to set the `.min_count` to 1, because of empty collection of chars is +not a number! +```zig +// ResultType: []u8 +const number = num_char.repeat(allocator, .{ .min_count = 1 }); +``` + +**RepeatOptions** + +All repeated combinators except the `repeatToArray(usize)` receive the `RepeatOptions`, +a structure with minimum and maximum counts of the elements in the sequence. All +parsers stop when reach the maximum count and fail if don't reach the minimum. + +### Try one or try another + +We'll postpone the `value` parser for now, and instead of that will focus on +creating a parsers for the '+' and '-' symbols. +```zig +// ResultType: i32 +const value: ParserCombinator(???) = ???; +``` + +First of all, we should be able to parse every symbol separately. The `char` +parser is the best candidate for it: +```zig +const plus = parcom.char('+'); +const minus = parcom.char('-'); +``` +Next, we have to choose one of them. To accomplish this, let's combine parsers +to a new one, that first attempt one, and if it fails, it will try the other: +```zig +// ResultType: parcom.Either(u8, u8) +const plus_or_minus = plus.orElse(minus); +``` +The result type of the new parser is `parcom.Either(L, R)`, an alias for +`union(enum) { left: L, right: R }` type. + +### Combine results + +We have a parser for operations and we assume that we have a parser for +values as well. This is sufficient to build the `Sum` parser, which, as you +may recall, follows this structure: +``` +Sum := Value (('+' / '-') Value)* +``` +Let's start from the part in brackets. We have to combine the `plus_or_minus` parser +with `value` parser and repeat result: +```zig +// ResultType: []struct{ parcom.Either(u8, u8), i32 } +plus_or_minus.andThen(value).repeat(allocator, .{}); +``` +The `andThen` combinator runs the left parser and then the right. If both +parsers were successful, it returns a tuple of results. Finally, we can combine +the value with the new parser to have the version of the `expression` +parser that follows the grammar: +```zig +// ResultType: struct{ i32, []struct{ parcom.Either(u8, u8), i32 } } +const sum = value.andThen(plus_or_minus.andThen(value).repeat(allocator, .{})); +``` + +### Transform the result + +So far so good. We are ready to create a parser that will not only parse the input, but +also sum of parsed values: +```zig +const expr = sum.transform(i32, {}, struct { + fn evaluate(_: void, value: struct{ i32, []struct{ Either(u8, u8), i32 } }) !i32 { + var result: i32 = value[0]; + for (value[1]) |op_and_arg| { + switch(op_and_arg[0]) { + .left => result += op_and_arg[1], + .right => result -= op_and_arg[1], + ) + } + return result; + } +}.evaluate); +``` +The combinator `transform` requires a context and a function for transformation. It +runs the left parser and applies the function to the parsed result. + +### Tagged parser + +Now the time to build the `value` parser: +``` +Value := Number / '(' Expr ')' +``` +This is a recursive parser that not only forms part of the `expression` parser but +also depends on it. How we can implement this? First of all, let's wrap the +`expression` parser to the function: +```zig +const std = @import("std"); +const parcom = @import("parcom"); + +fn expression(allocator: std.mem.Allocator) ??? { + + // ResultType: u8 + const num_char = parcom.anyChar().suchThat({}, struct { + fn condition(_: void, ch: u8) bool { + return switch (ch) { + '0' ... '9' => true, + else => false, + }; + } + }.condition); + + // ResultType: i32 + const number = num_char.repeat(allocator, .{ .min_count = 1 }).transform(i32, {}, struct { + fn parseInt(_: void, value: []u8) !i32 { + return try std.fmt.parseInt(i32, value, 10); + } + }.parseInt); + + // ResultType: i32 + const value = ???; + + // ResultType: parcom.Either(u8, u8) + const plus_or_minus = parcom.char('+').orElse(parcom.char('-')); + + // ResultType: struct{ i32, []struct{ parcom.Either(u8, u8), i32 } } + const sum = value.andThen(plus_or_minus.andThen(value).repeat(allocator, .{})); + + const expr = sum.transform(i32, {}, struct { + fn evaluate(_: void, v: struct{ i32, []struct{ parcom.Either(u8, u8), i32 } }) !i32 { + var result: i32 = v[0]; + for (v[1]) |op_and_arg| { + switch(op_and_arg[0]) { + .left => result += op_and_arg[1], + .right => result -= op_and_arg[1], + } + } + return result; + } + }.evaluate); + + return expr; +} +``` +The type of `ParserCombinator` in `Parcom` can be very cumbersome, and it is +often impractical to manually declare it as a function's type. However, Zig +requires this type to allocate enough memory for the parser instance. +While most parsers in `Parcom` are simply namespaces, this is not true for all +of them. What can we do is moving our parser to heap and replace particular type +by the pointer to it. This is exactly how the `TaggedParser` works. It has a +pointer to the original parser, and a pointer to a function responsible for +parsing the input. More over, the `TaggedParser` has explicit `ResultType`: +```zig +const std = @import("std"); +const parcom = @import("parcom"); + +fn expression(allocator: std.mem.Allocator) parcom.TaggedParser(i32) { + ... + return expr.taggedAllocated(allocator); +} +``` + +### Deferred parser + +Let's go ahead and finally build the `value` parser: +```zig +const value = number.orElse( + parcom.char('(').rightThen(expression(allocator)).leftThen(parcom.char(')') +); +``` +Pay attention on `rightThen` and `leftThen` combinators. Unlike the `andThen` +combinator, these two do not produce a tuple. Instead, they ignore one value and +return another. The `rightThen` uses only result of the right parser, and +`leftThen` of the left parser respectively. It means, that both brackets will be +parsed, but ignored in the example above. + +But this is not all. Unfortunately, such implementation of the `value` +parser will lead to infinite loop of invocations the `expression` function. We +can solve this by invoking the function only when we need to parse an expression +within brackets. The `Parcom` has the `deferred` parser for such purposes. +It receives the `ResultType` of `TaggedParser` which should be returned by the function, +a context that should be passed to the function and pointer to the function: + +```zig +const value = number.orElse( + parcom.char('(').rightThen(parcom.deferred(i32, allocator, expression)).leftThen(parcom.char(')')) +); +``` +When the tagged parsed completes its deferred work, the `deinit` method will be +invoked, and memory will be freed. But, do not forget to invoke `deinit` +manually, when you create the `TaggedParser` outside the `deferred` parser! + +
+ Complete solution + +```zig +const std = @import("std"); +const parcom = @import("parcom"); + +fn expression(allocator: std.mem.Allocator) !parcom.TaggedParser(i32) { + + // ResultType: u8 + const num_char = parcom.anyChar().suchThat({}, struct { + fn condition(_: void, ch: u8) bool { + return switch (ch) { + '0' ... '9' => true, + else => false, + }; + } + }.condition); + + // ResultType: i32 + const number = num_char.repeat(allocator, .{ .min_count = 1 }).transform(i32, {}, struct { + fn parseInt(_: void, value: []u8) !i32 { + return try std.fmt.parseInt(i32, value, 10); + } + }.parseInt); + + // ResultType: i32 + const value = number.orElse( + parcom.char('(').rightThen(parcom.deferred(i32, allocator, expression)).leftThen(parcom.char(')')) + ) + .transform(i32, {}, struct { + fn getFromEither(_: void, v: parcom.Either(i32, i32)) !i32 { + return switch (v) { + .left => v.left, + .right => v.right, + }; + } + }.getFromEither); + + // ResultType: parcom.Either(u8, u8) + const plus_or_minus = parcom.char('+').orElse(parcom.char('-')); + + // ResultType: struct{ i32, []struct{ parcom.Either(u8, u8), i32 } } + const sum = value.andThen(plus_or_minus.andThen(value).repeat(allocator, .{})); + + // ResultType: i32 + const expr = sum.transform(i32, {}, struct { + fn evaluate(_: void, v: struct{ i32, []struct{ parcom.Either(u8, u8), i32 } }) !i32 { + var result: i32 = v[0]; + for (v[1]) |op_and_arg| { + switch(op_and_arg[0]) { + .left => result += op_and_arg[1], + .right => result -= op_and_arg[1], + } + } + return result; + } + }.evaluate); + + return expr.taggedAllocated(allocator); +} + +test "9-(5+2) == 2" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const parser = try expression(arena.allocator()); + try std.testing.expectEqual(2, try parser.parseString("9-(5+2)")); +} +``` + +
+ +### Cutting the input + +In some cases it is reasonable not to consume the entire input to the string, and +instead parse it on-the-fly. For such cases, the `Parcom` library provides the +`parseFromReader` method, which takes a `std.io.AnyReader` as the input. During the +parsing, all consumed bytes are stored in an internal buffer to make it possible +to rollback the input and try another parser (such as with the `orElse` combinator). +While this approach may lead to the same result as reading the whole input to the string, +rollback may not make sense for some parsers. For example, when parsing JSON, +encountering the '{' symbol means the entire JObject must be parsed. If parsing +cannot proceed, it indicates that the input is malformed, and all parsers will +failed. It means, that the input can be cropped right before the '{' symbol. + +In the example above can be reasonable to cut the input when the left brace is +parsed: +```zig +... +const value = number.orElse( + parcom.char('(').cut().rightThen(parcom.deferred(i32, allocator, expression)).leftThen(parcom.char(')')) +// added this ^ +) +... +``` + +Cropping the input, when possible, can significantly reduce required memory and +may improve the speed of parsing. See [this example](examples/json.zig) for more details. + +### Debug + +When something is going wrong during the parsing, and a correct at first glance +parser returns null, it can be difficult to understand the root cause without +additional insights. In `Parcom` you can turn on logging for any particular +parser to see how it works during the parsing. For example, let's turn on +logging for the expression parser from the example above (with added `cut` +combinator) +```zig +... + return expr.logged(.{ .label = "EXPR", .scope = .example }).taggedAllocated(allocator); +} +``` +and run it on a string with unexpected symbol '!': +```zig +test "parse unexpected symbol" { + // don't forget to turn on debug level for the test + std.testing.log_level = .debug; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const parser = try expression(arena.allocator()); + try std.testing.expectEqual(2, try parser.parseString("9-(!5+2)")); +} +``` +Now, we have enough insights to understand what happened and where it occurred: +``` +error: 'expression.test.parse unexpected symbol' failed: [example] (debug): +The parsing by the has been started from position 0: +[9]-(!5+2) +[example] (debug): +The parsing by the has been started from position 3: +…[!]5+2) +[example] (debug): The parsing is failed at position 3: +…[!]5+2) +[example] (debug): End parsing by the . Cut 3 items during the parsing process. +[parcom] (warn): Imposible to reset the input from 3 to 2 at position 3: +…[!]5+2). +[example] (debug): An error error.ResetImposible occured on parsing by at position 3: +…[!]5+2) +[example] (debug): End parsing by the . Cut 3 items during the parsing process. +``` + +## Documentation +[https://dokwork.github.io/parcom/index.html](https://dokwork.github.io/parcom/index.html) -Documentation: [https://dokwork.github.io/parcom/index.html](https://dokwork.github.io/parcom/index.html) +## Examples -Examples: - [The parser of a math expression](examples/expression.zig) - [The json parser](examples/json.zig) diff --git a/examples/json.zig b/examples/json.zig index 3ea9531..f897500 100644 --- a/examples/json.zig +++ b/examples/json.zig @@ -1,15 +1,15 @@ //! This is a simple implementation of a JSON parser. While it isn't fully correct, it demonstrates -//! the flexibility of `parcom` and serves as a proof of concept for optimization through input -//! truncation. +//! the flexibility of the `parcom` library and serves as a proof of concept for optimization through +//! the input truncation. //! //! This example can be run to parse json from the stdin: //! ```sh //! echo '{ "hello" : "world" }' | zig build json //! ``` //! By default, the result of parsing is ignored. -//! To print the parsed json to stdout use `-o` argument. In that case this example will -//! parse the stdin, build AST for parsed input, serialize that AST back to string format, -//! and print it to the stdout: +//! To print the parsed json to stdout use `-o` argument. In that case the stdin will be parsed, +//! and AST for parsed input built. Then this AST will be serialized back to the string, and printed +//! to the stdout: //! ```sh //! echo '{ "hello" : "world" }' | zig build json -- -o //! > { "hello": "world" } diff --git a/src/parcom.zig b/src/parcom.zig index 81e9030..c11ecf2 100644 --- a/src/parcom.zig +++ b/src/parcom.zig @@ -23,10 +23,27 @@ //! This library provides an implementation of the parser combinators. //! //! Three different types of parser implementations exist: -//! 1. The inner parser implementations, which contain the logic for parsing the input. -//! 2. The public wrapper `ParserCombinator`, which provides methods to combine parsers and create new ones. -//! 3. The public wrapper `TaggedParser`, which erase the type of the underlying parser in `ParserCombinator`, -//! allowing for explicit type declaration in the code. +//! +//! - The base parser implementations contain the logic for parsing input and serve +//! as the fundamental building blocks; +//! - The `ParserCombinator`provides methods to combine parsers and create new ones; +//! - The `TaggedParser` erases the type of the underlying parser and simplifies +//! the parser's type declaration. +//! +//! Every parser provides the type of the parsing result as a constant `ResultType: +//! type`. +//! +//! `Parcom` offers two options for consuming data: +//! - parse the entire input string at once, +//! - or consume and parse byte by byte from `AnyReader`. +//! +//! When the input is a reader, `parcom` works as a buffered reader. It reads few +//! bytes to the buffer and then parse them. +//! +//! The result of parsing by any parser can be a value of type `ResultType` in successful +//! case, or `null` if parsing was failed. In successful case not whole input can be +//! consumed. If you have to be sure, that every byte was consumed and parsed, use the +//! `end()` parser explicitly. //! //! Github page: [https://github.com/dokwork/parcom](https://github.com/dokwork/parcom) const std = @import("std"); @@ -42,6 +59,9 @@ pub fn successfull(result: anytype) ParserCombinator(Successfull(@TypeOf(result) /// Creates a parser that fails if the input buffer contains not handled items, or otherwise /// tries to consume one byte from the input, and completes successfully if `EndOfStream` /// happened. It similar to '$' in regexp. +/// +/// `ResultType: void` +/// /// Example: /// ```zig /// test { @@ -59,6 +79,9 @@ test end { } /// Creates a parser that reads one byte from the input, and returns it as the result. +/// +/// `ResultType: u8` +/// /// Example: /// ```zig /// test { @@ -77,6 +100,9 @@ test anyChar { /// Creates a parser that reads one byte from the input, and returns `C` as the /// result if the same byte was read. +/// +/// `ResultType: u8` +/// /// Example: /// ```zig /// test { @@ -97,6 +123,18 @@ test char { /// Creates a parser that reads one byte from the input and returns it as the result /// if it is present in the chars set. +/// +/// `ResultType: u8` +/// +/// Example: +/// ```zig +/// test { +/// const p = oneCharOf("ab"); +/// try std.testing.expectEqual('a', try p.parseString("a")); +/// try std.testing.expectEqual('b', try p.parseString("b")); +/// try std.testing.expectEqual(null, try p.parseString("c")); +/// } +/// ``` pub inline fn oneCharOf(comptime chars: []const u8) ParserCombinator(OneCharOf(chars)) { return .{ .parser = .{} }; } @@ -112,10 +150,14 @@ test oneCharOf { /// Creates a parser that reads bytes from the input into the buffer as long as they are in the /// chars set "+-0123456789_boXABCDF". Then it attempts to parse the buffer as an integer using /// `std.fmt.parseInt`. +/// +/// `ResultType: T` +/// /// Example: /// ```zig /// test { -/// const p = int(i8, 5); +/// const T = i8; +/// const p = int(T, 5); /// const alloc = std.testing.allocator; /// try std.testing.expectEqual(2, try p.parseString(alloc, "2")); /// try std.testing.expectEqual(2, try p.parseString(alloc, "+2")); @@ -130,7 +172,8 @@ pub inline fn int(comptime T: type, max_length: usize) ParserCombinator(Int(T, m } test int { - const p = int(i8, 5); + const T = i8; + const p = int(T, 5); try std.testing.expectEqual(2, try p.parseString("2")); try std.testing.expectEqual(2, try p.parseString("+2")); try std.testing.expectEqual(-2, try p.parseString("-2")); @@ -142,17 +185,21 @@ test int { /// Creates a parser that reads bytes from the input into the buffer as long as they are in the /// chars set "+-0123456789_e.", or the case insensitive words "nan" or "inf". /// Then it attempts to parse the buffer as a float using `std.fmt.parseFloat`. +/// +/// `ResultType: T` +/// /// Example: /// ```zig /// test { -/// const p = float(f16, 10); -/// try std.testing.expectEqual(0.0, try p.parseString("0")); -/// try std.testing.expectEqual(0.0, try p.parseString("+0")); -/// try std.testing.expectEqual(0.0, try p.parseString("-0")); -/// try std.testing.expectEqual(1234, try p.parseString("1.234e3")); -/// try std.testing.expectEqual(std.math.inf(f16), try p.parseString("Inf")); -/// try std.testing.expectEqual(-std.math.inf(f16), try p.parseString("-inf")); -/// try std.testing.expect(try p.parseString("NaN") != null); +/// const T = f16; +/// const p = float(T, 10); +/// try std.testing.expectEqual(0.0, try p.parseString("0")); +/// try std.testing.expectEqual(0.0, try p.parseString("+0")); +/// try std.testing.expectEqual(0.0, try p.parseString("-0")); +/// try std.testing.expectEqual(1234, try p.parseString("1.234e3")); +/// try std.testing.expectEqual(std.math.inf(T), try p.parseString("Inf")); +/// try std.testing.expectEqual(-std.math.inf(T), try p.parseString("-inf")); +/// try std.testing.expect(try p.parseString("NaN") != null); /// } /// ``` pub inline fn float(comptime T: type, max_length: usize) ParserCombinator(Float(T, max_length)) { @@ -160,17 +207,21 @@ pub inline fn float(comptime T: type, max_length: usize) ParserCombinator(Float( } test float { - const p = float(f16, 10); + const T = f16; + const p = float(T, 10); try std.testing.expectEqual(0.0, try p.parseString("0")); try std.testing.expectEqual(0.0, try p.parseString("+0")); try std.testing.expectEqual(0.0, try p.parseString("-0")); try std.testing.expectEqual(1234, try p.parseString("1.234e3")); - try std.testing.expectEqual(std.math.inf(f16), try p.parseString("Inf")); - try std.testing.expectEqual(-std.math.inf(f16), try p.parseString("-inf")); + try std.testing.expectEqual(std.math.inf(T), try p.parseString("Inf")); + try std.testing.expectEqual(-std.math.inf(T), try p.parseString("-inf")); try std.testing.expect(try p.parseString("NaN") != null); } /// Creates a parser that processes a char from the chars set ['a'..'z', 'A'..'Z', '0'..'9']. +/// +/// `ResultType: u8` +/// /// Example: /// ```zig /// test { @@ -205,6 +256,9 @@ test letterOrNumber { } /// Creates a parser that processes only passed sequence of chars in the same order. +/// +/// `ResultType: []const u8` +/// /// Example: /// ```zig /// test { @@ -237,6 +291,9 @@ test word { /// Creates a parser that processes only passed sequence of chars in the same order, but ignores /// case. +/// +/// `ResultType: []const u8` +/// /// Example: /// ```zig /// test { @@ -269,6 +326,9 @@ test wORD { /// Creates a parser that processes characters within the ASCII range, where From is the lowest /// character in the ASCII table and To is the highest, inclusive. +/// +/// `ResultType: u8` +/// /// Example: /// ```zig /// test { @@ -310,6 +370,9 @@ test range { /// Creates a parser that sequentially applies all passed parsers, and returns a tuple of /// all results. +/// +/// `ResultType: struct { ... }` +/// /// Example: /// ```zig /// test { @@ -328,7 +391,11 @@ test tuple { /// Creates a parser that invokes the function `f` to create a tagged parser, which will be used /// to parse the input. That tagged parser will be deinited at the end of parsing if the destructor is provided -/// (parser was create by the `taggedAllocated` method. +/// (parser was create by the `taggedAllocated` method). +/// +/// `ResultType: T` +/// +/// Example: /// ```zig /// test { /// var result = std.ArrayList(u8).init(std.testing.allocator); @@ -362,12 +429,12 @@ test tuple { ///} /// ``` pub inline fn deferred( - comptime ResultType: type, + comptime T: type, context: anytype, - f: *const fn (context: @TypeOf(context)) anyerror!TaggedParser(ResultType), -) ParserCombinator(Deffered(@TypeOf(context), ResultType)) { + f: *const fn (context: @TypeOf(context)) anyerror!TaggedParser(T), +) ParserCombinator(Deffered(@TypeOf(context), T)) { return .{ - .parser = Deffered(@TypeOf(context), ResultType){ .context = context, .buildParserFn = f }, + .parser = Deffered(@TypeOf(context), T){ .context = context, .buildParserFn = f }, }; } @@ -549,6 +616,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Combines self parser with other to create a new parser that applies both underlying parsers /// to the input, producing a tuple of results from each. + /// + /// `ResultType: struct{ Self.ResultType, Self.ResultType }` + /// /// Example: /// ```zig /// test { @@ -574,6 +644,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Combines self parser with other to create a new parser that /// applies both underlying parsers to the input, producing a result from the self parser. + /// + /// `ResultType: Self.ResultType` + /// /// Example: /// ```zig /// test { @@ -603,6 +676,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Combines self parser with other to create a new parser that /// applies both underlying parsers to the input, producing a result from the other parser. + /// + /// `ResultType: @TypeOf(other.parser).ResultType` + /// /// Example: /// ```zig /// test { @@ -634,6 +710,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// parser, and if it was unsuccessful, applies the other. It returns tagged union with /// `.left` value for the result from the self parser, or the `.right` value for the result /// from the other parser. + /// + /// `ResultType: parcom.Either(Self.ResultType, @TypeOf(other.parser).ResultType)` + /// /// Example: /// ```zig /// test { @@ -659,7 +738,11 @@ pub fn ParserCombinator(comptime Parser: type) type { } /// Drops all items from the input buffer if the self parser was successful. It makes resetting to - /// items before the current position impossible. Example: + /// items before the current position impossible. + /// + /// `ResultType: Self.ResultType` + /// + /// Example: /// ```zig /// test { /// const p = char('a').andThen(char('?')); @@ -686,7 +769,11 @@ pub fn ParserCombinator(comptime Parser: type) type { } /// Explicitly sets the expected result type for parser. It can help solve type inference - /// in some cases. Example: + /// in some cases. + /// + /// `ResultType: ExpectedResultType` + /// + /// Example: /// ```zig /// const T = struct { u8, u8 }; /// var p = char('a').andThen(char('b')).coerce(T); @@ -718,7 +805,11 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Wraps the self parser in a new one that returns `Optional(self.ResultType).some` when /// the underlying parser successful, or `Optional(self.ResultType).none` when the - /// underlying fails. Example: + /// underlying fails. + /// + /// `ResultType: parcom.Optional(Self.ResultType)` + /// + /// Example: /// ```zig /// test { /// const p = char('a').optional(); @@ -740,6 +831,22 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Wraps the self parser in a new one that applies the `condition` function to the result of /// the underlying parser and fails if the function returns `false`. + /// + /// `ResultType: Self.ResultType` + /// + /// Example: + /// test { + /// const p = anyChar().suchThat({}, struct { + /// fn condition(_: void, ch: u8) bool { + /// return switch (ch) { + /// '0' ... '9' => true, + /// else => false, + /// }; + /// } + /// }.condition); + /// try std.testing.expectEqual('0', try p.parseString("0")); + /// try std.testing.expectEqual(null, try p.parseString("a")); + /// } pub fn suchThat( self: Self, context: anytype, @@ -754,6 +861,19 @@ pub fn ParserCombinator(comptime Parser: type) type { }; } + test suchThat { + const p = anyChar().suchThat({}, struct { + fn condition(_: void, ch: u8) bool { + return switch (ch) { + '0'...'9' => true, + else => false, + }; + } + }.condition); + try std.testing.expectEqual('0', try p.parseString("0")); + try std.testing.expectEqual(null, try p.parseString("a")); + } + // FIXME: It triggers infinite Semantic Analysis // pub fn not(self: Self) ParserCombinator(Not(Implementation)) { // return .{ .parser = .{ .underlying = self.parser } }; @@ -765,6 +885,18 @@ pub fn ParserCombinator(comptime Parser: type) type { // try std.testing.expectEqual(null, try p.parseString("ab")); // } + /// Wraps the self parser in a new one that repeat the self parser and ignores the result. + /// Returns the count of skipped items. + /// + /// `ResultType: u64` + /// + /// Example: + /// ```zig + /// test { + /// const p = char(' ').skip(.{}).andThen(char('!')); + /// try std.testing.expectEqual(.{ 6, '!' }, try p.parseString(" !")); + /// } + /// ``` pub fn skip(self: Self, comptime options: RepeatOptions) ParserCombinator(Skip(Parser, options)) { return .{ .parser = .{ .underlying = self.parser } }; } @@ -777,6 +909,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Wraps the self parser in a new one that repeat it until the underlying parser fails. /// All parsed results are stored in a slice allocated by the provided allocator. /// The returned slice must be freed using `free` method of the same allocator. + /// + /// `ResultType: []Self.ResultType` + /// /// Example: /// ```zig /// test { @@ -818,10 +953,16 @@ pub fn ParserCombinator(comptime Parser: type) type { /// repeated until the passed number of items will be parsed, or the underlying parser /// fails. If the underlying parser fails before parsing enough items, the new parser /// fails. Otherwise, an array containing the count items is returned. + /// + /// `ResultType: [options]Self.ResultType` + /// /// 2. `RepeatOptions` - the new parser will be repeated until the `max_count` items will be /// parsed, or the underlying parser fails. If the underlying parser fails before producing /// `min_count` results, the new parser fails. Otherwise, a tuple with an array with size /// `max_count` and the count of parsed items will be returned. + /// + /// `ResultType: struct{ [options.max_count]Self.ResultType, usize }` + /// /// Example: /// ```zig /// test { @@ -879,6 +1020,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// produce `max_count` results. Instead, it set the sentinel element after the last /// parsed result to the final array. If count of parsed results is less than `min_count`, the /// returned parser fails. + /// + /// `ResultType: [options.max_count:0]Self.ResultType` + /// /// Example: /// ```zig /// test { @@ -930,6 +1074,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Wraps the self parser in a new one that repeat the underlying parser until it fails, /// or consumed `max_count` items, if that limit is specified in the provided `RepeatOptions`. /// It applies the function `add_to_collection` to the every parsed item. + /// + /// `ResultType: @TypeOf(collector)` + /// /// Example: /// ```zig /// test { @@ -980,6 +1127,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Wraps the self parser in a new one that applies the function `f` to the parsing result and /// returns the value produced by the function. + /// + /// `ResultType: Result` + /// /// Example: /// ```zig /// test { @@ -1011,6 +1161,17 @@ pub fn ParserCombinator(comptime Parser: type) type { try std.testing.expectEqual(42, try p.parseString("42")); } + /// Wraps the self parser in a new one that returns the `new_value` if the self parser is successful. + /// + /// `ResultType: @TypeOf(new_value)` + /// + /// Example: + /// ```zig + /// test { + /// const p = word("true").as(true); + /// try std.testing.expectEqual(true, try p.parseString("true")); + /// } + /// ``` pub fn as( self: Self, new_value: anytype, @@ -1025,6 +1186,9 @@ pub fn ParserCombinator(comptime Parser: type) type { /// Create a parser that writes the result of running the underlying /// parser to the log with passed options. + /// + /// `ResultType: Self.ResultType` + /// pub fn logged(self: Self, comptime options: LogOptions) ParserCombinator(Logged(Self, options)) { return .{ .parser = Logged(Self, options){ .underlying = self } }; } @@ -1088,14 +1252,12 @@ pub const RepeatOptions = struct { } }; -/// Describes how the parsing process should be logged. -/// As minimum, the `scope` of the logger must be provided. -/// It also possible to change the `log_level` from the default -/// `.debug` to any other values supported by the `std.log.Level`. -/// The name of the parser that used in logged messages can be very verbose. -/// To override it by some custom value set the `label` property. +/// Describes how the parsing process should be logged. You may provide the `scope` of the logger +/// which is `.parcom` by default, or change the `log_level` from the default `.debug` to any other +/// values supported by the `std.log.Level`. The name of the parser that used in logged messages can +/// be very verbose. To override it by some custom value set the `label` property. pub const LogOptions = struct { - scope: @Type(.enum_literal), + scope: @Type(.enum_literal) = .parcom, log_level: std.log.Level = .debug, label: ?[]const u8 = null, }; @@ -1170,8 +1332,8 @@ const Input = struct { fn reset(self: *Input, to_position: usize) Error!void { if (self.committed_count > 0 and to_position < self.committed_count) { log.warn( - "Imposible to reset the input from {d} to {d} at {any}.\nItems already commited: {d}", - .{ self.position, to_position, self, self.committed_count }, + "Imposible to reset the input from {d} to {d} at {any}.", + .{ self.position, to_position, self }, ); return Error.ResetImposible; } @@ -2009,7 +2171,7 @@ fn Logged(comptime Underlying: type, comptime options: LogOptions) type { fn parse(self: Self, input: *Input) anyerror!?ResultType { writeToLog("\nThe parsing by the {any} has been started from {any}", .{ self, input }); defer writeToLog( - "End parsing by the {any}. Cut {d} items during the parsing process.\n", + "End parsing by the {any}. Cut {d} items during the parsing process.", .{ self, input.committed_count }, );