-
Notifications
You must be signed in to change notification settings - Fork 0
08 Zig Example
This chapter walks through the complete Zig grammar implementation (zig.zyn), explaining each section and the design decisions behind it.
The Zig grammar supports:
- Functions with typed parameters
- Structs and enums
- Control flow (if, while, for)
- Expressions with proper operator precedence
- Type expressions (pointers, optionals, arrays)
// 1. Language metadata
@language { ... }
// 2. Program structure
program = { ... }
declarations = { ... }
declaration = { ... }
// 3. Type declarations
struct_decl = { ... }
enum_decl = { ... }
// 4. Function declarations
fn_decl = { ... }
// 5. Statements
statement = { ... }
if_stmt = { ... }
while_stmt = { ... }
// ...
// 6. Expressions (by precedence)
expr = { ... }
logical_or = { ... }
// ... down to atoms
// 7. Literals and identifiers
integer_literal = { ... }
identifier = { ... }
// 8. Operators
add_op = { ... }
// ...
// 9. Whitespace/comments
WHITESPACE = { ... }
COMMENT = { ... }
@language {
name: "Zig",
version: "0.11",
file_extensions: [".zig"],
entry_point: "main",
}
This metadata tells the compiler:
- The language name for error messages
- Which file extensions to recognize
- Which function to execute for
--run
program = { SOI ~ declarations ~ EOI }
-> TypedProgram {
"get_child": { "index": 0 }
}
This matches the entire file (SOI to EOI) and extracts the declarations.
declarations = { declaration* }
-> TypedProgram {
"get_all_children": true,
"define": "program",
"args": { "declarations": "$result" }
}
Key pattern: get_all_children collects all declaration matches into a list, then define: "program" creates the root node.
declaration = { struct_decl | enum_decl | fn_decl | const_decl | var_decl }
-> TypedDeclaration {
"get_child": { "index": 0 }
}
Order matters! More specific rules should come first if there's ambiguity.
struct_decl = { "const" ~ identifier ~ "=" ~ "struct" ~ "{" ~ struct_fields? ~ "}" ~ ";" }
-> TypedDeclaration {
"commands": [
{ "define": "struct", "args": {
"name": "$1",
"fields": "$2"
}}
]
}
Example input:
const Point = struct {
x: i32,
y: i32,
};Child mapping:
-
$1=identifier→ "Point" -
$2=struct_fields?→ list of fields (or null)
struct_fields = { struct_field ~ ("," ~ struct_field)* ~ ","? }
-> List {
"get_all_children": true
}
struct_field = { identifier ~ ":" ~ type_expr }
-> TypedField {
"commands": [
{ "define": "field", "args": { "name": "$1", "type": "$2" } }
]
}
Pattern: Optional trailing comma (","?) is common in modern languages.
enum_decl = { "const" ~ identifier ~ "=" ~ "enum" ~ "{" ~ enum_variants? ~ "}" ~ ";" }
-> TypedDeclaration {
"commands": [
{ "define": "enum", "args": {
"name": "$1",
"variants": "$2"
}}
]
}
enum_variants = { enum_variant ~ ("," ~ enum_variant)* ~ ","? }
-> List {
"get_all_children": true
}
enum_variant = { identifier }
-> TypedVariant {
"get_text": true,
"define": "variant",
"args": { "name": "$result" }
}
Example:
const Color = enum {
Red,
Green,
Blue,
};The runtime assigns discriminant values (0, 1, 2) automatically.
fn_decl = { fn_decl_with_params | fn_decl_no_params }
-> TypedDeclaration {
"get_child": { "index": 0 }
}
Why split? PEG doesn't produce placeholder children for missing optionals. With a single rule like:
// PROBLEMATIC
fn_decl = { "fn" ~ identifier ~ "(" ~ fn_params? ~ ")" ~ type_expr ~ block }
If params are missing, $3 would be type_expr, not block. By splitting, each variant has predictable child indices.
fn_decl_with_params = { "fn" ~ identifier ~ "(" ~ fn_params ~ ")" ~ type_expr ~ block }
-> TypedDeclaration {
"commands": [
{ "define": "function", "args": {
"name": "$1",
"params": "$2",
"return_type": "$3",
"body": "$4"
}}
]
}
fn_decl_no_params = { "fn" ~ identifier ~ "(" ~ ")" ~ type_expr ~ block }
-> TypedDeclaration {
"commands": [
{ "define": "function", "args": {
"name": "$1",
"params": [],
"return_type": "$2",
"body": "$3"
}}
]
}
Note: "params": [] provides an empty list literal.
fn_params = { fn_param ~ ("," ~ fn_param)* }
-> List {
"get_child": { "index": 0 }
}
fn_param = { identifier ~ ":" ~ type_expr }
-> TypedParameter {
"commands": [
{ "define": "param", "args": { "name": "$1", "type": "$2" } }
]
}
statement = { if_stmt | while_stmt | for_stmt | return_stmt | break_stmt |
continue_stmt | local_const | local_var | assign_stmt | expr_stmt }
-> TypedStatement {
"get_child": { "index": 0 }
}
Order consideration: if_stmt before expr_stmt because an identifier if_something could otherwise match.
if_stmt = { if_else | if_only }
-> TypedStatement { "get_child": { "index": 0 } }
if_only = { "if" ~ "(" ~ expr ~ ")" ~ block }
-> TypedStatement {
"commands": [
{ "define": "if", "args": {
"condition": "$1",
"then_branch": "$2"
}}
]
}
if_else = { "if" ~ "(" ~ expr ~ ")" ~ block ~ "else" ~ block }
-> TypedStatement {
"commands": [
{ "define": "if", "args": {
"condition": "$1",
"then_branch": "$2",
"else_branch": "$3"
}}
]
}
Important: if_else must come before if_only in the choice, otherwise if_only would always match first!
while_stmt = { "while" ~ "(" ~ expr ~ ")" ~ block }
-> TypedStatement {
"commands": [
{ "define": "while", "args": {
"condition": "$1",
"body": "$2"
}}
]
}
for_stmt = { "for" ~ "(" ~ expr ~ ")" ~ "|" ~ identifier ~ "|" ~ block }
-> TypedStatement {
"commands": [
{ "define": "for", "args": {
"iterable": "$1",
"binding": "$2",
"body": "$3"
}}
]
}
Zig's for loop: for (slice) |item| { ... }
return_stmt = { "return" ~ expr? ~ ";" }
-> TypedStatement {
"commands": [
{ "define": "return_stmt", "args": { "value": "$1" } }
]
}
$1 will be null if expr? doesn't match.
block = { "{" ~ statement* ~ "}" }
-> TypedBlock {
"get_all_children": true,
"define": "block",
"args": { "statements": "$result" }
}
Zig supports switch expressions for pattern matching against values. The grammar handles multiple pattern types including literals, ranges, struct patterns, tagged union patterns, and error patterns.
switch_expr = { "switch" ~ "(" ~ expr ~ ")" ~ "{" ~ switch_cases? ~ "}" }
-> TypedExpression {
"commands": [
{ "define": "switch_expr", "args": {
"scrutinee": "$1",
"cases": "$2"
}}
]
}
switch_cases = { switch_case ~ ("," ~ switch_case)* ~ ","? }
-> List {
"get_all_children": true
}
The switch_expr command creates a switch expression node with:
-
scrutinee: The expression being matched against -
cases: List of case arms
Each case has a pattern and a body:
// Value case: pattern => expr
switch_case_value = { switch_pattern ~ "=>" ~ expr }
-> TypedExpression {
"commands": [
{ "define": "switch_case", "args": {
"pattern": { "get_child": { "index": 0 } },
"body": { "get_child": { "index": 1 } }
}}
]
}
// Else case: else => expr
switch_case_else = { "else" ~ "=>" ~ expr }
-> TypedExpression {
"commands": [
{ "define": "switch_case", "args": {
"pattern": { "define": "wildcard_pattern" },
"body": "$1"
}}
]
}
Note the else case uses an inline { "define": "wildcard_pattern" } to create the pattern directly in the args.
Match exact values:
switch_literal_pattern = { integer_literal | string_literal }
-> TypedExpression {
"commands": [
{ "define": "literal_pattern", "args": { "value": "$1" } }
]
}
Example:
const result = switch (x) {
1 => 10,
2 => 20,
else => 0,
};Match anything (used for _ or else):
switch_wildcard_pattern = { "_" }
-> TypedExpression {
"commands": [
{ "define": "wildcard_pattern" }
]
}
Match values within a range:
switch_range_pattern = { integer_literal ~ ".." ~ integer_literal }
-> TypedExpression {
"commands": [
{ "define": "range_pattern", "args": {
"start": { "define": "literal_pattern", "args": { "value": "$1" } },
"end": { "define": "literal_pattern", "args": { "value": "$2" } },
"inclusive": false
}}
]
}
Example:
const result = switch (x) {
0..9 => "single digit",
10..99 => "double digit",
else => "other",
};Match enum or tagged union variants (Zig uses .variant syntax):
switch_tagged_union_pattern = { "." ~ identifier }
-> TypedExpression {
"commands": [
{ "define": "enum_pattern", "args": {
"name": "",
"variant": { "text": "$1" },
"fields": []
}}
]
}
Example:
const result = switch (optional_value) {
.some => 100,
.none => 0,
};Note: Tagged union patterns against non-enum types (like integers) gracefully return false in the backend, allowing the else case to match.
Match struct values by field:
switch_struct_pattern = { identifier ~ "{" ~ struct_field_patterns? ~ "}" }
-> TypedExpression {
"commands": [
{ "define": "struct_pattern", "args": {
"name": { "text": "$1" },
"fields": "$2"
}}
]
}
switch_struct_field_pattern = { "." ~ identifier ~ ("=" ~ switch_pattern)? }
-> TypedExpression {
"commands": [
{ "define": "field_pattern", "args": {
"name": { "text": "$1" },
"pattern": "$2"
}}
]
}
Example:
const result = switch (point) {
Point{ .x = 0, .y = 0 } => "origin",
Point{ .x = 0 } => "on y-axis",
else => "elsewhere",
};Match error values from error unions:
switch_error_pattern = { "error" ~ "." ~ identifier }
-> TypedExpression {
"commands": [
{ "define": "error_pattern", "args": {
"name": { "text": "$1" }
}}
]
}
Example:
const result = switch (error_union) {
error.OutOfMemory => "memory error",
error.NotFound => "not found",
else => "success or other",
};Match through pointer dereference:
switch_pointer_pattern = { "*" ~ switch_pattern }
-> TypedExpression {
"commands": [
{ "define": "pointer_pattern", "args": {
"inner": "$1",
"mutable": false
}}
]
}
// Literal pattern match
fn main() i32 {
const x = 2;
const result = switch (x) {
1 => 10,
2 => 20,
else => 0,
};
return result;
}# Returns: 20// Else case (no match)
fn main() i32 {
const x = 99;
const result = switch (x) {
1 => 10,
2 => 20,
else => 0,
};
return result;
}# Returns: 0Operators are handled by a chain from lowest to highest precedence:
expr = { logical_or }
-> TypedExpression { "get_child": { "index": 0 } }
// Lowest: OR
logical_or = { logical_and ~ (or_op ~ logical_and)* }
-> TypedExpression {
"fold_binary": { "operand": "logical_and", "operator": "or_op" }
}
// AND
logical_and = { comparison ~ (and_op ~ comparison)* }
-> TypedExpression {
"fold_binary": { "operand": "comparison", "operator": "and_op" }
}
// Comparison
comparison = { addition ~ ((eq_op | neq_op | lte_op | gte_op | lt_op | gt_op) ~ addition)* }
-> TypedExpression {
"fold_binary": { "operand": "addition", "operator": "eq_op|neq_op|lte_op|gte_op|lt_op|gt_op" }
}
// Addition/Subtraction
addition = { multiplication ~ ((add_op | sub_op) ~ multiplication)* }
-> TypedExpression {
"fold_binary": { "operand": "multiplication", "operator": "add_op|sub_op" }
}
// Multiplication/Division
multiplication = { unary ~ ((mul_op | div_op) ~ unary)* }
-> TypedExpression {
"fold_binary": { "operand": "unary", "operator": "mul_op|div_op" }
}
// Unary (highest before atoms)
unary = { unary_with_op | primary }
-> TypedExpression { "get_child": { "index": 0 } }
unary_with_op = { unary_op ~ primary }
-> TypedExpression {
"commands": [
{ "define": "unary", "args": { "op": "$1", "operand": "$2" } }
]
}
For 1 + 2 + 3:
- Parse:
[term(1), +, term(2), +, term(3)] - Start:
result = 1 - Fold:
result = binary(+, 1, 2)→(1+2) - Fold:
result = binary(+, (1+2), 3)→((1+2)+3)
This creates left-associative trees automatically.
postfix_expr = { call_expr | field_expr | index_expr | atom }
-> TypedExpression { "get_child": { "index": 0 } }
// Function call
call_expr = { atom ~ "(" ~ call_args? ~ ")" }
-> TypedExpression {
"commands": [
{ "define": "call", "args": { "callee": "$1", "args": "$2" } }
]
}
// Field access
field_expr = { atom ~ "." ~ identifier }
-> TypedExpression {
"commands": [
{ "define": "field_access", "args": { "object": "$1", "field": "$2" } }
]
}
// Index access
index_expr = { atom ~ "[" ~ expr ~ "]" }
-> TypedExpression {
"commands": [
{ "define": "index", "args": { "object": "$1", "index": "$2" } }
]
}
atom = { try_expr | struct_init | array_literal | bool_literal |
string_literal | integer_literal | identifier_expr | paren_expr }
-> TypedExpression { "get_child": { "index": 0 } }
Order matters: struct_init (starts with identifier) before identifier_expr.
struct_init = { identifier ~ "{" ~ struct_init_fields? ~ "}" }
-> TypedExpression {
"commands": [
{ "define": "struct_init", "args": { "type_name": "$1", "fields": "$2" } }
]
}
struct_init_fields = { struct_init_field ~ ("," ~ struct_init_field)* ~ ","? }
-> List { "get_all_children": true }
struct_init_field = { "." ~ identifier ~ "=" ~ expr }
-> TypedExpression {
"commands": [
{ "define": "struct_field_init", "args": { "name": "$1", "value": "$2" } }
]
}
Example: Point{ .x = 10, .y = 20 }
paren_expr = _{ "(" ~ expr ~ ")" }
Silent rule (_{ }) - matches but doesn't create a node. The inner expr passes through directly.
type_expr = { pointer_type | optional_type | error_union_type | array_type |
primitive_type | identifier }
-> Type { "get_child": { "index": 0 } }
pointer_type = { "*" ~ "const"? ~ type_expr }
-> Type {
"commands": [
{ "define": "pointer_type", "args": { "pointee": "$1" } }
]
}
optional_type = { "?" ~ type_expr }
-> Type {
"commands": [
{ "define": "optional_type", "args": { "inner": "$1" } }
]
}
array_type = { "[" ~ integer_literal? ~ "]" ~ type_expr }
-> Type {
"commands": [
{ "define": "array_type", "args": { "size": "$1", "element": "$2" } }
]
}
primitive_type = { "i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" |
"f32" | "f64" | "bool" | "void" }
-> Type {
"get_text": true,
"define": "primitive_type",
"args": { "name": "$result" }
}
keyword = @{
("struct" | "enum" | "fn" | "const" | "var" | "if" | "else" | "while" | "for" |
"return" | "break" | "continue" | "try" | "and" | "or" | "true" | "false" |
"i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" |
"bool" | "void")
~ !(ASCII_ALPHANUMERIC | "_")
}
identifier = @{ !keyword ~ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
-> String { "get_text": true }
Key patterns:
-
~ !(ASCII_ALPHANUMERIC | "_")ensures "iffy" doesn't match as "if" + "fy" -
!keywordprevents identifiers from being keywords - Both are atomic (
@{ }) for proper token handling
Each operator is a separate rule for use with fold_binary:
// Must check longer operators first
lte_op = { "<=" } -> String { "get_text": true }
gte_op = { ">=" } -> String { "get_text": true }
eq_op = { "==" } -> String { "get_text": true }
neq_op = { "!=" } -> String { "get_text": true }
lt_op = { "<" } -> String { "get_text": true }
gt_op = { ">" } -> String { "get_text": true }
add_op = { "+" } -> String { "get_text": true }
sub_op = { "-" } -> String { "get_text": true }
mul_op = { "*" } -> String { "get_text": true }
div_op = { "/" } -> String { "get_text": true }
and_op = { "and" } -> String { "get_text": true }
or_op = { "or" } -> String { "get_text": true }
unary_op = { "-" | "!" } -> String { "get_text": true }
WHITESPACE = _{ " " | "\t" | "\n" | "\r" }
COMMENT = _{ "//" ~ (!"\n" ~ ANY)* ~ "\n"? }
Both are silent (_{ }) - they match but don't appear in the parse tree.
fn main() i32 {
return 42;
}zyntax compile --grammar zig.zyn --source test.zig --run
# Output: result: main() returned: 42const Point = struct {
x: i32,
y: i32,
};
fn main() i32 {
const p = Point{ .x = 10, .y = 20 };
return p.x;
}# Returns: 10const Color = enum {
Red,
Green,
Blue,
};
fn main() i32 {
return Color.Green;
}# Returns: 1 (Green's discriminant)fn main() i32 {
return 2 + 3 * 4;
}# Returns: 14 (multiplication before addition)| Pattern | Use Case |
|---|---|
| Split rules | Handle optional children with predictable indices |
fold_binary |
Left-associative binary operators |
get_all_children |
Collect repetitions into lists |
| Keyword protection | Prevent identifiers matching keywords |
| Silent rules | Grouping without AST nodes |
| Atomic rules | Token-level matching |
- Chapter 9: Complete command and API reference
- Try modifying the grammar to add new features!