From b7f53610e44104a77487c0e1adef7f8eb2b2acfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 22 Dec 2025 10:40:36 +0100 Subject: [PATCH 001/116] Works towards HyperDoc 2.0 --- .github/workflows/validate.yml | 2 +- README.md | 87 ++---- build.zig | 33 +-- docs/specification.md | 373 ++++++++++++++++++++++++++ examples/featureset.hdoc | 80 ------ examples/html-excerciser.hdoc | 2 - examples/hyperdoc.hdoc | 43 --- flake.lock | 147 ---------- flake.nix | 49 ---- src/data/default.css | 40 --- src/hyperdoc.zig | 472 ++++----------------------------- src/main.zig | 133 ++-------- src/renderer/Html.zig | 167 ------------ src/renderer/HyperDoc.zig | 158 ----------- src/renderer/Markdown.zig | 131 --------- src/testsuite.zig | 212 +-------------- 16 files changed, 488 insertions(+), 1641 deletions(-) create mode 100644 docs/specification.md delete mode 100644 examples/featureset.hdoc delete mode 100644 examples/html-excerciser.hdoc delete mode 100644 examples/hyperdoc.hdoc delete mode 100644 flake.lock delete mode 100644 flake.nix delete mode 100644 src/data/default.css delete mode 100644 src/renderer/Html.zig delete mode 100644 src/renderer/HyperDoc.zig delete mode 100644 src/renderer/Markdown.zig diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index f8d28d6..0b8538c 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -16,7 +16,7 @@ jobs: - name: Setup Zig uses: mlugg/setup-zig@v2 with: - version: 0.15.1 + version: 0.15.2 - name: Build run: | diff --git a/README.md b/README.md index 1755e29..6806429 100644 --- a/README.md +++ b/README.md @@ -1,62 +1,31 @@ # Ashet HyperDocument Format -This format is used for both the _Hyper Wiki_ as well as the _Gateway_ application to store and display -hyperlinked documents. - -The format is a rich-text format that can encode/store/display the following document blocks: - -- paragraphs (consisting of a sequence of spans) - - regular text - - links - - bold/emphasised text - - monospaced text - - line break -- 3 levels of headings -- ordered and unordered lists - - each list item is a paragraph or another list -- quotes (paragraph with special styling) -- preformatted text (code blocks, also uses the paragraph formatting) -- images - -Regular text is assumed to use a proportional font, while preformatted text is required to be rendered as monospace. - -## Storage - -HyperDocument is stored as a trivial-to-parse plain text format, not necessarily meant to be edited by humans, -but still human readable. - -**Example:** - -```lua -hdoc "1.0" -p { - span "Hello, World!\n" - link "http://google.com" "Visit Google!" - span "\n" - emph "This is fat!" - span "\n" - mono "int main()" - span "\n" -} -enumerate { - item { p { span "first" } } - item { p { span "second" } } - item { p { span "third" } } -} -itemize { - item { p { span "first" } } - item { p { span "second" } } - item { p { span "third" } } -} -quote { - span "Life is what happens when you're busy making other plans.\n - John Lennon" -} -pre { - span "const std = @import(\"std\");\n" - span "\n" - span "pub fn main() !void {\n" - span " std.debug.print(\"Hello, World!\\n\", .{});\n" - span "}\n" -} -image "dog.png" +## Motivation + +> TODO: Write motivation + +## Specification + +[Read the specification](docs/specification.md). + +## Building + +Requires [Zig 0.15.2](https://ziglang.org/) installed. + +### Build debug application + +```sh-session +[user@host] hyperdoc$ zig build +``` + +### Build release application + +```sh-session +[user@host] hyperdoc$ zig build -Drelease +``` + +### Run test suite + +```sh-session +[user@host] hyperdoc$ zig build test ``` diff --git a/build.zig b/build.zig index a6f8daa..0c845f2 100644 --- a/build.zig +++ b/build.zig @@ -2,27 +2,17 @@ const std = @import("std"); pub fn build(b: *std.Build) void { // Options: - const target = b.standardTargetOptions(.{}); - const optimize = b.standardOptimizeOption(.{}); + const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseSafe }); // Targets: - const run_step = b.step("run", "Run the app"); const test_step = b.step("test", "Run unit tests"); // Build: - - const pt_dep = b.dependency("parser_toolkit", .{}); - const args = b.dependency("args", .{}); - - const hyperdoc = b.addModule( - "hyperdoc", - .{ - .root_source_file = b.path("src/hyperdoc.zig"), - }, - ); - hyperdoc.addImport("parser-toolkit", pt_dep.module("parser-toolkit")); + const hyperdoc = b.addModule("hyperdoc", .{ + .root_source_file = b.path("src/hyperdoc.zig"), + }); const exe = b.addExecutable(.{ .name = "hyperdoc", @@ -30,13 +20,11 @@ pub fn build(b: *std.Build) void { .root_source_file = b.path("src/main.zig"), .target = target, .optimize = optimize, + .imports = &.{ + .{ .name = "hyperdoc", .module = hyperdoc }, + }, }), - .use_llvm = true, }); - - exe.root_module.addImport("hyperdoc", hyperdoc); - exe.root_module.addImport("args", args.module("args")); - b.installArtifact(exe); const run_cmd = b.addRunArtifact(exe); @@ -52,11 +40,10 @@ pub fn build(b: *std.Build) void { .root_source_file = b.path("src/testsuite.zig"), .target = target, .optimize = optimize, + .imports = &.{ + .{ .name = "hyperdoc", .module = hyperdoc }, + }, }), - .use_llvm = true, }); - - exe_tests.root_module.addImport("hyperdoc", hyperdoc); - test_step.dependOn(&b.addRunArtifact(exe_tests).step); } diff --git a/docs/specification.md b/docs/specification.md new file mode 100644 index 0000000..3e08e00 --- /dev/null +++ b/docs/specification.md @@ -0,0 +1,373 @@ +# HyperDoc 2.0 + +This specification describes the document markup language "HyperDoc 2.0", that tries to be a simple to parse, easy to write markup language for hypertext documents. + +It sits somewhat between LaTeX and Markdown and tries to be way simpler to parse than Markdown, but keep useful semantics around. + +## Syntax Overview + +```hdoc +hdoc "2.0" + +h1{HyperDoc 2.0} + +toc{} + +h2{Paragraphs} + +p { This is a simple paragraph containing text. } + +p(id="foo") { + This is a paragraph with an attribute "id" with the value "foo". +} + +p { + This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice. + Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}). + We can also \link(ref="foo"){link to other parts of a document) or \link(url="https://ashet.computer"){to websites}. + With \mono(lang="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font. +} + +h2{Special Paragraphs} + +note { HyperDoc 2.0 also supports different types of paragraphs. } +warning { These should affect rendering, and have well-defined semantics attached to them. } +danger { You shall not assume any specific formatting of these elements though. } +tip { They typically have a standardized style though. } +quote { You shall not pass! } +spoiler { Nobody expects the Spanish Inquisition! } + +h2{Literals and Preformatted Text} + +p: +| we can also use literal lines. +| these are introduced by a trailing colon (':') at the end of a line. +| each following line that starts with whitespace followed by a pipe character ('|') +| is then part of the contents. +| Literal lines don't perform any parsing, so they don't require any escaping of characters. +| This is really useful for code blocks: + +pre(lang="c"): +| #include +| int main(int argc, char const * argv[]) { +| printf("Hello, World!\n"); +| return 0; +| } + +h2{String Literals} + +p "It's also possible to use a string literal for bodies if desired." + +p { \em "Magic" is a simple way to highlight single words or text with escaping in inlines. } + +h2{Images & Figures} + +p { We can also add images to our documents: } + +img(id="fig1", path="./preview.jpeg") { If this is non-empty, it's a figure caption. } + +h2{Lists} + +p { Also lists are possible: } + +h3{Unordered Lists} + +ul { + li { p { Apples } } + li { p { Bananas } } + li { p { Cucumbers } } +} + +h3{Ordered Lists} + +ol { + li { p { Collect underpants } } + li { p { ? } } + li { p { Profit } } +} + +h2{Tables} + +p { And last, but not least, we can have tables: } + +table { + columns { + td "Key" + td "Value" + } + row { + td "Author" + td { Felix "xq" Queißner } + } + row { + td "Date of Invention" + td { \date{2025-12-17} } + } +} +``` + +## Grammar + +This grammar describes the text format + +Short notes on grammar notation: + +- `{ ... }` is a repetition +- `[ ... ]` is an option +- `a | b | c` is alternatives +- `( ... )` is a group +- `"foo"` is a literal token sequence +- `/.../` is a regex +- Whitespace is assumed to be ignored unless matched by a literal, so tokens are typically separated by whitespace +- Upper case elements are roughly tokens, while lowercase elements are rules. + +``` +document := HEADER { block } + +block := IDENTIFIER [ attribute_list ] body + +body := list | literal | STRING +literal := ":" "\n" { LITERAL_LINE } + +list := "{" { escape | inline | block | WORD } "}" +escape := "\\" | "\{" | "\}" +inline := "\" IDENTIFIER [ attribute_list ] body + +attribute_list := "(" [ attribute { "," attribute } ] ")" +attribute := IDENTIFIER "=" STRING + +IDENTIFIER := /\b\w+\b/ +HEADER := /^hdoc\s+"2.0"\s*$/ +STRING := /"(\\.|[^"\r\n])*"/ +LITERAL_LINE := /^\s*\|(.*)$/ +WORD := /[^\s\{\}\\]+/ +``` + +## Semantic Structure + +All elements have these attributes: + +| Attribute | Function | +| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | +| `lang` | Marks the (human) language of the contents of that element. This must be an [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag). | + + +## Top-Level / Block Elements + +All top-level elements have these attributes: + +| Attribute | Function | +| --------- | -------------------------------------------------------------------------------- | +| `id` | Marks a target for a `\link(ref="...")`. Must be unique throughout the document. | + +### Headings: `h1`, `h2`, `h3` + +**Allowed Items:** Inline Text + +These elements are all rendered as headings of different levels. + +- `h1` is the top-level heading. +- `h2` is the level below `h1`. +- `h3` is the level below `h2`. + +### Paragraphs: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` + +**Allowed Items:** Inline Text + +These elements are all rendered as paragraphs. + +The type of the paragraph includes a semantic hint: + +- `p`: A normal paragraph. +- `note`: A paragraph that informs the reader. This is typically rendered with a blue/white color hint. The associated icon is a white i in a blue box/circle. +- `warning`: A paragraph that warns the reader. This is typically rendered with a yellow/black color hint. The associated icon is a yellow triangle with a black exclamation mark. +- `danger`: A paragraph that warns the of danger. This is typically rendered with a red/white color hint. The associated icon is a red octagon with a white exclamation mark. +- `tip`: A paragraph that gives the reader a tip. The associated icon is a lightbulb. +- `quote`: A paragraph that quotes a foreign source. This is typically rendered with a small indentation and a distinct font. +- `spoiler`: A paragraph that contains information the reader about things they might not want to know. This is typically visually hidden/blurred so it's unreadable until a reader action is performed. + +### Lists: `ul`, `ol` + +**Allowed Items:** `li` + +- `ul` is an unordered list rendered with typically either dashes or dots as list enumerators. +- `ol` is an ordered list rendered with typically either roman or arabic numerals as list enumerators. + +#### Ordered List `ol` + +| Attribute | Function | +| --------- | -------------------------------------------------------------------------------------------------------------------- | +| `first` | An integer that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. | + +### Figures: `img` + +**Allowed Items:** Inline Text + +| Attribute | Function | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | +| `alt` | A textual description of the image contents for vision-impaired users. Similar to the [HTML alt tag](https://en.wikipedia.org/wiki/Alt_attribute). | +| `path` | A path relative to the current file that points to an image file that should be shown. | + +This element shows a full-width image or figure. Its contents are the figure description. + +If the contents are empty, the figure may be rendered in a simpler form. + +### Preformatted: `pre` + +**Allowed Items:** Inline Text + +| Attribute | Function | +| --------- | ------------------------------------------------------------------------------------------------------- | +| `syntax` | If present, hints a syntax highlighter that this preformatted block contains programming language code. | + +In contrast to all other block types, a `pre` block retains whitespace and line-break information and lays out the text as-is. + +It does not allow automatic line break insertion or word-wrapping. + +If a pre contains inline elements, these will still be parsed and apply their styles to the text spans. + +### Table Of Contents: `toc` + +**Allowed Items:** *none* + +| Attribute | Function | +| --------- | ----------------------------------------------------------------------- | +| `depth` | `1`, `2` or `3`. Defines how many levels of headings shall be included. | + +Renders a table of contents for the current document. + +This element allows no child items. + +## Lists + +### List Items `li` + +**Allowed Items:** Block Elements *or* String Content. + +These elements wrap a sequence of blocks that will be rendered for this list item. + +It also allows a string to be used as it's content directly, this will be equivalent to having a nested paragraph with that strings content: + +``` +ul { + li { p { This is a normal item. } } + li "This is a normal item." +} +``` + +will have two identical list items. + +### Tables: `table` + +Allowed Items: `columns`, `row`, `group` + +> TODO: Spec out tables proper. +> `columns` is basically a `row` with only column headings +> `row` is just a row with cells +> all rows must contain the same amount of cell span +> `group` is a heading for subsequent rows +> `row.title` attribute is displayed in a column left of the first column, the top-left element is always empty + +## Table Elements + +### Column Headers: `columns` + +**Allowed Items:** `td` + +This element contains cells + +### Rows: `row` + +**Allowed Items:** `td` + +| Attribute | Function | +| --------- | ---------------------------------------------------------------------------- | +| `title` | A title caption for this row. If present, will be shown left of all columns. | + +### Row Groups: `group` + +**Allowed Items:** Inline Text + +A *row group* is a row that contains a single heading-style cell that labels the rows below. + +### Cells: `td` + +**Allowed Items:** Block Elements *or* String Content. + +| Attribute | Function | +| --------- | -------------------------------------------------- | +| `colspan` | Integer defining how many columns this cell spans. | + +This element contains the contents of a table cell. + +> TODO: Similar to `li`, it can be string or block-sequence. + +## Inline Text + +These elements are all allowed inside a paragraph-like content and can typically be nested. + +### Emphasis: `em` + +**Nesting:** Yes + +Formats the text as emphasised. This is typically bold or italic rendering. + +### Monospaced: `mono` + +**Nesting:** Yes + +| Attribute | Function | +| --------- | ----------------------------------------------------------------------------------------- | +| `syntax` | If present, hints a syntax highlighter that this span contains programming language code. | + +Formats the text in a monospaced font. This is useful for code-like structures. + +### Strike-through: `strike` + +**Nesting:** Yes + +Renders the text with a horizontal line through the text, striking it out. + +### Sub/Superscript: `sub`, `sup` + +**Nesting:** Yes + +Renders the text a bit smaller and moved upwards (`sub`) or downwards (`sub`) to allow sub- or superscript rendering. + +### Linking: `link` + +**Nesting:** Yes + +| Attribute | Function | +| --------- | -------------------------------------------------------------------------------------------------------- | +| `ref` | Points the link to a top-level block with the `id` of this `ref` attribute. Mutually exclusive to `url`. | +| `url` | Points the link to the resource inside the `url`. Mutually exclusive to `ref`. | + +Adds a hyperlink to the contents. This allows a reader to navigate by typically clicking the link. + +### Localized Date/Time: `date`, `time`, `datedate` + +**Nesting:** No + +Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner. + +> TODO: Add `fmt` attribute: +> `\date` takes an attribute fmt which can be +> - "year" (2025) +> - "month" (December), +> - "day" (22th) +> - "weekday" (monday) +> - "short" (22.12.2025) +> - "long" (22th of December 2025) +> - "relative" (two days ago, two months ago, ...) +> +> `\time` takes an attribute fmt which can be +> - "short" (09:41) +> - "long" (09:41:25) +> - "rough" (early morning, morning, noon, afternoon, evening, late in the night, ...) +> - "relative" (two minutes ago, two days ago, ...) +> +> `\datetime` takes an attribute fmt which can be +> - *To be done* +> - ... +> \ No newline at end of file diff --git a/examples/featureset.hdoc b/examples/featureset.hdoc deleted file mode 100644 index 9d3a3af..0000000 --- a/examples/featureset.hdoc +++ /dev/null @@ -1,80 +0,0 @@ -hdoc "1.0" -h1 "intro" "Introduction" -toc { } -h2 "" "Basic Features" - -h3 "" "Spans" -p { - span "Hello, World!\n" - link "http://google.com" "Visit Google!" - span "\n" - emph "This is fat!" - span "\n" - mono "int main()" -} - -h3 "" "Lists" -enumerate { - item { p { span "first" } } - item { p { span "second" } } - item { p { span "third" } } -} -itemize { - item { p { span "first" } } - item { p { span "second" } } - item { p { span "third" } } -} - -h3 "" "Block Quote" -quote { - span "Life is what happens when you're busy making other plans.\n - John Lennon" -} - -h3 "" "Code Example" -pre "zig" { - span "const std = @import(\"std\");\n" - span "\n" - span "pub fn main() !void {\n" - span " std.debug.print(\"Hello, World!\\n\", .{});\n" - span "}" -} -image "dog.png" - -h2 "" "Nested lists" -itemize { - item { p { span "first" } } - item { p { span "second" } } - item { itemize { - item { p { span "third.first" } } - item { p { span "third.second" } } - } } - item {enumerate { - item { p { span "fourth.first" } } - item { p { span "fourth.second" } } - } } -} - -enumerate { - item { p { span "Item 1" } } - item { p { span "Item 2" } } - item { p { span "Item 3" } } - item { p { span "Item 4" } } - item { p { span "Item 5" } } - item { p { span "Item 6" } } - item { p { span "Item 7" } } - item { p { span "Item 8" } } - item { p { span "Item 9" } } -} - -enumerate { - item { p { span "Item 1" } } - item { p { span "Item 2" } } - item { p { span "Item 3" } } - item { p { span "Item 4" } } - item { p { span "Item 5" } } - item { p { span "Item 6" } } - item { p { span "Item 7" } } - item { p { span "Item 8" } } - item { p { span "Item 9" } } - item { p { span "Item 10" } } -} \ No newline at end of file diff --git a/examples/html-excerciser.hdoc b/examples/html-excerciser.hdoc deleted file mode 100644 index d620a88..0000000 --- a/examples/html-excerciser.hdoc +++ /dev/null @@ -1,2 +0,0 @@ -hdoc "1.0" -p { span "" } diff --git a/examples/hyperdoc.hdoc b/examples/hyperdoc.hdoc deleted file mode 100644 index ae3a25e..0000000 --- a/examples/hyperdoc.hdoc +++ /dev/null @@ -1,43 +0,0 @@ -hdoc "1.0" -h1 "" "HyperDocument File Format" -toc {} -h2 "intro" "Introduction" -p { - span "The HyperDocument file format is meant to descibe rich text files that link between each other. " -} -h2 "structure" "Structure" -p { - span "The structure of HyperDocument files is pretty simple. Each file starts with a " - mono "hdoc \"1.0\"" - span " sequence that will mark both \"magic number\" and format version. " -} -p { - span "After the header, an arbitrary number of block elements follows." - span "Each block can be considered similar to a paragraph in text documents, but isn't necessarily only a pure text element. " - span "The following blocks types are available:" -} -itemize { - item { p { mono "h1" span ", " mono "h2" span ", " mono "h3" span " - First, second and third level headings" } } - item { p { mono "toc" span " - A table of contents" } } - item { p { mono "p" span " - A regular, plain text paragraph" } } - item { p { mono "quote" span " - A block quote" } } - item { p { mono "enumerate" span " - An ordered list, using numbering" } } - item { p { mono "itemize" span " - An unordered list, using bullet points" } } - item { p { mono "pre" span " - A preformatted block of text, optionally with a language tag" } } - item { p { mono "image" span " - A picture that is inserted into the document." } } -} -p { - span "The " mono "p" span ", " mono "quote" span " and " mono "pre" span " blocks contain a sequence of non-nested spans. " - span "The following span types are available:" -} -itemize { - item { p { mono "span" span " - Regular, unformatted plain text" } } - item { p { mono "emph" span " - Emphasises text" } } - item { p { mono "mono" span " - Monospaced font" } } - item { p { mono "link" span " - Plain text with a hyperlink." } } -} - -p { - span "The blocks " mono "enumerate" span " and " mono "itemize" span " contain elements of type " mono "item" span ".\n" - span "Each of those items contain a list of blocks." -} diff --git a/flake.lock b/flake.lock deleted file mode 100644 index 1ba4434..0000000 --- a/flake.lock +++ /dev/null @@ -1,147 +0,0 @@ -{ - "nodes": { - "flake-compat": { - "flake": false, - "locked": { - "lastModified": 1696426674, - "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1710146030, - "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_2": { - "inputs": { - "systems": "systems_2" - }, - "locked": { - "lastModified": 1705309234, - "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1718229064, - "narHash": "sha256-ZFav8A9zPNfjZg/wrxh1uZeMJHELRfRgFP+meq01XYk=", - "owner": "nixos", - "repo": "nixpkgs", - "rev": "5c2ec3a5c2ee9909904f860dadc19bc12cd9cc44", - "type": "github" - }, - "original": { - "owner": "nixos", - "ref": "nixos-23.11", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs_2": { - "locked": { - "lastModified": 1708161998, - "narHash": "sha256-6KnemmUorCvlcAvGziFosAVkrlWZGIc6UNT9GUYr0jQ=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "84d981bae8b5e783b3b548de505b22880559515f", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-23.11", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs", - "zig": "zig" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - }, - "systems_2": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - }, - "zig": { - "inputs": { - "flake-compat": "flake-compat", - "flake-utils": "flake-utils_2", - "nixpkgs": "nixpkgs_2" - }, - "locked": { - "lastModified": 1718324667, - "narHash": "sha256-AZGskEGjvUmeb+fgBv4lxtCUtXmYBI+ABOlV+og9X14=", - "owner": "mitchellh", - "repo": "zig-overlay", - "rev": "b2c14e5f842af6b2bf03e634f73fd84f6956d4ba", - "type": "github" - }, - "original": { - "owner": "mitchellh", - "repo": "zig-overlay", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/flake.nix b/flake.nix deleted file mode 100644 index e1f2933..0000000 --- a/flake.nix +++ /dev/null @@ -1,49 +0,0 @@ -{ - description = "HyperDoc, a simple hyper document format"; - - inputs = { - nixpkgs.url = "github:nixos/nixpkgs/nixos-23.11"; - flake-utils.url = "github:numtide/flake-utils"; - zig.url = "github:mitchellh/zig-overlay"; - }; - - outputs = { - self, - nixpkgs, - flake-utils, - ... - } @ inputs: let - overlays = [ - # Other overlays - (final: prev: { - zigpkgs = inputs.zig.packages.${prev.system}; - }) - ]; - - # Our supported systems are the same supported systems as the Zig binaries - systems = builtins.attrNames inputs.zig.packages; - in - flake-utils.lib.eachSystem systems ( - system: let - pkgs = import nixpkgs {inherit overlays system;}; - in let - zig = pkgs.zigpkgs."0.13.0"; - in rec { - packages.default = pkgs.stdenv.mkDerivation { - name = "hyperdoc"; - src = ./.; - nativeBuildInputs = [zig]; - - configurePhase = ""; - - buildPhase = '' - zig build - ''; - - installPhase = '' - mv zig-out $out - ''; - }; - } - ); -} diff --git a/src/data/default.css b/src/data/default.css deleted file mode 100644 index 6040b76..0000000 --- a/src/data/default.css +++ /dev/null @@ -1,40 +0,0 @@ -* { - box-sizing: border-box; -} - - -body { - max-width: 60em; - margin-left: auto; - margin-right: auto; - font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; -} - -blockquote { - margin: 0; - padding: 0.5em; - border-left: 4px solid green; - background: rgba(0.7, 0.7, 0.7, 17%); -} - -pre { - padding: 0.5em; - border: 3px solid black; - font-family: 'Courier New', Courier, monospace -} - -ol, -ul { - margin: 0; - padding: 0; - padding-left: 1em; -} - -em { - font-style: normal; - font-weight: bold; -} - -code { - font-family: 'Courier New', Courier, monospace -} \ No newline at end of file diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 7900095..c04b0f2 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -19,450 +19,90 @@ pub const Document = struct { /// Depending on the level of nesting, the width might decrease /// from the full document size. pub const Block = union(enum) { - paragraph: Paragraph, - ordered_list: []Item, - unordered_list: []Item, - quote: Paragraph, - preformatted: CodeBlock, - image: Image, - heading: Heading, - table_of_contents, + // TODO }; -/// A paragraph is a sequence of spans. -pub const Paragraph = struct { - contents: []Span, -}; - -/// A list item is a sequence of blocks -pub const Item = struct { - contents: []Block, -}; - -/// A code block is a paragraph with a programming language attachment -pub const CodeBlock = struct { - contents: []Span, - language: []const u8, // empty=none -}; - -/// An image is a block that will display non-text content. -pub const Image = struct { - path: []const u8, -}; - -/// A heading is a block that will be rendered in a bigger/different font -/// and introduces a new section of the document. -/// It has an anchor that can be referenced. -pub const Heading = struct { - level: Level, - title: []const u8, - anchor: []const u8, - - pub const Level = enum(u2) { - document = 0, - chapter = 1, - section = 2, - }; -}; - -/// Spans are the building blocks of paragraphs. Each span is -/// defining a sequence of text with a certain formatting. -pub const Span = union(enum) { - text: []const u8, - emphasis: []const u8, - monospace: []const u8, - link: Link, -}; - -/// Links are spans that can refer to other documents or elements. -pub const Link = struct { - href: []const u8, - text: []const u8, -}; - -pub const ErrorLocation = parser_toolkit.Location; - /// Parses a HyperDoc document. pub fn parse( allocator: std.mem.Allocator, + /// The source code to be parsed plain_text: []const u8, - error_location: ?*ErrorLocation, + /// An optional diagnostics element that receives diagnostic messages like errors and warnings. + /// If present, will be filled out by the parser. + diagnostics: ?*Diagnostics, ) !Document { var arena = std.heap.ArenaAllocator.init(allocator); errdefer arena.deinit(); - var tokenizer: Tokenizer = .init(plain_text, null); - - var parser: Parser = .{ - .allocator = arena.allocator(), - .core = .init(&tokenizer), - }; - - defer if (error_location) |err| { - err.* = tokenizer.current_location; - }; - - const root_id = parser.acceptIdentifier() catch return error.InvalidFormat; - if (root_id != .hdoc) - return error.InvalidFormat; - const version_number = parser.accept(.text) catch return error.InvalidFormat; - if (!std.mem.eql(u8, version_number.text, "\"1.0\"")) - return error.InvalidVersion; + _ = plain_text; + _ = diagnostics; - const root_elements = try parser.acceptBlockSequence(.eof); - - return Document{ - .arena = arena, - .contents = root_elements, - }; + @panic("TODO: Implement this"); } -const Parser = struct { - allocator: std.mem.Allocator, - core: ParserCore, - - fn save(parser: *Parser) Tokenizer.State { - return parser.core.saveState(); - } - - fn restore(parser: *Parser, state: Tokenizer.State) void { - return parser.core.restoreState(state); - } - - fn accept(parser: *Parser, token_type: TokenType) !Token { - const state = parser.save(); - errdefer parser.restore(state); - - const token = (try parser.core.nextToken()) orelse return error.EndOfFile; - if (token.type != token_type) - return error.UnexpectedToken; - return token; - } - - fn consume(parser: *Parser, token_type: TokenType) !void { - _ = try parser.accept(token_type); - } +/// A diagnostic message. +pub const Diagnostic = struct { + pub const Severity = enum { warning, @"error" }; - const Identifier = enum { - // management - hdoc, - - // blocks - h1, - h2, - h3, - toc, - p, - enumerate, - itemize, - quote, - pre, - image, - - // spans - span, - link, - emph, - mono, - - // list of blocks - item, + pub const Location = struct { + line: u32, + column: u32, }; - fn acceptIdentifier(parser: *Parser) !Identifier { - const tok = try parser.accept(.identifier); - return std.meta.stringToEnum(Identifier, tok.text) orelse return error.InvalidIdentifier; - } - - fn acceptText(parser: *Parser) ![]const u8 { - const text_tok = try parser.accept(.text); - - const text = text_tok.text; - - std.debug.assert(text.len >= 2); - std.debug.assert(text[0] == text[text.len - 1]); - - const string_body = text[1 .. text.len - 1]; - - const allocator = parser.allocator; - var temp_string: std.ArrayList(u8) = .empty; - defer temp_string.deinit(allocator); - - try temp_string.ensureTotalCapacity(allocator, string_body.len); - - { - var i: usize = 0; - while (i < string_body.len) { - const c = string_body[i]; - if (c != '\\') { - try temp_string.append(allocator, c); - i += 1; - continue; - } - i += 1; - if (i >= string_body.len) - return error.InvalidEscapeSequence; - const selector = string_body[i]; - i += 1; - switch (selector) { - 'n' => try temp_string.append(allocator, '\n'), - 'r' => try temp_string.append(allocator, '\r'), - 'e' => try temp_string.append(allocator, '\x1B'), - // TODO: Implement the following cases: - // '\xFF' - // '\u{ABCD}' + /// An diagnostic code encoded as a 16 bit integer. + /// The upper 4 bit encode the severity of the code, the lower 12 bit the number. + pub const Code = enum(u16) { + // bitmasks: + const ERROR = 0x1000; + const WARNING = 0x2000; - else => { - try temp_string.append(allocator, selector); - }, - } - } - } - - return try temp_string.toOwnedSlice(allocator); - } + // TODO: Add other diagnostic codes - const BlockSequenceTerminator = enum { @"}", eof }; + // errors: + invalid_character = ERROR | 1, - fn acceptBlockSequence(parser: *Parser, terminator: BlockSequenceTerminator) ![]Block { - const allocator = parser.allocator; - var seq: std.ArrayList(Block) = .empty; - defer seq.deinit(allocator); + // warnings: + missing_space_in_literal = WARNING | 1, - accept_loop: while (true) { - const id = switch (terminator) { - .@"}" => if (parser.acceptIdentifier()) |id| - id - else |_| if (parser.accept(.@"}")) |_| - break :accept_loop - else |_| - return error.UnexpectedToken, - .eof => if (parser.acceptIdentifier()) |id| - id - else |err| switch (err) { - error.EndOfFile => break :accept_loop, - else => |e| return e, - }, + pub fn get_severity(code: Code) Severity { + const num = @intFromEnum(code); + return switch (num & 0xF000) { + ERROR => .@"error", + WARNING => .warning, + else => @panic("invalid error code!"), }; - - switch (id) { - .toc => { - try parser.consume(.@"{"); - try parser.consume(.@"}"); - try seq.append(allocator, .table_of_contents); - }, - - .h1, .h2, .h3 => { - const anchor = try parser.acceptText(); - const title = try parser.acceptText(); - - try seq.append(allocator, .{ - .heading = .{ - .level = switch (id) { - .h1 => .document, - .h2 => .chapter, - .h3 => .section, - else => unreachable, - }, - .title = title, - .anchor = anchor, - }, - }); - }, - - .p, .quote => { - try parser.consume(.@"{"); - const items = try parser.acceptSpanSequence(); - - try seq.append(allocator, if (id == .p) - .{ .paragraph = .{ .contents = items } } - else - .{ .quote = .{ .contents = items } }); - }, - - .pre => { - const language = try parser.acceptText(); - try parser.consume(.@"{"); - const items = try parser.acceptSpanSequence(); - - try seq.append(allocator, .{ - .preformatted = .{ - .language = language, - .contents = items, - }, - }); - }, - - .enumerate, .itemize => { - try parser.consume(.@"{"); - - var list: std.ArrayList(Item) = .empty; - defer list.deinit(allocator); - - while (true) { - if (parser.consume(.@"}")) |_| { - break; - } else |_| {} - - const ident = try parser.acceptIdentifier(); - if (ident != .item) { - return error.UnexpectedToken; - } - - try parser.consume(.@"{"); - - const sequence = try parser.acceptBlockSequence(.@"}"); - - try list.append(allocator, .{ - .contents = sequence, - }); - } - - const list_slice = try list.toOwnedSlice(allocator); - - try seq.append(allocator, if (id == .enumerate) - .{ .ordered_list = list_slice } - else - .{ .unordered_list = list_slice }); - }, - - .image => { - const file_path = try parser.acceptText(); - try seq.append(allocator, .{ - .image = .{ - .path = file_path, - }, - }); - }, - - .item, - .hdoc, - .link, - .emph, - .mono, - .span, - => return error.InvalidTopLevelItem, - } - } - - return try seq.toOwnedSlice(allocator); - } - - fn acceptSpanSequence(parser: *Parser) ![]Span { - const allocator = parser.allocator; - var seq: std.ArrayList(Span) = .empty; - defer seq.deinit(allocator); - - accept_loop: while (true) { - const id = if (parser.acceptIdentifier()) |id| - id - else |_| if (parser.accept(.@"}")) |_| - break :accept_loop - else |_| - return error.UnexpectedToken; - - switch (id) { - .item, - .toc, - .h1, - .h2, - .h3, - .p, - .quote, - .pre, - .enumerate, - .itemize, - .image, - .hdoc, - => return error.InvalidSpan, - - .span => { - const text = try parser.acceptText(); - try seq.append(allocator, .{ .text = text }); - }, - .emph => { - const text = try parser.acceptText(); - try seq.append(allocator, .{ .emphasis = text }); - }, - .mono => { - const text = try parser.acceptText(); - try seq.append(allocator, .{ .monospace = text }); - }, - - .link => { - const href = try parser.acceptText(); - const text = try parser.acceptText(); - try seq.append(allocator, .{ - .link = .{ - .href = href, - .text = text, - }, - }); - }, - } } + }; - return try seq.toOwnedSlice(allocator); - } + code: Code, + location: Location, + message: []const u8, }; -const ParserCore = parser_toolkit.ParserCore(Tokenizer, .{ .whitespace, .comment }); - -const Pattern = parser_toolkit.Pattern(TokenType); - -const Token = Tokenizer.Token; - -const Tokenizer = parser_toolkit.Tokenizer(TokenType, &[_]Pattern{ - Pattern.create( - .comment, - parser_toolkit.matchers.withPrefix( - "#", - parser_toolkit.matchers.takeNoneOf("\n"), - ), - ), - - Pattern.create(.@"{", parser_toolkit.matchers.literal("{")), - Pattern.create(.@"}", parser_toolkit.matchers.literal("}")), - Pattern.create(.text, matchStringLiteral('\"')), - - Pattern.create(.identifier, parser_toolkit.matchers.identifier), - - Pattern.create(.whitespace, parser_toolkit.matchers.whitespace), -}); - -fn matchStringLiteral(comptime boundary: u8) parser_toolkit.Matcher { - const T = struct { - fn match(str: []const u8) ?usize { - if (str.len < 2) - return null; - - if (str[0] != boundary) - return null; +/// A collection of diagnostic messages. +pub const Diagnostics = struct { + arena: std.heap.ArenaAllocator, + items: std.ArrayList(Diagnostic) = .empty, - var i: usize = 1; - while (i < str.len) { - if (str[i] == boundary) - return i + 1; + pub fn init(allocator: std.mem.Allocator) Diagnostic { + return .{ .arena = .init(allocator) }; + } - if (str[i] == '\\') { - i += 2; // skip over the escape and the escaped char - } else { - i += 1; // just go to the next char - } - } + pub fn deinit(diag: *Diagnostics) void { + diag.arena.deinit(); + diag.* = undefined; + } - return null; - } - }; + pub fn add(diag: *Diagnostics, code: Diagnostic.Code, location: Diagnostic.Location, comptime fmt: []const u8, args: anytype) !void { + const allocator = diag.arena.allocator(); - return T.match; -} + const msg = try std.fmt.allocPrint(allocator, fmt, args); + errdefer allocator.free(msg); -const TokenType = enum { - comment, - whitespace, - identifier, - text, - @"{", - @"}", + try diag.items.append(allocator, .{ + .location = location, + .code = code, + .message = msg, + }); + } }; diff --git a/src/main.zig b/src/main.zig index 462bfe7..3cdb76c 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,127 +1,28 @@ const std = @import("std"); +const builtin = @import("builtin"); const hdoc = @import("hyperdoc"); -const args_parser = @import("args"); -pub fn main() !u8 { - var stdout_buf: [1024]u8 = undefined; - const stdout_file: std.fs.File = .stdout(); - var stdout_writer = stdout_file.writer(&stdout_buf); - const stdout = &stdout_writer.interface; - var stderr_buf: [1024]u8 = undefined; - const stderr_file: std.fs.File = .stderr(); - var stderr_writer = stderr_file.writer(&stderr_buf); - const stderr = &stderr_writer.interface; - - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - - const allocator = gpa.allocator(); - - var cli = args_parser.parseForCurrentProcess(CliOptions, allocator, .print) catch return 1; - defer cli.deinit(); - - if (cli.options.help) { - try printUsage(cli.executable_name.?, stdout); - return 0; - } - - if (cli.positionals.len != 1) { - try printUsage(cli.executable_name.?, stderr); - return 1; - } +var debug_allocator: std.heap.DebugAllocator(.{}) = .init; - var error_location: hdoc.ErrorLocation = undefined; - - var document: hdoc.Document = blk: { - const source_text = try std.fs.cwd().readFileAlloc( - allocator, - cli.positionals[0], - 512 << 20, - ); // 512MB - defer allocator.free(source_text); - - break :blk hdoc.parse(allocator, source_text, &error_location) catch |err| { - error_location.source = cli.positionals[0]; - std.log.err("{f}: Failed to parse document: {s}", .{ - error_location, - switch (err) { - error.UnexpectedToken, - error.InvalidIdentifier, - error.UnexpectedCharacter, - error.InvalidTopLevelItem, - error.InvalidSpan, - => "syntax error", - error.InvalidFormat => "not a HyperDocument file", - error.InvalidVersion => "unsupported file version", - error.OutOfMemory => "out of memory", - error.EndOfFile => "unexpected end of file", - error.InvalidEscapeSequence => "illegal escape sequence", - // else => |e| @errorName(e), - }, - }); - return 1; - }; +pub fn main() !u8 { + defer if (builtin.mode == .Debug) { + std.debug.assert(debug_allocator.deinit() == .ok); }; - defer document.deinit(); - - const output_file: ?std.fs.File = if (cli.options.output != null and !std.mem.eql(u8, cli.options.output.?, "-")) - try std.fs.cwd().createFile(cli.options.output.?, .{}) + const allocator = if (builtin.mode == .Debug) + debug_allocator.allocator() else - null; - defer if (output_file) |f| f.close(); + std.heap.smp_allocator; - const renderDocument = switch (cli.options.format) { - .hdoc => &@import("renderer/HyperDoc.zig").render, - .html => &@import("renderer/Html.zig").render, - .markdown => &@import("renderer/Markdown.zig").render, - }; - - if (output_file) |f| { - var out_buf: [1024]u8 = undefined; - var out_writer = f.writer(&out_buf); - const output_stream = &out_writer.interface; - try renderDocument(output_stream, document); - try output_stream.flush(); - } else { - try renderDocument(stdout, document); - try stdout.flush(); - } - - return 0; -} - -const TargetFormat = enum { - hdoc, - html, - markdown, -}; + // TODO: Parse arguments and load file. + const document = + \\hdoc "2.0" + \\ + ; -const CliOptions = struct { - help: bool = false, - format: TargetFormat = .hdoc, - output: ?[]const u8 = null, + var doc = try hdoc.parse(allocator, document, null); + defer doc.deinit(); - pub const shorthands = .{ - .h = "help", - .f = "format", - .o = "output", - }; -}; + // TODO: Dump AST -fn printUsage(exe_name: []const u8, stream: *std.Io.Writer) !void { - try stream.print("{s} [-h] [-f ] \n", .{ - std.fs.path.basename(exe_name), - }); - try stream.writeAll( - \\ - \\Options: - \\ -h, --help Prints this text - \\ -f, --format Converts the given into . Legal values are: - \\ - hdoc - Formats the input file into canonical format. - \\ - html - Renders the HyperDocument as HTML. - \\ - markdown - Renders the HyperDocument as CommonMark. - \\ -o, --output Instead of printing to stdout, will put the output into . - \\ - ); - try stream.flush(); + return 0; } diff --git a/src/renderer/Html.zig b/src/renderer/Html.zig deleted file mode 100644 index 94ad957..0000000 --- a/src/renderer/Html.zig +++ /dev/null @@ -1,167 +0,0 @@ -const std = @import("std"); -const hdoc = @import("hyperdoc"); - -pub const WriteError = std.Io.Writer.Error; - -pub fn render(writer: *std.Io.Writer, document: hdoc.Document) WriteError!void { - try writer.writeAll( - \\ - \\ - \\ - \\ - \\ - \\ - ); - - try renderBlocks(writer, document, document.contents); - - try writer.writeAll( - \\ - \\ - ); -} - -fn renderBlocks( - writer: *std.Io.Writer, - document: hdoc.Document, - blocks: []const hdoc.Block, -) WriteError!void { - for (blocks) |block| { - try renderBlock(writer, document, block); - } -} - -fn renderBlock( - writer: *std.Io.Writer, - document: hdoc.Document, - block: hdoc.Block, -) WriteError!void { - switch (block) { - .paragraph => |content| { - try writer.writeAll("

"); - try renderSpans(writer, content.contents); - try writer.writeAll("

\n"); - }, - - .ordered_list => |content| { - try writer.writeAll("
    \n"); - for (content) |item| { - try writer.writeAll("
  1. "); - try renderBlocks(writer, document, item.contents); - try writer.writeAll("
  2. \n"); - } - try writer.writeAll("
\n"); - }, - - .unordered_list => |content| { - try writer.writeAll("
    \n"); - for (content) |item| { - try writer.writeAll("
  • "); - try renderBlocks(writer, document, item.contents); - try writer.writeAll("
  • \n"); - } - try writer.writeAll("
\n"); - }, - - .quote => |content| { - try writer.writeAll("
"); - try renderSpans(writer, content.contents); - try writer.writeAll("
\n"); - }, - - .preformatted => |content| { - if (!std.mem.eql(u8, content.language, "")) { - try writer.print("
", .{content.language});
-            } else {
-                try writer.writeAll("
");
-            }
-            try renderSpans(writer, content.contents);
-            try writer.writeAll("
\n"); - }, - .image => |content| { - try writer.print("\n", .{content.path}); - }, - .heading => |content| { - try writer.writeAll(switch (content.level) { - .document => " " " 0) { - try writer.print(" id=\"{s}\"", .{content.anchor}); - } - try writer.writeAll(">"); - - try writer.print("{f}", .{escapeHtml(content.title)}); - - try writer.writeAll(switch (content.level) { - .document => "\n", - .chapter => "\n", - .section => "\n", - }); - }, - .table_of_contents => |content| { - // TODO: Render TOC - _ = content; - }, - } -} - -fn renderSpans( - writer: *std.Io.Writer, - spans: []const hdoc.Span, -) WriteError!void { - for (spans) |span| { - try renderSpan(writer, span); - } -} - -fn renderSpan(writer: *std.Io.Writer, span: hdoc.Span) WriteError!void { - switch (span) { - .text => |val| { - try writer.print("{f}", .{escapeHtml(val)}); - }, - .emphasis => |val| { - try writer.writeAll(""); - try writer.print("{f}", .{escapeHtml(val)}); - try writer.writeAll(""); - }, - .monospace => |val| { - try writer.writeAll(""); - try writer.print("{f}", .{escapeHtml(val)}); - try writer.writeAll(""); - }, - .link => |val| { - try writer.print("{f}", .{ - val.href, - escapeHtml(val.text), - }); - }, - } -} - -fn escapeHtml(string: []const u8) HtmlEscaper { - return .{ .string = string }; -} - -const HtmlEscaper = struct { - string: []const u8, - - pub fn format(html: HtmlEscaper, writer: *std.Io.Writer) !void { - for (html.string) |char| { - switch (char) { - '&' => try writer.writeAll("&"), - '<' => try writer.writeAll("<"), - '>' => try writer.writeAll(">"), - '\"' => try writer.writeAll("""), - '\'' => try writer.writeAll("'"), - '\n' => try writer.writeAll("
"), - else => try writer.writeByte(char), - } - } - } -}; diff --git a/src/renderer/HyperDoc.zig b/src/renderer/HyperDoc.zig deleted file mode 100644 index 5aa508f..0000000 --- a/src/renderer/HyperDoc.zig +++ /dev/null @@ -1,158 +0,0 @@ -const std = @import("std"); -const hdoc = @import("hyperdoc"); - -pub const WriteError = std.Io.Writer.Error; - -pub fn render(writer: *std.Io.Writer, document: hdoc.Document) WriteError!void { - try writer.writeAll("hdoc \"1.0\"\n"); - try renderBlocks(writer, document, document.contents, 0); -} - -fn renderBlocks( - writer: *std.Io.Writer, - document: hdoc.Document, - blocks: []const hdoc.Block, - indent: usize, -) WriteError!void { - for (blocks) |block| { - try renderBlock(writer, document, block, indent); - } -} - -fn renderBlock( - writer: *std.Io.Writer, - document: hdoc.Document, - block: hdoc.Block, - indent: usize, -) WriteError!void { - try writer.splatByteAll(' ', 2 * indent); - switch (block) { - .paragraph => |content| { - try writer.writeAll("p {\n"); - try renderSpans(writer, content.contents, indent + 1); - try writer.splatByteAll(' ', 2 * indent); - try writer.writeAll("}\n"); - }, - - .ordered_list => |content| { - try writer.writeAll("enumerate {\n"); - for (content) |item| { - try writer.splatByteAll(' ', 2 * indent + 2); - try writer.writeAll("item {\n"); - - try renderBlocks(writer, document, item.contents, indent + 2); - - try writer.splatByteAll(' ', 2 * indent + 2); - try writer.writeAll("}\n"); - } - try writer.splatByteAll(' ', 2 * indent); - try writer.writeAll("}\n"); - }, - - .unordered_list => |content| { - try writer.writeAll("itemize {\n"); - for (content) |item| { - try writer.splatByteAll(' ', 2 * indent + 2); - try writer.writeAll("item {\n"); - - try renderBlocks(writer, document, item.contents, indent + 2); - - try writer.splatByteAll(' ', 2 * indent + 2); - try writer.writeAll("}\n"); - } - try writer.splatByteAll(' ', 2 * indent); - try writer.writeAll("}\n"); - }, - - .quote => |content| { - try writer.writeAll("quote {\n"); - try renderSpans(writer, content.contents, indent + 1); - try writer.splatByteAll(' ', 2 * indent); - try writer.writeAll("}\n"); - }, - - .preformatted => |content| { - try writer.print("pre \"{f}\" {{\n", .{ - escape(content.language), - }); - try renderSpans(writer, content.contents, indent + 1); - try writer.splatByteAll(' ', 2 * indent); - try writer.writeAll("}\n"); - }, - .image => |content| { - try writer.print("image \"{f}\"\n", .{ - escape(content.path), - }); - }, - .heading => |content| { - try writer.writeAll(switch (content.level) { - .document => "h1", - .chapter => "h2", - .section => "h3", - }); - try writer.print(" \"{f}\" \"{f}\"\n", .{ - escape(content.anchor), - escape(content.title), - }); - }, - .table_of_contents => { - try writer.writeAll("toc {}\n"); - }, - } -} - -fn renderSpans( - writer: *std.Io.Writer, - spans: []const hdoc.Span, - indent: usize, -) WriteError!void { - for (spans) |span| { - try renderSpan(writer, span, indent); - } -} - -fn renderSpan( - writer: *std.Io.Writer, - span: hdoc.Span, - indent: usize, -) WriteError!void { - try writer.splatByteAll(' ', 2 * indent); - switch (span) { - .text => |val| { - try writer.print("span \"{f}\"\n", .{escape(val)}); - }, - .emphasis => |val| { - try writer.print("emph \"{f}\"\n", .{escape(val)}); - }, - .monospace => |val| { - try writer.print("mono \"{f}\"\n", .{escape(val)}); - }, - .link => |val| { - try writer.print("link \"{f}\" \"{f}\"\n", .{ - escape(val.href), - escape(val.text), - }); - }, - } -} - -fn escape(string: []const u8) HDocEscaper { - return .{ .string = string }; -} - -const HDocEscaper = struct { - string: []const u8, - - pub fn format(html: HDocEscaper, writer: *std.Io.Writer) !void { - for (html.string) |char| { - switch (char) { - '\n' => try writer.writeAll("\\n"), - '\r' => try writer.writeAll("\\r"), - '\x1B' => try writer.writeAll("\\e"), - '\'' => try writer.writeAll("\\\'"), - '\"' => try writer.writeAll("\\\""), - else => try writer.writeByte(char), - } - } - } -}; diff --git a/src/renderer/Markdown.zig b/src/renderer/Markdown.zig deleted file mode 100644 index e8ba9ab..0000000 --- a/src/renderer/Markdown.zig +++ /dev/null @@ -1,131 +0,0 @@ -const std = @import("std"); -const hdoc = @import("hyperdoc"); - -const WriteError = std.Io.Writer.Error; - -pub fn render(writer: *std.Io.Writer, document: hdoc.Document) WriteError!void { - try renderBlocks(writer, document, document.contents); -} - -fn renderBlocks( - writer: *std.Io.Writer, - document: hdoc.Document, - blocks: []const hdoc.Block, -) WriteError!void { - for (blocks) |block| { - try renderBlock(writer, document, block); - } -} - -fn renderBlock( - writer: *std.Io.Writer, - document: hdoc.Document, - block: hdoc.Block, -) WriteError!void { - switch (block) { - .paragraph => |content| { - try renderSpans(writer, content.contents); - try writer.writeAll("\n\n"); - }, - - .ordered_list => |content| { - for (content) |item| { - try writer.writeAll("- "); - try renderBlocks(writer, document, item.contents); - } - }, - - .unordered_list => |content| { - for (content, 1..) |item, index| { - try writer.print("{}. ", .{index}); - try renderBlocks(writer, document, item.contents); - } - }, - - .quote => |content| { - try writer.writeAll("> "); - try renderSpans(writer, content.contents); - try writer.writeAll("\n\n"); - }, - - .preformatted => |content| { - try writer.print("```{s}\n", .{content.language}); - try renderSpans(writer, content.contents); - try writer.writeAll("```\n\n"); - }, - .image => |content| { - try writer.print("![]({s})\n\n", .{content.path}); - }, - .heading => |content| { - try writer.writeAll(switch (content.level) { - .document => "# ", - .chapter => "## ", - .section => "### ", - }); - if (content.anchor.len > 0) { - std.log.warn("anchor not supported in markdown!", .{}); - } - - try writer.print("{f}\n\n", .{escapeMd(content.title)}); - }, - .table_of_contents => |content| { - // TODO: Render TOC - _ = content; - }, - } -} - -fn renderSpans( - writer: *std.Io.Writer, - spans: []const hdoc.Span, -) WriteError!void { - for (spans) |span| { - try renderSpan(writer, span); - } -} - -fn renderSpan(writer: *std.Io.Writer, span: hdoc.Span) WriteError!void { - switch (span) { - .text => |val| { - try writer.print("{f}", .{escapeMd(val)}); - }, - .emphasis => |val| { - try writer.writeAll("**"); - try writer.print("{f}", .{escapeMd(val)}); - try writer.writeAll("**"); - }, - .monospace => |val| { - try writer.writeAll("`"); - try writer.print("{f}", .{escapeMd(val)}); - try writer.writeAll("`"); - }, - .link => |val| { - try writer.print("[{f}]({s})", .{ - escapeMd(val.text), - val.href, - }); - }, - } -} - -fn escapeMd(string: []const u8) MarkdownEscaper { - return .{ .string = string }; -} - -const MarkdownEscaper = struct { - string: []const u8, - - pub fn format(html: MarkdownEscaper, writer: *std.Io.Writer) !void { - for (html.string) |char| { - switch (char) { - '&' => try writer.writeAll("&"), - '<' => try writer.writeAll("<"), - '>' => try writer.writeAll(">"), - '\"' => try writer.writeAll("""), - '\'' => try writer.writeAll("'"), - '\n' => try writer.writeAll(" \n"), - else => try writer.writeByte(char), - } - } - } -}; diff --git a/src/testsuite.zig b/src/testsuite.zig index 4c0d4ac..58450b5 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -8,7 +8,7 @@ fn testAcceptDocument(document: []const u8) !void { test "empty document" { try testAcceptDocument( - \\hdoc "1.0" + \\hdoc "2.0" ); } @@ -22,17 +22,11 @@ test "invalid document" { try std.testing.expectError(error.InvalidFormat, testAcceptDocument( \\hdoc { )); - try std.testing.expectError(error.InvalidFormat, testAcceptDocument( - \\span - )); - try std.testing.expectError(error.InvalidFormat, testAcceptDocument( - \\blob - )); } test "invalid version" { try std.testing.expectError(error.InvalidFormat, testAcceptDocument( - \\hdoc 1.0 + \\hdoc 2.0 )); try std.testing.expectError(error.InvalidVersion, testAcceptDocument( \\hdoc "" @@ -40,207 +34,7 @@ test "invalid version" { try std.testing.expectError(error.InvalidVersion, testAcceptDocument( \\hdoc "1.2" )); -} - -test "accept toc" { - try testAcceptDocument( - \\hdoc "1.0" - \\toc {} - ); -} - -test "accept multiple blocks" { - try testAcceptDocument( - \\hdoc "1.0" - \\toc {} - \\toc {} - \\toc {} - \\toc {} - ); -} - -test "accept image" { - try testAcceptDocument( - \\hdoc "1.0" - \\image "dog.png" - ); -} - -test "accept headers" { - try testAcceptDocument( - \\hdoc "1.0" - \\h1 "" "Empty anchor" - \\h2 "chapter" "Chapter anchor" - \\h3 "section" "Section anchor" - ); -} - -test "invalid top level items" { - try std.testing.expectError(error.InvalidTopLevelItem, testAcceptDocument( - \\hdoc "1.0" - \\span - )); - try std.testing.expectError(error.InvalidTopLevelItem, testAcceptDocument( - \\hdoc "1.0" - \\link - )); - try std.testing.expectError(error.InvalidTopLevelItem, testAcceptDocument( - \\hdoc "1.0" - \\emph - )); - try std.testing.expectError(error.InvalidTopLevelItem, testAcceptDocument( + try std.testing.expectError(error.InvalidVersion, testAcceptDocument( \\hdoc "1.0" - \\mono )); } - -test "empty ordered lists" { - try testAcceptDocument( - \\hdoc "1.0" - \\enumerate {} - ); -} - -test "ordered lists" { - try testAcceptDocument( - \\hdoc "1.0" - \\enumerate { - \\ item { toc {} } - \\ item { toc {} } - \\ item { toc {} } - \\} - ); -} - -test "unordered lists" { - try testAcceptDocument( - \\hdoc "1.0" - \\itemize { - \\ item { toc {} } - \\ item { toc {} } - \\ item { toc {} } - \\} - ); -} - -test "nested lists" { - try testAcceptDocument( - \\hdoc "1.0" - \\enumerate { - \\ item { itemize { } } - \\ item { enumerate { } } - \\ item { toc { } } - \\ item { itemize { item { toc { } } } } - \\ item { enumerate { item { toc { } } } } - \\} - ); -} - -test "empty paragraph" { - try testAcceptDocument( - \\hdoc "1.0" - \\p{} - \\p{} - \\p{} - ); -} - -test "empty quote" { - try testAcceptDocument( - \\hdoc "1.0" - \\quote{} - \\quote{} - \\quote{} - ); -} - -test "spans" { - try testAcceptDocument( - \\hdoc "1.0" - \\p{ span "hello" } - \\p{ span "\n" } - \\p{ span "" } - ); -} - -test "mono" { - try testAcceptDocument( - \\hdoc "1.0" - \\p{ mono "hello" } - \\p{ mono "\n" } - \\p{ mono "" } - ); -} - -test "emph" { - try testAcceptDocument( - \\hdoc "1.0" - \\p{ emph "hello" } - \\p{ emph "\n" } - \\p{ emph "" } - ); -} - -test "links" { - try testAcceptDocument( - \\hdoc "1.0" - \\p{ link "" "hello" } - \\p{ link "" "\n" } - \\p{ link "" "" } - \\p{ link "https://www.example.com/deep/path.txt" "hello" } - \\p{ link "https://www.example.com/deep/path.txt" "\n" } - \\p{ link "https://www.example.com/deep/path.txt" "" } - \\p{ link "#anchor" "hello" } - \\p{ link "#anchor" "\n" } - \\p{ link "#anchor" "" } - ); -} - -test "code block" { - try testAcceptDocument( - \\hdoc "1.0" - \\pre "" { } - \\pre "c++" { } - \\pre "zig" { } - \\pre "c++" { span "#include " } - \\pre "zig" { span "const std = @import(\"std\");" } - ); -} - -test "example document" { - try testAcceptDocument( - \\hdoc "1.0" - \\h1 "intro" "Introduction" - \\toc { } - \\p { - \\ span "Hello, World!\n" - \\ link "http://google.com" "Visit Google!" - \\ span "\n" - \\ emph "This is fat!" - \\ span "\n" - \\ mono "int main()" - \\ span "\n" - \\} - \\enumerate { - \\ item { p { span "first" } } - \\ item { p { span "second" } } - \\ item { p { span "third" } } - \\} - \\itemize { - \\ item { p { span "first" } } - \\ item { p { span "second" } } - \\ item { p { span "third" } } - \\} - \\quote { - \\ span "Life is what happens when you're busy making other plans.\n - John Lennon" - \\} - \\pre "zig" { - \\ span "const std = @import(\"std\");\n" - \\ span "\n" - \\ span "pub fn main() !void {\n" - \\ span " std.debug.print(\"Hello, World!\\n\", .{});\n" - \\ span "}\n" - \\} - \\image "dog.png" - ); -} From f2d3b5624b795ce80a9e939aac6f99174b49957e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 22 Dec 2025 11:07:01 +0100 Subject: [PATCH 002/116] Fixes some spec parts --- docs/specification.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 3e08e00..f3790b0 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -24,8 +24,8 @@ p(id="foo") { p { This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice. Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}). - We can also \link(ref="foo"){link to other parts of a document) or \link(url="https://ashet.computer"){to websites}. - With \mono(lang="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font. + We can also \link(ref="foo"){link to other parts of a document} or \link(url="https://ashet.computer"){to websites}. + With \mono(syntax="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font. } h2{Special Paragraphs} @@ -47,7 +47,7 @@ p: | Literal lines don't perform any parsing, so they don't require any escaping of characters. | This is really useful for code blocks: -pre(lang="c"): +pre(syntax="c"): | #include | int main(int argc, char const * argv[]) { | printf("Hello, World!\n"); @@ -116,9 +116,9 @@ Short notes on grammar notation: - `[ ... ]` is an option - `a | b | c` is alternatives - `( ... )` is a group -- `"foo"` is a literal token sequence +- `"foo"` is a literal token sequence, no escape sequences (So `"\"` is a single backslash) - `/.../` is a regex -- Whitespace is assumed to be ignored unless matched by a literal, so tokens are typically separated by whitespace +- Whitespace is assumed to be ignored between tokens unless matched by a literal or regex, so tokens are typically separated by whitespace - Upper case elements are roughly tokens, while lowercase elements are rules. ``` @@ -143,6 +143,8 @@ LITERAL_LINE := /^\s*\|(.*)$/ WORD := /[^\s\{\}\\]+/ ``` +**NOTE:** `list` also allows `block` for `inline` elements, as this enables us to have support for balanced braces without special care. The `block` elements will be flattened when rendering an inline list body into the document. + ## Semantic Structure All elements have these attributes: @@ -306,6 +308,10 @@ This element contains the contents of a table cell. These elements are all allowed inside a paragraph-like content and can typically be nested. +### Plain Text + +This is normal plain text and has no special meaning. + ### Emphasis: `em` **Nesting:** Yes @@ -332,7 +338,7 @@ Renders the text with a horizontal line through the text, striking it out. **Nesting:** Yes -Renders the text a bit smaller and moved upwards (`sub`) or downwards (`sub`) to allow sub- or superscript rendering. +Renders the text a bit smaller and moved upwards (`sup`) or downwards (`sub`) to allow sub- or superscript rendering. ### Linking: `link` @@ -345,7 +351,7 @@ Renders the text a bit smaller and moved upwards (`sub`) or downwards (`sub`) to Adds a hyperlink to the contents. This allows a reader to navigate by typically clicking the link. -### Localized Date/Time: `date`, `time`, `datedate` +### Localized Date/Time: `date`, `time`, `datetime` **Nesting:** No From 13b8b6b13a28a9f6a3666bf425c13806a84f22b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 22 Dec 2025 11:45:51 +0100 Subject: [PATCH 003/116] More spec improvements --- docs/specification.md | 60 +++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index f3790b0..6757a25 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -145,6 +145,8 @@ WORD := /[^\s\{\}\\]+/ **NOTE:** `list` also allows `block` for `inline` elements, as this enables us to have support for balanced braces without special care. The `block` elements will be flattened when rendering an inline list body into the document. +**NOTE:** All attribute values are strings, so numeric-looking values are still expressed as strings (e.g. `depth="1"`). + ## Semantic Structure All elements have these attributes: @@ -199,7 +201,7 @@ The type of the paragraph includes a semantic hint: | Attribute | Function | | --------- | -------------------------------------------------------------------------------------------------------------------- | -| `first` | An integer that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. | +| `first` | An integer string that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. | ### Figures: `img` @@ -234,7 +236,7 @@ If a pre contains inline elements, these will still be parsed and apply their st | Attribute | Function | | --------- | ----------------------------------------------------------------------- | -| `depth` | `1`, `2` or `3`. Defines how many levels of headings shall be included. | +| `depth` | String `1`, `2` or `3`. Defines how many levels of headings shall be included. | Renders a table of contents for the current document. @@ -261,14 +263,17 @@ will have two identical list items. ### Tables: `table` -Allowed Items: `columns`, `row`, `group` +**Allowed Items:** `columns`, `row`, `group` + +Tables are made up of an optional header row (`columns`) followed by a sequence of `row` and `group` elements. -> TODO: Spec out tables proper. -> `columns` is basically a `row` with only column headings -> `row` is just a row with cells -> all rows must contain the same amount of cell span -> `group` is a heading for subsequent rows -> `row.title` attribute is displayed in a column left of the first column, the top-left element is always empty +- `columns` defines the header labels and the column count. +- `row` defines a data row. +- `group` provides a section heading that applies to subsequent rows until the next group or the end of the table. + +All `row` and `columns` elements must resolve to the same number of columns after applying `colspan`. +If a `row` uses the `title` attribute or a `group` is present, renderers must reserve a leading title column. +In that case, the header row should have an empty leading cell before the column headers. ## Table Elements @@ -276,7 +281,7 @@ Allowed Items: `columns`, `row`, `group` **Allowed Items:** `td` -This element contains cells +This element contains the header cells for each column. ### Rows: `row` @@ -298,16 +303,20 @@ A *row group* is a row that contains a single heading-style cell that labels the | Attribute | Function | | --------- | -------------------------------------------------- | -| `colspan` | Integer defining how many columns this cell spans. | +| `colspan` | Integer string defining how many columns this cell spans. | This element contains the contents of a table cell. -> TODO: Similar to `li`, it can be string or block-sequence. +Like `li`, a `td` can either contain a single string or a nested block sequence. ## Inline Text These elements are all allowed inside a paragraph-like content and can typically be nested. +*Inline Text* can either be a string literal, a literal block or a list. + +If the text is a list, it allows the use of inline elements like `\em` or `\mono`. + ### Plain Text This is normal plain text and has no special meaning. @@ -355,25 +364,10 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically **Nesting:** No -Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner. +| Element | Attribute | Function | +| ----------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | +| `date` | `fmt` | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`. | +| `time` | `fmt` | `short`, `long`, `rough`, `relative`. | +| `datetime` | `fmt` | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). | -> TODO: Add `fmt` attribute: -> `\date` takes an attribute fmt which can be -> - "year" (2025) -> - "month" (December), -> - "day" (22th) -> - "weekday" (monday) -> - "short" (22.12.2025) -> - "long" (22th of December 2025) -> - "relative" (two days ago, two months ago, ...) -> -> `\time` takes an attribute fmt which can be -> - "short" (09:41) -> - "long" (09:41:25) -> - "rough" (early morning, morning, noon, afternoon, evening, late in the night, ...) -> - "relative" (two minutes ago, two days ago, ...) -> -> `\datetime` takes an attribute fmt which can be -> - *To be done* -> - ... -> \ No newline at end of file +Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner. From 71f0bf86da836dabfdb623176012607e6827f388 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 22 Dec 2025 12:05:05 +0100 Subject: [PATCH 004/116] Adds example files. --- examples/assets/diagram.svg | 5 ++ examples/featurematrix.hdoc | 101 ++++++++++++++++++++++++++++++++++++ examples/guide.hdoc | 83 +++++++++++++++++++++++++++++ examples/tables.hdoc | 36 +++++++++++++ 4 files changed, 225 insertions(+) create mode 100644 examples/assets/diagram.svg create mode 100644 examples/featurematrix.hdoc create mode 100644 examples/guide.hdoc create mode 100644 examples/tables.hdoc diff --git a/examples/assets/diagram.svg b/examples/assets/diagram.svg new file mode 100644 index 0000000..a9d4754 --- /dev/null +++ b/examples/assets/diagram.svg @@ -0,0 +1,5 @@ + + + HyperDoc + Example Asset + \ No newline at end of file diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc new file mode 100644 index 0000000..c900af3 --- /dev/null +++ b/examples/featurematrix.hdoc @@ -0,0 +1,101 @@ +hdoc "2.0" + +h1 { Small Computer Feature Matrix } + +table { + columns { + td "Ashet Home Computer" + td { \link(url="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" } + td { \link(url="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" } + td { \link(url="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" } + td { \link(url="https://www.codycomputer.org/") "Cody Computer" } + } + row(title="CPU Bus Width") { + td "32 bit" + td "64 bit" + td "8 bit" + td "32 bit" + td "8 bit" + } + row(title="CPU Architecture") { + td "Arm Cortex-M33" + td "Arm Cortex-A72" + td "AVRe+" + td "Arm Cortex-M0+" + td "6502" + } + row(title="CPU Model") { + td { \link(url="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" } + td { \link(url="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" } + td { \link(url="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" } + td { \link(url="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" } + td { \link(url="https://wdc65xx.com/integrated-circuit") "W65C02S" } + } + row(title="CPU Cores") { + td "2" + td "4" + td "1" + td "2" + td "1" + } + row(title="CPU Clock") { + td "150 MHz" + td "1.8 GHz" + td "16 MHz" + td "133 MHz" + td "1 MHz" + } + row(title="System Memory") { + td "8 MB" + td "1, 2, 4 or 8 GB" + td "2 KB" + td "264 kB" + td "64 kB" + } + row(title="Comprehensible") { + td "✅" + td "❌" + td "✅" + td "✅" + td "✅" + } + row(title="Modern I/O") { + td "✅" + td "✅" + td "❌" + td { ❌\sup{1} } + td "❌" + } + row(title="Modular Design") { + td "✅" + td "❌" + td "❌" + td "✅" + td { ✅\sup{2} } + } + row(title="Full Documentation") { + td "✅" + td "❌" + td "✅" + td "✅" + td "✅" + } + row(title="Ethernet") { + td "✅" + td "✅" + td "❌" + td "❌" + td "❌" + } + row(title="Parallax Propeller") { + td { ✅ (\link(url="https://www.parallax.com/propeller-2"){Propeller 2}) } + td "❌" + td "❌" + td "❌" + td { ✅ (\link(url="https://www.parallax.com/propeller-1"){Propeller 1}) } + } +} + +p { \sup{1}: Neotron Pico uses PS/2 for mouse/keyboard and VGA for video. } + +p { \sup{2}: Cody Computer has a single cartridge that can be added. } diff --git a/examples/guide.hdoc b/examples/guide.hdoc new file mode 100644 index 0000000..82ed458 --- /dev/null +++ b/examples/guide.hdoc @@ -0,0 +1,83 @@ +hdoc "2.0" + +h1(id="intro", lang="en") { HyperDoc 2.0 Examples } + +toc(depth="2") {} + +h2(id="paragraphs") { Paragraphs and Inline Text } + +p(id="p-basic") { + This paragraph shows plain text mixed with \em{emphasis}, \strike{strike-through}, and \mono{monospaced} spans. +} + +p(lang="de") { + Dieser Absatz zeigt das Attribut \mono{lang} auf Blockebene. +} + +p "This paragraph uses a string literal body instead of a list." + +p { + Inline scripts support \mono(syntax="zig"){const version = "2.0";} as well as sub/superscripts like H\sub{2}O and x\sup{2}. +} + +p { + Links can target \link(ref="fig-diagram"){other blocks} or external \link(url="https://ashet.computer"){resources}. +} + +note { Notes highlight supportive information. } +warning { Warnings call out risky behavior. } +danger { Danger paragraphs emphasize critical hazards. } +tip { Tips provide actionable hints. } +quote { Quotes include sourced or emphasized wording. } +spoiler { Spoilers hide key story information until revealed. } + +h2(id="literals") { Literal and Preformatted Blocks } + +p: +| Literal blocks keep exact content and skip parsing, so characters like \{ \} are not special. +| They are introduced by a trailing colon. +| You can capture snippets without escaping anything. + +pre(id="code-sample", syntax="zig") { +test { + const message = "HyperDoc"; + const answer = \mono(syntax="zig"){42}; +} +} + +h2(id="lists") { Lists } + +ul { + li { p { Apples } } + li "Bananas" + li { p { \em{Cucumbers} with inline markup. } } +} + +ol(first="3") { + li { p { Start counting at three. } } + li "Continue with a string item." + li { p { Finish the sequence. } } +} + +h2(id="media") { Figures } + +p { + The image below has a caption, alt text, and a relative asset path. +} + +img(id="fig-diagram", alt="Diagram showing a HyperDoc example", path="./assets/diagram.svg") { + HyperDoc is centered inside a rounded rectangle. +} + +h2(id="dates") { Dates and Times } + +p { + The event was announced on \date(fmt="long"){2025-12-17} at \time(fmt="short"){13:45:00}. + A combined timestamp looks like \datetime(fmt="iso"){2025-12-17T13:45:00Z}. +} + +h2(id="table-ref") { Tables } + +p { + See the dedicated tables example file for row groups and colspan usage. +} diff --git a/examples/tables.hdoc b/examples/tables.hdoc new file mode 100644 index 0000000..5adb144 --- /dev/null +++ b/examples/tables.hdoc @@ -0,0 +1,36 @@ +hdoc "2.0" + +h1(id="tables") { HyperDoc 2.0 Table Examples } + +toc(depth="1") {} + +h2(id="table-basic") { Table Structure } + +table(id="inventory") { + columns { + td "Item" + td "Quantity" + td "Notes" + } + group { Fresh Produce } + row(title="Fruit") { + td "Apples" + td "12" + td { p { Delivered on \date(fmt="short"){2025-02-08}. } } + } + row(title="Vegetables") { + td "Carrots" + td "7" + td { p { Store at \time(fmt="rough"){08:00:00}. } } + } + group { Pantry } + row(title="Dry Goods") { + td "Rice" + td "3" + td { p { Packed on \datetime(fmt="relative"){2025-02-08T08:00:00Z}. } } + } + row(title="Bulk") { + td(colspan="2") { p { This cell spans two columns. } } + td "Requires label" + } +} From 6a8f864011efaaf24cbccf918a81531ce0e66c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 22 Dec 2025 13:22:22 +0100 Subject: [PATCH 005/116] Removes the dependencies for now. --- build.zig.zon | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 9b78c87..00a368a 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -4,14 +4,14 @@ .fingerprint = 0xfd1a4802abc4739e, .dependencies = .{ - .parser_toolkit = .{ - .url = "git+https://github.com/ikskuh/parser-toolkit.git#62e0a3dca3632bb361df59407b2d7805280ab1b9", - .hash = "parser_toolkit-0.1.0-baYGPUVCEwBaVmu09ORh0lLlVjRaJ489TdSIdTa_8VWg", - }, - .args = .{ - .url = "git+https://github.com/ikskuh/zig-args.git#8ae26b44a884ff20dca98ee84c098e8f8e94902f", - .hash = "args-0.0.0-CiLiqojRAACGzDRO7A9dw7kWSchNk29caJZkXuMCb0Cn", - }, + // .parser_toolkit = .{ + // .url = "git+https://github.com/ikskuh/parser-toolkit.git#62e0a3dca3632bb361df59407b2d7805280ab1b9", + // .hash = "parser_toolkit-0.1.0-baYGPUVCEwBaVmu09ORh0lLlVjRaJ489TdSIdTa_8VWg", + // }, + // .args = .{ + // .url = "git+https://github.com/ikskuh/zig-args.git#8ae26b44a884ff20dca98ee84c098e8f8e94902f", + // .hash = "args-0.0.0-CiLiqojRAACGzDRO7A9dw7kWSchNk29caJZkXuMCb0Cn", + // }, }, .paths = .{""}, From 83bf570b18e241d21d77c6567f9051bc1de49e8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 22 Dec 2025 15:24:10 +0100 Subject: [PATCH 006/116] Adds AGENTS.md --- AGENTS.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..05ef7d4 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,12 @@ +# AGENTS + +## General guidelines + +- Keep changes focused and incremental; prefer small, reviewable commits. +- Follow existing code style and formatting conventions. +- Use `zig fmt` on Zig source files after edits. +- Ensure new tests are added or updated when behavior changes. +- Run relevant tests (`zig build test`) when making code changes. +- Run `zig build` to validate the main application still compiles +- Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/`. +- Avoid editing documentation unless the request explicitly asks for it. From 460bd942f42b0119967ccccf232d264e180878e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 22 Dec 2025 15:36:08 +0100 Subject: [PATCH 007/116] Vibecoded: Adds tokenizer for the new HyperDoc format --- docs/specification.md | 9 +- src/hyperdoc.zig | 198 +++++++++++++++++++++++++++++++++++++++++- src/main.zig | 27 ++++-- src/testsuite.zig | 123 ++++++++++++++++++++------ 4 files changed, 314 insertions(+), 43 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 6757a25..9802700 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -124,23 +124,22 @@ Short notes on grammar notation: ``` document := HEADER { block } -block := IDENTIFIER [ attribute_list ] body +block := WORD [ attribute_list ] body body := list | literal | STRING literal := ":" "\n" { LITERAL_LINE } list := "{" { escape | inline | block | WORD } "}" escape := "\\" | "\{" | "\}" -inline := "\" IDENTIFIER [ attribute_list ] body +inline := "\" WORD [ attribute_list ] body attribute_list := "(" [ attribute { "," attribute } ] ")" -attribute := IDENTIFIER "=" STRING +attribute := WORD "=" STRING -IDENTIFIER := /\b\w+\b/ HEADER := /^hdoc\s+"2.0"\s*$/ STRING := /"(\\.|[^"\r\n])*"/ LITERAL_LINE := /^\s*\|(.*)$/ -WORD := /[^\s\{\}\\]+/ +WORD := /[^\s\{\}\\\"(),=:]+/ ``` **NOTE:** `list` also allows `block` for `inline` elements, as this enables us to have support for balanced braces without special care. The `block` elements will be flattened when rendering an inline list body into the document. diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index c04b0f2..2a49308 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -19,7 +19,198 @@ pub const Document = struct { /// Depending on the level of nesting, the width might decrease /// from the full document size. pub const Block = union(enum) { - // TODO + placeholder: void, +}; + +/// A token emitted by the HyperDoc tokenizer. +pub const Token = struct { + pub const Tag = enum { + eof, + word, + string_literal, + unterminated_string_literal, + literal_line, + newline, + @"{", + @"}", + @"(", + @")", + @",", + @"=", + @":", + @"\\", + invalid_character, + }; + + tag: Tag, + offset: usize, + len: usize, + + /// Returns the slice of the original input covered by this token. + pub fn slice(token: Token, input: []const u8) []const u8 { + return input[token.offset .. token.offset + token.len]; + } +}; + +/// Tokenizes HyperDoc source text incrementally. +pub const Tokenizer = struct { + input: []const u8, + index: usize = 0, + line_start: bool = true, + finished: bool = false, + + /// Creates a tokenizer for the provided input. + pub fn init(input: []const u8) Tokenizer { + return .{ .input = input }; + } + + /// Returns the next token, or null after emitting EOF once. + pub fn next(tok: *Tokenizer) ?Token { + if (tok.finished) { + return null; + } + + while (tok.index < tok.input.len) { + const start = tok.index; + const ch = tok.input[tok.index]; + + if (tok.line_start) { + const literal = tok.scanLiteralLine(); + if (literal) |token| { + return token; + } + } + + if (tok.isNewline(ch)) { + const consumed = tok.consumeNewline(); + tok.line_start = true; + return .{ .tag = .newline, .offset = start, .len = consumed }; + } + + if (tok.isHorizontalWhitespace(ch)) { + tok.index += 1; + tok.line_start = false; + continue; + } + + tok.line_start = false; + + switch (ch) { + '{' => return tok.simpleToken(.@"{"), + '}' => return tok.simpleToken(.@"}"), + '(' => return tok.simpleToken(.@"("), + ')' => return tok.simpleToken(.@")"), + ',' => return tok.simpleToken(.@","), + '=' => return tok.simpleToken(.@"="), + ':' => return tok.simpleToken(.@":"), + '\\' => return tok.simpleToken(.@"\\"), + '"' => return tok.scanStringLiteral(), + else => {}, + } + + if (tok.isWordChar(ch)) { + return tok.scanWord(); + } + + // Non-obvious fallback: we still emit a token for unknown bytes + // so callers can recover and keep walking the stream. + tok.index += 1; + return .{ .tag = .invalid_character, .offset = start, .len = 1 }; + } + + tok.finished = true; + return .{ .tag = .eof, .offset = tok.input.len, .len = 0 }; + } + + /// Emits a single-character token at the current offset. + fn simpleToken(tok: *Tokenizer, tag: Token.Tag) Token { + const start = tok.index; + tok.index += 1; + return .{ .tag = tag, .offset = start, .len = 1 }; + } + + /// Scans a quoted string or an unterminated string literal. + fn scanStringLiteral(tok: *Tokenizer) Token { + const start = tok.index; + tok.index += 1; + while (tok.index < tok.input.len) { + const ch = tok.input[tok.index]; + if (ch == '"') { + tok.index += 1; + return .{ .tag = .string_literal, .offset = start, .len = tok.index - start }; + } + if (tok.isNewline(ch)) { + // We stop before the newline so the next call can emit it. + return .{ .tag = .unterminated_string_literal, .offset = start, .len = tok.index - start }; + } + if (ch == '\\') { + // Escape sequences consume the next byte, even if it is a quote. + if (tok.index + 1 >= tok.input.len) { + tok.index = tok.input.len; + break; + } + tok.index += 2; + continue; + } + tok.index += 1; + } + + return .{ .tag = .unterminated_string_literal, .offset = start, .len = tok.index - start }; + } + + /// Scans a WORD token as defined by the grammar. + fn scanWord(tok: *Tokenizer) Token { + const start = tok.index; + tok.index += 1; + while (tok.index < tok.input.len and tok.isWordChar(tok.input[tok.index])) { + tok.index += 1; + } + return .{ .tag = .word, .offset = start, .len = tok.index - start }; + } + + /// Scans a literal line token if the current position is at a line start. + fn scanLiteralLine(tok: *Tokenizer) ?Token { + const start = tok.index; + var cursor = tok.index; + while (cursor < tok.input.len and tok.isHorizontalWhitespace(tok.input[cursor])) { + cursor += 1; + } + if (cursor >= tok.input.len or tok.input[cursor] != '|') { + return null; + } + cursor += 1; + while (cursor < tok.input.len and !tok.isNewline(tok.input[cursor])) { + cursor += 1; + } + tok.index = cursor; + tok.line_start = false; + return .{ .tag = .literal_line, .offset = start, .len = cursor - start }; + } + + /// Consumes a newline, including CRLF sequences. + fn consumeNewline(tok: *Tokenizer) usize { + if (tok.input[tok.index] == '\r') { + if (tok.index + 1 < tok.input.len and tok.input[tok.index + 1] == '\n') { + tok.index += 2; + return 2; + } + tok.index += 1; + return 1; + } + tok.index += 1; + return 1; + } + fn isWordChar(_: *Tokenizer, ch: u8) bool { + return !std.ascii.isControl(ch) and !std.ascii.isWhitespace(ch) and ch != '{' and ch != '}' and ch != '\\' and ch != '"' and ch != '(' and ch != ')' and ch != ',' and ch != '=' and ch != ':'; + } + + fn isHorizontalWhitespace(_: *Tokenizer, ch: u8) bool { + return ch == ' ' or ch == '\t'; + } + + fn isNewline(_: *Tokenizer, ch: u8) bool { + return ch == '\n' or ch == '\r'; + } }; /// Parses a HyperDoc document. @@ -37,7 +228,10 @@ pub fn parse( _ = plain_text; _ = diagnostics; - @panic("TODO: Implement this"); + return .{ + .arena = arena, + .contents = &[_]Block{}, + }; } /// A diagnostic message. diff --git a/src/main.zig b/src/main.zig index 3cdb76c..6810065 100644 --- a/src/main.zig +++ b/src/main.zig @@ -13,16 +13,27 @@ pub fn main() !u8 { else std.heap.smp_allocator; - // TODO: Parse arguments and load file. - const document = - \\hdoc "2.0" - \\ - ; + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); - var doc = try hdoc.parse(allocator, document, null); - defer doc.deinit(); + if (args.len < 2) { + const stderr = std.fs.File.stderr().deprecatedWriter(); + try stderr.print("usage: {s} \n", .{args[0]}); + return 1; + } - // TODO: Dump AST + const path = args[1]; + const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10); + defer allocator.free(document); + + var tokenizer = hdoc.Tokenizer.init(document); + var stdout = std.fs.File.stdout().deprecatedWriter(); + while (tokenizer.next()) |token| { + try stdout.print("{s} \"{f}\"\n", .{ @tagName(token.tag), std.zig.fmtString(token.slice(document)) }); + if (token.tag == .eof) { + break; + } + } return 0; } diff --git a/src/testsuite.zig b/src/testsuite.zig index 58450b5..ee42e38 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -6,35 +6,102 @@ fn testAcceptDocument(document: []const u8) !void { defer doc.deinit(); } -test "empty document" { - try testAcceptDocument( - \\hdoc "2.0" +const TokenExpect = struct { + tag: hdoc.Token.Tag, + lexeme: []const u8, +}; + +fn expectTokens(input: []const u8, expected: []const TokenExpect) !void { + var tokenizer = hdoc.Tokenizer.init(input); + var index: usize = 0; + while (true) { + const token_opt = tokenizer.next(); + if (token_opt == null) { + break; + } + const token = token_opt.?; + try std.testing.expect(index < expected.len); + try std.testing.expectEqual(expected[index].tag, token.tag); + try std.testing.expectEqualStrings(expected[index].lexeme, token.slice(input)); + index += 1; + if (token.tag == .eof) { + break; + } + } + try std.testing.expectEqual(expected.len, index); + try std.testing.expect(tokenizer.next() == null); +} + +test "tokenizes header line" { + try expectTokens("hdoc \"2.0\"\n", &.{ + .{ .tag = .word, .lexeme = "hdoc" }, + .{ .tag = .string_literal, .lexeme = "\"2.0\"" }, + .{ .tag = .newline, .lexeme = "\n" }, + .{ .tag = .eof, .lexeme = "" }, + }); +} + +test "tokenizes literal lines" { + try expectTokens("p:\n| code\n |more\n", &.{ + .{ .tag = .word, .lexeme = "p" }, + .{ .tag = .@":", .lexeme = ":" }, + .{ .tag = .newline, .lexeme = "\n" }, + .{ .tag = .literal_line, .lexeme = "| code" }, + .{ .tag = .newline, .lexeme = "\n" }, + .{ .tag = .literal_line, .lexeme = " |more" }, + .{ .tag = .newline, .lexeme = "\n" }, + .{ .tag = .eof, .lexeme = "" }, + }); +} + +test "tokenizes unterminated string" { + try expectTokens("\"oops\n", &.{ + .{ .tag = .unterminated_string_literal, .lexeme = "\"oops" }, + .{ .tag = .newline, .lexeme = "\n" }, + .{ .tag = .eof, .lexeme = "" }, + }); +} + +test "tokenizes word and escapes" { + try expectTokens("{alpha \\{ -dash}", &.{ + .{ .tag = .@"{", .lexeme = "{" }, + .{ .tag = .word, .lexeme = "alpha" }, + .{ .tag = .@"\\", .lexeme = "\\" }, + .{ .tag = .@"{", .lexeme = "{" }, + .{ .tag = .word, .lexeme = "-dash" }, + .{ .tag = .@"}", .lexeme = "}" }, + .{ .tag = .eof, .lexeme = "" }, + }); +} + +test "tokenizes mixed sequences" { + try expectTokens( + "note(id=\"x\"){\n\\em \"hi\", -dash\n}\n", + &.{ + .{ .tag = .word, .lexeme = "note" }, + .{ .tag = .@"(", .lexeme = "(" }, + .{ .tag = .word, .lexeme = "id" }, + .{ .tag = .@"=", .lexeme = "=" }, + .{ .tag = .string_literal, .lexeme = "\"x\"" }, + .{ .tag = .@")", .lexeme = ")" }, + .{ .tag = .@"{", .lexeme = "{" }, + .{ .tag = .newline, .lexeme = "\n" }, + .{ .tag = .@"\\", .lexeme = "\\" }, + .{ .tag = .word, .lexeme = "em" }, + .{ .tag = .string_literal, .lexeme = "\"hi\"" }, + .{ .tag = .@",", .lexeme = "," }, + .{ .tag = .word, .lexeme = "-dash" }, + .{ .tag = .newline, .lexeme = "\n" }, + .{ .tag = .@"}", .lexeme = "}" }, + .{ .tag = .newline, .lexeme = "\n" }, + .{ .tag = .eof, .lexeme = "" }, + }, ); } -test "invalid document" { - try std.testing.expectError(error.InvalidFormat, testAcceptDocument( - \\ - )); - try std.testing.expectError(error.InvalidFormat, testAcceptDocument( - \\hdoc - )); - try std.testing.expectError(error.InvalidFormat, testAcceptDocument( - \\hdoc { - )); -} - -test "invalid version" { - try std.testing.expectError(error.InvalidFormat, testAcceptDocument( - \\hdoc 2.0 - )); - try std.testing.expectError(error.InvalidVersion, testAcceptDocument( - \\hdoc "" - )); - try std.testing.expectError(error.InvalidVersion, testAcceptDocument( - \\hdoc "1.2" - )); - try std.testing.expectError(error.InvalidVersion, testAcceptDocument( - \\hdoc "1.0" - )); +test "tokenizes invalid characters" { + try expectTokens("\x00", &.{ + .{ .tag = .invalid_character, .lexeme = "\x00" }, + .{ .tag = .eof, .lexeme = "" }, + }); } From f6e67573ebb79a3760835d84a551d37d48d8343b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Tue, 23 Dec 2025 09:29:06 +0100 Subject: [PATCH 008/116] Starts to rework specification to have improved definition and setup. --- AGENTS.md | 2 +- docs/specification.md | 186 +++++++++++++++-------------------- examples/demo.hdoc | 97 ++++++++++++++++++ examples/featurematrix.hdoc | 24 ++--- examples/guide.hdoc | 8 +- src/hyperdoc.zig | 191 ------------------------------------ src/main.zig | 9 +- src/testsuite.zig | 100 ------------------- test/parser/stress.hdoc | 74 ++++++++++++++ 9 files changed, 267 insertions(+), 424 deletions(-) create mode 100644 examples/demo.hdoc create mode 100644 test/parser/stress.hdoc diff --git a/AGENTS.md b/AGENTS.md index 05ef7d4..0bb6695 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,5 +8,5 @@ - Ensure new tests are added or updated when behavior changes. - Run relevant tests (`zig build test`) when making code changes. - Run `zig build` to validate the main application still compiles -- Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/`. +- Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/` and `test/`. - Avoid editing documentation unless the request explicitly asks for it. diff --git a/docs/specification.md b/docs/specification.md index 9802700..996c00a 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -7,108 +7,25 @@ It sits somewhat between LaTeX and Markdown and tries to be way simpler to parse ## Syntax Overview ```hdoc -hdoc "2.0" +hdoc(version="2.0"); -h1{HyperDoc 2.0} - -toc{} - -h2{Paragraphs} - -p { This is a simple paragraph containing text. } - -p(id="foo") { - This is a paragraph with an attribute "id" with the value "foo". -} +h1 "Introduction" p { - This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice. - Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}). - We can also \link(ref="foo"){link to other parts of a document} or \link(url="https://ashet.computer"){to websites}. - With \mono(syntax="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font. + This is my first HyperDoc 2.0 document! } -h2{Special Paragraphs} - -note { HyperDoc 2.0 also supports different types of paragraphs. } -warning { These should affect rendering, and have well-defined semantics attached to them. } -danger { You shall not assume any specific formatting of these elements though. } -tip { They typically have a standardized style though. } -quote { You shall not pass! } -spoiler { Nobody expects the Spanish Inquisition! } - -h2{Literals and Preformatted Text} - -p: -| we can also use literal lines. -| these are introduced by a trailing colon (':') at the end of a line. -| each following line that starts with whitespace followed by a pipe character ('|') -| is then part of the contents. -| Literal lines don't perform any parsing, so they don't require any escaping of characters. -| This is really useful for code blocks: - pre(syntax="c"): | #include -| int main(int argc, char const * argv[]) { -| printf("Hello, World!\n"); +| int main(int argc, char *argv[]) { +| printf("Hello, World!"); | return 0; | } - -h2{String Literals} - -p "It's also possible to use a string literal for bodies if desired." - -p { \em "Magic" is a simple way to highlight single words or text with escaping in inlines. } - -h2{Images & Figures} - -p { We can also add images to our documents: } - -img(id="fig1", path="./preview.jpeg") { If this is non-empty, it's a figure caption. } - -h2{Lists} - -p { Also lists are possible: } - -h3{Unordered Lists} - -ul { - li { p { Apples } } - li { p { Bananas } } - li { p { Cucumbers } } -} - -h3{Ordered Lists} - -ol { - li { p { Collect underpants } } - li { p { ? } } - li { p { Profit } } -} - -h2{Tables} - -p { And last, but not least, we can have tables: } - -table { - columns { - td "Key" - td "Value" - } - row { - td "Author" - td { Felix "xq" Queißner } - } - row { - td "Date of Invention" - td { \date{2025-12-17} } - } -} ``` ## Grammar -This grammar describes the text format +This grammar describes the hypertext format. Short notes on grammar notation: @@ -121,13 +38,13 @@ Short notes on grammar notation: - Whitespace is assumed to be ignored between tokens unless matched by a literal or regex, so tokens are typically separated by whitespace - Upper case elements are roughly tokens, while lowercase elements are rules. -``` -document := HEADER { block } +```ebnf +document := { block } block := WORD [ attribute_list ] body -body := list | literal | STRING -literal := ":" "\n" { LITERAL_LINE } +body := ";" | list | verbatim | STRING +verbatim := ":" "\n" { VERBATIM_LINE } list := "{" { escape | inline | block | WORD } "}" escape := "\\" | "\{" | "\}" @@ -136,9 +53,8 @@ inline := "\" WORD [ attribute_list ] body attribute_list := "(" [ attribute { "," attribute } ] ")" attribute := WORD "=" STRING -HEADER := /^hdoc\s+"2.0"\s*$/ STRING := /"(\\.|[^"\r\n])*"/ -LITERAL_LINE := /^\s*\|(.*)$/ +VERBATIM_LINE := /^\s*\|(.*)$/ WORD := /[^\s\{\}\\\"(),=:]+/ ``` @@ -146,6 +62,60 @@ WORD := /[^\s\{\}\\\"(),=:]+/ **NOTE:** All attribute values are strings, so numeric-looking values are still expressed as strings (e.g. `depth="1"`). +## Element Overview + +| Element | Element Type | Allowed Children | Attributes | +| ----------------------------------------------------------- | ------------ | ---------------------------- | ------------------------------------ | +| *Document* | Document | `hdoc`, Blocks | | +| `hdoc` | Header | - | `lang`, `title`, `version`, `author` | +| `h1`, `h2`, `h3` | Block | Text Body | `lang`, \[`id`\] | +| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block | Text Body | `lang`, \[`id`\] | +| `ul` | Block | `li` ≥ 1 | `lang`, \[`id`\] | +| `ol` | Block | `li` ≥ 1 | `lang`, \[`id`\], `first` | +| `img` | Block | Text Body | `lang`, \[`id`\], `alt`, `path` | +| `pre` | Block | Text Body | `lang`, \[`id`\], `syntax` | +| `toc` | Block | - | `lang`, \[`id`\], `depth` | +| `table` | Block | Table Rows | `lang`, \[`id`\] | +| `columns` | Table Row | `td` ≥ 1 | `lang` | +| `group` | Table Row | Text Body | `lang`, | +| `row` | Table Row | `td` ≥ 1 | `lang`, `title` | +| `td` | Table Cell | Blocks, String, Verbatim | `lang`, `colspan` | +| `li` | List Item | Blocks, String, Verbatim | `lang` | +| `\em` | Text Body | Text Body | `lang` | +| `\mono` | Text Body | Text Body | `lang`, `syntax` | +| `\strike` | Text Body | Text Body | `lang` | +| `\sub`, `\sup` | Text Body | Text Body | `lang` | +| `\link` | Text Body | Text Body | `lang`, (`ref` \| `uri`) | +| `\date`, `\time`, `\datetime` | Text Body | Plain Text, String, Verbatim | `lang`, `fmt` | +| *Plain Text* | Text Body | - | | +| *String* | Text Body | - | | +| *Verbatim* | Text Body | - | | + +Notes: + +- The attribute `id` is only allowed when the element is a top-level element (direct child of the document) +- The attributes `ref` and `uri` on a `\link` are mutually exclusive +- `\date`, `\time` and `\datetime` cannot contain other text body items except for plain text, string or verbatim content. + +## Attribute Overview + +| Attribute | Required | Allowed Values | Description | +| --------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | +| `version` | Yes | `2.0` | Describes the version of this HyperDoc document. | +| `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | +| `title` | No | *Any* | Sets the title of the document or the table row. | +| `author` | No | *Any* | Sets the author of the document. | +| `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | +| `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | +| `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | +| `path` | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | +| `syntax` | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | +| `depth` | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | +| `colspan` | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | +| `ref` | No | Any value present in an `id` attribute. | References any `id` inside this document. | +| `uri` | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | +| `fmt` | No | *See element documentation* | | + ## Semantic Structure All elements have these attributes: @@ -198,8 +168,8 @@ The type of the paragraph includes a semantic hint: #### Ordered List `ol` -| Attribute | Function | -| --------- | -------------------------------------------------------------------------------------------------------------------- | +| Attribute | Function | +| --------- | --------------------------------------------------------------------------------------------------------------------------- | | `first` | An integer string that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. | ### Figures: `img` @@ -233,8 +203,8 @@ If a pre contains inline elements, these will still be parsed and apply their st **Allowed Items:** *none* -| Attribute | Function | -| --------- | ----------------------------------------------------------------------- | +| Attribute | Function | +| --------- | ------------------------------------------------------------------------------ | | `depth` | String `1`, `2` or `3`. Defines how many levels of headings shall be included. | Renders a table of contents for the current document. @@ -300,8 +270,8 @@ A *row group* is a row that contains a single heading-style cell that labels the **Allowed Items:** Block Elements *or* String Content. -| Attribute | Function | -| --------- | -------------------------------------------------- | +| Attribute | Function | +| --------- | --------------------------------------------------------- | | `colspan` | Integer string defining how many columns this cell spans. | This element contains the contents of a table cell. @@ -354,8 +324,8 @@ Renders the text a bit smaller and moved upwards (`sup`) or downwards (`sub`) to | Attribute | Function | | --------- | -------------------------------------------------------------------------------------------------------- | -| `ref` | Points the link to a top-level block with the `id` of this `ref` attribute. Mutually exclusive to `url`. | -| `url` | Points the link to the resource inside the `url`. Mutually exclusive to `ref`. | +| `ref` | Points the link to a top-level block with the `id` of this `ref` attribute. Mutually exclusive to `uri`. | +| `uri` | Points the link to the resource inside the `uri`. Mutually exclusive to `ref`. | Adds a hyperlink to the contents. This allows a reader to navigate by typically clicking the link. @@ -363,10 +333,10 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically **Nesting:** No -| Element | Attribute | Function | -| ----------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -| `date` | `fmt` | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`. | -| `time` | `fmt` | `short`, `long`, `rough`, `relative`. | -| `datetime` | `fmt` | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). | +| Element | Attribute | Function | +| ---------- | --------- | ----------------------------------------------------------------------------------------------------------- | +| `date` | `fmt` | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`. | +| `time` | `fmt` | `short`, `long`, `rough`, `relative`. | +| `datetime` | `fmt` | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). | Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner. diff --git a/examples/demo.hdoc b/examples/demo.hdoc new file mode 100644 index 0000000..a092e91 --- /dev/null +++ b/examples/demo.hdoc @@ -0,0 +1,97 @@ +hdoc(version="2.0", lang="en", title="HyperDoc \"2.0\" Demonstration"); + +h1{HyperDoc 2.0} + +toc; + +h2{Paragraphs} + +p { This is a simple paragraph containing text. } + +p(id="foo") { + This is a paragraph with an attribute "id" with the value "foo". +} + +p { + This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice. + Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}). + We can also \link(ref="foo"){link to other parts of a document} or \link(uri="https://ashet.computer"){to websites}. + With \mono(syntax="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font. +} + +h2{Special Paragraphs} + +note { HyperDoc 2.0 also supports different types of paragraphs. } +warning { These should affect rendering, and have well-defined semantics attached to them. } +danger { You shall not assume any specific formatting of these elements though. } +tip { They typically have a standardized style though. } +quote { You shall not pass! } +spoiler { Nobody expects the Spanish Inquisition! } + +h2{Verbatim and Preformatted Text} + +p: +| We can also use verbatim text mode. +| This is introduced by a trailing colon (':') at the end of a line. +| Each following line that starts with whitespace followed by a pipe character ('|') +| is then part of the contents. +| Verbatim text doesn't perform any interpretation of its contents, so no escaping is required. +| This is really useful for code blocks: + +pre(syntax="c"): +| #include +| int main(int argc, char const * argv[]) { +| printf("Hello, World!\n"); +| return 0; +| } + +h2{String Literals} + +p "It's also possible to use a string literal for bodies if desired." + +p { \em "Magic" is a simple way to highlight single words or text with escaping in inlines. } + +h2{Images & Figures} + +p { We can also add images to our documents: } + +img(id="fig1", path="./preview.jpeg") { If this is non-empty, it's a figure caption. } + +h2{Lists} + +p { Also lists are possible: } + +h3{Unordered Lists} + +ul { + li { p { Apples } } + li { p { Bananas } } + li { p { Cucumbers } } +} + +h3{Ordered Lists} + +ol { + li { p { Collect underpants } } + li { p { ? } } + li { p { Profit } } +} + +h2{Tables} + +p { And last, but not least, we can have tables: } + +table { + columns { + td "Key" + td "Value" + } + row { + td "Author" + td { p { Felix "xq" Queißner } } + } + row { + td "Date of Invention" + td { p { \date{2025-12-17} } } + } +} diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc index c900af3..d72f095 100644 --- a/examples/featurematrix.hdoc +++ b/examples/featurematrix.hdoc @@ -1,14 +1,14 @@ -hdoc "2.0" +hdoc(version="2.0") h1 { Small Computer Feature Matrix } table { columns { td "Ashet Home Computer" - td { \link(url="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" } - td { \link(url="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" } - td { \link(url="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" } - td { \link(url="https://www.codycomputer.org/") "Cody Computer" } + td { \link(uri="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" } + td { \link(uri="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" } + td { \link(uri="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" } + td { \link(uri="https://www.codycomputer.org/") "Cody Computer" } } row(title="CPU Bus Width") { td "32 bit" @@ -25,11 +25,11 @@ table { td "6502" } row(title="CPU Model") { - td { \link(url="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" } - td { \link(url="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" } - td { \link(url="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" } - td { \link(url="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" } - td { \link(url="https://wdc65xx.com/integrated-circuit") "W65C02S" } + td { \link(uri="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" } + td { \link(uri="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" } + td { \link(uri="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" } + td { \link(uri="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" } + td { \link(uri="https://wdc65xx.com/integrated-circuit") "W65C02S" } } row(title="CPU Cores") { td "2" @@ -88,11 +88,11 @@ table { td "❌" } row(title="Parallax Propeller") { - td { ✅ (\link(url="https://www.parallax.com/propeller-2"){Propeller 2}) } + td { ✅ (\link(uri="https://www.parallax.com/propeller-2"){Propeller 2}) } td "❌" td "❌" td "❌" - td { ✅ (\link(url="https://www.parallax.com/propeller-1"){Propeller 1}) } + td { ✅ (\link(uri="https://www.parallax.com/propeller-1"){Propeller 1}) } } } diff --git a/examples/guide.hdoc b/examples/guide.hdoc index 82ed458..94c70c4 100644 --- a/examples/guide.hdoc +++ b/examples/guide.hdoc @@ -1,4 +1,4 @@ -hdoc "2.0" +hdoc(version="2.0") h1(id="intro", lang="en") { HyperDoc 2.0 Examples } @@ -7,11 +7,11 @@ toc(depth="2") {} h2(id="paragraphs") { Paragraphs and Inline Text } p(id="p-basic") { - This paragraph shows plain text mixed with \em{emphasis}, \strike{strike-through}, and \mono{monospaced} spans. + This paragraph shows plain text mixed with \em{emphasis}, \strike{strike-through}, and \mono{monospaced} spans. } p(lang="de") { - Dieser Absatz zeigt das Attribut \mono{lang} auf Blockebene. + Dieser Absatz zeigt das Attribut \mono{lang} auf Blockebene. } p "This paragraph uses a string literal body instead of a list." @@ -21,7 +21,7 @@ p { } p { - Links can target \link(ref="fig-diagram"){other blocks} or external \link(url="https://ashet.computer"){resources}. + Links can target \link(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}. } note { Notes highlight supportive information. } diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 2a49308..c91ef12 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -22,197 +22,6 @@ pub const Block = union(enum) { placeholder: void, }; -/// A token emitted by the HyperDoc tokenizer. -pub const Token = struct { - pub const Tag = enum { - eof, - word, - string_literal, - unterminated_string_literal, - literal_line, - newline, - @"{", - @"}", - @"(", - @")", - @",", - @"=", - @":", - @"\\", - invalid_character, - }; - - tag: Tag, - offset: usize, - len: usize, - - /// Returns the slice of the original input covered by this token. - pub fn slice(token: Token, input: []const u8) []const u8 { - return input[token.offset .. token.offset + token.len]; - } -}; - -/// Tokenizes HyperDoc source text incrementally. -pub const Tokenizer = struct { - input: []const u8, - index: usize = 0, - line_start: bool = true, - finished: bool = false, - - /// Creates a tokenizer for the provided input. - pub fn init(input: []const u8) Tokenizer { - return .{ .input = input }; - } - - /// Returns the next token, or null after emitting EOF once. - pub fn next(tok: *Tokenizer) ?Token { - if (tok.finished) { - return null; - } - - while (tok.index < tok.input.len) { - const start = tok.index; - const ch = tok.input[tok.index]; - - if (tok.line_start) { - const literal = tok.scanLiteralLine(); - if (literal) |token| { - return token; - } - } - - if (tok.isNewline(ch)) { - const consumed = tok.consumeNewline(); - tok.line_start = true; - return .{ .tag = .newline, .offset = start, .len = consumed }; - } - - if (tok.isHorizontalWhitespace(ch)) { - tok.index += 1; - tok.line_start = false; - continue; - } - - tok.line_start = false; - - switch (ch) { - '{' => return tok.simpleToken(.@"{"), - '}' => return tok.simpleToken(.@"}"), - '(' => return tok.simpleToken(.@"("), - ')' => return tok.simpleToken(.@")"), - ',' => return tok.simpleToken(.@","), - '=' => return tok.simpleToken(.@"="), - ':' => return tok.simpleToken(.@":"), - '\\' => return tok.simpleToken(.@"\\"), - '"' => return tok.scanStringLiteral(), - else => {}, - } - - if (tok.isWordChar(ch)) { - return tok.scanWord(); - } - - // Non-obvious fallback: we still emit a token for unknown bytes - // so callers can recover and keep walking the stream. - tok.index += 1; - return .{ .tag = .invalid_character, .offset = start, .len = 1 }; - } - - tok.finished = true; - return .{ .tag = .eof, .offset = tok.input.len, .len = 0 }; - } - - /// Emits a single-character token at the current offset. - fn simpleToken(tok: *Tokenizer, tag: Token.Tag) Token { - const start = tok.index; - tok.index += 1; - return .{ .tag = tag, .offset = start, .len = 1 }; - } - - /// Scans a quoted string or an unterminated string literal. - fn scanStringLiteral(tok: *Tokenizer) Token { - const start = tok.index; - tok.index += 1; - while (tok.index < tok.input.len) { - const ch = tok.input[tok.index]; - if (ch == '"') { - tok.index += 1; - return .{ .tag = .string_literal, .offset = start, .len = tok.index - start }; - } - if (tok.isNewline(ch)) { - // We stop before the newline so the next call can emit it. - return .{ .tag = .unterminated_string_literal, .offset = start, .len = tok.index - start }; - } - if (ch == '\\') { - // Escape sequences consume the next byte, even if it is a quote. - if (tok.index + 1 >= tok.input.len) { - tok.index = tok.input.len; - break; - } - tok.index += 2; - continue; - } - tok.index += 1; - } - - return .{ .tag = .unterminated_string_literal, .offset = start, .len = tok.index - start }; - } - - /// Scans a WORD token as defined by the grammar. - fn scanWord(tok: *Tokenizer) Token { - const start = tok.index; - tok.index += 1; - while (tok.index < tok.input.len and tok.isWordChar(tok.input[tok.index])) { - tok.index += 1; - } - return .{ .tag = .word, .offset = start, .len = tok.index - start }; - } - - /// Scans a literal line token if the current position is at a line start. - fn scanLiteralLine(tok: *Tokenizer) ?Token { - const start = tok.index; - var cursor = tok.index; - while (cursor < tok.input.len and tok.isHorizontalWhitespace(tok.input[cursor])) { - cursor += 1; - } - if (cursor >= tok.input.len or tok.input[cursor] != '|') { - return null; - } - cursor += 1; - while (cursor < tok.input.len and !tok.isNewline(tok.input[cursor])) { - cursor += 1; - } - tok.index = cursor; - tok.line_start = false; - return .{ .tag = .literal_line, .offset = start, .len = cursor - start }; - } - - /// Consumes a newline, including CRLF sequences. - fn consumeNewline(tok: *Tokenizer) usize { - if (tok.input[tok.index] == '\r') { - if (tok.index + 1 < tok.input.len and tok.input[tok.index + 1] == '\n') { - tok.index += 2; - return 2; - } - tok.index += 1; - return 1; - } - tok.index += 1; - return 1; - } - fn isWordChar(_: *Tokenizer, ch: u8) bool { - return !std.ascii.isControl(ch) and !std.ascii.isWhitespace(ch) and ch != '{' and ch != '}' and ch != '\\' and ch != '"' and ch != '(' and ch != ')' and ch != ',' and ch != '=' and ch != ':'; - } - - fn isHorizontalWhitespace(_: *Tokenizer, ch: u8) bool { - return ch == ' ' or ch == '\t'; - } - - fn isNewline(_: *Tokenizer, ch: u8) bool { - return ch == '\n' or ch == '\r'; - } -}; - /// Parses a HyperDoc document. pub fn parse( allocator: std.mem.Allocator, diff --git a/src/main.zig b/src/main.zig index 6810065..44013de 100644 --- a/src/main.zig +++ b/src/main.zig @@ -26,14 +26,7 @@ pub fn main() !u8 { const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10); defer allocator.free(document); - var tokenizer = hdoc.Tokenizer.init(document); - var stdout = std.fs.File.stdout().deprecatedWriter(); - while (tokenizer.next()) |token| { - try stdout.print("{s} \"{f}\"\n", .{ @tagName(token.tag), std.zig.fmtString(token.slice(document)) }); - if (token.tag == .eof) { - break; - } - } + // TODO: Parse document return 0; } diff --git a/src/testsuite.zig b/src/testsuite.zig index ee42e38..961cef5 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -5,103 +5,3 @@ fn testAcceptDocument(document: []const u8) !void { var doc = try hdoc.parse(std.testing.allocator, document, null); defer doc.deinit(); } - -const TokenExpect = struct { - tag: hdoc.Token.Tag, - lexeme: []const u8, -}; - -fn expectTokens(input: []const u8, expected: []const TokenExpect) !void { - var tokenizer = hdoc.Tokenizer.init(input); - var index: usize = 0; - while (true) { - const token_opt = tokenizer.next(); - if (token_opt == null) { - break; - } - const token = token_opt.?; - try std.testing.expect(index < expected.len); - try std.testing.expectEqual(expected[index].tag, token.tag); - try std.testing.expectEqualStrings(expected[index].lexeme, token.slice(input)); - index += 1; - if (token.tag == .eof) { - break; - } - } - try std.testing.expectEqual(expected.len, index); - try std.testing.expect(tokenizer.next() == null); -} - -test "tokenizes header line" { - try expectTokens("hdoc \"2.0\"\n", &.{ - .{ .tag = .word, .lexeme = "hdoc" }, - .{ .tag = .string_literal, .lexeme = "\"2.0\"" }, - .{ .tag = .newline, .lexeme = "\n" }, - .{ .tag = .eof, .lexeme = "" }, - }); -} - -test "tokenizes literal lines" { - try expectTokens("p:\n| code\n |more\n", &.{ - .{ .tag = .word, .lexeme = "p" }, - .{ .tag = .@":", .lexeme = ":" }, - .{ .tag = .newline, .lexeme = "\n" }, - .{ .tag = .literal_line, .lexeme = "| code" }, - .{ .tag = .newline, .lexeme = "\n" }, - .{ .tag = .literal_line, .lexeme = " |more" }, - .{ .tag = .newline, .lexeme = "\n" }, - .{ .tag = .eof, .lexeme = "" }, - }); -} - -test "tokenizes unterminated string" { - try expectTokens("\"oops\n", &.{ - .{ .tag = .unterminated_string_literal, .lexeme = "\"oops" }, - .{ .tag = .newline, .lexeme = "\n" }, - .{ .tag = .eof, .lexeme = "" }, - }); -} - -test "tokenizes word and escapes" { - try expectTokens("{alpha \\{ -dash}", &.{ - .{ .tag = .@"{", .lexeme = "{" }, - .{ .tag = .word, .lexeme = "alpha" }, - .{ .tag = .@"\\", .lexeme = "\\" }, - .{ .tag = .@"{", .lexeme = "{" }, - .{ .tag = .word, .lexeme = "-dash" }, - .{ .tag = .@"}", .lexeme = "}" }, - .{ .tag = .eof, .lexeme = "" }, - }); -} - -test "tokenizes mixed sequences" { - try expectTokens( - "note(id=\"x\"){\n\\em \"hi\", -dash\n}\n", - &.{ - .{ .tag = .word, .lexeme = "note" }, - .{ .tag = .@"(", .lexeme = "(" }, - .{ .tag = .word, .lexeme = "id" }, - .{ .tag = .@"=", .lexeme = "=" }, - .{ .tag = .string_literal, .lexeme = "\"x\"" }, - .{ .tag = .@")", .lexeme = ")" }, - .{ .tag = .@"{", .lexeme = "{" }, - .{ .tag = .newline, .lexeme = "\n" }, - .{ .tag = .@"\\", .lexeme = "\\" }, - .{ .tag = .word, .lexeme = "em" }, - .{ .tag = .string_literal, .lexeme = "\"hi\"" }, - .{ .tag = .@",", .lexeme = "," }, - .{ .tag = .word, .lexeme = "-dash" }, - .{ .tag = .newline, .lexeme = "\n" }, - .{ .tag = .@"}", .lexeme = "}" }, - .{ .tag = .newline, .lexeme = "\n" }, - .{ .tag = .eof, .lexeme = "" }, - }, - ); -} - -test "tokenizes invalid characters" { - try expectTokens("\x00", &.{ - .{ .tag = .invalid_character, .lexeme = "\x00" }, - .{ .tag = .eof, .lexeme = "" }, - }); -} diff --git a/test/parser/stress.hdoc b/test/parser/stress.hdoc new file mode 100644 index 0000000..aca067a --- /dev/null +++ b/test/parser/stress.hdoc @@ -0,0 +1,74 @@ +hdoc(version="2.0") + +p { + On Monday at 09:07, the support desk logged a ticket titled "Login loop (again)". The user wrote, "I click + 'Sign in', the page flashes, and I'm back where I started." Someone replied, "That sounds like a cookie + issue—clear your cache," but the customer insisted they’d already tried: "Chrome, Safari, even a private window." + The message ended with an oddly specific note: "It only happens when the Wi-Fi name is 'Office-Guest'." Nobody + knew whether to laugh or worry. +} + +p { + The product manager’s summary was short but loaded: "We shipped the hotfix; we didn't fix the root + cause." In the same breath, she added, "Don't roll back unless you absolutely have to—it's worse." Later, + in a longer thread, she used quotes inside quotes: "When QA says 'it’s fine', what they mean is 'it hasn’t + exploded yet'." The tone wasn’t cruel, just tired, and the timestamps (11:58, 12:01, 12:03) made it feel like + a miniature drama. +} + +p { + In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed + steps like: export → transform → validate → import, then added a parenthetical aside (which itself contained + parentheses): "Use the staging key (not the production key (seriously))". A different person pasted a pseudo-path, + /var/tmp/builds/2025-12-23/, and then warned, "If you see `NULL` in the output, don't 'fix' it by replacing it with + '0'—that's how we broke reporting last time." +} + +p { + When the vendor called, they insisted everything was "within spec"; our engineer disagreed. "Within spec" can mean + two opposite things, she said: either the spec is strict, or the spec is meaningless. She pulled up a screenshot and + quoted the line: "Error: expected ']' but found '\}'". Then she joked, "At least it’s honest," and forwarded the log + snippet with a subject line that read, "Re: Re: RE: Please confirm ASAP!!!" (three exclamation marks included, unfortunately). +} + +p { + The draft contract read like a puzzle: "Client shall provide 'reasonable access' to systems," while another clause said, + "Provider may request access at any time." Someone circled the phrase "reasonable access" and wrote, "Reasonable for whom?" + A lawyer suggested adding: "…as mutually agreed in writing," but the team worried that "in writing" would exclude Slack, email, + and tickets—so they proposed: "…in writing (including electronic messages)". Even that sparked debate: does "electronic messages" + include chat reactions, like 👍 or ✅? +} + +p { + A teammate tried to reproduce the bug and wrote a narrative that sounded like a short story: "I opened the dashboard, clicked + 'Reports', then typed 'Q4' into the search field." The UI responded with "No results found" even though the sidebar clearly showed "Q4 + Forecast". The odd part: if you type "Q4 " (with a trailing space), results appear. He ended the note with, "Yes, I know that sounds + fake," and added, "But watch: 'Q4' ≠ 'Q4 '." It’s the sort of thing parsers and humans both hate. +} + +p { + The incident timeline included times in different formats—09:15 CET, 08:15 UTC, and "around 8-ish"—which made the postmortem messy. + One line said, "Database CPU hit 92% (again)," another said, "CPU was fine; it was I/O." Someone pasted a link: + \link(uri="https://example.com/status?from=2025-12-23T08:00:00Z&to=2025-12-23T10:00:00Z"){https://example.com/status?from=2025-12-23T08:00:00Z&to=2025-12-23T10:00:00Z} and then asked, "Why does + the graph show a dip at 08:37?" The answer was "maintenance," but the maintenance note was filed under "misc"—not "maintenance." +} + +p { + In the customer’s feedback form, the message was polite but pointed: "Your app is great—until it isn't." They described a checkout flow where the + total briefly showed €19.99, then flipped to €1,999.00, then back again. "I know it's probably formatting," they wrote, "but seeing that number + made me think I'd been scammed." Another sentence used both quote styles: "The label says 'Total', but the tooltip says "Estimated total"." The + difference matters when people are anxious. +} + +p { + A developer left a comment in the code review: "This is readable, but it's not maintainable." When asked what that meant, he replied, "Readable + means I can understand it today; maintainable means I can change it tomorrow without breaking it." He pointed at a line that looked harmless — \mono(syntax="c"){if (a < b && c > d) return;} — and said, "It encodes policy with no explanation." Then, in the same comment, he used + markdown-like fragments that shouldn’t be parsed as such: *not emphasis*, *not italics*, and [not a link](just text). +} + +p { + Before the release, the checklist included items that were half instruction, half superstition: "Update changelog; tag release; don't forget the + 'README' typo." Someone wrote "DONE" in all caps, then later edited it to "done" because the automation treats "DONE" as a keyword. Another item + read: "Confirm the 'rollback plan' exists (even if we never use it)." The final note—"If anything feels off, stop"—was simple, but it carried the + weight of every prior incident, every "it’s fine," every quiet "…maybe not." +} From f5c13eaadb43cd6750b9505c0f93e5999170ed39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Tue, 23 Dec 2025 11:31:44 +0100 Subject: [PATCH 009/116] Implements new parser for HyperDoc 2.0 --- examples/demo.hdoc | 2 +- examples/featurematrix.hdoc | 10 +- examples/guide.hdoc | 2 +- examples/tables.hdoc | 2 +- src/hyperdoc.zig | 666 +++++++++++++++++++++++++++++++++++- src/main.zig | 3 +- test/parser/stress.hdoc | 2 +- 7 files changed, 672 insertions(+), 15 deletions(-) diff --git a/examples/demo.hdoc b/examples/demo.hdoc index a092e91..68ef189 100644 --- a/examples/demo.hdoc +++ b/examples/demo.hdoc @@ -1,4 +1,4 @@ -hdoc(version="2.0", lang="en", title="HyperDoc \"2.0\" Demonstration"); +hdoc(version="2.0" , lang="en", title="HyperDoc \"2.0\" Demonstration"); h1{HyperDoc 2.0} diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc index d72f095..3600e02 100644 --- a/examples/featurematrix.hdoc +++ b/examples/featurematrix.hdoc @@ -1,4 +1,4 @@ -hdoc(version="2.0") +hdoc(version="2.0"); h1 { Small Computer Feature Matrix } @@ -63,7 +63,7 @@ table { td "✅" td "✅" td "❌" - td { ❌\sup{1} } + td { p { ❌\sup{1} } } td "❌" } row(title="Modular Design") { @@ -71,7 +71,7 @@ table { td "❌" td "❌" td "✅" - td { ✅\sup{2} } + td { p { ✅\sup{2} } } } row(title="Full Documentation") { td "✅" @@ -88,11 +88,11 @@ table { td "❌" } row(title="Parallax Propeller") { - td { ✅ (\link(uri="https://www.parallax.com/propeller-2"){Propeller 2}) } + td { p { ✅ (\link(uri="https://www.parallax.com/propeller-2"){Propeller 2}) } } td "❌" td "❌" td "❌" - td { ✅ (\link(uri="https://www.parallax.com/propeller-1"){Propeller 1}) } + td { p { ✅ (\link(uri="https://www.parallax.com/propeller-1"){Propeller 1}) } } } } diff --git a/examples/guide.hdoc b/examples/guide.hdoc index 94c70c4..50f7b64 100644 --- a/examples/guide.hdoc +++ b/examples/guide.hdoc @@ -1,4 +1,4 @@ -hdoc(version="2.0") +hdoc(version="2.0"); h1(id="intro", lang="en") { HyperDoc 2.0 Examples } diff --git a/examples/tables.hdoc b/examples/tables.hdoc index 5adb144..28f73f9 100644 --- a/examples/tables.hdoc +++ b/examples/tables.hdoc @@ -1,4 +1,4 @@ -hdoc "2.0" +hdoc(version="2.0"); h1(id="tables") { HyperDoc 2.0 Table Examples } diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index c91ef12..5bf47e0 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -19,7 +19,13 @@ pub const Document = struct { /// Depending on the level of nesting, the width might decrease /// from the full document size. pub const Block = union(enum) { - placeholder: void, + header: Header, + + pub const Header = struct { + title: ?[]const u8, + author: ?[]const u8, + date: ?[]const u8, + }; }; /// Parses a HyperDoc document. @@ -30,19 +36,665 @@ pub fn parse( /// An optional diagnostics element that receives diagnostic messages like errors and warnings. /// If present, will be filled out by the parser. diagnostics: ?*Diagnostics, -) !Document { +) error{ OutOfMemory, SyntaxError }!Document { var arena = std.heap.ArenaAllocator.init(allocator); errdefer arena.deinit(); - _ = plain_text; - _ = diagnostics; + var parser: Parser = .{ + .code = plain_text, + .arena = arena.allocator(), + .diagnostics = diagnostics, + }; + + var sema: SemanticAnalyzer = .{ + .arena = arena.allocator(), + .diagnostics = diagnostics, + }; + + var blocks: std.ArrayList(Block) = .empty; + + while (true) { + errdefer |err| { + std.log.debug("error at examples/demo.hdoc:{f}: {t}", .{ + parser.make_diagnostic_location(parser.offset), + err, + }); + } + + const node = parser.accept_node(.top_level) catch |err| switch (err) { + error.OutOfMemory => |e| return @as(error{OutOfMemory}!Document, e), // TODO: What the fuck? Bug report! + + error.EndOfFile => break, + + error.UnexpectedEndOfFile, + error.InvalidCharacter, + error.UnexpectedCharacter, + error.UnterminatedStringLiteral, + error.UnterminatedList, + => return error.SyntaxError, + }; + + const block = sema.translate_toplevel_node(node) catch |err| switch (err) { + error.OutOfMemory => |e| return @as(error{OutOfMemory}!Document, e), + + error.InvalidNodeType => continue, + }; + + try blocks.append(arena.allocator(), block); + } return .{ .arena = arena, - .contents = &[_]Block{}, + .contents = try blocks.toOwnedSlice(arena.allocator()), }; } +pub const SemanticAnalyzer = struct { + arena: std.mem.Allocator, + diagnostics: ?*Diagnostics, + + fn translate_toplevel_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType }!Block { + _ = sema; + switch (node.type) { + else => { + return error.InvalidNodeType; + }, + } + } +}; + +pub const Parser = struct { + code: []const u8, + offset: usize = 0, + + arena: std.mem.Allocator, + diagnostics: ?*Diagnostics, + + pub const ScopeType = enum { top_level, nested }; + + pub fn accept_node(parser: *Parser, comptime scope_type: ScopeType) !Node { + const type_ident = parser.accept_identifier() catch |err| switch (err) { + error.UnexpectedEndOfFile => |e| switch (scope_type) { + .nested => return e, + .top_level => return error.EndOfFile, + }, + else => |e| return e, + }; + const node_type: NodeType = if (std.meta.stringToEnum(NodeType, type_ident.text)) |node_type| + node_type + else if (std.mem.startsWith(u8, type_ident.text, "\\")) + .unknown_inline + else + .unknown_block; + + var attributes: std.StringArrayHashMapUnmanaged(Attribute) = .empty; + errdefer attributes.deinit(parser.arena); + + if (parser.try_accept_char('(')) { + if (!parser.try_accept_char(')')) { + // We 're not at the end of the attribute list, + // so we know that the next token must be the attribute name. + + while (true) { + const start = parser.offset; + const attr_name = try parser.accept_identifier(); + _ = try parser.accept_char('='); + const attr_value = try parser.accept_string(); + const attr_location = parser.location(start, parser.offset); + + const gop_entry = try attributes.getOrPut(parser.arena, attr_name.text); + if (gop_entry.found_existing) { + // TODO: Emit diagnostic + } + gop_entry.value_ptr.* = .{ + .location = attr_location, + .value = try parser.unescape_string(attr_value), + }; + + if (!parser.try_accept_char(',')) { + break; + } + } + try parser.accept_char(')'); + } + } + + if (parser.try_accept_char(';')) { + // block has empty content + return .{ + .location = parser.location(type_ident.position.offset, null), + .type = node_type, + .attributes = attributes, + .body = .empty, + }; + } + + if (parser.try_accept_char(':')) { + // block has verbatim content + + var lines: std.ArrayList(Token) = .empty; + + while (try parser.try_accept_verbatim_line()) |line| { + try lines.append(parser.arena, line); + } + + if (lines.items.len == 0) { + // TODO: Emit diagnostic about verbatim block with no lines + } + + return .{ + .location = parser.location(type_ident.position.offset, null), + .type = node_type, + .attributes = attributes, + .body = .{ .verbatim = try lines.toOwnedSlice(parser.arena) }, + }; + } + + if (try parser.try_accept_string()) |string_body| { + // block has string content + + return .{ + .location = parser.location(type_ident.position.offset, null), + .type = node_type, + .attributes = attributes, + .body = .{ .string = string_body }, + }; + } + + var children = if (node_type.has_inline_body()) + try parser.accept_inline_node_list() + else + try parser.accept_block_node_list(); + + return .{ + .location = parser.location(type_ident.position.offset, null), + .type = node_type, + .attributes = attributes, + .body = .{ .list = try children.toOwnedSlice(parser.arena) }, + }; + } + + pub fn accept_block_node_list(parser: *Parser) error{ + OutOfMemory, + InvalidCharacter, + UnterminatedStringLiteral, + UnexpectedEndOfFile, + UnterminatedList, + UnexpectedCharacter, + }!std.ArrayList(Node) { + var children: std.ArrayList(Node) = .empty; + errdefer children.deinit(parser.arena); + + try parser.accept_char('{'); + + while (true) { + parser.skip_whitespace(); + + if (parser.try_accept_char('}')) + break; + + const child = try parser.accept_node(.nested); + try children.append(parser.arena, child); + } + + return children; + } + + pub fn accept_inline_node_list(parser: *Parser) error{ + OutOfMemory, + InvalidCharacter, + UnterminatedStringLiteral, + UnexpectedEndOfFile, + UnterminatedList, + UnexpectedCharacter, + }!std.ArrayList(Node) { + var children: std.ArrayList(Node) = .empty; + errdefer children.deinit(parser.arena); + + try parser.accept_char('{'); + + var nesting: usize = 0; + + while (true) { + parser.skip_whitespace(); + + const head = parser.peek_char() orelse { + // TODO: Emit diagnostic + return error.UnterminatedList; + }; + + switch (head) { + '{' => { + nesting += 1; + parser.offset += 1; + }, + + '}' => { + parser.offset += 1; + + if (nesting == 0) + break; + + nesting -= 1; + }, + + '\\' => backslash: { + if (parser.offset < parser.code.len - 1) { + const next_char = parser.code[parser.offset + 1]; + switch (next_char) { + '{', '}', '\\' => { + // Escaped brace + parser.offset += 2; + break :backslash; + }, + else => {}, + } + } + + const child = try parser.accept_node(.nested); + + // This will only be a non-inline node if we have a bug. + std.debug.assert(child.type.is_inline()); + + try children.append(parser.arena, child); + }, + + else => { + const word = try parser.accept_word(); + try children.append(parser.arena, .{ + .location = word.position, + .type = .text, + .attributes = .empty, + .body = .empty, + }); + }, + } + } + + return children; + } + + pub fn try_accept_verbatim_line(parser: *Parser) !?Token { + parser.skip_whitespace(); + + const head = parser.offset; + if (!parser.try_accept_char('|')) { + return null; + } + + while (!parser.at_end()) { + const c = parser.code[parser.offset]; + if (c == '\n') { + break; + } + + // we don't consume the LF character, as each verbatim line should be prefixed with exactly a single LF character + parser.offset += 1; + } + if (parser.at_end()) { + // TODO: Emit diagnostic about verbatim lines should have an empty line feed at the end of the file. + } + + const token = parser.slice(head, parser.offset); + std.debug.assert(std.mem.startsWith(u8, token.text, "|")); + return token; + } + + pub fn peek_char(parser: *Parser) ?u8 { + if (parser.at_end()) + return null; + return parser.code[parser.offset]; + } + + pub fn accept_char(parser: *Parser, expected: u8) error{ UnexpectedEndOfFile, UnexpectedCharacter }!void { + if (parser.try_accept_char(expected)) + return; + + if (parser.at_end()) + return error.UnexpectedEndOfFile; + + return error.UnexpectedCharacter; + } + + pub fn try_accept_char(parser: *Parser, expected: u8) bool { + std.debug.assert(!is_space(expected)); + parser.skip_whitespace(); + + if (parser.at_end()) + return false; + + if (parser.code[parser.offset] != expected) + return false; + + parser.offset += 1; + return true; + } + + pub fn try_accept_string(parser: *Parser) !?Token { + parser.skip_whitespace(); + + if (parser.at_end()) + return null; + + if (parser.code[parser.offset] != '"') + return null; + + return try parser.accept_string(); + } + + pub fn accept_string(parser: *Parser) error{ OutOfMemory, UnexpectedEndOfFile, UnexpectedCharacter, UnterminatedStringLiteral }!Token { + parser.skip_whitespace(); + + if (parser.at_end()) + return error.UnexpectedEndOfFile; + + const start = parser.offset; + if (parser.code[start] != '"') + return error.UnexpectedCharacter; + + parser.offset += 1; + + while (parser.offset < parser.code.len) { + const c = parser.code[parser.offset]; + parser.offset += 1; + + switch (c) { + '"' => return parser.slice(start, parser.offset), + + '\\' => { + // Escape sequence + if (parser.at_end()) + return error.UnterminatedStringLiteral; + + const escaped = parser.code[parser.offset]; + parser.offset += 1; + + switch (escaped) { + '\n', '\r' => return error.UnterminatedStringLiteral, + else => {}, + } + }, + + else => {}, + } + } + + return error.UnterminatedStringLiteral; + } + + pub fn accept_identifier(parser: *Parser) error{ UnexpectedEndOfFile, InvalidCharacter }!Token { + parser.skip_whitespace(); + + if (parser.at_end()) + return error.UnexpectedEndOfFile; + + const start = parser.offset; + const first = parser.code[start]; + if (!is_ident_char(first)) + return error.InvalidCharacter; + + while (parser.offset < parser.code.len) { + const c = parser.code[parser.offset]; + if (!is_ident_char(c)) + break; + parser.offset += 1; + } + + return parser.slice(start, parser.offset); + } + + /// Accepts a word token (a sequence of non-whitespace characters). + pub fn accept_word(parser: *Parser) error{UnexpectedEndOfFile}!Token { + parser.skip_whitespace(); + + if (parser.at_end()) + return error.UnexpectedEndOfFile; + + const start = parser.offset; + + while (parser.offset < parser.code.len) { + const c = parser.code[parser.offset]; + if (is_space(c)) + break; + switch (c) { + // These are word-terminating characters: + '{', '}', '\\' => break, + else => {}, + } + parser.offset += 1; + } + + return parser.slice(start, parser.offset); + } + + /// Skips forward until the first non-whitespace character. + pub fn skip_whitespace(parser: *Parser) void { + while (!parser.at_end()) { + const c = parser.code[parser.offset]; + if (!is_space(c)) { + break; + } + parser.offset += 1; + } + } + + pub fn at_end(parser: *Parser) bool { + return parser.offset >= parser.code.len; + } + + /// Accepts a string literal, including the surrounding quotes. + pub fn unescape_string(parser: *Parser, token: Token) error{OutOfMemory}![]const u8 { + std.debug.assert(token.text.len >= 2); + std.debug.assert(token.text[0] == '"' and token.text[token.text.len - 1] == '"'); + + _ = parser; + // TODO: Implement unescaping logic here. + + // For now, we just return the raw text. + return token.text[1 .. token.text.len - 1]; + } + + pub fn location(parser: *Parser, start: usize, end: ?usize) Location { + return .{ .offset = start, .length = (end orelse parser.offset) - start }; + } + + pub fn slice(parser: *Parser, start: usize, end: usize) Token { + return .{ + .text = parser.code[start..end], + .position = .{ .offset = start, .length = end - start }, + }; + } + + pub fn make_diagnostic_location(parser: Parser, offset: usize) Diagnostic.Location { + var line: u32 = 1; + var column: u32 = 1; + + var i: usize = 0; + while (i < offset and i < parser.code.len) : (i += 1) { + if (parser.code[i] == '\n') { + line += 1; + column = 1; + } else { + column += 1; + } + } + + return .{ .line = line, .column = column }; + } + + pub fn is_space(c: u8) bool { + return switch (c) { + ' ', '\t', '\n', '\r' => true, + else => false, + }; + } + + pub fn is_ident_char(c: u8) bool { + return switch (c) { + 'a'...'z', 'A'...'Z', '0'...'9', '_', '\\' => true, + else => false, + }; + } + + pub const Token = struct { + text: []const u8, + position: Location, + }; + + pub const Location = struct { + offset: usize, + length: usize, + }; + + pub const NodeType = enum { + hdoc, + h1, + h2, + h3, + p, + note, + warning, + danger, + tip, + quote, + spoiler, + ul, + ol, + img, + pre, + toc, + table, + columns, + group, + row, + td, + li, + + text, + @"\\em", + @"\\mono", + @"\\strike", + @"\\sub", + @"\\sup", + @"\\link", + @"\\date", + @"\\time", + @"\\datetime", + + unknown_block, + unknown_inline, + + pub fn is_inline(node_type: NodeType) bool { + return switch (node_type) { + .@"\\em", + .@"\\mono", + .@"\\strike", + .@"\\sub", + .@"\\sup", + .@"\\link", + .@"\\date", + .@"\\time", + .@"\\datetime", + .unknown_inline, + .text, + => true, + + .hdoc, + .h1, + .h2, + .h3, + .p, + .note, + .warning, + .danger, + .tip, + .quote, + .spoiler, + .ul, + .ol, + .img, + .pre, + .toc, + .table, + .columns, + .group, + .row, + .td, + .li, + .unknown_block, + => false, + }; + } + + pub fn has_inline_body(node_type: NodeType) bool { + return switch (node_type) { + .h1, + .h2, + .h3, + + .p, + .note, + .warning, + .danger, + .tip, + .quote, + .spoiler, + + .img, + .pre, + .toc, + .group, + + .@"\\em", + .@"\\mono", + .@"\\strike", + .@"\\sub", + .@"\\sup", + .@"\\link", + .@"\\date", + .@"\\time", + .@"\\datetime", + + .unknown_inline, + => true, + + .hdoc, + .ul, + .ol, + .table, + .columns, + .row, + .td, + .li, + + .text, + .unknown_block, + => false, + }; + } + }; + + pub const Node = struct { + location: Location, + type: NodeType, + attributes: std.StringArrayHashMapUnmanaged(Attribute), + + body: Body, + + pub const Body = union(enum) { + empty, + string: Token, + verbatim: []Token, + list: []Node, + }; + }; + + pub const Attribute = struct { + location: Location, + value: []const u8, + }; +}; + /// A diagnostic message. pub const Diagnostic = struct { pub const Severity = enum { warning, @"error" }; @@ -50,6 +702,10 @@ pub const Diagnostic = struct { pub const Location = struct { line: u32, column: u32, + + pub fn format(loc: Location, w: *std.Io.Writer) !void { + try w.print("{d}:{d}", .{ loc.line, loc.column }); + } }; /// An diagnostic code encoded as a 16 bit integer. diff --git a/src/main.zig b/src/main.zig index 44013de..32e30a0 100644 --- a/src/main.zig +++ b/src/main.zig @@ -26,7 +26,8 @@ pub fn main() !u8 { const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10); defer allocator.free(document); - // TODO: Parse document + var parsed = try hdoc.parse(allocator, document, null); + defer parsed.deinit(); return 0; } diff --git a/test/parser/stress.hdoc b/test/parser/stress.hdoc index aca067a..bec3b0f 100644 --- a/test/parser/stress.hdoc +++ b/test/parser/stress.hdoc @@ -1,4 +1,4 @@ -hdoc(version="2.0") +hdoc(version="2.0"); p { On Monday at 09:07, the support desk logged a ticket titled "Login loop (again)". The user wrote, "I click From 0bd2f1ae884ece34510b062b0d81753f4e432955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Tue, 23 Dec 2025 12:01:48 +0100 Subject: [PATCH 010/116] Adds functions for test suite, adds fuzzer code --- build.zig | 16 ++- src/hyperdoc.zig | 30 +++++- src/testsuite.zig | 265 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 308 insertions(+), 3 deletions(-) diff --git a/build.zig b/build.zig index 0c845f2..4c71fd2 100644 --- a/build.zig +++ b/build.zig @@ -41,9 +41,23 @@ pub fn build(b: *std.Build) void { .target = target, .optimize = optimize, .imports = &.{ - .{ .name = "hyperdoc", .module = hyperdoc }, + rawFileMod(b, "examples/tables.hdoc"), + rawFileMod(b, "examples/featurematrix.hdoc"), + rawFileMod(b, "examples/demo.hdoc"), + rawFileMod(b, "examples/guide.hdoc"), + rawFileMod(b, "test/parser/stress.hdoc"), }, }), + .use_llvm = true, }); test_step.dependOn(&b.addRunArtifact(exe_tests).step); } + +fn rawFileMod(b: *std.Build, path: []const u8) std.Build.Module.Import { + return .{ + .name = path, + .module = b.createModule(.{ + .root_source_file = b.path(path), + }), + }; +} diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 5bf47e0..b55dbf2 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -743,7 +743,7 @@ pub const Diagnostics = struct { arena: std.heap.ArenaAllocator, items: std.ArrayList(Diagnostic) = .empty, - pub fn init(allocator: std.mem.Allocator) Diagnostic { + pub fn init(allocator: std.mem.Allocator) Diagnostics { return .{ .arena = .init(allocator) }; } @@ -765,3 +765,31 @@ pub const Diagnostics = struct { }); } }; + +test "fuzz parser" { + const Impl = struct { + fn testOne(impl: @This(), data: []const u8) !void { + _ = impl; + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var diagnostics: Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = parse(std.testing.allocator, data, &diagnostics) catch return; + defer doc.deinit(); + } + }; + + try std.testing.fuzz(Impl{}, Impl.testOne, .{ + .corpus = &.{ + "hdoc(version=\"2.0\");", + @embedFile("examples/tables.hdoc"), + @embedFile("examples/featurematrix.hdoc"), + @embedFile("examples/demo.hdoc"), + @embedFile("examples/guide.hdoc"), + @embedFile("test/parser/stress.hdoc"), + }, + }); +} diff --git a/src/testsuite.zig b/src/testsuite.zig index 961cef5..052e95f 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -1,7 +1,270 @@ const std = @import("std"); -const hdoc = @import("hyperdoc"); +const hdoc = @import("./hyperdoc.zig"); fn testAcceptDocument(document: []const u8) !void { var doc = try hdoc.parse(std.testing.allocator, document, null); defer doc.deinit(); } + +fn parseFile(path: []const u8) !void { + const source = try std.fs.cwd().readFileAlloc(std.testing.allocator, path, 10 * 1024 * 1024); + defer std.testing.allocator.free(source); + try testAcceptDocument(source); +} + +fn parseDirectoryTree(path: []const u8) !void { + var dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); + defer dir.close(); + + var walker = try dir.walk(std.testing.allocator); + defer walker.deinit(); + + while (try walker.next()) |entry| { + if (entry.kind != .file) + continue; + if (!std.mem.endsWith(u8, entry.path, ".hdoc")) + continue; + + const full_path = try std.fs.path.join(std.testing.allocator, &.{ path, entry.path }); + defer std.testing.allocator.free(full_path); + + try parseFile(full_path); + } +} + +test "parser accepts examples and test documents" { + try parseDirectoryTree("examples"); + try parseDirectoryTree("test"); +} + +test "parser accept identifier and word tokens" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "h1 word\\em{test}", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const ident = try parser.accept_identifier(); + try std.testing.expectEqualStrings("h1", ident.text); + try std.testing.expectEqual(@as(usize, 0), ident.position.offset); + try std.testing.expectEqual(@as(usize, 2), ident.position.length); + + const word = try parser.accept_word(); + try std.testing.expectEqualStrings("word", word.text); + try std.testing.expectEqual(@as(usize, 3), word.position.offset); + try std.testing.expectEqual(@as(usize, 4), word.position.length); + try std.testing.expectEqual(@as(usize, 7), parser.offset); +} + +test "parser rejects identifiers with invalid start characters" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "-abc", + .arena = arena.allocator(), + .diagnostics = null, + }; + + try std.testing.expectError(error.InvalidCharacter, parser.accept_identifier()); +} + +test "parser accept string literals and unescape" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "\"hello\\\\n\"", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const token = try parser.accept_string(); + try std.testing.expectEqualStrings("\"hello\\\\n\"", token.text); +} + +test "parser reports unterminated string literals" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "\"unterminated\n", + .arena = arena.allocator(), + .diagnostics = null, + }; + + try std.testing.expectError(error.UnterminatedStringLiteral, parser.accept_string()); +} + +test "parser handles attributes and empty bodies" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "h1(title=\"Hello\",author=\"World\");", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const node = try parser.accept_node(.top_level); + try std.testing.expectEqual(hdoc.Parser.NodeType.h1, node.type); + try std.testing.expectEqual(@as(usize, 2), node.attributes.count()); + + const title = node.attributes.get("title") orelse return error.TestExpectedEqual; + try std.testing.expectEqualStrings("Hello", title.value); + + const author = node.attributes.get("author") orelse return error.TestExpectedEqual; + try std.testing.expectEqualStrings("World", author.value); + + try std.testing.expect(node.body == .empty); +} + +test "parser handles string bodies" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "p \"Hello world\"", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const node = try parser.accept_node(.top_level); + try std.testing.expectEqual(hdoc.Parser.NodeType.p, node.type); + switch (node.body) { + .string => |token| try std.testing.expectEqualStrings("\"Hello world\"", token.text), + else => return error.TestExpectedEqual, + } +} + +test "parser handles verbatim blocks" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "pre:\n|line one\n|line two\n", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const node = try parser.accept_node(.top_level); + try std.testing.expectEqual(hdoc.Parser.NodeType.pre, node.type); + switch (node.body) { + .verbatim => |lines| { + try std.testing.expectEqual(@as(usize, 2), lines.len); + try std.testing.expectEqualStrings("|line one", lines[0].text); + try std.testing.expectEqualStrings("|line two", lines[1].text); + }, + else => return error.TestExpectedEqual, + } +} + +test "parser handles block node lists" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "hdoc{h1 \"Title\" p \"Body\"}", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const node = try parser.accept_node(.top_level); + try std.testing.expectEqual(hdoc.Parser.NodeType.hdoc, node.type); + switch (node.body) { + .list => |children| { + try std.testing.expectEqual(@as(usize, 2), children.len); + try std.testing.expectEqual(hdoc.Parser.NodeType.h1, children[0].type); + try std.testing.expectEqual(hdoc.Parser.NodeType.p, children[1].type); + }, + else => return error.TestExpectedEqual, + } +} + +test "parser handles inline node lists" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "p { Hello \\em{world} }", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const node = try parser.accept_node(.top_level); + try std.testing.expectEqual(hdoc.Parser.NodeType.p, node.type); + switch (node.body) { + .list => |children| { + try std.testing.expectEqual(@as(usize, 2), children.len); + try std.testing.expectEqual(hdoc.Parser.NodeType.text, children[0].type); + try std.testing.expectEqual(@as(usize, 5), children[0].location.length); + + try std.testing.expectEqual(hdoc.Parser.NodeType.@"\\em", children[1].type); + switch (children[1].body) { + .list => |inline_children| { + try std.testing.expectEqual(@as(usize, 1), inline_children.len); + try std.testing.expectEqual(hdoc.Parser.NodeType.text, inline_children[0].type); + try std.testing.expectEqual(@as(usize, 5), inline_children[0].location.length); + }, + else => return error.TestExpectedEqual, + } + }, + else => return error.TestExpectedEqual, + } +} + +test "parser handles unknown node types" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "\\madeup{} mystery{}", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const inline_node = try parser.accept_node(.top_level); + try std.testing.expectEqual(hdoc.Parser.NodeType.unknown_inline, inline_node.type); + switch (inline_node.body) { + .list => |children| try std.testing.expectEqual(@as(usize, 0), children.len), + else => return error.TestExpectedEqual, + } + + const block_node = try parser.accept_node(.top_level); + try std.testing.expectEqual(hdoc.Parser.NodeType.unknown_block, block_node.type); + switch (block_node.body) { + .list => |children| try std.testing.expectEqual(@as(usize, 0), children.len), + else => return error.TestExpectedEqual, + } +} + +test "parser reports unterminated inline lists" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "p { word", + .arena = arena.allocator(), + .diagnostics = null, + }; + + try std.testing.expectError(error.UnterminatedList, parser.accept_node(.top_level)); +} + +test "parser maps diagnostic locations" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var parser: hdoc.Parser = .{ + .code = "a\nb\nc", + .arena = arena.allocator(), + .diagnostics = null, + }; + + const loc = parser.make_diagnostic_location(4); + try std.testing.expectEqual(@as(u32, 3), loc.line); + try std.testing.expectEqual(@as(u32, 1), loc.column); +} From 715f53f9784af090f3c6f36371a9eefae3b1c680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 24 Dec 2025 12:46:26 +0100 Subject: [PATCH 011/116] Vibecoded: Implements DOM datastructure, adds diagnostic error messages and very crude semantic analysis --- docs/specification.md | 3 +- src/hyperdoc.zig | 357 +++++++++++++++++++++++++++++++++++++----- src/main.zig | 8 +- src/testsuite.zig | 71 +++++++++ 4 files changed, 396 insertions(+), 43 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 996c00a..fd0e433 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -67,7 +67,7 @@ WORD := /[^\s\{\}\\\"(),=:]+/ | Element | Element Type | Allowed Children | Attributes | | ----------------------------------------------------------- | ------------ | ---------------------------- | ------------------------------------ | | *Document* | Document | `hdoc`, Blocks | | -| `hdoc` | Header | - | `lang`, `title`, `version`, `author` | +| `hdoc` | Header | - | `lang`, `title`, `version`, `author`, `date` | | `h1`, `h2`, `h3` | Block | Text Body | `lang`, \[`id`\] | | `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block | Text Body | `lang`, \[`id`\] | | `ul` | Block | `li` ≥ 1 | `lang`, \[`id`\] | @@ -105,6 +105,7 @@ Notes: | `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | | `title` | No | *Any* | Sets the title of the document or the table row. | | `author` | No | *Any* | Sets the author of the document. | +| `date` | No | [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) timestamp | Sets the authoring date of the document. | | `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | | `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | | `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index b55dbf2..d557866 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -6,6 +6,7 @@ const parser_toolkit = @import("parser-toolkit"); pub const Document = struct { arena: std.heap.ArenaAllocator, contents: []Block, + ids: []?[]const u8, pub fn deinit(doc: *Document) void { doc.arena.deinit(); @@ -20,12 +21,129 @@ pub const Document = struct { /// from the full document size. pub const Block = union(enum) { header: Header, + heading: Heading, + paragraph: Paragraph, + list: List, + image: Image, + preformatted: Preformatted, + toc: TableOfContents, + table: Table, pub const Header = struct { + lang: ?[]const u8, title: ?[]const u8, + version: ?[]const u8, author: ?[]const u8, date: ?[]const u8, }; + + pub const Heading = struct { + level: HeadingLevel, + lang: ?[]const u8, + content: []Span, + }; + + pub const HeadingLevel = enum { h1, h2, h3 }; + + pub const Paragraph = struct { + kind: ParagraphKind, + lang: ?[]const u8, + content: []Span, + }; + + pub const ParagraphKind = enum { p, note, warning, danger, tip, quote, spoiler }; + + pub const List = struct { + lang: ?[]const u8, + first: ?u32, + items: []ListItem, + }; + + pub const ListItem = struct { + lang: ?[]const u8, + content: []Span, + }; + + pub const Image = struct { + lang: ?[]const u8, + alt: ?[]const u8, + path: ?[]const u8, + content: []Span, + }; + + pub const Preformatted = struct { + lang: ?[]const u8, + syntax: ?[]const u8, + content: []Span, + }; + + pub const TableOfContents = struct { + lang: ?[]const u8, + depth: ?u8, + }; + + pub const Table = struct { + lang: ?[]const u8, + rows: []TableRow, + }; + + pub const TableRow = union(enum) { + columns: TableColumns, + row: TableDataRow, + group: TableGroup, + }; + + pub const TableColumns = struct { + lang: ?[]const u8, + cells: []TableCell, + }; + + pub const TableDataRow = struct { + lang: ?[]const u8, + title: ?[]const u8, + cells: []TableCell, + }; + + pub const TableGroup = struct { + lang: ?[]const u8, + content: []Span, + }; + + pub const TableCell = struct { + lang: ?[]const u8, + colspan: ?u32, + content: []Span, + }; +}; + +pub const SpanContent = union(enum) { + text: []const u8, + date: DateTime, + time: DateTime, + datetime: DateTime, +}; + +pub const DateTime = struct { + value: []const u8, + format: ?[]const u8 = null, +}; + +pub const Span = struct { + content: SpanContent, + lang: ?[]const u8 = null, + em: bool = false, + mono: bool = false, + strike: bool = false, + sub: bool = false, + sup: bool = false, + link: Link = .none, + syntax: ?[]const u8 = null, +}; + +pub const Link = union(enum) { + none, + ref: []const u8, + uri: []const u8, }; /// Parses a HyperDoc document. @@ -49,9 +167,11 @@ pub fn parse( var sema: SemanticAnalyzer = .{ .arena = arena.allocator(), .diagnostics = diagnostics, + .code = plain_text, }; var blocks: std.ArrayList(Block) = .empty; + var ids: std.ArrayList(?[]const u8) = .empty; while (true) { errdefer |err| { @@ -81,25 +201,78 @@ pub fn parse( }; try blocks.append(arena.allocator(), block); + try ids.append(arena.allocator(), null); } return .{ .arena = arena, .contents = try blocks.toOwnedSlice(arena.allocator()), + .ids = try ids.toOwnedSlice(arena.allocator()), }; } pub const SemanticAnalyzer = struct { arena: std.mem.Allocator, diagnostics: ?*Diagnostics, + code: []const u8, + seen_header: bool = false, fn translate_toplevel_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType }!Block { - _ = sema; - switch (node.type) { - else => { + if (!sema.seen_header) { + sema.seen_header = true; + if (node.type != .hdoc) { + sema.emit_diagnostic(.missing_hdoc_header, node.location.offset); return error.InvalidNodeType; - }, + } + } else if (node.type == .hdoc) { + sema.emit_diagnostic(.duplicate_hdoc_header, node.location.offset); + return error.InvalidNodeType; + } + + if (node.type == .hdoc) { + return .{ + .header = .{ + .lang = null, + .title = sema.attr_value(node.attributes, "title"), + .version = sema.attr_value(node.attributes, "version"), + .author = sema.attr_value(node.attributes, "author"), + .date = sema.attr_value(node.attributes, "date"), + }, + }; + } + + return error.InvalidNodeType; + } + + fn attr_value(sema: *SemanticAnalyzer, attrs: std.StringArrayHashMapUnmanaged(Parser.Attribute), name: []const u8) ?[]const u8 { + _ = sema; + if (attrs.get(name)) |attr| { + return attr.value; + } + return null; + } + + fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, offset: usize) void { + if (sema.diagnostics) |diag| { + diag.add(code, sema.make_location(offset)) catch {}; + } + } + + fn make_location(sema: *SemanticAnalyzer, offset: usize) Diagnostic.Location { + var line: u32 = 1; + var column: u32 = 1; + + var i: usize = 0; + while (i < offset and i < sema.code.len) : (i += 1) { + if (sema.code[i] == '\n') { + line += 1; + column = 1; + } else { + column += 1; + } } + + return .{ .line = line, .column = column }; } }; @@ -112,7 +285,22 @@ pub const Parser = struct { pub const ScopeType = enum { top_level, nested }; + fn emitDiagnostic( + parser: *Parser, + code: Diagnostic.Code, + diag_location: Diagnostic.Location, + ) void { + if (parser.diagnostics) |diag| { + diag.add(code, diag_location) catch {}; + } + } + pub fn accept_node(parser: *Parser, comptime scope_type: ScopeType) !Node { + parser.skip_whitespace(); + if (scope_type == .top_level and parser.at_end()) { + return error.EndOfFile; + } + const type_ident = parser.accept_identifier() catch |err| switch (err) { error.UnexpectedEndOfFile => |e| switch (scope_type) { .nested => return e, @@ -144,7 +332,7 @@ pub const Parser = struct { const gop_entry = try attributes.getOrPut(parser.arena, attr_name.text); if (gop_entry.found_existing) { - // TODO: Emit diagnostic + emitDiagnostic(parser, .{ .duplicate_attribute = .{ .name = attr_name.text } }, parser.make_diagnostic_location(attr_location.offset)); } gop_entry.value_ptr.* = .{ .location = attr_location, @@ -179,7 +367,7 @@ pub const Parser = struct { } if (lines.items.len == 0) { - // TODO: Emit diagnostic about verbatim block with no lines + emitDiagnostic(parser, .empty_verbatim_block, parser.make_diagnostic_location(type_ident.position.offset)); } return .{ @@ -230,6 +418,11 @@ pub const Parser = struct { while (true) { parser.skip_whitespace(); + if (parser.at_end()) { + emitDiagnostic(parser, .unterminated_block_list, parser.make_diagnostic_location(parser.offset)); + return error.UnterminatedList; + } + if (parser.try_accept_char('}')) break; @@ -259,7 +452,7 @@ pub const Parser = struct { parser.skip_whitespace(); const head = parser.peek_char() orelse { - // TODO: Emit diagnostic + emitDiagnostic(parser, .unterminated_inline_list, parser.make_diagnostic_location(parser.offset)); return error.UnterminatedList; }; @@ -322,6 +515,13 @@ pub const Parser = struct { return null; } + const after_pipe = if (!parser.at_end()) parser.code[parser.offset] else null; + if (after_pipe) |c| { + if (c != ' ' and c != '\n') { + emitDiagnostic(parser, .verbatim_missing_space, parser.make_diagnostic_location(head)); + } + } + while (!parser.at_end()) { const c = parser.code[parser.offset]; if (c == '\n') { @@ -332,11 +532,17 @@ pub const Parser = struct { parser.offset += 1; } if (parser.at_end()) { - // TODO: Emit diagnostic about verbatim lines should have an empty line feed at the end of the file. + emitDiagnostic(parser, .verbatim_missing_trailing_newline, parser.make_diagnostic_location(parser.offset)); } const token = parser.slice(head, parser.offset); std.debug.assert(std.mem.startsWith(u8, token.text, "|")); + if (token.text.len > 0) { + const last = token.text[token.text.len - 1]; + if (last == ' ' or last == '\t') { + emitDiagnostic(parser, .trailing_whitespace, parser.make_diagnostic_location(parser.offset - 1)); + } + } return token; } @@ -350,9 +556,12 @@ pub const Parser = struct { if (parser.try_accept_char(expected)) return; - if (parser.at_end()) + if (parser.at_end()) { + emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "character", .expected_char = expected } }, parser.make_diagnostic_location(parser.offset)); return error.UnexpectedEndOfFile; + } + emitDiagnostic(parser, .{ .unexpected_character = .{ .expected = expected, .found = parser.code[parser.offset] } }, parser.make_diagnostic_location(parser.offset)); return error.UnexpectedCharacter; } @@ -373,8 +582,10 @@ pub const Parser = struct { pub fn try_accept_string(parser: *Parser) !?Token { parser.skip_whitespace(); - if (parser.at_end()) + if (parser.at_end()) { + emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "string literal" } }, parser.make_diagnostic_location(parser.offset)); return null; + } if (parser.code[parser.offset] != '"') return null; @@ -385,12 +596,16 @@ pub const Parser = struct { pub fn accept_string(parser: *Parser) error{ OutOfMemory, UnexpectedEndOfFile, UnexpectedCharacter, UnterminatedStringLiteral }!Token { parser.skip_whitespace(); - if (parser.at_end()) + if (parser.at_end()) { + emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "string literal" } }, parser.make_diagnostic_location(parser.offset)); return error.UnexpectedEndOfFile; + } const start = parser.offset; - if (parser.code[start] != '"') + if (parser.code[start] != '"') { + emitDiagnostic(parser, .{ .unexpected_character = .{ .expected = '"', .found = parser.code[start] } }, parser.make_diagnostic_location(parser.offset)); return error.UnexpectedCharacter; + } parser.offset += 1; @@ -419,19 +634,24 @@ pub const Parser = struct { } } + emitDiagnostic(parser, .unterminated_string, parser.make_diagnostic_location(start)); return error.UnterminatedStringLiteral; } pub fn accept_identifier(parser: *Parser) error{ UnexpectedEndOfFile, InvalidCharacter }!Token { parser.skip_whitespace(); - if (parser.at_end()) + if (parser.at_end()) { + emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "identifier" } }, parser.make_diagnostic_location(parser.offset)); return error.UnexpectedEndOfFile; + } const start = parser.offset; const first = parser.code[start]; - if (!is_ident_char(first)) + if (!is_ident_char(first)) { + emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start)); return error.InvalidCharacter; + } while (parser.offset < parser.code.len) { const c = parser.code[parser.offset]; @@ -447,8 +667,10 @@ pub const Parser = struct { pub fn accept_word(parser: *Parser) error{UnexpectedEndOfFile}!Token { parser.skip_whitespace(); - if (parser.at_end()) + if (parser.at_end()) { + emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "word" } }, parser.make_diagnostic_location(parser.offset)); return error.UnexpectedEndOfFile; + } const start = parser.offset; @@ -708,34 +930,77 @@ pub const Diagnostic = struct { } }; - /// An diagnostic code encoded as a 16 bit integer. - /// The upper 4 bit encode the severity of the code, the lower 12 bit the number. - pub const Code = enum(u16) { - // bitmasks: - const ERROR = 0x1000; - const WARNING = 0x2000; - - // TODO: Add other diagnostic codes + pub const UnexpectedEof = struct { context: []const u8, expected_char: ?u8 = null }; + pub const UnexpectedCharacter = struct { expected: u8, found: u8 }; + pub const InvalidIdentifierStart = struct { char: u8 }; + pub const DuplicateAttribute = struct { name: []const u8 }; + pub const MissingHdocHeader = struct {}; + pub const DuplicateHdocHeader = struct {}; + pub const Code = union(enum) { // errors: - invalid_character = ERROR | 1, + unterminated_inline_list, + unexpected_eof: UnexpectedEof, + unexpected_character: UnexpectedCharacter, + unterminated_string, + invalid_identifier_start: InvalidIdentifierStart, + unterminated_block_list, + missing_hdoc_header: MissingHdocHeader, + duplicate_hdoc_header: DuplicateHdocHeader, // warnings: - missing_space_in_literal = WARNING | 1, - - pub fn get_severity(code: Code) Severity { - const num = @intFromEnum(code); - return switch (num & 0xF000) { - ERROR => .@"error", - WARNING => .warning, - else => @panic("invalid error code!"), + duplicate_attribute: DuplicateAttribute, + empty_verbatim_block, + verbatim_missing_trailing_newline, + verbatim_missing_space, + trailing_whitespace, + + pub fn severity(code: Code) Severity { + return switch (code) { + .unterminated_inline_list => .@"error", + .unexpected_eof => .@"error", + .unexpected_character => .@"error", + .unterminated_string => .@"error", + .invalid_identifier_start => .@"error", + .unterminated_block_list => .@"error", + .missing_hdoc_header => .@"error", + .duplicate_hdoc_header => .@"error", + + .duplicate_attribute => .warning, + .empty_verbatim_block => .warning, + .verbatim_missing_trailing_newline => .warning, + .verbatim_missing_space => .warning, + .trailing_whitespace => .warning, }; } + + pub fn format(code: Code, w: anytype) !void { + switch (code) { + .unterminated_inline_list => try w.writeAll("Inline list body is unterminated (missing '}' before end of file)."), + .unexpected_eof => |ctx| { + if (ctx.expected_char) |ch| { + try w.print("Unexpected end of file while expecting '{c}'.", .{ch}); + } else { + try w.print("Unexpected end of file while expecting {s}.", .{ctx.context}); + } + }, + .unexpected_character => |ctx| try w.print("Expected '{c}' but found '{c}'.", .{ ctx.expected, ctx.found }), + .unterminated_string => try w.writeAll("Unterminated string literal (missing closing \")."), + .invalid_identifier_start => |ctx| try w.print("Invalid identifier start character: '{c}'.", .{ctx.char}), + .unterminated_block_list => try w.writeAll("Block list body is unterminated (missing '}' before end of file)."), + .missing_hdoc_header => try w.writeAll("Document must start with an 'hdoc' header."), + .duplicate_hdoc_header => try w.writeAll("Only one 'hdoc' header is allowed; additional header found."), + .duplicate_attribute => |ctx| try w.print("Duplicate attribute '{s}' will overwrite the earlier value.", .{ctx.name}), + .empty_verbatim_block => try w.writeAll("Verbatim block has no lines."), + .verbatim_missing_trailing_newline => try w.writeAll("Verbatim line should end with a newline."), + .verbatim_missing_space => try w.writeAll("Expected a space after '|' in verbatim line."), + .trailing_whitespace => try w.writeAll("Trailing whitespace at end of line."), + } + } }; code: Code, location: Location, - message: []const u8, }; /// A collection of diagnostic messages. @@ -752,18 +1017,28 @@ pub const Diagnostics = struct { diag.* = undefined; } - pub fn add(diag: *Diagnostics, code: Diagnostic.Code, location: Diagnostic.Location, comptime fmt: []const u8, args: anytype) !void { - const allocator = diag.arena.allocator(); - - const msg = try std.fmt.allocPrint(allocator, fmt, args); - errdefer allocator.free(msg); - - try diag.items.append(allocator, .{ + pub fn add(diag: *Diagnostics, code: Diagnostic.Code, location: Diagnostic.Location) !void { + try diag.items.append(diag.arena.allocator(), .{ .location = location, .code = code, - .message = msg, }); } + + pub fn has_error(diag: Diagnostics) bool { + for (diag.items.items) |item| { + if (item.code.severity() == .@"error") + return true; + } + return false; + } + + pub fn has_warning(diag: Diagnostics) bool { + for (diag.items.items) |item| { + if (item.code.severity() == .warning) + return true; + } + return false; + } }; test "fuzz parser" { diff --git a/src/main.zig b/src/main.zig index 32e30a0..d9ecaf0 100644 --- a/src/main.zig +++ b/src/main.zig @@ -26,8 +26,14 @@ pub fn main() !u8 { const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10); defer allocator.free(document); - var parsed = try hdoc.parse(allocator, document, null); + var diagnostics: hdoc.Diagnostics = .init(allocator); + defer diagnostics.deinit(); + + var parsed = try hdoc.parse(allocator, document, &diagnostics); defer parsed.deinit(); + if (diagnostics.has_error()) + return 1; + return 0; } diff --git a/src/testsuite.zig b/src/testsuite.zig index 052e95f..634c428 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -241,6 +241,77 @@ test "parser handles unknown node types" { } } +fn diagnosticsContain(diag: *const hdoc.Diagnostics, expected: hdoc.Diagnostic.Code) bool { + for (diag.items.items) |item| { + if (std.meta.activeTag(item.code) == std.meta.activeTag(expected)) { + return true; + } + } + return false; +} + +test "parsing valid document yields empty diagnostics" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, "hdoc(version=\"2.0\");", &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expect(!diagnostics.has_warning()); + try std.testing.expectEqual(@as(usize, 0), diagnostics.items.items.len); +} + +test "diagnostic codes are emitted for expected samples" { + const Case = struct { + code: hdoc.Diagnostic.Code, + samples: []const []const u8, + }; + + const cases = [_]Case{ + .{ .code = .{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }, .samples = &.{"hdoc(version=\"2.0\"); h1("} }, + .{ .code = .{ .unexpected_character = .{ .expected = '{', .found = '1' } }, .samples = &.{"hdoc(version=\"2.0\"); h1 123"} }, + .{ .code = .unterminated_string, .samples = &.{"hdoc(version=\"2.0\"); h1 \"unterminated"} }, + .{ .code = .{ .invalid_identifier_start = .{ .char = '-' } }, .samples = &.{"hdoc(version=\"2.0\"); -abc"} }, + .{ .code = .unterminated_block_list, .samples = &.{"hdoc{h1 \"x\""} }, + .{ .code = .unterminated_inline_list, .samples = &.{"hdoc(version=\"2.0\"); p {hello"} }, + .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(title=\"a\",title=\"b\");"} }, + .{ .code = .empty_verbatim_block, .samples = &.{"hdoc(version=\"2.0\"); pre:\n"} }, + .{ .code = .verbatim_missing_trailing_newline, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|line"} }, + .{ .code = .verbatim_missing_space, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|nospace\n"} }, + .{ .code = .trailing_whitespace, .samples = &.{"hdoc(version=\"2.0\"); pre:\n| trailing \n"} }, + .{ .code = .missing_hdoc_header, .samples = &.{"h1 \"Title\""} }, + .{ .code = .duplicate_hdoc_header, .samples = &.{"hdoc(version=\"2.0\"); hdoc(version=\"2.0\");"} }, + }; + + inline for (cases) |case| { + for (case.samples) |sample| { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const maybe_doc = hdoc.parse(std.testing.allocator, sample, &diagnostics) catch |err| switch (err) { + error.OutOfMemory => return err, + else => null, + }; + + if (maybe_doc) |doc| { + var owned_doc = doc; + defer owned_doc.deinit(); + } + + try std.testing.expect(diagnosticsContain(&diagnostics, case.code)); + + const expected_severity = case.code.severity(); + if (expected_severity == .@"error") { + try std.testing.expect(diagnostics.has_error()); + } else { + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expect(diagnostics.has_warning()); + } + } + } +} + test "parser reports unterminated inline lists" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); From 46cdc65e79b2cc86a8e25d5724726e3b9ce7ddc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 24 Dec 2025 13:58:07 +0100 Subject: [PATCH 012/116] Introduces more types into the DOM, adds basics for attribute parsing. --- docs/specification.md | 85 +++++----- examples/demo.hdoc | 2 +- src/hyperdoc.zig | 360 +++++++++++++++++++++++++++++++++--------- src/main.zig | 2 + 4 files changed, 334 insertions(+), 115 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index fd0e433..4c00749 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -64,32 +64,32 @@ WORD := /[^\s\{\}\\\"(),=:]+/ ## Element Overview -| Element | Element Type | Allowed Children | Attributes | -| ----------------------------------------------------------- | ------------ | ---------------------------- | ------------------------------------ | -| *Document* | Document | `hdoc`, Blocks | | +| Element | Element Type | Allowed Children | Attributes | +| ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------- | +| *Document* | Document | `hdoc`, Blocks | | | `hdoc` | Header | - | `lang`, `title`, `version`, `author`, `date` | -| `h1`, `h2`, `h3` | Block | Text Body | `lang`, \[`id`\] | -| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block | Text Body | `lang`, \[`id`\] | -| `ul` | Block | `li` ≥ 1 | `lang`, \[`id`\] | -| `ol` | Block | `li` ≥ 1 | `lang`, \[`id`\], `first` | -| `img` | Block | Text Body | `lang`, \[`id`\], `alt`, `path` | -| `pre` | Block | Text Body | `lang`, \[`id`\], `syntax` | -| `toc` | Block | - | `lang`, \[`id`\], `depth` | -| `table` | Block | Table Rows | `lang`, \[`id`\] | -| `columns` | Table Row | `td` ≥ 1 | `lang` | -| `group` | Table Row | Text Body | `lang`, | -| `row` | Table Row | `td` ≥ 1 | `lang`, `title` | -| `td` | Table Cell | Blocks, String, Verbatim | `lang`, `colspan` | -| `li` | List Item | Blocks, String, Verbatim | `lang` | -| `\em` | Text Body | Text Body | `lang` | -| `\mono` | Text Body | Text Body | `lang`, `syntax` | -| `\strike` | Text Body | Text Body | `lang` | -| `\sub`, `\sup` | Text Body | Text Body | `lang` | -| `\link` | Text Body | Text Body | `lang`, (`ref` \| `uri`) | -| `\date`, `\time`, `\datetime` | Text Body | Plain Text, String, Verbatim | `lang`, `fmt` | -| *Plain Text* | Text Body | - | | -| *String* | Text Body | - | | -| *Verbatim* | Text Body | - | | +| `h1`, `h2`, `h3` | Block | Text Body | `lang`, \[`id`\] | +| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block | Text Body | `lang`, \[`id`\] | +| `ul` | Block | `li` ≥ 1 | `lang`, \[`id`\] | +| `ol` | Block | `li` ≥ 1 | `lang`, \[`id`\], `first` | +| `img` | Block | Text Body | `lang`, \[`id`\], `alt`, `path` | +| `pre` | Block | Text Body | `lang`, \[`id`\], `syntax` | +| `toc` | Block | - | `lang`, \[`id`\], `depth` | +| `table` | Block | Table Rows | `lang`, \[`id`\] | +| `columns` | Table Row | `td` ≥ 1 | `lang` | +| `group` | Table Row | Text Body | `lang`, | +| `row` | Table Row | `td` ≥ 1 | `lang`, `title` | +| `td` | Table Cell | Blocks, String, Verbatim | `lang`, `colspan` | +| `li` | List Item | Blocks, String, Verbatim | `lang` | +| `\em` | Text Body | Text Body | `lang` | +| `\mono` | Text Body | Text Body | `lang`, `syntax` | +| `\strike` | Text Body | Text Body | `lang` | +| `\sub`, `\sup` | Text Body | Text Body | `lang` | +| `\link` | Text Body | Text Body | `lang`, (`ref` \| `uri`) | +| `\date`, `\time`, `\datetime` | Text Body | Plain Text, String, Verbatim | `lang`, `fmt` | +| *Plain Text* | Text Body | - | | +| *String* | Text Body | - | | +| *Verbatim* | Text Body | - | | Notes: @@ -99,23 +99,23 @@ Notes: ## Attribute Overview -| Attribute | Required | Allowed Values | Description | -| --------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | -| `version` | Yes | `2.0` | Describes the version of this HyperDoc document. | -| `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | -| `title` | No | *Any* | Sets the title of the document or the table row. | -| `author` | No | *Any* | Sets the author of the document. | -| `date` | No | [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) timestamp | Sets the authoring date of the document. | -| `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | -| `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | -| `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | -| `path` | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | -| `syntax` | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | -| `depth` | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | -| `colspan` | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | -| `ref` | No | Any value present in an `id` attribute. | References any `id` inside this document. | -| `uri` | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | -| `fmt` | No | *See element documentation* | | +| Attribute | Required | Allowed Values | Description | +| --------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | +| `version` | Yes | `2.0` | Describes the version of this HyperDoc document. | +| `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | +| `title` | No | *Any* | Sets the title of the document or the table row. | +| `author` | No | *Any* | Sets the author of the document. | +| `date` | No | A date-time value using the format specified below (intersection between [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601)) | Sets the authoring date of the document. | +| `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | +| `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | +| `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | +| `path` | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | +| `syntax` | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | +| `depth` | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | +| `colspan` | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | +| `ref` | No | Any value present in an `id` attribute. | References any `id` inside this document. | +| `uri` | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | +| `fmt` | No | *See element documentation* | | ## Semantic Structure @@ -341,3 +341,4 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically | `datetime` | `fmt` | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). | Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner. + diff --git a/examples/demo.hdoc b/examples/demo.hdoc index 68ef189..a092e91 100644 --- a/examples/demo.hdoc +++ b/examples/demo.hdoc @@ -1,4 +1,4 @@ -hdoc(version="2.0" , lang="en", title="HyperDoc \"2.0\" Demonstration"); +hdoc(version="2.0", lang="en", title="HyperDoc \"2.0\" Demonstration"); h1{HyperDoc 2.0} diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index d557866..32aacc9 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -5,9 +5,19 @@ const parser_toolkit = @import("parser-toolkit"); /// tree structure of the document. pub const Document = struct { arena: std.heap.ArenaAllocator, + + version: Version, + + // document contents: contents: []Block, ids: []?[]const u8, + // header information + lang: ?[]const u8, + title: ?[]const u8, + author: ?[]const u8, + date: ?DateTime, + pub fn deinit(doc: *Document) void { doc.arena.deinit(); doc.* = undefined; @@ -20,7 +30,6 @@ pub const Document = struct { /// Depending on the level of nesting, the width might decrease /// from the full document size. pub const Block = union(enum) { - header: Header, heading: Heading, paragraph: Paragraph, list: List, @@ -29,14 +38,6 @@ pub const Block = union(enum) { toc: TableOfContents, table: Table, - pub const Header = struct { - lang: ?[]const u8, - title: ?[]const u8, - version: ?[]const u8, - author: ?[]const u8, - date: ?[]const u8, - }; - pub const Heading = struct { level: HeadingLevel, lang: ?[]const u8, @@ -118,15 +119,17 @@ pub const Block = union(enum) { pub const SpanContent = union(enum) { text: []const u8, - date: DateTime, - time: DateTime, - datetime: DateTime, + date: FormattedDateTime(Date), + time: FormattedDateTime(Time), + datetime: FormattedDateTime(DateTime), }; -pub const DateTime = struct { - value: []const u8, - format: ?[]const u8 = null, -}; +pub fn FormattedDateTime(comptime DT: type) type { + return struct { + value: DT, + format: DT.Format = .default, + }; +} pub const Span = struct { content: SpanContent, @@ -146,6 +149,95 @@ pub const Link = union(enum) { uri: []const u8, }; +/// HyperDoc Version Number +pub const Version = struct { + major: u16, + minor: u16, + + pub fn parse(text: []const u8) !Version { + const split_index = std.mem.indexOfScalar(u8, text, '.') orelse return error.InvalidValue; + + const head = text[0..split_index]; + const tail = text[split_index + 1 ..]; + + return .{ + .major = std.fmt.parseInt(u16, head, 10) catch return error.InvalidValue, + .minor = std.fmt.parseInt(u16, tail, 10) catch return error.InvalidValue, + }; + } +}; + +pub const DateTime = struct { + pub const Format = enum { + pub const default: Format = .short; + + short, + long, + relative, + iso, + }; + + date: Date, + time: Time, + + pub fn parse(text: []const u8) !DateTime { + const split_index = std.mem.indexOfScalar(u8, text, 'T') orelse return error.InvalidValue; + + const head = text[0..split_index]; + const tail = text[split_index + 1 ..]; + + return .{ + .date = try Date.parse(head), + .time = try Time.parse(tail), + }; + } +}; + +pub const Date = struct { + pub const Format = enum { + pub const default: Format = .short; + year, + month, + day, + weekday, + short, + long, + relative, + iso, + }; + + year: i32, // e.g., 2024 + month: u4, // 1-12 + day: u5, // 1-31 + + pub fn parse(text: []const u8) !Date { + _ = text; + @panic("TODO: Implement this"); + } +}; + +pub const Time = struct { + pub const Format = enum { + pub const default: Format = .short; + + long, + short, + rough, + relative, + iso, + }; + + hour: u5, // 0-23 + minute: u6, // 0-59 + second: u6, // 0-59 + microsecond: u20, // 0-999999 + + pub fn parse(text: []const u8) !Time { + _ = text; + @panic("TODO: Implement this"); + } +}; + /// Parses a HyperDoc document. pub fn parse( allocator: std.mem.Allocator, @@ -154,7 +246,7 @@ pub fn parse( /// An optional diagnostics element that receives diagnostic messages like errors and warnings. /// If present, will be filled out by the parser. diagnostics: ?*Diagnostics, -) error{ OutOfMemory, SyntaxError }!Document { +) error{ OutOfMemory, SyntaxError, MalformedDocument }!Document { var arena = std.heap.ArenaAllocator.init(allocator); errdefer arena.deinit(); @@ -170,9 +262,6 @@ pub fn parse( .code = plain_text, }; - var blocks: std.ArrayList(Block) = .empty; - var ids: std.ArrayList(?[]const u8) = .empty; - while (true) { errdefer |err| { std.log.debug("error at examples/demo.hdoc:{f}: {t}", .{ @@ -194,67 +283,185 @@ pub fn parse( => return error.SyntaxError, }; - const block = sema.translate_toplevel_node(node) catch |err| switch (err) { - error.OutOfMemory => |e| return @as(error{OutOfMemory}!Document, e), - - error.InvalidNodeType => continue, - }; - - try blocks.append(arena.allocator(), block); - try ids.append(arena.allocator(), null); + try sema.append_node(node); } + const header = sema.header orelse return error.MalformedDocument; + return .{ .arena = arena, - .contents = try blocks.toOwnedSlice(arena.allocator()), - .ids = try ids.toOwnedSlice(arena.allocator()), + .contents = try sema.blocks.toOwnedSlice(arena.allocator()), + .ids = try sema.ids.toOwnedSlice(arena.allocator()), + + .lang = header.lang, + .title = header.title, + .version = header.version, + .author = header.author, + .date = header.date, }; } pub const SemanticAnalyzer = struct { + const Header = struct { + version: Version, + lang: ?[]const u8, + title: ?[]const u8, + author: ?[]const u8, + date: ?DateTime, + }; + arena: std.mem.Allocator, diagnostics: ?*Diagnostics, code: []const u8, - seen_header: bool = false, - - fn translate_toplevel_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType }!Block { - if (!sema.seen_header) { - sema.seen_header = true; - if (node.type != .hdoc) { - sema.emit_diagnostic(.missing_hdoc_header, node.location.offset); - return error.InvalidNodeType; + + header: ?Header = null, + blocks: std.ArrayList(Block) = .empty, + ids: std.ArrayList(?[]const u8) = .empty, + + fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{OutOfMemory}!void { + switch (node.type) { + .hdoc => { + if (sema.header != null) { + try sema.emit_diagnostic(.duplicate_hdoc_header, node.location.offset); + } + sema.header = sema.translate_header_node(node) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.BadAttributes => null, + }; + }, + + else => { + if (sema.header == null) { + if (sema.blocks.items.len == 0) { + // Emit error for the first encountered block. + // This can only happen exactly once, as we either: + // - have already set a header block when the first non-header nodes arrives. + // - we have processed another block already, so the previous block would've emitted the warning already. + try sema.emit_diagnostic(.missing_hdoc_header, node.location.offset); + } + } + + const block, const id = sema.translate_block_node(node) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.InvalidNodeType, error.BadAttributes => { + return; + }, + }; + + try sema.blocks.append(sema.arena, block); + try sema.ids.append(sema.arena, id); + }, + } + } + + fn translate_header_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }!Header { + std.debug.assert(node.type == .hdoc); + + const attrs = try sema.get_attributes(node, struct { + version: Version, + title: ?[]const u8 = null, + author: ?[]const u8 = null, + date: ?DateTime = null, + lang: ?[]const u8 = null, + }); + + return .{ + .version = attrs.version, + .lang = attrs.lang, + .title = attrs.title, + .author = attrs.author, + .date = attrs.date, + }; + } + + fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes }!struct { Block, ?[]const u8 } { + std.debug.assert(node.type != .hdoc); + + _ = sema; + + return error.InvalidNodeType; + } + + fn get_attributes(sema: *SemanticAnalyzer, node: Parser.Node, comptime Attrs: type) error{ OutOfMemory, BadAttributes }!Attrs { + const Fields = std.meta.FieldEnum(Attrs); + const fields = @typeInfo(Attrs).@"struct".fields; + + var required: std.EnumSet(Fields) = .initEmpty(); + + var attrs: Attrs = undefined; + inline for (fields) |fld| { + if (fld.default_value_ptr) |default_value_ptr| { + @field(attrs, fld.name) = @as(*const fld.type, @ptrCast(@alignCast(default_value_ptr))).*; + } else { + @field(attrs, fld.name) = undefined; + required.insert(@field(Fields, fld.name)); } - } else if (node.type == .hdoc) { - sema.emit_diagnostic(.duplicate_hdoc_header, node.location.offset); - return error.InvalidNodeType; } - if (node.type == .hdoc) { - return .{ - .header = .{ - .lang = null, - .title = sema.attr_value(node.attributes, "title"), - .version = sema.attr_value(node.attributes, "version"), - .author = sema.attr_value(node.attributes, "author"), - .date = sema.attr_value(node.attributes, "date"), - }, + var any_invalid = false; + var found: std.EnumSet(Fields) = .initEmpty(); + for (node.attributes.keys(), node.attributes.values()) |key, attrib| { + const fld = std.meta.stringToEnum(Fields, key) orelse { + try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, node.location.offset); + continue; }; + if (found.contains(fld)) { + try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, node.location.offset); + } + found.insert(fld); + + switch (fld) { + inline else => |tag| @field(attrs, @tagName(tag)) = sema.cast_value(attrib, @FieldType(Attrs, @tagName(tag))) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + + else => { + any_invalid = true; + + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, node.location.offset); + + continue; + }, + }, + } } - return error.InvalidNodeType; + // Check if we have any required attributes missing: + var any_missing = false; + { + var iter = required.iterator(); + while (iter.next()) |req_field| { + if (!found.contains(req_field)) { + try sema.emit_diagnostic(.{ .missing_attribute = .{ .type = node.type, .name = @tagName(req_field) } }, node.location.offset); + any_missing = true; + } + } + } + if (any_missing or any_invalid) + return error.BadAttributes; + + return attrs; } - fn attr_value(sema: *SemanticAnalyzer, attrs: std.StringArrayHashMapUnmanaged(Parser.Attribute), name: []const u8) ?[]const u8 { - _ = sema; - if (attrs.get(name)) |attr| { - return attr.value; + fn cast_value(sema: *SemanticAnalyzer, attrib: Parser.Attribute, comptime T: type) error{ OutOfMemory, InvalidValue }!T { + if (@typeInfo(T) == .optional) { + return try sema.cast_value(attrib, @typeInfo(T).optional.child); } - return null; + + return switch (T) { + []const u8 => attrib.value, + + Version => Version.parse(attrib.value) catch return error.InvalidValue, + DateTime => DateTime.parse(attrib.value) catch return error.InvalidValue, + Date => Date.parse(attrib.value) catch return error.InvalidValue, + Time => Time.parse(attrib.value) catch return error.InvalidValue, + + else => @compileError("Unsupported attribute type: " ++ @typeName(T)), + }; } - fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, offset: usize) void { + fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, offset: usize) !void { if (sema.diagnostics) |diag| { - diag.add(code, sema.make_location(offset)) catch {}; + try diag.add(code, sema.make_location(offset)); } } @@ -934,6 +1141,7 @@ pub const Diagnostic = struct { pub const UnexpectedCharacter = struct { expected: u8, found: u8 }; pub const InvalidIdentifierStart = struct { char: u8 }; pub const DuplicateAttribute = struct { name: []const u8 }; + pub const NodeAttributeError = struct { type: Parser.NodeType, name: []const u8 }; pub const MissingHdocHeader = struct {}; pub const DuplicateHdocHeader = struct {}; @@ -947,8 +1155,11 @@ pub const Diagnostic = struct { unterminated_block_list, missing_hdoc_header: MissingHdocHeader, duplicate_hdoc_header: DuplicateHdocHeader, + missing_attribute: NodeAttributeError, + invalid_attribute: NodeAttributeError, // warnings: + unknown_attribute: NodeAttributeError, duplicate_attribute: DuplicateAttribute, empty_verbatim_block, verbatim_missing_trailing_newline, @@ -957,20 +1168,25 @@ pub const Diagnostic = struct { pub fn severity(code: Code) Severity { return switch (code) { - .unterminated_inline_list => .@"error", - .unexpected_eof => .@"error", - .unexpected_character => .@"error", - .unterminated_string => .@"error", - .invalid_identifier_start => .@"error", - .unterminated_block_list => .@"error", - .missing_hdoc_header => .@"error", - .duplicate_hdoc_header => .@"error", - - .duplicate_attribute => .warning, - .empty_verbatim_block => .warning, - .verbatim_missing_trailing_newline => .warning, - .verbatim_missing_space => .warning, - .trailing_whitespace => .warning, + .unterminated_inline_list, + .unexpected_eof, + .unexpected_character, + .unterminated_string, + .invalid_identifier_start, + .unterminated_block_list, + .missing_hdoc_header, + .duplicate_hdoc_header, + .invalid_attribute, + .missing_attribute, + => .@"error", + + .unknown_attribute, + .duplicate_attribute, + .empty_verbatim_block, + .verbatim_missing_trailing_newline, + .verbatim_missing_space, + .trailing_whitespace, + => .warning, }; } diff --git a/src/main.zig b/src/main.zig index d9ecaf0..ccc0786 100644 --- a/src/main.zig +++ b/src/main.zig @@ -35,5 +35,7 @@ pub fn main() !u8 { if (diagnostics.has_error()) return 1; + // TODO: Implement dumping of "parsed " + return 0; } From 986cf055921e2204ad9507993e23a8983c89cb64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 24 Dec 2025 14:15:38 +0100 Subject: [PATCH 013/116] Vibecoded: Adds dump code in main. --- build.zig | 13 ++ src/main.zig | 452 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 464 insertions(+), 1 deletion(-) diff --git a/build.zig b/build.zig index 4c71fd2..5018027 100644 --- a/build.zig +++ b/build.zig @@ -51,6 +51,19 @@ pub fn build(b: *std.Build) void { .use_llvm = true, }); test_step.dependOn(&b.addRunArtifact(exe_tests).step); + + const main_tests = b.addTest(.{ + .root_module = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + .imports = &.{ + .{ .name = "hyperdoc", .module = hyperdoc }, + }, + }), + .use_llvm = true, + }); + test_step.dependOn(&b.addRunArtifact(main_tests).step); } fn rawFileMod(b: *std.Build, path: []const u8) std.Build.Module.Import { diff --git a/src/main.zig b/src/main.zig index ccc0786..98ea655 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,6 +4,455 @@ const hdoc = @import("hyperdoc"); var debug_allocator: std.heap.DebugAllocator(.{}) = .init; +const indent_step: usize = 2; + +fn writeIndent(writer: anytype, indent: usize) !void { + var i: usize = 0; + while (i < indent) : (i += 1) { + try writer.writeByte(' '); + } +} + +fn writeStringValue(writer: anytype, value: []const u8) !void { + try writer.print("\"{f}\"", .{std.zig.fmtString(value)}); +} + +fn writeOptionalStringValue(writer: anytype, value: ?[]const u8) !void { + if (value) |text| { + try writeStringValue(writer, text); + } else { + try writer.writeAll("null"); + } +} + +fn writeOptionalIntValue(writer: anytype, value: anytype) !void { + if (value) |number| { + try writer.print("{}", .{number}); + } else { + try writer.writeAll("null"); + } +} + +fn dumpOptionalStringField(writer: anytype, indent: usize, key: []const u8, value: ?[]const u8) !void { + try writeIndent(writer, indent); + try writer.print("{s}: ", .{key}); + try writeOptionalStringValue(writer, value); + try writer.writeByte('\n'); +} + +fn dumpOptionalNumberField(writer: anytype, indent: usize, key: []const u8, value: anytype) !void { + try writeIndent(writer, indent); + try writer.print("{s}: ", .{key}); + try writeOptionalIntValue(writer, value); + try writer.writeByte('\n'); +} + +fn dumpBoolField(writer: anytype, indent: usize, key: []const u8, value: bool) !void { + try writeIndent(writer, indent); + try writer.print("{s}: {}\n", .{ key, value }); +} + +fn dumpEnumField(writer: anytype, indent: usize, key: []const u8, value: anytype) !void { + try writeIndent(writer, indent); + try writer.print("{s}: {s}\n", .{ key, @tagName(value) }); +} + +fn dumpVersion(writer: anytype, indent: usize, version: hdoc.Version) !void { + try writeIndent(writer, indent); + try writer.writeAll("version:\n"); + try writeIndent(writer, indent + indent_step); + try writer.print("major: {}\n", .{version.major}); + try writeIndent(writer, indent + indent_step); + try writer.print("minor: {}\n", .{version.minor}); +} + +fn dumpDate(writer: anytype, indent: usize, date: hdoc.Date) !void { + try writeIndent(writer, indent); + try writer.print("year: {}\n", .{date.year}); + try writeIndent(writer, indent); + try writer.print("month: {}\n", .{date.month}); + try writeIndent(writer, indent); + try writer.print("day: {}\n", .{date.day}); +} + +fn dumpTime(writer: anytype, indent: usize, time: hdoc.Time) !void { + try writeIndent(writer, indent); + try writer.print("hour: {}\n", .{time.hour}); + try writeIndent(writer, indent); + try writer.print("minute: {}\n", .{time.minute}); + try writeIndent(writer, indent); + try writer.print("second: {}\n", .{time.second}); + try writeIndent(writer, indent); + try writer.print("microsecond: {}\n", .{time.microsecond}); +} + +fn dumpDateTime(writer: anytype, indent: usize, datetime: hdoc.DateTime) !void { + try writeIndent(writer, indent); + try writer.writeAll("date:\n"); + try dumpDate(writer, indent + indent_step, datetime.date); + try writeIndent(writer, indent); + try writer.writeAll("time:\n"); + try dumpTime(writer, indent + indent_step, datetime.time); +} + +fn dumpFormattedDate(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.Date)) !void { + try writeIndent(writer, indent); + try writer.writeAll("value:\n"); + try dumpDate(writer, indent + indent_step, formatted.value); + try writeIndent(writer, indent); + try writer.print("format: {s}\n", .{@tagName(formatted.format)}); +} + +fn dumpFormattedTime(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.Time)) !void { + try writeIndent(writer, indent); + try writer.writeAll("value:\n"); + try dumpTime(writer, indent + indent_step, formatted.value); + try writeIndent(writer, indent); + try writer.print("format: {s}\n", .{@tagName(formatted.format)}); +} + +fn dumpFormattedDateTime(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) !void { + try writeIndent(writer, indent); + try writer.writeAll("value:\n"); + try dumpDateTime(writer, indent + indent_step, formatted.value); + try writeIndent(writer, indent); + try writer.print("format: {s}\n", .{@tagName(formatted.format)}); +} + +fn dumpSpanContent(writer: anytype, indent: usize, content: hdoc.SpanContent) !void { + switch (content) { + .text => |text| { + try writeIndent(writer, indent); + try writer.writeAll("text: "); + try writeStringValue(writer, text); + try writer.writeByte('\n'); + }, + .date => |date| { + try writeIndent(writer, indent); + try writer.writeAll("date:\n"); + try dumpFormattedDate(writer, indent + indent_step, date); + }, + .time => |time| { + try writeIndent(writer, indent); + try writer.writeAll("time:\n"); + try dumpFormattedTime(writer, indent + indent_step, time); + }, + .datetime => |datetime| { + try writeIndent(writer, indent); + try writer.writeAll("datetime:\n"); + try dumpFormattedDateTime(writer, indent + indent_step, datetime); + }, + } +} + +fn dumpLink(writer: anytype, indent: usize, link: hdoc.Link) !void { + switch (link) { + .none => { + try writeIndent(writer, indent); + try writer.writeAll("link: none\n"); + }, + .ref => |value| { + try writeIndent(writer, indent); + try writer.writeAll("link:\n"); + try writeIndent(writer, indent + indent_step); + try writer.writeAll("ref: "); + try writeStringValue(writer, value); + try writer.writeByte('\n'); + }, + .uri => |value| { + try writeIndent(writer, indent); + try writer.writeAll("link:\n"); + try writeIndent(writer, indent + indent_step); + try writer.writeAll("uri: "); + try writeStringValue(writer, value); + try writer.writeByte('\n'); + }, + } +} + +fn dumpSpan(writer: anytype, indent: usize, span: hdoc.Span) !void { + try writeIndent(writer, indent); + try writer.writeAll("content:\n"); + try dumpSpanContent(writer, indent + indent_step, span.content); + try dumpOptionalStringField(writer, indent, "lang", span.lang); + try dumpBoolField(writer, indent, "em", span.em); + try dumpBoolField(writer, indent, "mono", span.mono); + try dumpBoolField(writer, indent, "strike", span.strike); + try dumpBoolField(writer, indent, "sub", span.sub); + try dumpBoolField(writer, indent, "sup", span.sup); + try dumpLink(writer, indent, span.link); + try dumpOptionalStringField(writer, indent, "syntax", span.syntax); +} + +fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []const hdoc.Span) !void { + try writeIndent(writer, indent); + if (spans.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (spans) |span| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("-\n"); + try dumpSpan(writer, indent + indent_step * 2, span); + } +} + +fn dumpListItem(writer: anytype, indent: usize, item: hdoc.Block.ListItem) !void { + try dumpOptionalStringField(writer, indent, "lang", item.lang); + try dumpSpanListField(writer, indent, "content", item.content); +} + +fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: []const hdoc.Block.ListItem) !void { + try writeIndent(writer, indent); + if (items.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (items) |item| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("-\n"); + try dumpListItem(writer, indent + indent_step * 2, item); + } +} + +fn dumpTableCell(writer: anytype, indent: usize, cell: hdoc.Block.TableCell) !void { + try dumpOptionalStringField(writer, indent, "lang", cell.lang); + try dumpOptionalNumberField(writer, indent, "colspan", cell.colspan); + try dumpSpanListField(writer, indent, "content", cell.content); +} + +fn dumpTableCellsField(writer: anytype, indent: usize, key: []const u8, cells: []const hdoc.Block.TableCell) !void { + try writeIndent(writer, indent); + if (cells.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (cells) |cell| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("-\n"); + try dumpTableCell(writer, indent + indent_step * 2, cell); + } +} + +fn dumpTableColumns(writer: anytype, indent: usize, columns: hdoc.Block.TableColumns) !void { + try dumpOptionalStringField(writer, indent, "lang", columns.lang); + try dumpTableCellsField(writer, indent, "cells", columns.cells); +} + +fn dumpTableDataRow(writer: anytype, indent: usize, row: hdoc.Block.TableDataRow) !void { + try dumpOptionalStringField(writer, indent, "lang", row.lang); + try dumpOptionalStringField(writer, indent, "title", row.title); + try dumpTableCellsField(writer, indent, "cells", row.cells); +} + +fn dumpTableGroup(writer: anytype, indent: usize, group: hdoc.Block.TableGroup) !void { + try dumpOptionalStringField(writer, indent, "lang", group.lang); + try dumpSpanListField(writer, indent, "content", group.content); +} + +fn dumpTableRow(writer: anytype, indent: usize, row: hdoc.Block.TableRow) !void { + switch (row) { + .columns => |columns| { + try writeIndent(writer, indent); + try writer.writeAll("columns:\n"); + try dumpTableColumns(writer, indent + indent_step, columns); + }, + .row => |data_row| { + try writeIndent(writer, indent); + try writer.writeAll("row:\n"); + try dumpTableDataRow(writer, indent + indent_step, data_row); + }, + .group => |group| { + try writeIndent(writer, indent); + try writer.writeAll("group:\n"); + try dumpTableGroup(writer, indent + indent_step, group); + }, + } +} + +fn dumpTableRowsField(writer: anytype, indent: usize, key: []const u8, rows: []const hdoc.Block.TableRow) !void { + try writeIndent(writer, indent); + if (rows.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (rows) |row| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("-\n"); + try dumpTableRow(writer, indent + indent_step * 2, row); + } +} + +fn dumpBlock(writer: anytype, indent: usize, block: hdoc.Block) !void { + switch (block) { + .heading => |heading| { + try writeIndent(writer, indent); + try writer.writeAll("heading:\n"); + try dumpEnumField(writer, indent + indent_step, "level", heading.level); + try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang); + try dumpSpanListField(writer, indent + indent_step, "content", heading.content); + }, + .paragraph => |paragraph| { + try writeIndent(writer, indent); + try writer.writeAll("paragraph:\n"); + try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind); + try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang); + try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content); + }, + .list => |list| { + try writeIndent(writer, indent); + try writer.writeAll("list:\n"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang); + try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first); + try dumpListItemsField(writer, indent + indent_step, "items", list.items); + }, + .image => |image| { + try writeIndent(writer, indent); + try writer.writeAll("image:\n"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang); + try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt); + try dumpOptionalStringField(writer, indent + indent_step, "path", image.path); + try dumpSpanListField(writer, indent + indent_step, "content", image.content); + }, + .preformatted => |preformatted| { + try writeIndent(writer, indent); + try writer.writeAll("preformatted:\n"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang); + try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax); + try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content); + }, + .toc => |toc| { + try writeIndent(writer, indent); + try writer.writeAll("toc:\n"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang); + try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth); + }, + .table => |table| { + try writeIndent(writer, indent); + try writer.writeAll("table:\n"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang); + try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows); + }, + } +} + +fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: []const hdoc.Block) !void { + try writeIndent(writer, indent); + if (blocks.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (blocks) |block| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("-\n"); + try dumpBlock(writer, indent + indent_step * 2, block); + } +} + +fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, values: []?[]const u8) !void { + try writeIndent(writer, indent); + if (values.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (values) |value| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("- "); + try writeOptionalStringValue(writer, value); + try writer.writeByte('\n'); + } +} + +fn dumpOptionalDateTimeField(writer: anytype, indent: usize, key: []const u8, value: ?hdoc.DateTime) !void { + try writeIndent(writer, indent); + if (value) |datetime| { + try writer.print("{s}:\n", .{key}); + try dumpDateTime(writer, indent + indent_step, datetime); + } else { + try writer.print("{s}: null\n", .{key}); + } +} + +fn dumpDocument(writer: anytype, doc: *const hdoc.Document) !void { + try writer.writeAll("document:\n"); + try dumpVersion(writer, indent_step, doc.version); + try dumpOptionalStringField(writer, indent_step, "lang", doc.lang); + try dumpOptionalStringField(writer, indent_step, "title", doc.title); + try dumpOptionalStringField(writer, indent_step, "author", doc.author); + try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date); + try dumpBlockListField(writer, indent_step, "contents", doc.contents); + try dumpOptionalStringListField(writer, indent_step, "ids", doc.ids); +} + +test "dumpDocument escapes string values" { + const title = "Doc \"Title\"\n"; + const span_text = "Hello \"world\"\n"; + const link_ref = "section \"A\""; + const id_value = "id:1\n"; + + var doc: hdoc.Document = .{ + .arena = std.heap.ArenaAllocator.init(std.testing.allocator), + .version = .{ .major = 1, .minor = 2 }, + .contents = &.{}, + .ids = &.{}, + .lang = null, + .title = title, + .author = null, + .date = null, + }; + defer doc.deinit(); + + const arena_alloc = doc.arena.allocator(); + + const spans = try arena_alloc.alloc(hdoc.Span, 1); + spans[0] = .{ + .content = .{ .text = span_text }, + .link = .{ .ref = link_ref }, + }; + + const blocks = try arena_alloc.alloc(hdoc.Block, 1); + blocks[0] = .{ + .heading = .{ + .level = .h1, + .lang = null, + .content = spans, + }, + }; + doc.contents = blocks; + + const ids = try arena_alloc.alloc(?[]const u8, 1); + ids[0] = id_value; + doc.ids = ids; + + var buffer: std.ArrayList(u8) = .empty; + defer buffer.deinit(std.testing.allocator); + + try dumpDocument(buffer.writer(std.testing.allocator), &doc); + const output = buffer.items; + + const expected_title = try std.fmt.allocPrint(std.testing.allocator, "title: \"{f}\"\n", .{std.zig.fmtString(title)}); + defer std.testing.allocator.free(expected_title); + try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null); + + const expected_span = try std.fmt.allocPrint(std.testing.allocator, "text: \"{f}\"\n", .{std.zig.fmtString(span_text)}); + defer std.testing.allocator.free(expected_span); + try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null); + + const expected_link = try std.fmt.allocPrint(std.testing.allocator, "ref: \"{f}\"\n", .{std.zig.fmtString(link_ref)}); + defer std.testing.allocator.free(expected_link); + try std.testing.expect(std.mem.indexOf(u8, output, expected_link) != null); + + const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value)}); + defer std.testing.allocator.free(expected_id); + try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null); +} + pub fn main() !u8 { defer if (builtin.mode == .Debug) { std.debug.assert(debug_allocator.deinit() == .ok); @@ -35,7 +484,8 @@ pub fn main() !u8 { if (diagnostics.has_error()) return 1; - // TODO: Implement dumping of "parsed " + const stdout = std.fs.File.stdout().deprecatedWriter(); + try dumpDocument(stdout, &parsed); return 0; } From 5ccfcbf3fd5ee8ed6e3ee989b9d84b4f18e5d368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 24 Dec 2025 14:22:39 +0100 Subject: [PATCH 014/116] Prepares node translation by stubbing out block layer. --- src/hyperdoc.zig | 107 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 32aacc9..2956179 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -377,11 +377,109 @@ pub const SemanticAnalyzer = struct { fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes }!struct { Block, ?[]const u8 } { std.debug.assert(node.type != .hdoc); - _ = sema; + switch (node.type) { + .hdoc => unreachable, + + .h1, .h2, .h3 => { + const heading, const id = try sema.translate_heading_node(node); + return .{ .{ .heading = heading }, id }; + }, + .p, .note, .warning, .danger, .tip, .quote, .spoiler => { + const paragraph, const id = try sema.translate_paragraph_node(node); + return .{ .{ .paragraph = paragraph }, id }; + }, + .ul, .ol => { + const list, const id = try sema.translate_list_node(node); + return .{ .{ .list = list }, id }; + }, + .img => { + const image, const id = try sema.translate_image_node(node); + return .{ .{ .image = image }, id }; + }, + .pre => { + const preformatted, const id = try sema.translate_preformatted_node(node); + return .{ .{ .preformatted = preformatted }, id }; + }, + .toc => { + const toc, const id = try sema.translate_toc_node(node); + return .{ .{ .toc = toc }, id }; + }, + .table => { + const table, const id = try sema.translate_table_node(node); + return .{ .{ .table = table }, id }; + }, + + .unknown_block, .unknown_inline => { + try sema.emit_diagnostic(.{ .unknown_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location.offset); + return error.InvalidNodeType; + }, + + .@"\\em", + .@"\\mono", + .@"\\strike", + .@"\\sub", + .@"\\sup", + .@"\\link", + .@"\\time", + .@"\\date", + .@"\\datetime", + .text, + .columns, + .group, + .row, + .td, + .li, + => { + try sema.emit_diagnostic(.{ .invalid_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location.offset); + return error.InvalidNodeType; + }, + } return error.InvalidNodeType; } + fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?[]const u8 } { + _ = sema; + _ = node; + @panic("Not yet implemented"); + } + + fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?[]const u8 } { + _ = sema; + _ = node; + @panic("Not yet implemented"); + } + + fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?[]const u8 } { + _ = sema; + _ = node; + @panic("Not yet implemented"); + } + + fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?[]const u8 } { + _ = sema; + _ = node; + @panic("Not yet implemented"); + } + + fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?[]const u8 } { + _ = sema; + _ = node; + @panic("Not yet implemented"); + } + + fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?[]const u8 } { + _ = sema; + _ = node; + @panic("Not yet implemented"); + } + + fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?[]const u8 } { + _ = sema; + _ = node; + @panic("Not yet implemented"); + } + fn get_attributes(sema: *SemanticAnalyzer, node: Parser.Node, comptime Attrs: type) error{ OutOfMemory, BadAttributes }!Attrs { const Fields = std.meta.FieldEnum(Attrs); const fields = @typeInfo(Attrs).@"struct".fields; @@ -448,7 +546,7 @@ pub const SemanticAnalyzer = struct { } return switch (T) { - []const u8 => attrib.value, + []const u8 => attrib.value, Version => Version.parse(attrib.value) catch return error.InvalidValue, DateTime => DateTime.parse(attrib.value) catch return error.InvalidValue, @@ -1144,6 +1242,7 @@ pub const Diagnostic = struct { pub const NodeAttributeError = struct { type: Parser.NodeType, name: []const u8 }; pub const MissingHdocHeader = struct {}; pub const DuplicateHdocHeader = struct {}; + pub const InvalidBlockError = struct { name: []const u8 }; pub const Code = union(enum) { // errors: @@ -1157,6 +1256,8 @@ pub const Diagnostic = struct { duplicate_hdoc_header: DuplicateHdocHeader, missing_attribute: NodeAttributeError, invalid_attribute: NodeAttributeError, + unknown_block_type: InvalidBlockError, + invalid_block_type: InvalidBlockError, // warnings: unknown_attribute: NodeAttributeError, @@ -1178,6 +1279,8 @@ pub const Diagnostic = struct { .duplicate_hdoc_header, .invalid_attribute, .missing_attribute, + .unknown_block_type, + .invalid_block_type, => .@"error", .unknown_attribute, From c9b32d521dbe283f1fa6a5cfb79fd1f212e60ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 25 Dec 2025 23:22:36 +0100 Subject: [PATCH 015/116] Streamlines some parts of the processor the improve the code structure. --- src/hyperdoc.zig | 117 +++++++++++++++++++++++++++++++++++----------- src/testsuite.zig | 4 +- 2 files changed, 91 insertions(+), 30 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 2956179..08c1b6f 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -346,6 +346,10 @@ pub const SemanticAnalyzer = struct { error.InvalidNodeType, error.BadAttributes => { return; }, + error.Unimplemented => { + std.log.warn("implementd translation of {} node", .{node.type}); + return; + }, }; try sema.blocks.append(sema.arena, block); @@ -374,7 +378,7 @@ pub const SemanticAnalyzer = struct { }; } - fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes }!struct { Block, ?[]const u8 } { + fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes, Unimplemented }!struct { Block, ?[]const u8 } { std.debug.assert(node.type != .hdoc); switch (node.type) { @@ -439,45 +443,100 @@ pub const SemanticAnalyzer = struct { } fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?[]const u8 } { + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + id: ?[]const u8 = null, + }); + + const heading: Block.Heading = .{ + .level = switch (node.type) { + .h1 => .h1, + .h2 => .h2, + .h3 => .h3, + else => unreachable, + }, + .lang = attrs.lang, + .content = try sema.translate_inline_list(node.body), + }; + + return .{ heading, attrs.id }; + } + + fn translate_inline_list(sema: *SemanticAnalyzer, body: Parser.Node.Body) error{ OutOfMemory, Unimplemented }![]Span { + switch (body) { + .empty => return &.{}, + + .string => { + std.log.warn("TODO: Implement string span translation", .{}); + return error.Unimplemented; + }, + .verbatim => { + std.log.warn("TODO: Implement verbatim span translation", .{}); + return error.Unimplemented; + }, + + .list => { + var spans: std.ArrayList(Span) = .empty; + errdefer spans.deinit(sema.arena); + + // TODO: Insert a space span between two regular text spans if they are not consecutive to each other. + + for (body.list) |child_node| { + const span = try sema.translate_span_node(child_node); + try spans.append(sema.arena, span); + } + + // TODO: Compact spans by joining spans with equal properties + + return try spans.toOwnedSlice(sema.arena); + }, + } + } + + fn translate_span_node(sema: *SemanticAnalyzer, node: Parser.Node) !Span { + // _ = sema; - _ = node; - @panic("Not yet implemented"); + std.log.warn("TODO: Translate spans of type {}", .{node.type}); + + return .{ + .content = .{ .text = "???" }, + }; } fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?[]const u8 } { _ = sema; _ = node; - @panic("Not yet implemented"); + return error.Unimplemented; // TODO: Implement this node type } fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?[]const u8 } { _ = sema; _ = node; - @panic("Not yet implemented"); + return error.Unimplemented; // TODO: Implement this node type } fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?[]const u8 } { _ = sema; _ = node; - @panic("Not yet implemented"); + return error.Unimplemented; // TODO: Implement this node type } fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?[]const u8 } { _ = sema; _ = node; - @panic("Not yet implemented"); + return error.Unimplemented; // TODO: Implement this node type } fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?[]const u8 } { _ = sema; _ = node; - @panic("Not yet implemented"); + return error.Unimplemented; // TODO: Implement this node type } fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?[]const u8 } { _ = sema; _ = node; - @panic("Not yet implemented"); + return error.Unimplemented; // TODO: Implement this node type } fn get_attributes(sema: *SemanticAnalyzer, node: Parser.Node, comptime Attrs: type) error{ OutOfMemory, BadAttributes }!Attrs { @@ -545,13 +604,15 @@ pub const SemanticAnalyzer = struct { return try sema.cast_value(attrib, @typeInfo(T).optional.child); } + const value = try sema.unescape_string(attrib.value); + return switch (T) { - []const u8 => attrib.value, + []const u8 => value, - Version => Version.parse(attrib.value) catch return error.InvalidValue, - DateTime => DateTime.parse(attrib.value) catch return error.InvalidValue, - Date => Date.parse(attrib.value) catch return error.InvalidValue, - Time => Time.parse(attrib.value) catch return error.InvalidValue, + Version => Version.parse(value) catch return error.InvalidValue, + DateTime => DateTime.parse(value) catch return error.InvalidValue, + Date => Date.parse(value) catch return error.InvalidValue, + Time => Time.parse(value) catch return error.InvalidValue, else => @compileError("Unsupported attribute type: " ++ @typeName(T)), }; @@ -579,6 +640,18 @@ pub const SemanticAnalyzer = struct { return .{ .line = line, .column = column }; } + + /// Accepts a string literal, including the surrounding quotes. + pub fn unescape_string(sema: *SemanticAnalyzer, token: Parser.Token) error{OutOfMemory}![]const u8 { + std.debug.assert(token.text.len >= 2); + std.debug.assert(token.text[0] == '"' and token.text[token.text.len - 1] == '"'); + + _ = sema; + // TODO: Implement unescaping logic here. + + // For now, we just return the raw text. + return token.text[1 .. token.text.len - 1]; + } }; pub const Parser = struct { @@ -641,7 +714,7 @@ pub const Parser = struct { } gop_entry.value_ptr.* = .{ .location = attr_location, - .value = try parser.unescape_string(attr_value), + .value = attr_value, }; if (!parser.try_accept_char(',')) { @@ -1009,18 +1082,6 @@ pub const Parser = struct { return parser.offset >= parser.code.len; } - /// Accepts a string literal, including the surrounding quotes. - pub fn unescape_string(parser: *Parser, token: Token) error{OutOfMemory}![]const u8 { - std.debug.assert(token.text.len >= 2); - std.debug.assert(token.text[0] == '"' and token.text[token.text.len - 1] == '"'); - - _ = parser; - // TODO: Implement unescaping logic here. - - // For now, we just return the raw text. - return token.text[1 .. token.text.len - 1]; - } - pub fn location(parser: *Parser, start: usize, end: ?usize) Location { return .{ .offset = start, .length = (end orelse parser.offset) - start }; } @@ -1218,7 +1279,7 @@ pub const Parser = struct { pub const Attribute = struct { location: Location, - value: []const u8, + value: Token, }; }; diff --git a/src/testsuite.zig b/src/testsuite.zig index 634c428..a0700b4 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -114,10 +114,10 @@ test "parser handles attributes and empty bodies" { try std.testing.expectEqual(@as(usize, 2), node.attributes.count()); const title = node.attributes.get("title") orelse return error.TestExpectedEqual; - try std.testing.expectEqualStrings("Hello", title.value); + try std.testing.expectEqualStrings("\"Hello\"", title.value.text); const author = node.attributes.get("author") orelse return error.TestExpectedEqual; - try std.testing.expectEqualStrings("World", author.value); + try std.testing.expectEqualStrings("\"World\"", author.value.text); try std.testing.expect(node.body == .empty); } From 5979ab79ab27e54bdb02ff80b3376d1ae8a655b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 25 Dec 2025 23:56:33 +0100 Subject: [PATCH 016/116] Vibecoded: Implements date/time parsing, improves spec --- docs/specification.md | 72 ++++++++++++++++++++++------- src/hyperdoc.zig | 103 ++++++++++++++++++++++++++++++++++++++++-- src/testsuite.zig | 66 +++++++++++++++++++++++++++ 3 files changed, 220 insertions(+), 21 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 4c00749..4494022 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -99,23 +99,23 @@ Notes: ## Attribute Overview -| Attribute | Required | Allowed Values | Description | -| --------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | -| `version` | Yes | `2.0` | Describes the version of this HyperDoc document. | -| `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | -| `title` | No | *Any* | Sets the title of the document or the table row. | -| `author` | No | *Any* | Sets the author of the document. | -| `date` | No | A date-time value using the format specified below (intersection between [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601)) | Sets the authoring date of the document. | -| `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | -| `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | -| `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | -| `path` | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | -| `syntax` | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | -| `depth` | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | -| `colspan` | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | -| `ref` | No | Any value present in an `id` attribute. | References any `id` inside this document. | -| `uri` | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | -| `fmt` | No | *See element documentation* | | +| Attribute | Required | Allowed Values | Description | +| --------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | +| `version` | Yes | `2.0` | Describes the version of this HyperDoc document. | +| `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | +| `title` | No | *Any* | Sets the title of the document or the table row. | +| `author` | No | *Any* | Sets the author of the document. | +| `date` | No | A date-time value using the format specified below (a conservative intersection of [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), compatible with both) | Sets the authoring date of the document. | +| `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | +| `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | +| `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | +| `path` | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | +| `syntax` | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | +| `depth` | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | +| `colspan` | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | +| `ref` | No | Any value present in an `id` attribute. | References any `id` inside this document. | +| `uri` | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | +| `fmt` | No | *See element documentation* | | ## Semantic Structure @@ -342,3 +342,41 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner. +## Date/Time Formatting + +All date/time values MUST use the formats defined in this section. This is a conservative, interoperable intersection between [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), so values that conform here are valid under both specifications. Digits are ASCII decimal unless stated otherwise. + +### Date Format + +Date strings MUST follow `YYYY-MM-DD`. + +- `YYYY` is a year with one or more digits. +- `MM` is a two-digit month in the range `01` to `12`. +- `DD` is a two-digit day in the range `01` to `31`. +- The `-` separators are mandatory. + +Examples: `2025-12-25`, `1-01-01`. + +### Time Format + +Time strings MUST follow `hh:mm:ss` with a required time zone. + +- `hh`, `mm`, `ss` are two-digit hour, minute, second fields. +- Hour MUST be in `00` to `23`, minute and second MUST be in `00` to `59`. +- An optional fractional seconds component MAY follow the seconds field as `.` plus + 1, 2, 3, 6, or 9 digits. +- The fractional separator MUST be `.`. Comma is not allowed. +- A time zone is required and MUST be either `Z` (UTC) or a numeric offset + in the form `+hh:mm` or `-hh:mm` with two-digit hour/minute fields. +- Offset hours MUST be in `00` to `23`, offset minutes MUST be in `00` to `59`. + +Examples: `22:30:46Z`, `22:30:46.136+01:00`, `21:30:46.136797358-05:30`. + +### Date/Time Format + +Date/time strings MUST combine a date and time with a literal `T`. + +- Format: `YYYY-MM-DD` + `T` + `hh:mm:ss` (with optional fraction and required zone). + +Examples: `2025-12-25T22:31:50.13+01:00`, `2025-12-25T21:31:43Z`. + diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 08c1b6f..84514ee 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -211,8 +211,31 @@ pub const Date = struct { day: u5, // 1-31 pub fn parse(text: []const u8) !Date { - _ = text; - @panic("TODO: Implement this"); + const first_dash = std.mem.indexOfScalar(u8, text, '-') orelse return error.InvalidValue; + const tail = text[first_dash + 1 ..]; + const second_dash_rel = std.mem.indexOfScalar(u8, tail, '-') orelse return error.InvalidValue; + const second_dash = first_dash + 1 + second_dash_rel; + + const year_text = text[0..first_dash]; + const month_text = text[first_dash + 1 .. second_dash]; + const day_text = text[second_dash + 1 ..]; + + if (year_text.len == 0 or month_text.len != 2 or day_text.len != 2) return error.InvalidValue; + + const year_value = std.fmt.parseInt(u32, year_text, 10) catch return error.InvalidValue; + if (year_value > std.math.maxInt(i32)) return error.InvalidValue; + + const month_value = std.fmt.parseInt(u8, month_text, 10) catch return error.InvalidValue; + const day_value = std.fmt.parseInt(u8, day_text, 10) catch return error.InvalidValue; + + if (month_value < 1 or month_value > 12) return error.InvalidValue; + if (day_value < 1 or day_value > 31) return error.InvalidValue; + + return .{ + .year = @intCast(year_value), + .month = @intCast(month_value), + .day = @intCast(day_value), + }; } }; @@ -231,10 +254,82 @@ pub const Time = struct { minute: u6, // 0-59 second: u6, // 0-59 microsecond: u20, // 0-999999 + zone_offset: i32, // in minutes pub fn parse(text: []const u8) !Time { - _ = text; - @panic("TODO: Implement this"); + if (text.len < 9) return error.InvalidValue; + + const hour = std.fmt.parseInt(u8, text[0..2], 10) catch return error.InvalidValue; + if (text[2] != ':') return error.InvalidValue; + const minute = std.fmt.parseInt(u8, text[3..5], 10) catch return error.InvalidValue; + if (text[5] != ':') return error.InvalidValue; + const second = std.fmt.parseInt(u8, text[6..8], 10) catch return error.InvalidValue; + + if (hour > 23 or minute > 59 or second > 59) return error.InvalidValue; + + var index: usize = 8; + var microsecond: u20 = 0; + + if (index >= text.len) return error.InvalidValue; + + if (text[index] == '.') { + const start = index + 1; + var end = start; + while (end < text.len and std.ascii.isDigit(text[end])) : (end += 1) {} + if (end == start) return error.InvalidValue; + + const fraction_value = std.fmt.parseInt(u64, text[start..end], 10) catch return error.InvalidValue; + microsecond = fractionToMicrosecond(end - start, fraction_value) orelse return error.InvalidValue; + index = end; + } + + if (index >= text.len) return error.InvalidValue; + + if (text[index] == 'Z') { + if (index + 1 != text.len) return error.InvalidValue; + return .{ + .hour = @intCast(hour), + .minute = @intCast(minute), + .second = @intCast(second), + .microsecond = microsecond, + .zone_offset = 0, + }; + } + + const sign_char = text[index]; + if (sign_char != '+' and sign_char != '-') return error.InvalidValue; + const sign: i32 = if (sign_char == '+') 1 else -1; + + if (text.len - index != 6) return error.InvalidValue; + const zone_hour = std.fmt.parseInt(u8, text[index + 1 .. index + 3], 10) catch return error.InvalidValue; + if (text[index + 3] != ':') return error.InvalidValue; + const zone_minute = std.fmt.parseInt(u8, text[index + 4 .. index + 6], 10) catch return error.InvalidValue; + + if (zone_hour > 23 or zone_minute > 59) return error.InvalidValue; + + const zone_total: u16 = @as(u16, zone_hour) * 60 + zone_minute; + const offset_minutes: i32 = sign * @as(i32, zone_total); + + return .{ + .hour = @intCast(hour), + .minute = @intCast(minute), + .second = @intCast(second), + .microsecond = microsecond, + .zone_offset = offset_minutes, + }; + } + + fn fractionToMicrosecond(len: usize, value: u64) ?u20 { + const micro: u64 = switch (len) { + 1 => value * 100_000, + 2 => value * 10_000, + 3 => value * 1_000, + 6 => value, + 9 => value / 1_000, + else => return null, + }; + if (micro > 999_999) return null; + return @intCast(micro); } }; diff --git a/src/testsuite.zig b/src/testsuite.zig index a0700b4..0212271 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -339,3 +339,69 @@ test "parser maps diagnostic locations" { try std.testing.expectEqual(@as(u32, 3), loc.line); try std.testing.expectEqual(@as(u32, 1), loc.column); } + +test "Version.parse accepts dotted versions" { + const version = try hdoc.Version.parse("2.0"); + try std.testing.expectEqual(@as(u16, 2), version.major); + try std.testing.expectEqual(@as(u16, 0), version.minor); + + try std.testing.expectError(error.InvalidValue, hdoc.Version.parse("2")); + try std.testing.expectError(error.InvalidValue, hdoc.Version.parse("2.")); + try std.testing.expectError(error.InvalidValue, hdoc.Version.parse("2.0.1")); + try std.testing.expectError(error.InvalidValue, hdoc.Version.parse(".1")); + try std.testing.expectError(error.InvalidValue, hdoc.Version.parse("2.a")); +} + +test "Date.parse accepts ISO dates" { + const date = try hdoc.Date.parse("2025-12-25"); + try std.testing.expectEqual(@as(i32, 2025), date.year); + try std.testing.expectEqual(@as(u4, 12), date.month); + try std.testing.expectEqual(@as(u5, 25), date.day); + + const short_year = try hdoc.Date.parse("1-01-01"); + try std.testing.expectEqual(@as(i32, 1), short_year.year); + try std.testing.expectEqual(@as(u4, 1), short_year.month); + try std.testing.expectEqual(@as(u5, 1), short_year.day); + + try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-1-01")); + try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-13-01")); + try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-12-32")); +} + +test "Time.parse accepts ISO times with zones" { + const utc = try hdoc.Time.parse("22:30:46Z"); + try std.testing.expectEqual(@as(u5, 22), utc.hour); + try std.testing.expectEqual(@as(u6, 30), utc.minute); + try std.testing.expectEqual(@as(u6, 46), utc.second); + try std.testing.expectEqual(@as(u20, 0), utc.microsecond); + try std.testing.expectEqual(@as(i32, 0), utc.zone_offset); + + const fractional = try hdoc.Time.parse("22:30:46.136+01:00"); + try std.testing.expectEqual(@as(u20, 136_000), fractional.microsecond); + try std.testing.expectEqual(@as(i32, 60), fractional.zone_offset); + + const nanos = try hdoc.Time.parse("21:30:46.136797358-05:30"); + try std.testing.expectEqual(@as(u20, 136_797), nanos.microsecond); + try std.testing.expectEqual(@as(i32, -330), nanos.zone_offset); + + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("21:30:46,1Z")); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("22:30:46")); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("24:00:00Z")); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:60:00Z")); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:60Z")); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:59.1234Z")); +} + +test "DateTime.parse accepts ISO date-time" { + const datetime = try hdoc.DateTime.parse("2025-12-25T22:31:50.13+01:00"); + try std.testing.expectEqual(@as(i32, 2025), datetime.date.year); + try std.testing.expectEqual(@as(u4, 12), datetime.date.month); + try std.testing.expectEqual(@as(u5, 25), datetime.date.day); + try std.testing.expectEqual(@as(u5, 22), datetime.time.hour); + try std.testing.expectEqual(@as(u6, 31), datetime.time.minute); + try std.testing.expectEqual(@as(u6, 50), datetime.time.second); + try std.testing.expectEqual(@as(u20, 130_000), datetime.time.microsecond); + try std.testing.expectEqual(@as(i32, 60), datetime.time.zone_offset); + + try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z")); +} From 53f09469bea45299ceb14b24bdde991b6d60b5ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 26 Dec 2025 00:15:15 +0100 Subject: [PATCH 017/116] Improves spec a lot --- docs/specification.md | 429 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 397 insertions(+), 32 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 4494022..d3c0959 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -2,7 +2,7 @@ This specification describes the document markup language "HyperDoc 2.0", that tries to be a simple to parse, easy to write markup language for hypertext documents. -It sits somewhat between LaTeX and Markdown and tries to be way simpler to parse than Markdown, but keep useful semantics around. +It sits in a space where it's unambigious to parse, but still relatively convenient to write. ## Syntax Overview @@ -23,51 +23,416 @@ pre(syntax="c"): | } ``` -## Grammar +## Document encoding -This grammar describes the hypertext format. +This section defines the required byte-level encoding and line structure of HyperDoc documents. -Short notes on grammar notation: +### Character encoding -- `{ ... }` is a repetition -- `[ ... ]` is an option -- `a | b | c` is alternatives -- `( ... )` is a group -- `"foo"` is a literal token sequence, no escape sequences (So `"\"` is a single backslash) -- `/.../` is a regex -- Whitespace is assumed to be ignored between tokens unless matched by a literal or regex, so tokens are typically separated by whitespace -- Upper case elements are roughly tokens, while lowercase elements are rules. +- A HyperDoc document **MUST** be encoded as **UTF-8**. +- A HyperDoc document **MUST NOT** contain invalid UTF-8 byte sequences. + +**Byte Order Mark (BOM):** + +- A UTF-8 BOM (the byte sequence `EF BB BF`) **SHOULD NOT** be used. Tooling **MAY** accept it and treat it as U+FEFF at the beginning of the document. + +### Line endings + +- Lines **MUST** be terminated by either: + - `` (U+000A), or + - `` (U+000D U+000A). +- A bare `` **MUST NOT** appear except as part of a `` sequence. + +A document **MAY** mix `` and `` line endings, but tooling **SHOULD** normalize to a single convention when rewriting documents. + +The canonical line ending emitted by tooling **SHOULD** be ``. + +### Control characters + +- The only permitted control character **within a line** is: + - `` (U+0009). +- Apart from line terminators (`` and `` only as part of ``), all other Unicode control characters (General Category `Cc`) **MUST NOT** appear anywhere in a HyperDoc document. + +### Unicode text + +- Apart from the restrictions above, arbitrary Unicode text is allowed. + +### Recommendations for writing systems and directionality (non-normative) + +HyperDoc does not define special handling for right-to-left scripts, bidirectional layout, or writing system segmentation. For readability and to reduce ambiguity across renderers and editors: + +- Authors **SHOULD** keep each paragraph primarily in a **single writing system/directionality** where practical. +- Tooling **MAY** warn when a paragraph mixes strongly different directional scripts or contains invisible bidirectional formatting characters (e.g., bidi overrides/isolates), since these can be confusing in editors and reviews. + +## Syntax + +This chapter defines the **syntactic structure** of HyperDoc documents: how characters form tokens, how tokens form **nodes**, and how nodes nest. It intentionally does **not** define meaning (required elements, allowed attributes per node type, ID/refs, allowed escape sequences, etc.). Those are handled in later chapters as **semantic validity** rules. + +A HyperDoc document is a sequence of **nodes**. Each node has: + +- a **node name** (identifier), +- an optional **attribute list** `(key="value", ...)`, +- and a mandatory **body**, which is one of: + - `;` empty body, + - `"..."` string literal body, + - `:` verbatim body (one or more `|` lines), + - `{ ... }` list body. + +A list body `{ ... }` is parsed in one of two modes: + +- **Block-list mode**: the list contains nested nodes. +- **Inline-list mode**: the list contains a token stream of text items, escape tokens, inline nodes, and balanced brace groups. + +The grammar below is syntax-only and intentionally leaves the choice between block-list and inline-list content to an **external disambiguation rule**. + +### Grammar (EBNF) ```ebnf -document := { block } +(* ---------- Top level ---------- *) + +document ::= ws , { node , ws } , EOF ; + +(* ---------- Nodes ---------- *) + +node ::= node_name , ws , [ attribute_list , ws ] , body ; + +body ::= empty_body + | string_body + | verbatim_body + | list_body ; + +empty_body ::= ";" ; + +string_body ::= string_literal ; + +verbatim_body ::= ":" , { ws , piped_line } ; + +list_body ::= "{" , list_content , "}" ; + +(* + IMPORTANT: list_content is intentionally ambiguous. + A conforming parser chooses either inline_content or block_content by an + EXTERNAL rule (see “Disambiguation for list bodies”). +*) +list_content ::= inline_content | block_content ; + + +(* ---------- Attributes ---------- *) + +attribute_list ::= "(" , ws , + [ attribute , + { ws , "," , ws , attribute } , + [ ws , "," ] (* trailing comma allowed *) + ] , + ws , ")" ; + +attribute ::= attr_key , ws , "=" , ws , string_literal ; + +(* + Attribute keys may include '-' and ':' in addition to node-name characters. +*) +attr_key ::= attr_key_char , { attr_key_char } ; + +attr_key_char ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | ":" | "\" ; + + +(* ---------- Block-list content ---------- *) -block := WORD [ attribute_list ] body +block_content ::= ws , { node , ws } ; -body := ";" | list | verbatim | STRING -verbatim := ":" "\n" { VERBATIM_LINE } -list := "{" { escape | inline | block | WORD } "}" -escape := "\\" | "\{" | "\}" -inline := "\" WORD [ attribute_list ] body +(* ---------- Inline-list content ---------- *) -attribute_list := "(" [ attribute { "," attribute } ] ")" -attribute := WORD "=" STRING +inline_content ::= ws , { inline_item , ws } ; -STRING := /"(\\.|[^"\r\n])*"/ -VERBATIM_LINE := /^\s*\|(.*)$/ -WORD := /[^\s\{\}\\\"(),=:]+/ +inline_item ::= word + | escape_text + | inline_node + | inline_group ; + +(* + Balanced braces in inline content are represented as inline_group. + If braces cannot be balanced, they must be written as \{ and \}. +*) +inline_group ::= "{" , inline_content , "}" ; + +(* + Backslash dispatch inside inline content: + - If next char is one of '\', '{', '}', emit escape_text. + - Otherwise begin an inline_node. +*) +escape_text ::= "\" , ( "\" | "{" | "}" ) ; + +inline_node ::= inline_name , ws , [ attribute_list , ws ] , body ; + +(* + Inline node names start with '\' and then continue with node-name characters. +*) +inline_name ::= "\" , node_name_char_no_backslash , { node_name_char } ; + + +(* ---------- Words / node names ---------- *) + +(* + Node names intentionally do NOT include ':' because ':' is also a body marker + (e.g. 'p:' for verbatim body) and adjacency is allowed. +*) +node_name ::= node_name_char , { node_name_char } ; + +node_name_char ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | "\" ; + +node_name_char_no_backslash + ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" ; + +word ::= word_char , { word_char } ; + +(* + word_char matches any Unicode scalar value except: + - whitespace + - '{' or '}' + - '\' (because '\' begins escape_text or inline_node) +*) +word_char ::= ? any scalar value except WS, "{", "}", "\" ? ; + + +(* ---------- String literals (syntax only; no escape validation here) ---------- *) + +string_literal ::= "\"" , { string_unit } , "\"" ; + +(* + string_unit is permissive enough that malformed escapes remain parsable, + BUT forbids escaping control characters (including LF/CR/TAB). + Raw TAB is allowed as a normal string_char. +*) +string_unit ::= string_char | "\" , escaped_noncontrol ; + +string_char ::= ? any scalar value except '"', '\', LF, CR ? ; + +escaped_noncontrol + ::= ? any scalar value except control chars (Unicode category Cc) ? ; + + +(* ---------- Verbatim lines ---------- *) + +piped_line ::= "|" , { not_line_end } , line_terminator ; + +not_line_end ::= ? any scalar value except CR and LF ? ; + +line_terminator ::= LF | CR , LF | EOF ; + + +(* ---------- Whitespace ---------- *) + +ws ::= { WS } ; + +WS ::= " " | "\t" | CR | LF ; + +CR ::= "\r" ; +LF ::= "\n" ; ``` -**NOTE:** `list` also allows `block` for `inline` elements, as this enables us to have support for balanced braces without special care. The `block` elements will be flattened when rendering an inline list body into the document. +### Additional syntax rules and notes (normative) + +#### 1) Maximal-munch for identifiers + +When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consume the **longest possible** sequence of allowed identifier characters (maximal munch). This is required because `\` is a legal identifier character and must not be arbitrarily split. + +#### 2) Disambiguation for list bodies (external chooser) + +The production `list_content ::= inline_content | block_content` is resolved by a deterministic, non-backtracking rule: + +1. Before parsing the content of a `{ ... }` list body, the parser **MUST** choose exactly one list mode: **Inline-list mode** or **Block-list mode**. +2. The mode is determined solely from the syntactic **node name token** (not attributes, not body contents, not document state). +3. Required behavior (recovery-friendly): + - If the node name begins with `\`, the parser **MUST** choose **Inline-list mode**. + - If the node name is recognized as a built-in name with a specified list mode, the parser **MUST** choose that mode. + - Otherwise (unknown / misspelled / unsupported node name), the parser **MUST** choose **Inline-list mode**. + +This rule ensures unknown nodes accept rich inline content for typo recovery (e.g. `prre { ... }`). + +#### 3) Inline-list mode: brace balancing and escape-text tokens + +In **Inline-list mode**: + +- `{` and `}` that appear as literal characters in the inline stream are represented structurally as `inline_group` and therefore **must be balanced**. +- If braces cannot be balanced, they **must** be written using the escape-text tokens `\{` and `\}`. +- A backslash in inline content is interpreted as: + - one of the three **escape-text tokens** `\\`, `\{`, `\}`, or + - the start of an `inline_node` otherwise. + +The escape-text tokens exist primarily so the three characters `\`, `{`, `}` can be represented literally within inline content without always starting an inline node. -**NOTE:** All attribute values are strings, so numeric-looking values are still expressed as strings (e.g. `depth="1"`). +#### 4) String literals are syntax-only at this stage + +String literals are delimited by `"` and parsed without interpreting escape meanings. This is intentional: documents with malformed or unknown escape sequences remain **syntactically valid**, allowing formatters and other tooling to round-trip source reliably. + +However, the following are **syntactically invalid** inside string literals: + +- raw LF or CR characters (line breaks are not allowed within `"..."`), +- a backslash immediately followed by a **control character** (Unicode General Category `Cc`), which includes TAB. + +(Separately: which escape sequences are *semantically* valid is defined later.) + +#### 5) Verbatim bodies are line-oriented + +In a verbatim body (`:`): + +- The body consists of zero or more `piped_line` entries. +- Each `piped_line` starts with `|` after optional whitespace skipping. +- The content of a verbatim line is everything up to the line terminator; it is not tokenized into nodes. + +A file ending without a final newline is syntactically allowed (`EOF` as a line terminator), though tooling may warn. + +#### 6) Syntactic validity vs semantic validity + +A document is **syntactically valid** if it matches the grammar and the additional syntax rules above (maximal munch, list-mode disambiguation, inline brace balancing, and the string-literal constraints). + +A syntactically valid document may still be **semantically invalid**. Semantic validation is defined later and may include rules such as required header nodes, attribute constraints, reference resolution, allowed escape sequences, encoding policy, and disallowed control characters in source text. + +## Escape encoding + +This chapter defines how **escape sequences are interpreted** to produce decoded Unicode text. Escape processing is part of **semantic validation**: a document may be syntactically valid even if it contains unknown or malformed escapes, but it is not semantically valid unless all escapes decode successfully under the rules below. + +HyperDoc documents are UTF-8 text. Unless explicitly stated otherwise, all “characters” in this chapter refer to Unicode scalar values. + +### Scope + +Escape sequences are recognized in two places: + +1. **STRING literals** (the `"..."` body form, and attribute values which are also STRING literals). +2. **Inline escape-text tokens** inside inline-list bodies: `\\`, `\{`, `\}` (these are emitted as text spans by the parser and can be decoded to literal characters during semantic processing). + +No other part of the syntax performs escape decoding (not node names, not verbatim bodies, not block-list structure). + +## Control character policy + +HyperDoc forbids control characters except **LF** and **CR**. + +- A semantically valid document **MUST NOT** contain any Unicode control characters (General Category `Cc`) anywhere **except**: + - U+000A LINE FEED (LF) + - U+000D CARRIAGE RETURN (CR) + +This rule applies both to: + +- the raw document text (source), and +- any decoded text produced from escapes. + +Implications: + +- TAB (U+0009) is forbidden, including if introduced via `\u{9}`. +- NUL (U+0000) is forbidden, including if introduced via `\u{0}`. + +(Structural line breaks in the file may be LF or CRLF or CR as allowed by the syntax rules; decoded strings may contain LF/CR only via escapes.) + +### String literal escape sequences + +#### Overview + +Within a STRING literal, a backslash (`\`) begins an escape sequence. The set of valid escapes is deliberately small. + +A semantic validator/decoder **MUST** accept exactly the escape forms listed below and **MUST** reject all others. + +#### Supported escapes (STRING literals) + +The following escapes are valid inside STRING literals: + +| Escape | Decodes to | +| ---------- | ---------------------------- | +| `\\` | U+005C REVERSE SOLIDUS (`\`) | +| `\"` | U+0022 QUOTATION MARK (`"`) | +| `\n` | U+000A LINE FEED (LF) | +| `\r` | U+000D CARRIAGE RETURN (CR) | +| `\u{H...}` | Unicode scalar value U+H... | + +No other escapes exist. In particular, `\0`, `\xHH`, `\e`, and similar are not part of HyperDoc. + +#### Unicode escape `\u{H...}` + +`H...` is a non-empty sequence of hexadecimal digits (`0–9`, `A–F`, `a–f`) representing a Unicode code point in hexadecimal. + +Rules: + +- The hex sequence **MUST** contain **1 to 6** hex digits. +- The value **MUST** be within `0x0 .. 0x10FFFF` inclusive. +- The value **MUST NOT** be in the surrogate range `0xD800 .. 0xDFFF`. +- The value **MUST NOT** decode to a forbidden control character (see Control character policy). The only allowed controls are LF and CR. + +Notes: + +- Leading zeros are allowed (`\u{000041}` is `A`). +- `\u{20}` is ASCII space. (`\u{032}` is U+0032, the digit `"2"`, because the digits are hexadecimal.) + +#### Invalid escapes (STRING literals) + +A semantic validator/decoder **MUST** reject a document (or at least reject that literal) if any STRING literal contains: + +- an unknown escape (e.g. `\q`, `\uFFFF`, `\x20`, `\t`, `\b`, …), +- an unterminated escape (string ends immediately after `\`), +- a malformed Unicode escape (`\u{}`, missing `{`/`}`, non-hex digits, more than 6 hex digits), +- a Unicode escape outside the valid scalar range or within the surrogate range, +- a Unicode escape that produces a forbidden control character. + +#### Canonical encoding recommendations (non-normative) + +For authors and formatters: + +- Prefer `\\` and `\"` for literal backslash and quote. +- Prefer `\n` and `\r` for LF/CR instead of `\u{A}` / `\u{D}`. +- Prefer the shortest hex form for `\u{...}` without leading zeros unless alignment/readability benefits. + +### Inline escape-text tokens in inline-list bodies + +Inside **inline-list bodies**, the syntax defines three special two-character text tokens: + +- `\\` +- `\{` +- `\}` + +These exist so that inline content can contain literal `\`, `{`, and `}` without always starting an inline node (`\name{...}`) or requiring brace balancing. + +#### Decoding rule + +During semantic text construction, an implementation **MAY** decode these tokens as: + +- `\\` → `\` +- `\{` → `{` +- `\}` → `}` + +This decoding is independent of STRING literal escapes: these tokens occur in inline text streams, not inside `"..."` literals. + +#### Round-tripping note (normative intent) + +A formatter or tooling that aims to preserve the author’s intent **SHOULD** preserve the distinction between: + +- a literal `{`/`}` that is part of a balanced inline group, and +- an escaped brace token `\{`/`\}` that was used to avoid imbalance. + +This distinction matters for reliable reconstruction and for edits that may reflow or restructure inline content. + +### Interaction with syntax + +- Escape decoding is performed **after** syntactic parsing. +- Syntactic parsing of STRING literals is delimiter-based and does not validate escape *meaning*. +- Semantic validation determines whether escapes are valid and produces the decoded Unicode text. + +This separation is intentional: it allows autoformatters to parse and rewrite documents that may contain malformed escapes without losing information, while still allowing strict validators to enforce the escape rules above. + +## Semantic Validity + +> TO BE DONE. +> +> - Attribute uniqueness +> - Attribute must be defined on a node +> - Non-optional attributes must be present +> - id is only valid on top-level nodes +> - id must be unique +> - id is case sensitive +> - ref must point to an existing id ## Element Overview | Element | Element Type | Allowed Children | Attributes | | ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------- | -| *Document* | Document | `hdoc`, Blocks | | -| `hdoc` | Header | - | `lang`, `title`, `version`, `author`, `date` | | `h1`, `h2`, `h3` | Block | Text Body | `lang`, \[`id`\] | | `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block | Text Body | `lang`, \[`id`\] | | `ul` | Block | `li` ≥ 1 | `lang`, \[`id`\] | @@ -76,11 +441,13 @@ WORD := /[^\s\{\}\\\"(),=:]+/ | `pre` | Block | Text Body | `lang`, \[`id`\], `syntax` | | `toc` | Block | - | `lang`, \[`id`\], `depth` | | `table` | Block | Table Rows | `lang`, \[`id`\] | +| *Document* | Document | `hdoc`, Blocks | | +| `hdoc` | Header | - | `lang`, `title`, `version`, `author`, `date` | +| `li` | List Item | Blocks, String, Verbatim | `lang` | +| `td` | Table Cell | Blocks, String, Verbatim | `lang`, `colspan` | | `columns` | Table Row | `td` ≥ 1 | `lang` | | `group` | Table Row | Text Body | `lang`, | | `row` | Table Row | `td` ≥ 1 | `lang`, `title` | -| `td` | Table Cell | Blocks, String, Verbatim | `lang`, `colspan` | -| `li` | List Item | Blocks, String, Verbatim | `lang` | | `\em` | Text Body | Text Body | `lang` | | `\mono` | Text Body | Text Body | `lang`, `syntax` | | `\strike` | Text Body | Text Body | `lang` | @@ -125,7 +492,6 @@ All elements have these attributes: | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | | `lang` | Marks the (human) language of the contents of that element. This must be an [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag). | - ## Top-Level / Block Elements All top-level elements have these attributes: @@ -379,4 +745,3 @@ Date/time strings MUST combine a date and time with a literal `T`. - Format: `YYYY-MM-DD` + `T` + `hh:mm:ss` (with optional fraction and required zone). Examples: `2025-12-25T22:31:50.13+01:00`, `2025-12-25T21:31:43Z`. - From ba40f2c34f54b1f67fdee3694bfb8075da13aa69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 26 Dec 2025 07:52:19 +0100 Subject: [PATCH 018/116] Refactors Parser.Node.attributes from map to list, so the lexical structure is retained --- src/hyperdoc.zig | 50 ++++++++++++++++++++++------------------------- src/testsuite.zig | 20 +++++++++++-------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 84514ee..d2b7f0f 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -652,24 +652,26 @@ pub const SemanticAnalyzer = struct { var any_invalid = false; var found: std.EnumSet(Fields) = .initEmpty(); - for (node.attributes.keys(), node.attributes.values()) |key, attrib| { + for (node.attributes.items) |attrib| { + const key = attrib.name.text; + const fld = std.meta.stringToEnum(Fields, key) orelse { - try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, node.location.offset); + try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, attrib.name.location.offset); continue; }; if (found.contains(fld)) { - try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, node.location.offset); + try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, attrib.name.location.offset); } found.insert(fld); switch (fld) { - inline else => |tag| @field(attrs, @tagName(tag)) = sema.cast_value(attrib, @FieldType(Attrs, @tagName(tag))) catch |err| switch (err) { + inline else => |tag| @field(attrs, @tagName(tag)) = sema.cast_value(attrib.value, @FieldType(Attrs, @tagName(tag))) catch |err| switch (err) { error.OutOfMemory => |e| return e, else => { any_invalid = true; - try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, node.location.offset); + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, attrib.value.location.offset); continue; }, @@ -694,12 +696,12 @@ pub const SemanticAnalyzer = struct { return attrs; } - fn cast_value(sema: *SemanticAnalyzer, attrib: Parser.Attribute, comptime T: type) error{ OutOfMemory, InvalidValue }!T { + fn cast_value(sema: *SemanticAnalyzer, attrib: Parser.Token, comptime T: type) error{ OutOfMemory, InvalidValue }!T { if (@typeInfo(T) == .optional) { return try sema.cast_value(attrib, @typeInfo(T).optional.child); } - const value = try sema.unescape_string(attrib.value); + const value = try sema.unescape_string(attrib); return switch (T) { []const u8 => value, @@ -788,7 +790,7 @@ pub const Parser = struct { else .unknown_block; - var attributes: std.StringArrayHashMapUnmanaged(Attribute) = .empty; + var attributes: std.ArrayList(Attribute) = .empty; errdefer attributes.deinit(parser.arena); if (parser.try_accept_char('(')) { @@ -797,20 +799,14 @@ pub const Parser = struct { // so we know that the next token must be the attribute name. while (true) { - const start = parser.offset; const attr_name = try parser.accept_identifier(); _ = try parser.accept_char('='); const attr_value = try parser.accept_string(); - const attr_location = parser.location(start, parser.offset); - const gop_entry = try attributes.getOrPut(parser.arena, attr_name.text); - if (gop_entry.found_existing) { - emitDiagnostic(parser, .{ .duplicate_attribute = .{ .name = attr_name.text } }, parser.make_diagnostic_location(attr_location.offset)); - } - gop_entry.value_ptr.* = .{ - .location = attr_location, + try attributes.append(parser.arena, .{ + .name = attr_name, .value = attr_value, - }; + }); if (!parser.try_accept_char(',')) { break; @@ -823,7 +819,7 @@ pub const Parser = struct { if (parser.try_accept_char(';')) { // block has empty content return .{ - .location = parser.location(type_ident.position.offset, null), + .location = parser.location(type_ident.location.offset, null), .type = node_type, .attributes = attributes, .body = .empty, @@ -840,11 +836,11 @@ pub const Parser = struct { } if (lines.items.len == 0) { - emitDiagnostic(parser, .empty_verbatim_block, parser.make_diagnostic_location(type_ident.position.offset)); + emitDiagnostic(parser, .empty_verbatim_block, parser.make_diagnostic_location(type_ident.location.offset)); } return .{ - .location = parser.location(type_ident.position.offset, null), + .location = parser.location(type_ident.location.offset, null), .type = node_type, .attributes = attributes, .body = .{ .verbatim = try lines.toOwnedSlice(parser.arena) }, @@ -855,7 +851,7 @@ pub const Parser = struct { // block has string content return .{ - .location = parser.location(type_ident.position.offset, null), + .location = parser.location(type_ident.location.offset, null), .type = node_type, .attributes = attributes, .body = .{ .string = string_body }, @@ -868,7 +864,7 @@ pub const Parser = struct { try parser.accept_block_node_list(); return .{ - .location = parser.location(type_ident.position.offset, null), + .location = parser.location(type_ident.location.offset, null), .type = node_type, .attributes = attributes, .body = .{ .list = try children.toOwnedSlice(parser.arena) }, @@ -968,7 +964,7 @@ pub const Parser = struct { else => { const word = try parser.accept_word(); try children.append(parser.arena, .{ - .location = word.position, + .location = word.location, .type = .text, .attributes = .empty, .body = .empty, @@ -1184,7 +1180,7 @@ pub const Parser = struct { pub fn slice(parser: *Parser, start: usize, end: usize) Token { return .{ .text = parser.code[start..end], - .position = .{ .offset = start, .length = end - start }, + .location = .{ .offset = start, .length = end - start }, }; } @@ -1221,7 +1217,7 @@ pub const Parser = struct { pub const Token = struct { text: []const u8, - position: Location, + location: Location, }; pub const Location = struct { @@ -1360,7 +1356,7 @@ pub const Parser = struct { pub const Node = struct { location: Location, type: NodeType, - attributes: std.StringArrayHashMapUnmanaged(Attribute), + attributes: std.ArrayList(Attribute) = .empty, body: Body, @@ -1373,7 +1369,7 @@ pub const Parser = struct { }; pub const Attribute = struct { - location: Location, + name: Token, value: Token, }; }; diff --git a/src/testsuite.zig b/src/testsuite.zig index 0212271..5b7ed99 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -49,13 +49,13 @@ test "parser accept identifier and word tokens" { const ident = try parser.accept_identifier(); try std.testing.expectEqualStrings("h1", ident.text); - try std.testing.expectEqual(@as(usize, 0), ident.position.offset); - try std.testing.expectEqual(@as(usize, 2), ident.position.length); + try std.testing.expectEqual(@as(usize, 0), ident.location.offset); + try std.testing.expectEqual(@as(usize, 2), ident.location.length); const word = try parser.accept_word(); try std.testing.expectEqualStrings("word", word.text); - try std.testing.expectEqual(@as(usize, 3), word.position.offset); - try std.testing.expectEqual(@as(usize, 4), word.position.length); + try std.testing.expectEqual(@as(usize, 3), word.location.offset); + try std.testing.expectEqual(@as(usize, 4), word.location.length); try std.testing.expectEqual(@as(usize, 7), parser.offset); } @@ -111,12 +111,16 @@ test "parser handles attributes and empty bodies" { const node = try parser.accept_node(.top_level); try std.testing.expectEqual(hdoc.Parser.NodeType.h1, node.type); - try std.testing.expectEqual(@as(usize, 2), node.attributes.count()); + try std.testing.expectEqual(@as(usize, 2), node.attributes.items.len); - const title = node.attributes.get("title") orelse return error.TestExpectedEqual; + const attribs = node.attributes.items; + + const title = attribs[0]; + try std.testing.expectEqualStrings("title", title.name.text); try std.testing.expectEqualStrings("\"Hello\"", title.value.text); - const author = node.attributes.get("author") orelse return error.TestExpectedEqual; + const author = attribs[1]; + try std.testing.expectEqualStrings("author", author.name.text); try std.testing.expectEqualStrings("\"World\"", author.value.text); try std.testing.expect(node.body == .empty); @@ -275,7 +279,7 @@ test "diagnostic codes are emitted for expected samples" { .{ .code = .{ .invalid_identifier_start = .{ .char = '-' } }, .samples = &.{"hdoc(version=\"2.0\"); -abc"} }, .{ .code = .unterminated_block_list, .samples = &.{"hdoc{h1 \"x\""} }, .{ .code = .unterminated_inline_list, .samples = &.{"hdoc(version=\"2.0\"); p {hello"} }, - .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(title=\"a\",title=\"b\");"} }, + // TODO: .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(title=\"a\",title=\"b\");"} }, .{ .code = .empty_verbatim_block, .samples = &.{"hdoc(version=\"2.0\"); pre:\n"} }, .{ .code = .verbatim_missing_trailing_newline, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|line"} }, .{ .code = .verbatim_missing_space, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|nospace\n"} }, From 50b409371b2b8aa7113752c889359584876f1711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 26 Dec 2025 07:59:10 +0100 Subject: [PATCH 019/116] Fixes failing test. --- src/hyperdoc.zig | 6 ++++++ src/testsuite.zig | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index d2b7f0f..4a3c94c 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -1466,6 +1466,12 @@ pub const Diagnostic = struct { .verbatim_missing_trailing_newline => try w.writeAll("Verbatim line should end with a newline."), .verbatim_missing_space => try w.writeAll("Expected a space after '|' in verbatim line."), .trailing_whitespace => try w.writeAll("Trailing whitespace at end of line."), + + .missing_attribute => |ctx| try w.print("Missing required attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }), + .invalid_attribute => |ctx| try w.print("Invalid value for attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }), + .unknown_attribute => |ctx| try w.print("Unknown attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }), + .unknown_block_type => |ctx| try w.print("Unknown block type '{s}'.", .{ctx.name}), + .invalid_block_type => |ctx| try w.print("Invalid block type '{s}' in this context.", .{ctx.name}), } } }; diff --git a/src/testsuite.zig b/src/testsuite.zig index 5b7ed99..6a79530 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -279,7 +279,7 @@ test "diagnostic codes are emitted for expected samples" { .{ .code = .{ .invalid_identifier_start = .{ .char = '-' } }, .samples = &.{"hdoc(version=\"2.0\"); -abc"} }, .{ .code = .unterminated_block_list, .samples = &.{"hdoc{h1 \"x\""} }, .{ .code = .unterminated_inline_list, .samples = &.{"hdoc(version=\"2.0\"); p {hello"} }, - // TODO: .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(title=\"a\",title=\"b\");"} }, + .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");"} }, .{ .code = .empty_verbatim_block, .samples = &.{"hdoc(version=\"2.0\"); pre:\n"} }, .{ .code = .verbatim_missing_trailing_newline, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|line"} }, .{ .code = .verbatim_missing_space, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|nospace\n"} }, @@ -303,7 +303,13 @@ test "diagnostic codes are emitted for expected samples" { defer owned_doc.deinit(); } - try std.testing.expect(diagnosticsContain(&diagnostics, case.code)); + if (!diagnosticsContain(&diagnostics, case.code)) { + std.log.err("Diagnostics did not contain expected code: '{t}'", .{case.code}); + for (diagnostics.items.items) |item| { + std.log.err(" Emitted diagnostic: {f}", .{item.code}); + } + return error.MissingDiagnosticCode; + } const expected_severity = case.code.severity(); if (expected_severity == .@"error") { From 8f83120190ebc7dd1b863289d9c344baffd41b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 26 Dec 2025 14:05:57 +0100 Subject: [PATCH 020/116] Implements translation of H1 nodes and basic inline contents. --- src/hyperdoc.zig | 260 +++++++++++++++++++++++++++++++-------- src/main.zig | 20 +-- test/parser/workset.hdoc | 4 + 3 files changed, 225 insertions(+), 59 deletions(-) create mode 100644 test/parser/workset.hdoc diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 4a3c94c..43b1ffe 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -117,13 +117,6 @@ pub const Block = union(enum) { }; }; -pub const SpanContent = union(enum) { - text: []const u8, - date: FormattedDateTime(Date), - time: FormattedDateTime(Time), - datetime: FormattedDateTime(DateTime), -}; - pub fn FormattedDateTime(comptime DT: type) type { return struct { value: DT, @@ -132,15 +125,47 @@ pub fn FormattedDateTime(comptime DT: type) type { } pub const Span = struct { - content: SpanContent, - lang: ?[]const u8 = null, - em: bool = false, - mono: bool = false, - strike: bool = false, - sub: bool = false, - sup: bool = false, - link: Link = .none, - syntax: ?[]const u8 = null, + pub const Content = union(enum) { + text: []const u8, + date: FormattedDateTime(Date), + time: FormattedDateTime(Time), + datetime: FormattedDateTime(DateTime), + }; + + pub const Attributes = struct { + lang: ?[]const u8 = null, + em: bool = false, + mono: bool = false, + strike: bool = false, + sub: bool = false, + sup: bool = false, + link: Link = .none, + syntax: ?[]const u8 = null, + + pub const Overrides = struct { + lang: ?[]const u8 = null, + em: ?bool = null, + mono: ?bool = null, + strike: ?bool = null, + sub: ?bool = null, + sup: ?bool = null, + link: ?Link = null, + syntax: ?[]const u8 = null, + }; + + pub fn derive(base: Attributes, overlay: Overrides) Attributes { + var new = base; + inline for (@typeInfo(Attributes).@"struct".fields) |fld| { + if (@field(overlay, fld.name)) |new_value| { + @field(new, fld.name) = new_value; + } + } + return new; + } + }; + + content: Content, + attribs: Attributes, }; pub const Link = union(enum) { @@ -551,51 +576,148 @@ pub const SemanticAnalyzer = struct { else => unreachable, }, .lang = attrs.lang, - .content = try sema.translate_inline_list(node.body), + .content = try sema.translate_inline(node), }; return .{ heading, attrs.id }; } - fn translate_inline_list(sema: *SemanticAnalyzer, body: Parser.Node.Body) error{ OutOfMemory, Unimplemented }![]Span { - switch (body) { - .empty => return &.{}, + fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, Unimplemented }![]Span { + var spans: std.ArrayList(Span) = .empty; + errdefer spans.deinit(sema.arena); + + // TODO: Implement automatic space insertion. + // This must be done when two consecutive nodes are separated by a space + + try sema.translate_inline_body(&spans, node.body, .{}); + + // TODO: Compact spans by joining spans with equal properties + + return try spans.toOwnedSlice(sema.arena); + } + + fn translate_inline_node(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), node: Parser.Node, attribs: Span.Attributes) !void { + switch (node.type) { + .unknown_inline, + .text, + => { + try sema.translate_inline_body(spans, node.body, attribs); + }, + + .@"\\em", + .@"\\mono", + .@"\\strike", + .@"\\sub", + .@"\\sup", + .@"\\link", - .string => { - std.log.warn("TODO: Implement string span translation", .{}); - return error.Unimplemented; + .@"\\date", + .@"\\time", + .@"\\datetime", + => { + // TODO: Implement date/time translation }, - .verbatim => { - std.log.warn("TODO: Implement verbatim span translation", .{}); - return error.Unimplemented; + + .hdoc, + .h1, + .h2, + .h3, + .p, + .note, + .warning, + .danger, + .tip, + .quote, + .spoiler, + .ul, + .ol, + .img, + .pre, + .toc, + .table, + .columns, + .group, + .row, + .td, + .li, + .unknown_block, + => @panic("PARSER ERROR: The parser emitted a block node inside an inline context"), + } + } + + fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, Unimplemented }!void { + switch (body) { + .empty => |location| { + try sema.emit_diagnostic(.empty_inline_body, location.offset); }, - .list => { - var spans: std.ArrayList(Span) = .empty; - errdefer spans.deinit(sema.arena); + .string => |string_body| { + const text = try sema.unescape_string(string_body); - // TODO: Insert a space span between two regular text spans if they are not consecutive to each other. + try spans.append(sema.arena, .{ + .content = .{ .text = text }, + .attribs = attribs, + }); + }, + + .verbatim => |verbatim_lines| { + var text_buffer: std.ArrayList(u8) = .empty; + defer text_buffer.deinit(sema.arena); - for (body.list) |child_node| { - const span = try sema.translate_span_node(child_node); - try spans.append(sema.arena, span); + var size: usize = verbatim_lines.len -| 1; + for (verbatim_lines) |line| { + size += line.text.len; } + try text_buffer.ensureTotalCapacityPrecise(sema.arena, size); + + var first_unpadded = true; + for (verbatim_lines, 0..) |line, index| { + if (index != 0) { + try text_buffer.append(sema.arena, '\n'); + } + std.debug.assert(std.mem.startsWith(u8, line.text, "|")); + + const is_padded = std.mem.startsWith(u8, line.text, "| "); - // TODO: Compact spans by joining spans with equal properties + if (!is_padded) { + if (first_unpadded) { + try sema.emit_diagnostic(.unpadded_verbatim_line, line.location.offset); + first_unpadded = false; + } + } + + const text = if (is_padded) + line.text[2..] + else + line.text[1..]; + + const stripped = std.mem.trimRight(u8, text, " \t"); + if (text.len != stripped.len) { + try sema.emit_diagnostic(.trailing_whitespace_in_verbatim_line, line.location.offset + stripped.len); + } - return try spans.toOwnedSlice(sema.arena); + text_buffer.appendSliceAssumeCapacity(stripped); + } + + try spans.append(sema.arena, .{ + .content = .{ .text = try text_buffer.toOwnedSlice(sema.arena) }, + .attribs = attribs, + }); }, - } - } - fn translate_span_node(sema: *SemanticAnalyzer, node: Parser.Node) !Span { - // - _ = sema; - std.log.warn("TODO: Translate spans of type {}", .{node.type}); + .list => |list| { + for (list) |child_node| { + try sema.translate_inline_node(spans, child_node, attribs); + } + }, - return .{ - .content = .{ .text = "???" }, - }; + .text_span => |text_span| { + try spans.append(sema.arena, .{ + .content = .{ .text = text_span.text }, + .attribs = attribs, + }); + }, + } } fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?[]const u8 } { @@ -822,7 +944,7 @@ pub const Parser = struct { .location = parser.location(type_ident.location.offset, null), .type = node_type, .attributes = attributes, - .body = .empty, + .body = .{ .empty = parser.location(parser.offset - 1, null) }, }; } @@ -929,6 +1051,15 @@ pub const Parser = struct { '{' => { nesting += 1; parser.offset += 1; + + const token = parser.slice(parser.offset - 1, parser.offset); + try children.append(parser.arena, .{ + .location = token.location, + .type = .text, + .body = .{ + .text_span = token, + }, + }); }, '}' => { @@ -938,6 +1069,15 @@ pub const Parser = struct { break; nesting -= 1; + + const token = parser.slice(parser.offset - 1, parser.offset); + try children.append(parser.arena, .{ + .location = token.location, + .type = .text, + .body = .{ + .text_span = token, + }, + }); }, '\\' => backslash: { @@ -946,7 +1086,18 @@ pub const Parser = struct { switch (next_char) { '{', '}', '\\' => { // Escaped brace + + const token = parser.slice(parser.offset, parser.offset + 2); + try children.append(parser.arena, .{ + .location = token.location, + .type = .text, + .body = .{ + .text_span = token, + }, + }); + parser.offset += 2; + break :backslash; }, else => {}, @@ -966,8 +1117,7 @@ pub const Parser = struct { try children.append(parser.arena, .{ .location = word.location, .type = .text, - .attributes = .empty, - .body = .empty, + .body = .{ .text_span = word }, }); }, } @@ -1335,6 +1485,7 @@ pub const Parser = struct { .@"\\datetime", .unknown_inline, + .unknown_block, // Unknown blocks must also have inline bodies to optimally retain body contents => true, .hdoc, @@ -1347,7 +1498,6 @@ pub const Parser = struct { .li, .text, - .unknown_block, => false, }; } @@ -1361,10 +1511,11 @@ pub const Parser = struct { body: Body, pub const Body = union(enum) { - empty, + empty: Location, string: Token, verbatim: []Token, list: []Node, + text_span: Token, }; }; @@ -1418,6 +1569,9 @@ pub const Diagnostic = struct { verbatim_missing_trailing_newline, verbatim_missing_space, trailing_whitespace, + empty_inline_body, + unpadded_verbatim_line, + trailing_whitespace_in_verbatim_line, pub fn severity(code: Code) Severity { return switch (code) { @@ -1441,6 +1595,9 @@ pub const Diagnostic = struct { .verbatim_missing_trailing_newline, .verbatim_missing_space, .trailing_whitespace, + .empty_inline_body, + .unpadded_verbatim_line, + .trailing_whitespace_in_verbatim_line, => .warning, }; } @@ -1472,6 +1629,11 @@ pub const Diagnostic = struct { .unknown_attribute => |ctx| try w.print("Unknown attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }), .unknown_block_type => |ctx| try w.print("Unknown block type '{s}'.", .{ctx.name}), .invalid_block_type => |ctx| try w.print("Invalid block type '{s}' in this context.", .{ctx.name}), + + .empty_inline_body => try w.writeAll("Inline body is empty."), + + .unpadded_verbatim_line => try w.writeAll("Verbatim line is not properly padded with a space character at the start."), + .trailing_whitespace_in_verbatim_line => try w.writeAll("Trailing whitespace at end of verbatim line."), } } }; diff --git a/src/main.zig b/src/main.zig index 98ea655..df3e962 100644 --- a/src/main.zig +++ b/src/main.zig @@ -119,7 +119,7 @@ fn dumpFormattedDateTime(writer: anytype, indent: usize, formatted: hdoc.Formatt try writer.print("format: {s}\n", .{@tagName(formatted.format)}); } -fn dumpSpanContent(writer: anytype, indent: usize, content: hdoc.SpanContent) !void { +fn dumpSpanContent(writer: anytype, indent: usize, content: hdoc.Span.Content) !void { switch (content) { .text => |text| { try writeIndent(writer, indent); @@ -174,14 +174,14 @@ fn dumpSpan(writer: anytype, indent: usize, span: hdoc.Span) !void { try writeIndent(writer, indent); try writer.writeAll("content:\n"); try dumpSpanContent(writer, indent + indent_step, span.content); - try dumpOptionalStringField(writer, indent, "lang", span.lang); - try dumpBoolField(writer, indent, "em", span.em); - try dumpBoolField(writer, indent, "mono", span.mono); - try dumpBoolField(writer, indent, "strike", span.strike); - try dumpBoolField(writer, indent, "sub", span.sub); - try dumpBoolField(writer, indent, "sup", span.sup); - try dumpLink(writer, indent, span.link); - try dumpOptionalStringField(writer, indent, "syntax", span.syntax); + try dumpOptionalStringField(writer, indent, "lang", span.attribs.lang); + try dumpBoolField(writer, indent, "em", span.attribs.em); + try dumpBoolField(writer, indent, "mono", span.attribs.mono); + try dumpBoolField(writer, indent, "strike", span.attribs.strike); + try dumpBoolField(writer, indent, "sub", span.attribs.sub); + try dumpBoolField(writer, indent, "sup", span.attribs.sup); + try dumpLink(writer, indent, span.attribs.link); + try dumpOptionalStringField(writer, indent, "syntax", span.attribs.syntax); } fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []const hdoc.Span) !void { @@ -413,7 +413,7 @@ test "dumpDocument escapes string values" { const spans = try arena_alloc.alloc(hdoc.Span, 1); spans[0] = .{ .content = .{ .text = span_text }, - .link = .{ .ref = link_ref }, + .attribs = .{ .link = .{ .ref = link_ref } }, }; const blocks = try arena_alloc.alloc(hdoc.Block, 1); diff --git a/test/parser/workset.hdoc b/test/parser/workset.hdoc new file mode 100644 index 0000000..e4b2e8f --- /dev/null +++ b/test/parser/workset.hdoc @@ -0,0 +1,4 @@ +hdoc(version="2.0"); + +h1: +| Hello, World! From c3ff7bcb78520458de79ea3123723a664b33130b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 26 Dec 2025 14:32:36 +0100 Subject: [PATCH 021/116] Implements most guts of the inline composition system, but only exposes \em right now. --- src/hyperdoc.zig | 139 +++++++++++++++++++++++++++++++-------- src/main.zig | 3 +- test/parser/workset.hdoc | 2 + 3 files changed, 114 insertions(+), 30 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 43b1ffe..dd44a87 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -133,41 +133,25 @@ pub const Span = struct { }; pub const Attributes = struct { - lang: ?[]const u8 = null, + lang: []const u8 = "", // empty is absence + position: ScriptPosition = .baseline, em: bool = false, mono: bool = false, strike: bool = false, - sub: bool = false, - sup: bool = false, link: Link = .none, - syntax: ?[]const u8 = null, - - pub const Overrides = struct { - lang: ?[]const u8 = null, - em: ?bool = null, - mono: ?bool = null, - strike: ?bool = null, - sub: ?bool = null, - sup: ?bool = null, - link: ?Link = null, - syntax: ?[]const u8 = null, - }; - - pub fn derive(base: Attributes, overlay: Overrides) Attributes { - var new = base; - inline for (@typeInfo(Attributes).@"struct".fields) |fld| { - if (@field(overlay, fld.name)) |new_value| { - @field(new, fld.name) = new_value; - } - } - return new; - } + syntax: []const u8 = "", // empty is absence }; content: Content, attribs: Attributes, }; +pub const ScriptPosition = enum { + baseline, + superscript, + subscript, +}; + pub const Link = union(enum) { none, ref: []const u8, @@ -596,15 +580,91 @@ pub const SemanticAnalyzer = struct { return try spans.toOwnedSlice(sema.arena); } + pub const AttribOverrides = struct { + lang: ?[]const u8 = null, + em: ?bool = null, + mono: ?bool = null, + strike: ?bool = null, + position: ?ScriptPosition = null, + link: ?Link = null, + syntax: []const u8 = "", + }; + + fn derive_attribute(sema: *SemanticAnalyzer, location: Parser.Location, old: Span.Attributes, overlay: AttribOverrides) !Span.Attributes { + comptime std.debug.assert(@typeInfo(Span.Attributes).@"struct".fields.len == @typeInfo(AttribOverrides).@"struct".fields.len); + + var new = old; + if (overlay.lang) |lang| { + new.lang = lang; + } + + if (overlay.em) |v| { + if (old.em) { + try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .em } }, location.offset); + } + new.em = v; + } + + if (overlay.mono) |mono| { + if (old.mono) { + if (std.mem.eql(u8, old.syntax, new.syntax)) { + try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .mono } }, location.offset); + } + } + new.mono = mono; + new.syntax = overlay.syntax; + } else { + // can't override syntax without also enabling mono! + std.debug.assert(overlay.syntax.len == 0); + } + + if (overlay.strike) |strike| { + if (old.strike) { + try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .strike } }, location.offset); + } + new.strike = strike; + } + + if (overlay.position) |new_pos| { + std.debug.assert(new_pos != .baseline); // we can never return to baseline script. + if (old.position == new_pos) { + try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .sub } }, location.offset); + } else if (old.position != .baseline) { + try sema.emit_diagnostic(.{ .invalid_inline_combination = .{ + .first = switch (old.position) { + .superscript => .sup, + .subscript => .sub, + .baseline => unreachable, + }, + .second = switch (new_pos) { + .superscript => .sup, + .subscript => .sub, + .baseline => unreachable, + }, + } }, location.offset); + } + new.position = new_pos; + } + + if (overlay.link) |link| { + if (old.link != .none) { + try sema.emit_diagnostic(.link_not_nestable, location.offset); + } + new.link = link; + } + + return new; + } + fn translate_inline_node(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), node: Parser.Node, attribs: Span.Attributes) !void { switch (node.type) { .unknown_inline, .text, - => { - try sema.translate_inline_body(spans, node.body, attribs); - }, + => try sema.translate_inline_body(spans, node.body, attribs), .@"\\em", + => try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .em = true })), + .@"\\mono", .@"\\strike", .@"\\sub", @@ -1546,6 +1606,8 @@ pub const Diagnostic = struct { pub const MissingHdocHeader = struct {}; pub const DuplicateHdocHeader = struct {}; pub const InvalidBlockError = struct { name: []const u8 }; + pub const InlineUsageError = struct { attribute: InlineAttribute }; + pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute }; pub const Code = union(enum) { // errors: @@ -1561,6 +1623,8 @@ pub const Diagnostic = struct { invalid_attribute: NodeAttributeError, unknown_block_type: InvalidBlockError, invalid_block_type: InvalidBlockError, + invalid_inline_combination: InlineCombinationError, + link_not_nestable, // warnings: unknown_attribute: NodeAttributeError, @@ -1572,6 +1636,7 @@ pub const Diagnostic = struct { empty_inline_body, unpadded_verbatim_line, trailing_whitespace_in_verbatim_line, + redundant_inline: InlineUsageError, pub fn severity(code: Code) Severity { return switch (code) { @@ -1587,6 +1652,8 @@ pub const Diagnostic = struct { .missing_attribute, .unknown_block_type, .invalid_block_type, + .invalid_inline_combination, + .link_not_nestable, => .@"error", .unknown_attribute, @@ -1598,6 +1665,7 @@ pub const Diagnostic = struct { .empty_inline_body, .unpadded_verbatim_line, .trailing_whitespace_in_verbatim_line, + .redundant_inline, => .warning, }; } @@ -1634,6 +1702,10 @@ pub const Diagnostic = struct { .unpadded_verbatim_line => try w.writeAll("Verbatim line is not properly padded with a space character at the start."), .trailing_whitespace_in_verbatim_line => try w.writeAll("Trailing whitespace at end of verbatim line."), + + .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}), + .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }), + .link_not_nestable => try w.writeAll("Links are not nestable"), } } }; @@ -1680,6 +1752,17 @@ pub const Diagnostics = struct { } }; +pub const InlineAttribute = enum { + lang, + em, + mono, + strike, + sub, + sup, + link, + syntax, +}; + test "fuzz parser" { const Impl = struct { fn testOne(impl: @This(), data: []const u8) !void { diff --git a/src/main.zig b/src/main.zig index df3e962..b9e8e7a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -178,8 +178,7 @@ fn dumpSpan(writer: anytype, indent: usize, span: hdoc.Span) !void { try dumpBoolField(writer, indent, "em", span.attribs.em); try dumpBoolField(writer, indent, "mono", span.attribs.mono); try dumpBoolField(writer, indent, "strike", span.attribs.strike); - try dumpBoolField(writer, indent, "sub", span.attribs.sub); - try dumpBoolField(writer, indent, "sup", span.attribs.sup); + try dumpEnumField(writer, indent, "position", span.attribs.position); try dumpLink(writer, indent, span.attribs.link); try dumpOptionalStringField(writer, indent, "syntax", span.attribs.syntax); } diff --git a/test/parser/workset.hdoc b/test/parser/workset.hdoc index e4b2e8f..fbfaf77 100644 --- a/test/parser/workset.hdoc +++ b/test/parser/workset.hdoc @@ -2,3 +2,5 @@ hdoc(version="2.0"); h1: | Hello, World! + +h2{Hello \em{World}!} From 349ada5367eb481683e6049deb4b3f32582fd13c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 26 Dec 2025 14:48:24 +0100 Subject: [PATCH 022/116] Implements most inline node translations except for the date/time parts. --- src/hyperdoc.zig | 97 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index dd44a87..b2f1fe1 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -566,7 +566,7 @@ pub const SemanticAnalyzer = struct { return .{ heading, attrs.id }; } - fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, Unimplemented }![]Span { + fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }![]Span { var spans: std.ArrayList(Span) = .empty; errdefer spans.deinit(sema.arena); @@ -662,20 +662,96 @@ pub const SemanticAnalyzer = struct { .text, => try sema.translate_inline_body(spans, node.body, attribs), - .@"\\em", - => try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .em = true })), + .@"\\em" => { + const props = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + }); - .@"\\mono", - .@"\\strike", - .@"\\sub", - .@"\\sup", - .@"\\link", + try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + .lang = props.lang, + .em = true, + })); + }, + + .@"\\strike" => { + const props = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + }); + + try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + .lang = props.lang, + .strike = true, + })); + }, + + .@"\\sub" => { + const props = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + }); + + try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + .lang = props.lang, + .position = .superscript, + })); + }, + + .@"\\sup" => { + const props = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + }); + + try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + .lang = props.lang, + .position = .subscript, + })); + }, + + .@"\\link" => { + const props = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + uri: ?[]const u8 = null, + ref: ?[]const u8 = null, + }); + + if (props.uri != null and props.ref != null) { + try sema.emit_diagnostic(.invalid_link, node.location.offset); // TODO: Use proper attribute location + } + + const link: Link = if (props.uri) |uri| blk: { + // TODO: Figure out where to put URI validation (not empty, no leading/trailing whitespace) + break :blk .{ .uri = uri }; + } else if (props.ref) |ref| blk: { + // TODO: Figure out where to put reference validation (no leading/trailing whitespace) + // TODO: Reference validation must also happen for "id" attribute + break :blk .{ .ref = ref }; + } else blk: { + try sema.emit_diagnostic(.invalid_link, node.location.offset); // TODO: Use proper attribute location + break :blk .none; + }; + + try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + .link = link, + })); + }, + + .@"\\mono" => { + const props = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + syntax: []const u8 = "", + }); + try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + .mono = true, + .lang = props.lang, + .syntax = props.syntax, + })); + }, .@"\\date", .@"\\time", .@"\\datetime", => { // TODO: Implement date/time translation + std.log.err("TODO: Implement {t}", .{node.type}); }, .hdoc, @@ -705,7 +781,7 @@ pub const SemanticAnalyzer = struct { } } - fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, Unimplemented }!void { + fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void { switch (body) { .empty => |location| { try sema.emit_diagnostic(.empty_inline_body, location.offset); @@ -1625,6 +1701,7 @@ pub const Diagnostic = struct { invalid_block_type: InvalidBlockError, invalid_inline_combination: InlineCombinationError, link_not_nestable, + invalid_link, // warnings: unknown_attribute: NodeAttributeError, @@ -1654,6 +1731,7 @@ pub const Diagnostic = struct { .invalid_block_type, .invalid_inline_combination, .link_not_nestable, + .invalid_link, => .@"error", .unknown_attribute, @@ -1706,6 +1784,7 @@ pub const Diagnostic = struct { .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}), .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }), .link_not_nestable => try w.writeAll("Links are not nestable"), + .invalid_link => try w.writeAll("\\link requires either ref=\"…\" or uri=\"…\" attribute."), } } }; From 0f6815576f8836c00d1fb208ebe8b780f533a9f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 26 Dec 2025 15:12:23 +0100 Subject: [PATCH 023/116] Vibecoded: Reduces the verbosity of the debug dump output --- src/main.zig | 196 +++++++++++++++++++++++++++++---------------------- 1 file changed, 112 insertions(+), 84 deletions(-) diff --git a/src/main.zig b/src/main.zig index b9e8e7a..3791de7 100644 --- a/src/main.zig +++ b/src/main.zig @@ -95,92 +95,126 @@ fn dumpDateTime(writer: anytype, indent: usize, datetime: hdoc.DateTime) !void { try dumpTime(writer, indent + indent_step, datetime.time); } -fn dumpFormattedDate(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.Date)) !void { - try writeIndent(writer, indent); - try writer.writeAll("value:\n"); - try dumpDate(writer, indent + indent_step, formatted.value); - try writeIndent(writer, indent); - try writer.print("format: {s}\n", .{@tagName(formatted.format)}); +fn writeAttrSeparator(writer: anytype, first: *bool) !void { + if (first.*) { + first.* = false; + } else { + try writer.writeByte(' '); + } } -fn dumpFormattedTime(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.Time)) !void { - try writeIndent(writer, indent); - try writer.writeAll("value:\n"); - try dumpTime(writer, indent + indent_step, formatted.value); - try writeIndent(writer, indent); - try writer.print("format: {s}\n", .{@tagName(formatted.format)}); +fn writeSpanAttributes(writer: anytype, span: hdoc.Span) !void { + try writer.writeByte('['); + var first = true; + if (span.attribs.em) { + try writeAttrSeparator(writer, &first); + try writer.writeAll("em"); + } + if (span.attribs.mono) { + try writeAttrSeparator(writer, &first); + try writer.writeAll("mono"); + } + if (span.attribs.strike) { + try writeAttrSeparator(writer, &first); + try writer.writeAll("strike"); + } + if (span.attribs.position != .baseline) { + try writeAttrSeparator(writer, &first); + try writer.print("position=\"{s}\"", .{@tagName(span.attribs.position)}); + } + switch (span.attribs.link) { + .none => {}, + .ref => |value| { + try writeAttrSeparator(writer, &first); + try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value)}); + }, + .uri => |value| { + try writeAttrSeparator(writer, &first); + try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value)}); + }, + } + if (span.attribs.lang.len != 0) { + try writeAttrSeparator(writer, &first); + try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang)}); + } + if (span.attribs.syntax.len != 0) { + try writeAttrSeparator(writer, &first); + try writer.print("syntax=\"{f}\"", .{std.zig.fmtString(span.attribs.syntax)}); + } + try writer.writeByte(']'); } -fn dumpFormattedDateTime(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) !void { - try writeIndent(writer, indent); - try writer.writeAll("value:\n"); - try dumpDateTime(writer, indent + indent_step, formatted.value); - try writeIndent(writer, indent); - try writer.print("format: {s}\n", .{@tagName(formatted.format)}); +fn writeDateValue(writer: anytype, date: hdoc.Date) !void { + try writer.print("{d:0>4}-{d:0>2}-{d:0>2}", .{ date.year, date.month, date.day }); +} + +fn writeTimeValue(writer: anytype, time: hdoc.Time) !void { + try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ time.hour, time.minute, time.second }); + if (time.microsecond != 0) { + try writer.print(".{d:0>6}", .{time.microsecond}); + } +} + +fn writeDateTimeValue(writer: anytype, datetime: hdoc.DateTime) !void { + try writeDateValue(writer, datetime.date); + try writer.writeByte('T'); + try writeTimeValue(writer, datetime.time); +} + +fn writeFormattedDateInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.Date)) !void { + try writer.writeAll("date:"); + try writeDateValue(writer, formatted.value); + if (formatted.format != hdoc.Date.Format.default) { + try writer.writeByte('@'); + try writer.writeAll(@tagName(formatted.format)); + } } -fn dumpSpanContent(writer: anytype, indent: usize, content: hdoc.Span.Content) !void { +fn writeFormattedTimeInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.Time)) !void { + try writer.writeAll("time:"); + try writeTimeValue(writer, formatted.value); + if (formatted.format != hdoc.Time.Format.default) { + try writer.writeByte('@'); + try writer.writeAll(@tagName(formatted.format)); + } +} + +fn writeFormattedDateTimeInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) !void { + try writer.writeAll("datetime:"); + try writeDateTimeValue(writer, formatted.value); + if (formatted.format != hdoc.DateTime.Format.default) { + try writer.writeByte('@'); + try writer.writeAll(@tagName(formatted.format)); + } +} + +fn writeSpanContentInline(writer: anytype, content: hdoc.Span.Content) !void { switch (content) { .text => |text| { - try writeIndent(writer, indent); - try writer.writeAll("text: "); try writeStringValue(writer, text); - try writer.writeByte('\n'); }, .date => |date| { - try writeIndent(writer, indent); - try writer.writeAll("date:\n"); - try dumpFormattedDate(writer, indent + indent_step, date); + try writer.writeByte('"'); + try writeFormattedDateInline(writer, date); + try writer.writeByte('"'); }, .time => |time| { - try writeIndent(writer, indent); - try writer.writeAll("time:\n"); - try dumpFormattedTime(writer, indent + indent_step, time); + try writer.writeByte('"'); + try writeFormattedTimeInline(writer, time); + try writer.writeByte('"'); }, .datetime => |datetime| { - try writeIndent(writer, indent); - try writer.writeAll("datetime:\n"); - try dumpFormattedDateTime(writer, indent + indent_step, datetime); + try writer.writeByte('"'); + try writeFormattedDateTimeInline(writer, datetime); + try writer.writeByte('"'); }, } } -fn dumpLink(writer: anytype, indent: usize, link: hdoc.Link) !void { - switch (link) { - .none => { - try writeIndent(writer, indent); - try writer.writeAll("link: none\n"); - }, - .ref => |value| { - try writeIndent(writer, indent); - try writer.writeAll("link:\n"); - try writeIndent(writer, indent + indent_step); - try writer.writeAll("ref: "); - try writeStringValue(writer, value); - try writer.writeByte('\n'); - }, - .uri => |value| { - try writeIndent(writer, indent); - try writer.writeAll("link:\n"); - try writeIndent(writer, indent + indent_step); - try writer.writeAll("uri: "); - try writeStringValue(writer, value); - try writer.writeByte('\n'); - }, - } -} - -fn dumpSpan(writer: anytype, indent: usize, span: hdoc.Span) !void { - try writeIndent(writer, indent); - try writer.writeAll("content:\n"); - try dumpSpanContent(writer, indent + indent_step, span.content); - try dumpOptionalStringField(writer, indent, "lang", span.attribs.lang); - try dumpBoolField(writer, indent, "em", span.attribs.em); - try dumpBoolField(writer, indent, "mono", span.attribs.mono); - try dumpBoolField(writer, indent, "strike", span.attribs.strike); - try dumpEnumField(writer, indent, "position", span.attribs.position); - try dumpLink(writer, indent, span.attribs.link); - try dumpOptionalStringField(writer, indent, "syntax", span.attribs.syntax); +fn dumpSpanInline(writer: anytype, span: hdoc.Span) !void { + try writeSpanAttributes(writer, span); + try writer.writeByte(' '); + try writeSpanContentInline(writer, span.content); } fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []const hdoc.Span) !void { @@ -192,8 +226,9 @@ fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []c try writer.print("{s}:\n", .{key}); for (spans) |span| { try writeIndent(writer, indent + indent_step); - try writer.writeAll("-\n"); - try dumpSpan(writer, indent + indent_step * 2, span); + try writer.writeAll("- "); + try dumpSpanInline(writer, span); + try writer.writeByte('\n'); } } @@ -286,31 +321,27 @@ fn dumpTableRowsField(writer: anytype, indent: usize, key: []const u8, rows: []c } } -fn dumpBlock(writer: anytype, indent: usize, block: hdoc.Block) !void { +fn dumpBlockInline(writer: anytype, indent: usize, block: hdoc.Block) !void { switch (block) { .heading => |heading| { - try writeIndent(writer, indent); try writer.writeAll("heading:\n"); try dumpEnumField(writer, indent + indent_step, "level", heading.level); try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang); try dumpSpanListField(writer, indent + indent_step, "content", heading.content); }, .paragraph => |paragraph| { - try writeIndent(writer, indent); try writer.writeAll("paragraph:\n"); try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind); try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang); try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content); }, .list => |list| { - try writeIndent(writer, indent); try writer.writeAll("list:\n"); try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang); try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first); try dumpListItemsField(writer, indent + indent_step, "items", list.items); }, .image => |image| { - try writeIndent(writer, indent); try writer.writeAll("image:\n"); try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang); try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt); @@ -318,20 +349,17 @@ fn dumpBlock(writer: anytype, indent: usize, block: hdoc.Block) !void { try dumpSpanListField(writer, indent + indent_step, "content", image.content); }, .preformatted => |preformatted| { - try writeIndent(writer, indent); try writer.writeAll("preformatted:\n"); try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang); try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax); try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content); }, .toc => |toc| { - try writeIndent(writer, indent); try writer.writeAll("toc:\n"); try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang); try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth); }, .table => |table| { - try writeIndent(writer, indent); try writer.writeAll("table:\n"); try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang); try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows); @@ -348,8 +376,8 @@ fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: [ try writer.print("{s}:\n", .{key}); for (blocks) |block| { try writeIndent(writer, indent + indent_step); - try writer.writeAll("-\n"); - try dumpBlock(writer, indent + indent_step * 2, block); + try writer.writeAll("- "); + try dumpBlockInline(writer, indent + indent_step, block); } } @@ -439,14 +467,14 @@ test "dumpDocument escapes string values" { defer std.testing.allocator.free(expected_title); try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null); - const expected_span = try std.fmt.allocPrint(std.testing.allocator, "text: \"{f}\"\n", .{std.zig.fmtString(span_text)}); + const expected_span = try std.fmt.allocPrint( + std.testing.allocator, + "- [link=\"ref:{f}\"] \"{f}\"\n", + .{ std.zig.fmtString(link_ref), std.zig.fmtString(span_text) }, + ); defer std.testing.allocator.free(expected_span); try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null); - const expected_link = try std.fmt.allocPrint(std.testing.allocator, "ref: \"{f}\"\n", .{std.zig.fmtString(link_ref)}); - defer std.testing.allocator.free(expected_link); - try std.testing.expect(std.mem.indexOf(u8, output, expected_link) != null); - const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value)}); defer std.testing.allocator.free(expected_id); try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null); From 16123edff0c3c8f09386fe45950ac07a0bb232d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 27 Dec 2025 12:47:36 +0100 Subject: [PATCH 024/116] Implements basic validation of reference and uri attributes. --- build.zig.zon | 1 + src/hyperdoc.zig | 114 ++++++++++++++++++++++++++++++----------------- src/main.zig | 18 ++++---- 3 files changed, 84 insertions(+), 49 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 00a368a..e098508 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -2,6 +2,7 @@ .name = .hyperdoc, .version = "0.1.0", .fingerprint = 0xfd1a4802abc4739e, + .minimum_zig_version = "0.15.0", .dependencies = .{ // .parser_toolkit = .{ diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index b2f1fe1..dc2fe0b 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -10,7 +10,7 @@ pub const Document = struct { // document contents: contents: []Block, - ids: []?[]const u8, + ids: []?Reference, // header information lang: ?[]const u8, @@ -154,8 +154,8 @@ pub const ScriptPosition = enum { pub const Link = union(enum) { none, - ref: []const u8, - uri: []const u8, + ref: Reference, + uri: Uri, }; /// HyperDoc Version Number @@ -342,6 +342,30 @@ pub const Time = struct { } }; +/// Type-safe wrapper around a URI attribute. +pub const Uri = struct { + pub const empty: Uri = .{ .text = "" }; + + text: []const u8, + + pub fn init(text: []const u8) Uri { + // TODO: Add correctness validation here + return .{ .text = text }; + } +}; + +/// Type-safe wrapper around a reference value (id/ref) attribute. +pub const Reference = struct { + pub const empty: Reference = .{ .text = "" }; + + text: []const u8, + + pub fn init(text: []const u8) Reference { + // TODO: Add correctness validation here + return .{ .text = text }; + } +}; + /// Parses a HyperDoc document. pub fn parse( allocator: std.mem.Allocator, @@ -406,6 +430,8 @@ pub fn parse( } pub const SemanticAnalyzer = struct { + const whitespace_chars = " \t"; + const Header = struct { version: Version, lang: ?[]const u8, @@ -420,7 +446,7 @@ pub const SemanticAnalyzer = struct { header: ?Header = null, blocks: std.ArrayList(Block) = .empty, - ids: std.ArrayList(?[]const u8) = .empty, + ids: std.ArrayList(?Reference) = .empty, fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{OutOfMemory}!void { switch (node.type) { @@ -482,7 +508,7 @@ pub const SemanticAnalyzer = struct { }; } - fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes, Unimplemented }!struct { Block, ?[]const u8 } { + fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes, Unimplemented }!struct { Block, ?Reference } { std.debug.assert(node.type != .hdoc); switch (node.type) { @@ -546,10 +572,10 @@ pub const SemanticAnalyzer = struct { return error.InvalidNodeType; } - fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?[]const u8 } { + fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?Reference } { const attrs = try sema.get_attributes(node, struct { lang: ?[]const u8 = null, - id: ?[]const u8 = null, + id: ?Reference = null, }); const heading: Block.Heading = .{ @@ -709,23 +735,20 @@ pub const SemanticAnalyzer = struct { .@"\\link" => { const props = try sema.get_attributes(node, struct { lang: ?[]const u8 = null, - uri: ?[]const u8 = null, - ref: ?[]const u8 = null, + uri: ?Uri = null, + ref: ?Reference = null, }); if (props.uri != null and props.ref != null) { - try sema.emit_diagnostic(.invalid_link, node.location.offset); // TODO: Use proper attribute location + try sema.emit_diagnostic(.invalid_link, node.location.offset); } const link: Link = if (props.uri) |uri| blk: { - // TODO: Figure out where to put URI validation (not empty, no leading/trailing whitespace) break :blk .{ .uri = uri }; } else if (props.ref) |ref| blk: { - // TODO: Figure out where to put reference validation (no leading/trailing whitespace) - // TODO: Reference validation must also happen for "id" attribute break :blk .{ .ref = ref }; } else blk: { - try sema.emit_diagnostic(.invalid_link, node.location.offset); // TODO: Use proper attribute location + try sema.emit_diagnostic(.invalid_link, node.location.offset); break :blk .none; }; @@ -806,7 +829,6 @@ pub const SemanticAnalyzer = struct { } try text_buffer.ensureTotalCapacityPrecise(sema.arena, size); - var first_unpadded = true; for (verbatim_lines, 0..) |line, index| { if (index != 0) { try text_buffer.append(sema.arena, '\n'); @@ -814,23 +836,12 @@ pub const SemanticAnalyzer = struct { std.debug.assert(std.mem.startsWith(u8, line.text, "|")); const is_padded = std.mem.startsWith(u8, line.text, "| "); - - if (!is_padded) { - if (first_unpadded) { - try sema.emit_diagnostic(.unpadded_verbatim_line, line.location.offset); - first_unpadded = false; - } - } - const text = if (is_padded) line.text[2..] else line.text[1..]; - const stripped = std.mem.trimRight(u8, text, " \t"); - if (text.len != stripped.len) { - try sema.emit_diagnostic(.trailing_whitespace_in_verbatim_line, line.location.offset + stripped.len); - } + const stripped = std.mem.trimRight(u8, text, whitespace_chars); text_buffer.appendSliceAssumeCapacity(stripped); } @@ -856,42 +867,52 @@ pub const SemanticAnalyzer = struct { } } - fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?[]const u8 } { + fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } { _ = sema; _ = node; return error.Unimplemented; // TODO: Implement this node type } - fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?[]const u8 } { + fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } { _ = sema; _ = node; return error.Unimplemented; // TODO: Implement this node type } - fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?[]const u8 } { + fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } { _ = sema; _ = node; return error.Unimplemented; // TODO: Implement this node type } - fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?[]const u8 } { + fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } { _ = sema; _ = node; return error.Unimplemented; // TODO: Implement this node type } - fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?[]const u8 } { + fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } { _ = sema; _ = node; return error.Unimplemented; // TODO: Implement this node type } - fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?[]const u8 } { + fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } { _ = sema; _ = node; return error.Unimplemented; // TODO: Implement this node type } + fn get_attribute_location(node: Parser.Node, attrib_name: []const u8, comptime key: enum { name, value }) ?Parser.Location { + var i = node.attributes.items.len; + while (i > 0) { + i -= 1; + if (std.mem.eql(u8, node.attributes.items[i].name.text, attrib_name)) + return @field(node.attributes.items[i], @tagName(key)).location; + } + return null; + } + fn get_attributes(sema: *SemanticAnalyzer, node: Parser.Node, comptime Attrs: type) error{ OutOfMemory, BadAttributes }!Attrs { const Fields = std.meta.FieldEnum(Attrs); const fields = @typeInfo(Attrs).@"struct".fields; @@ -964,6 +985,22 @@ pub const SemanticAnalyzer = struct { return switch (T) { []const u8 => value, + Reference => { + const stripped = std.mem.trim(u8, value, whitespace_chars); + if (stripped.len != value.len) { + try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location.offset); + } + return .init(stripped); + }, + + Uri => { + const stripped = std.mem.trim(u8, value, whitespace_chars); + if (stripped.len != value.len) { + try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location.offset); + } + return .init(stripped); + }, + Version => Version.parse(value) catch return error.InvalidValue, DateTime => DateTime.parse(value) catch return error.InvalidValue, Date => Date.parse(value) catch return error.InvalidValue, @@ -1711,9 +1748,8 @@ pub const Diagnostic = struct { verbatim_missing_space, trailing_whitespace, empty_inline_body, - unpadded_verbatim_line, - trailing_whitespace_in_verbatim_line, redundant_inline: InlineUsageError, + attribute_leading_trailing_whitespace, pub fn severity(code: Code) Severity { return switch (code) { @@ -1741,9 +1777,8 @@ pub const Diagnostic = struct { .verbatim_missing_space, .trailing_whitespace, .empty_inline_body, - .unpadded_verbatim_line, - .trailing_whitespace_in_verbatim_line, .redundant_inline, + .attribute_leading_trailing_whitespace, => .warning, }; } @@ -1778,13 +1813,12 @@ pub const Diagnostic = struct { .empty_inline_body => try w.writeAll("Inline body is empty."), - .unpadded_verbatim_line => try w.writeAll("Verbatim line is not properly padded with a space character at the start."), - .trailing_whitespace_in_verbatim_line => try w.writeAll("Trailing whitespace at end of verbatim line."), - .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}), .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }), .link_not_nestable => try w.writeAll("Links are not nestable"), .invalid_link => try w.writeAll("\\link requires either ref=\"…\" or uri=\"…\" attribute."), + + .attribute_leading_trailing_whitespace => try w.writeAll("Attribute value has invalid leading or trailing whitespace."), } } }; diff --git a/src/main.zig b/src/main.zig index 3791de7..2f9b602 100644 --- a/src/main.zig +++ b/src/main.zig @@ -126,11 +126,11 @@ fn writeSpanAttributes(writer: anytype, span: hdoc.Span) !void { .none => {}, .ref => |value| { try writeAttrSeparator(writer, &first); - try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value)}); + try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.text)}); }, .uri => |value| { try writeAttrSeparator(writer, &first); - try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value)}); + try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value.text)}); }, } if (span.attribs.lang.len != 0) { @@ -381,7 +381,7 @@ fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: [ } } -fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, values: []?[]const u8) !void { +fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, values: []?hdoc.Reference) !void { try writeIndent(writer, indent); if (values.len == 0) { try writer.print("{s}: []\n", .{key}); @@ -391,7 +391,7 @@ fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, for (values) |value| { try writeIndent(writer, indent + indent_step); try writer.writeAll("- "); - try writeOptionalStringValue(writer, value); + try writeOptionalStringValue(writer, if (value) |val| val.text else null); try writer.writeByte('\n'); } } @@ -420,8 +420,8 @@ fn dumpDocument(writer: anytype, doc: *const hdoc.Document) !void { test "dumpDocument escapes string values" { const title = "Doc \"Title\"\n"; const span_text = "Hello \"world\"\n"; - const link_ref = "section \"A\""; - const id_value = "id:1\n"; + const link_ref: hdoc.Reference = .init("section \"A\""); + const id_value: hdoc.Reference = .init("id:1\n"); var doc: hdoc.Document = .{ .arena = std.heap.ArenaAllocator.init(std.testing.allocator), @@ -453,7 +453,7 @@ test "dumpDocument escapes string values" { }; doc.contents = blocks; - const ids = try arena_alloc.alloc(?[]const u8, 1); + const ids = try arena_alloc.alloc(?hdoc.Reference, 1); ids[0] = id_value; doc.ids = ids; @@ -470,12 +470,12 @@ test "dumpDocument escapes string values" { const expected_span = try std.fmt.allocPrint( std.testing.allocator, "- [link=\"ref:{f}\"] \"{f}\"\n", - .{ std.zig.fmtString(link_ref), std.zig.fmtString(span_text) }, + .{ std.zig.fmtString(link_ref.text), std.zig.fmtString(span_text) }, ); defer std.testing.allocator.free(expected_span); try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null); - const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value)}); + const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value.text)}); defer std.testing.allocator.free(expected_id); try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null); } From 088c51eda3fb46aa851f54f7d958b08a28b61cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 27 Dec 2025 15:03:22 +0100 Subject: [PATCH 025/116] Prepares most of the date/time inline parsing, except for the span to string conversion --- src/hyperdoc.zig | 73 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index dc2fe0b..2977cb9 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -773,8 +773,35 @@ pub const SemanticAnalyzer = struct { .@"\\time", .@"\\datetime", => { - // TODO: Implement date/time translation - std.log.err("TODO: Implement {t}", .{node.type}); + const props = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + fmt: []const u8 = "", + }); + + var content_spans: std.ArrayList(Span) = .empty; + defer content_spans.deinit(sema.arena); + + // TODO: Implement automatic space insertion. + // This must be done when two consecutive nodes are separated by a space + + try sema.translate_inline_body(&content_spans, node.body, .{}); + + // TODO: Convert the content_spans into a "rendered string". + const content_text = ""; + + const content: Span.Content = switch (node.type) { + .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt), + .@"\\time" => try sema.parse_date_body(node, .time, Time, content_text, props.fmt), + .@"\\datetime" => try sema.parse_date_body(node, .datetime, DateTime, content_text, props.fmt), + else => unreachable, + }; + + try spans.append(sema.arena, .{ + .content = content, + .attribs = try sema.derive_attribute(node.location, attribs, .{ + .lang = attribs.lang, + }), + }); }, .hdoc, @@ -804,6 +831,40 @@ pub const SemanticAnalyzer = struct { } } + fn parse_date_body( + sema: *SemanticAnalyzer, + node: Parser.Node, + comptime body: enum { date, time, datetime }, + comptime DTValue: type, + value_str: []const u8, + format_str: []const u8, + ) !Span.Content { + const Format: type = DTValue.Format; + + const value: DTValue = if (DTValue.parse(value_str)) |value| + value + else |_| blk: { + // TODO: Report error for invalid value + try sema.emit_diagnostic(.invalid_date_time, node.location.offset); + break :blk std.mem.zeroes(DTValue); + }; + + const format: Format = if (format_str.len == 0) + .default + else if (std.meta.stringToEnum(Format, format_str)) |format| + format + else blk: { + // TODO: Report error about invalid format + try sema.emit_diagnostic(.invalid_date_time_fmt, (get_attribute_location(node, "fmt", .value) orelse node.location).offset); + break :blk .default; + }; + + return @unionInit(Span.Content, @tagName(body), .{ + .format = format, + .value = value, + }); + } + fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void { switch (body) { .empty => |location| { @@ -1739,6 +1800,8 @@ pub const Diagnostic = struct { invalid_inline_combination: InlineCombinationError, link_not_nestable, invalid_link, + invalid_date_time, + invalid_date_time_fmt, // warnings: unknown_attribute: NodeAttributeError, @@ -1768,6 +1831,8 @@ pub const Diagnostic = struct { .invalid_inline_combination, .link_not_nestable, .invalid_link, + .invalid_date_time, + .invalid_date_time_fmt, => .@"error", .unknown_attribute, @@ -1819,6 +1884,10 @@ pub const Diagnostic = struct { .invalid_link => try w.writeAll("\\link requires either ref=\"…\" or uri=\"…\" attribute."), .attribute_leading_trailing_whitespace => try w.writeAll("Attribute value has invalid leading or trailing whitespace."), + + .invalid_date_time => try w.writeAll("Invalid date/time value."), + + .invalid_date_time_fmt => try w.writeAll("Invalid 'fmt' for date/time value."), } } }; From fa5d31ab26bee94f8ff864221abe94f7fbb18a07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 27 Dec 2025 15:06:57 +0100 Subject: [PATCH 026/116] Simplifies sema.emit_diagnostic by taking a Parser.Location --- src/hyperdoc.zig | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 2977cb9..09ca70f 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -452,7 +452,7 @@ pub const SemanticAnalyzer = struct { switch (node.type) { .hdoc => { if (sema.header != null) { - try sema.emit_diagnostic(.duplicate_hdoc_header, node.location.offset); + try sema.emit_diagnostic(.duplicate_hdoc_header, node.location); } sema.header = sema.translate_header_node(node) catch |err| switch (err) { error.OutOfMemory => |e| return e, @@ -467,7 +467,7 @@ pub const SemanticAnalyzer = struct { // This can only happen exactly once, as we either: // - have already set a header block when the first non-header nodes arrives. // - we have processed another block already, so the previous block would've emitted the warning already. - try sema.emit_diagnostic(.missing_hdoc_header, node.location.offset); + try sema.emit_diagnostic(.missing_hdoc_header, node.location); } } @@ -544,7 +544,7 @@ pub const SemanticAnalyzer = struct { }, .unknown_block, .unknown_inline => { - try sema.emit_diagnostic(.{ .unknown_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location.offset); + try sema.emit_diagnostic(.{ .unknown_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location); return error.InvalidNodeType; }, @@ -564,7 +564,7 @@ pub const SemanticAnalyzer = struct { .td, .li, => { - try sema.emit_diagnostic(.{ .invalid_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location.offset); + try sema.emit_diagnostic(.{ .invalid_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location); return error.InvalidNodeType; }, } @@ -626,7 +626,7 @@ pub const SemanticAnalyzer = struct { if (overlay.em) |v| { if (old.em) { - try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .em } }, location.offset); + try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .em } }, location); } new.em = v; } @@ -634,7 +634,7 @@ pub const SemanticAnalyzer = struct { if (overlay.mono) |mono| { if (old.mono) { if (std.mem.eql(u8, old.syntax, new.syntax)) { - try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .mono } }, location.offset); + try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .mono } }, location); } } new.mono = mono; @@ -646,7 +646,7 @@ pub const SemanticAnalyzer = struct { if (overlay.strike) |strike| { if (old.strike) { - try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .strike } }, location.offset); + try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .strike } }, location); } new.strike = strike; } @@ -654,7 +654,7 @@ pub const SemanticAnalyzer = struct { if (overlay.position) |new_pos| { std.debug.assert(new_pos != .baseline); // we can never return to baseline script. if (old.position == new_pos) { - try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .sub } }, location.offset); + try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .sub } }, location); } else if (old.position != .baseline) { try sema.emit_diagnostic(.{ .invalid_inline_combination = .{ .first = switch (old.position) { @@ -667,14 +667,14 @@ pub const SemanticAnalyzer = struct { .subscript => .sub, .baseline => unreachable, }, - } }, location.offset); + } }, location); } new.position = new_pos; } if (overlay.link) |link| { if (old.link != .none) { - try sema.emit_diagnostic(.link_not_nestable, location.offset); + try sema.emit_diagnostic(.link_not_nestable, location); } new.link = link; } @@ -740,7 +740,7 @@ pub const SemanticAnalyzer = struct { }); if (props.uri != null and props.ref != null) { - try sema.emit_diagnostic(.invalid_link, node.location.offset); + try sema.emit_diagnostic(.invalid_link, node.location); } const link: Link = if (props.uri) |uri| blk: { @@ -748,7 +748,7 @@ pub const SemanticAnalyzer = struct { } else if (props.ref) |ref| blk: { break :blk .{ .ref = ref }; } else blk: { - try sema.emit_diagnostic(.invalid_link, node.location.offset); + try sema.emit_diagnostic(.invalid_link, node.location); break :blk .none; }; @@ -845,7 +845,7 @@ pub const SemanticAnalyzer = struct { value else |_| blk: { // TODO: Report error for invalid value - try sema.emit_diagnostic(.invalid_date_time, node.location.offset); + try sema.emit_diagnostic(.invalid_date_time, node.location); break :blk std.mem.zeroes(DTValue); }; @@ -855,7 +855,7 @@ pub const SemanticAnalyzer = struct { format else blk: { // TODO: Report error about invalid format - try sema.emit_diagnostic(.invalid_date_time_fmt, (get_attribute_location(node, "fmt", .value) orelse node.location).offset); + try sema.emit_diagnostic(.invalid_date_time_fmt, get_attribute_location(node, "fmt", .value) orelse node.location); break :blk .default; }; @@ -868,7 +868,7 @@ pub const SemanticAnalyzer = struct { fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void { switch (body) { .empty => |location| { - try sema.emit_diagnostic(.empty_inline_body, location.offset); + try sema.emit_diagnostic(.empty_inline_body, location); }, .string => |string_body| { @@ -996,11 +996,11 @@ pub const SemanticAnalyzer = struct { const key = attrib.name.text; const fld = std.meta.stringToEnum(Fields, key) orelse { - try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, attrib.name.location.offset); + try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, attrib.name.location); continue; }; if (found.contains(fld)) { - try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, attrib.name.location.offset); + try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, attrib.name.location); } found.insert(fld); @@ -1011,7 +1011,7 @@ pub const SemanticAnalyzer = struct { else => { any_invalid = true; - try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, attrib.value.location.offset); + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, attrib.value.location); continue; }, @@ -1025,7 +1025,7 @@ pub const SemanticAnalyzer = struct { var iter = required.iterator(); while (iter.next()) |req_field| { if (!found.contains(req_field)) { - try sema.emit_diagnostic(.{ .missing_attribute = .{ .type = node.type, .name = @tagName(req_field) } }, node.location.offset); + try sema.emit_diagnostic(.{ .missing_attribute = .{ .type = node.type, .name = @tagName(req_field) } }, node.location); any_missing = true; } } @@ -1049,7 +1049,7 @@ pub const SemanticAnalyzer = struct { Reference => { const stripped = std.mem.trim(u8, value, whitespace_chars); if (stripped.len != value.len) { - try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location.offset); + try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location); } return .init(stripped); }, @@ -1057,7 +1057,7 @@ pub const SemanticAnalyzer = struct { Uri => { const stripped = std.mem.trim(u8, value, whitespace_chars); if (stripped.len != value.len) { - try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location.offset); + try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location); } return .init(stripped); }, @@ -1071,9 +1071,9 @@ pub const SemanticAnalyzer = struct { }; } - fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, offset: usize) !void { + fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, location: Parser.Location) !void { if (sema.diagnostics) |diag| { - try diag.add(code, sema.make_location(offset)); + try diag.add(code, sema.make_location(location.offset)); } } From 67f9a10e1c0a8781702200d76a4717ea5e283113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 27 Dec 2025 15:07:52 +0100 Subject: [PATCH 027/116] Moves code around --- src/hyperdoc.zig | 72 ++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 09ca70f..e2c197d 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -592,6 +592,42 @@ pub const SemanticAnalyzer = struct { return .{ heading, attrs.id }; } + fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } { + _ = sema; + _ = node; + return error.Unimplemented; // TODO: Implement this node type + } + + fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } { + _ = sema; + _ = node; + return error.Unimplemented; // TODO: Implement this node type + } + + fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } { + _ = sema; + _ = node; + return error.Unimplemented; // TODO: Implement this node type + } + + fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } { + _ = sema; + _ = node; + return error.Unimplemented; // TODO: Implement this node type + } + + fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } { + _ = sema; + _ = node; + return error.Unimplemented; // TODO: Implement this node type + } + + fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } { + _ = sema; + _ = node; + return error.Unimplemented; // TODO: Implement this node type + } + fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }![]Span { var spans: std.ArrayList(Span) = .empty; errdefer spans.deinit(sema.arena); @@ -928,42 +964,6 @@ pub const SemanticAnalyzer = struct { } } - fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type - } - - fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type - } - - fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type - } - - fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type - } - - fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type - } - - fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type - } - fn get_attribute_location(node: Parser.Node, attrib_name: []const u8, comptime key: enum { name, value }) ?Parser.Location { var i = node.attributes.items.len; while (i > 0) { From 98ee9090aa11864f085d4d896ac6499a40920205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 19:52:06 +0100 Subject: [PATCH 028/116] Implements SemanticAnalyzer.unescape_string --- AGENTS.md | 5 + src/hyperdoc.zig | 233 ++++++++++++++++++++++++++++++++-- src/testsuite.zig | 312 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 484 insertions(+), 66 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 0bb6695..2579445 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -10,3 +10,8 @@ - Run `zig build` to validate the main application still compiles - Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/` and `test/`. - Avoid editing documentation unless the request explicitly asks for it. + +## Zig Programming Style + +- Do not use "inline functions" like `const func = struct { fn func(…) {} }.func;` +- Zig has no methods. Functions used by "method like" functions can still be placed next to them, no need to put them into global scope nor into local scope. diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index e2c197d..b226fc1 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -370,16 +370,18 @@ pub const Reference = struct { pub fn parse( allocator: std.mem.Allocator, /// The source code to be parsed - plain_text: []const u8, + raw_plain_text: []const u8, /// An optional diagnostics element that receives diagnostic messages like errors and warnings. /// If present, will be filled out by the parser. diagnostics: ?*Diagnostics, -) error{ OutOfMemory, SyntaxError, MalformedDocument }!Document { +) error{ OutOfMemory, SyntaxError, MalformedDocument, InvalidUtf8 }!Document { + const source_text = try remove_byte_order_mark(diagnostics, raw_plain_text); + var arena = std.heap.ArenaAllocator.init(allocator); errdefer arena.deinit(); var parser: Parser = .{ - .code = plain_text, + .code = source_text, .arena = arena.allocator(), .diagnostics = diagnostics, }; @@ -387,7 +389,7 @@ pub fn parse( var sema: SemanticAnalyzer = .{ .arena = arena.allocator(), .diagnostics = diagnostics, - .code = plain_text, + .code = source_text, }; while (true) { @@ -429,6 +431,27 @@ pub fn parse( }; } +pub fn remove_byte_order_mark(diagnostics: ?*Diagnostics, plain_text: []const u8) error{ OutOfMemory, InvalidUtf8 }![]const u8 { + // First check if all of our code is valid UTF-8 + // and if it potentially starts with a BOM. + var view = std.unicode.Utf8View.init(plain_text) catch { + return error.InvalidUtf8; + }; + + var iter = view.iterator(); + + if (iter.nextCodepointSlice()) |slice| { + const codepoint = std.unicode.utf8Decode(slice) catch unreachable; + if (codepoint == 0xFEFF) { + if (diagnostics) |diag| { + try diag.add(.document_starts_with_bom, .{ .column = 1, .line = 1 }); + } + return plain_text[slice.len..]; + } + } + return plain_text; +} + pub const SemanticAnalyzer = struct { const whitespace_chars = " \t"; @@ -1099,11 +1122,184 @@ pub const SemanticAnalyzer = struct { std.debug.assert(token.text.len >= 2); std.debug.assert(token.text[0] == '"' and token.text[token.text.len - 1] == '"'); - _ = sema; - // TODO: Implement unescaping logic here. + const base_offset = token.location.offset + 1; // skip leading quote + const content = token.text[1 .. token.text.len - 1]; + + const Source = struct { + char: u8, + location: Parser.Location, + }; + + var output_buffer: std.MultiArrayList(Source) = .empty; + defer output_buffer.deinit(sema.arena); + + try output_buffer.ensureTotalCapacity(sema.arena, content.len); + + { + var out_chars_buffer: [4]u8 = undefined; + + var i: usize = 0; + while (i < content.len) { + const start = i; + + // We process bytes, even thought the input is UTF-8. + // This is fine as we only process ASCII-range escape sequences + const in_char = content[i]; + + // We process our in_char into 1..4 bytes, depending + // on the escape sequence. Worst input is \u{10FFFF}, which is + // encoded as {F4 8F BF BF}, so 4 bytes. + const out_chars: []const u8 = blk: { + i += 1; + if (in_char != '\\') { + // Just return the actual character + break :blk content[start..i]; + } + + // This would mean an uinterminated escape sequence, and + // must be processed by the parser already: + std.debug.assert(i < content.len); + + const esc_char = content[i]; + + switch (esc_char) { + '"' => { + i += 1; + break :blk "\""; + }, + '\\' => { + i += 1; + break :blk "\\"; + }, + 'n' => { + i += 1; + break :blk "\n"; + }, + 'r' => { + i += 1; + break :blk "\r"; + }, + + 'u' => { + while (content[i] != '}') { + i += 1; + if (i >= content.len) { + try sema.emit_diagnostic(.invalid_unicode_string_escape, .{ .offset = start, .length = i - start }); + break :blk content[start..i]; + } + } + i += 1; + const escape_part = content[start..i]; + std.debug.assert(escape_part.len > 2); + std.debug.assert(escape_part[0] == '\\'); + std.debug.assert(escape_part[1] == 'u'); + std.debug.assert(escape_part[escape_part.len - 1] == '}'); + + const location: Parser.Location = .{ .offset = start, .length = escape_part.len }; + + if (escape_part[2] != '{') { + try sema.emit_diagnostic(.invalid_unicode_string_escape, location); + } + + const codepoint = std.fmt.parseInt(u21, escape_part[3 .. escape_part.len - 1], 16) catch { + try sema.emit_diagnostic(.invalid_unicode_string_escape, location); + break :blk "???"; + }; + + const out_len = std.unicode.utf8Encode(codepoint, &out_chars_buffer) catch |err| switch (err) { + error.Utf8CannotEncodeSurrogateHalf => { + try sema.emit_diagnostic(.{ .illegal_character = .{ .codepoint = codepoint } }, location); + break :blk "???"; + }, + error.CodepointTooLarge => { + try sema.emit_diagnostic(.invalid_unicode_string_escape, location); + break :blk "???"; + }, + }; + break :blk out_chars_buffer[0..out_len]; + }, + + else => { + // Unknown escape sequence, emit escaped char verbatim: + // TODO: How to handle something like "\😭", which is + // definitly valid and in-scope. + + const len = std.unicode.utf8ByteSequenceLength(esc_char) catch unreachable; + + const esc_codepoint = std.unicode.utf8Decode(content[i .. i + len]) catch unreachable; + + i += len; - // For now, we just return the raw text. - return token.text[1 .. token.text.len - 1]; + try sema.emit_diagnostic(.{ + .invalid_string_escape = .{ .codepoint = esc_codepoint }, + }, .{ .offset = start, .length = i - start + 1 }); + + break :blk content[start..i]; + }, + } + @compileError("The switch above must be exhaustive and break to :blk for each code path."); + }; + + const loc: Parser.Location = .{ + .offset = base_offset + start, + .length = i - start + 1, + }; + for (out_chars) |out_char| { + output_buffer.appendAssumeCapacity(.{ + .char = out_char, + .location = loc, + }); + } + } + } + + var output = output_buffer.toOwnedSlice(); + errdefer output.deinit(sema.arena); + + const view = std.unicode.Utf8View.init(output.items(.char)) catch { + std.log.err("invalid utf-8 input: \"{f}\"", .{std.zig.fmtString(output.items(.char))}); + @panic("String unescape produced invalid UTF-8 sequence. This should not be possible."); + }; + + var iter = view.iterator(); + while (iter.nextCodepointSlice()) |slice| { + const start = iter.i - slice.len; + const codepoint = std.unicode.utf8Decode(slice) catch unreachable; + + if (is_illegal_character(codepoint)) { + try sema.emit_diagnostic( + .{ .illegal_character = .{ .codepoint = codepoint } }, + output.get(start).location, + ); + } + } + + return view.bytes; + } + + // TODO: Also validate the whole document against this rules. + fn is_illegal_character(codepoint: u21) bool { + // Surrogate codepoints are illegal, we're only ever using UTF-8 which doesn't need them. + if (std.unicode.isSurrogateCodepoint(codepoint)) + return true; + + // CR and LF are the only allowed control characters: + if (codepoint == std.ascii.control_code.cr) + return false; + if (codepoint == std.ascii.control_code.lf) + return false; + + // Disallow characters from the "Control" category: + // + if (codepoint <= 0x1F) // C0 control characters + return true; + if (codepoint == 0x7F) // DEL + return true; + if (codepoint >= 0x80 and codepoint <= 0x9F) // C1 control characters + return true; + + // All other characters are fine + return false; } }; @@ -1782,6 +1978,8 @@ pub const Diagnostic = struct { pub const InvalidBlockError = struct { name: []const u8 }; pub const InlineUsageError = struct { attribute: InlineAttribute }; pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute }; + pub const InvalidStringEscape = struct { codepoint: u21 }; + pub const ForbiddenControlCharacter = struct { codepoint: u21 }; pub const Code = union(enum) { // errors: @@ -1802,8 +2000,12 @@ pub const Diagnostic = struct { invalid_link, invalid_date_time, invalid_date_time_fmt, + invalid_unicode_string_escape, + invalid_string_escape: InvalidStringEscape, + illegal_character: ForbiddenControlCharacter, // warnings: + document_starts_with_bom, unknown_attribute: NodeAttributeError, duplicate_attribute: DuplicateAttribute, empty_verbatim_block, @@ -1833,6 +2035,9 @@ pub const Diagnostic = struct { .invalid_link, .invalid_date_time, .invalid_date_time_fmt, + .invalid_string_escape, + .illegal_character, + .invalid_unicode_string_escape, => .@"error", .unknown_attribute, @@ -1844,12 +2049,15 @@ pub const Diagnostic = struct { .empty_inline_body, .redundant_inline, .attribute_leading_trailing_whitespace, + .document_starts_with_bom, => .warning, }; } pub fn format(code: Code, w: anytype) !void { switch (code) { + .document_starts_with_bom => try w.writeAll("Document starts with BOM (U+FEFF). HyperDoc recommends not using the BOM with UTF-8."), + .unterminated_inline_list => try w.writeAll("Inline list body is unterminated (missing '}' before end of file)."), .unexpected_eof => |ctx| { if (ctx.expected_char) |ch| { @@ -1888,6 +2096,15 @@ pub const Diagnostic = struct { .invalid_date_time => try w.writeAll("Invalid date/time value."), .invalid_date_time_fmt => try w.writeAll("Invalid 'fmt' for date/time value."), + + .invalid_string_escape => |ctx| if (ctx.codepoint > 0x20 and ctx.codepoint <= 0x7F) + try w.print("\\{u} is not a valid escape sequence.", .{ctx.codepoint}) + else + try w.print("U+{X:0>2} is not a valid escape sequence.", .{ctx.codepoint}), + + .invalid_unicode_string_escape => try w.writeAll("Invalid unicode escape sequence"), + + .illegal_character => |ctx| try w.print("Forbidden control character U+{X:0>4}.", .{ctx.codepoint}), } } }; diff --git a/src/testsuite.zig b/src/testsuite.zig index 6a79530..034501b 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -86,6 +86,96 @@ test "parser accept string literals and unescape" { try std.testing.expectEqualStrings("\"hello\\\\n\"", token.text); } +test "semantic analyzer unescapes string literals" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const source = "\"line\\\\break\\nquote \\\" unicode \\u{1F600}\""; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var sema: hdoc.SemanticAnalyzer = .{ + .arena = arena.allocator(), + .diagnostics = &diagnostics, + .code = source, + }; + + const token: hdoc.Parser.Token = .{ .text = source, .location = .{ .offset = 0, .length = source.len } }; + + const text = try sema.unescape_string(token); + try std.testing.expectEqualStrings("line\\break\nquote \" unicode 😀", text); + try std.testing.expect(!diagnostics.has_error()); +} + +test "semantic analyzer reports invalid string escapes" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const source = "\"oops\\q\""; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var sema: hdoc.SemanticAnalyzer = .{ + .arena = arena.allocator(), + .diagnostics = &diagnostics, + .code = source, + }; + + const token: hdoc.Parser.Token = .{ .text = source, .location = .{ .offset = 0, .length = source.len } }; + + const text = try sema.unescape_string(token); + try std.testing.expectEqualStrings("oops\\q", text); + try std.testing.expectEqual(@as(usize, 1), diagnostics.items.items.len); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .{ .invalid_string_escape = .{ .codepoint = 'q' } })); +} + +test "semantic analyzer flags forbidden control characters" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const source = "\"tab\\u{9}\""; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var sema: hdoc.SemanticAnalyzer = .{ + .arena = arena.allocator(), + .diagnostics = &diagnostics, + .code = source, + }; + + const token: hdoc.Parser.Token = .{ .text = source, .location = .{ .offset = 0, .length = source.len } }; + + const text = try sema.unescape_string(token); + try std.testing.expectEqualStrings("tab\t", text); + try std.testing.expectEqual(@as(usize, 1), diagnostics.items.items.len); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .{ .illegal_character = .{ .codepoint = 0x9 } })); +} + +test "semantic analyzer forbids raw control characters" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const source = "\"bad\tvalue\""; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var sema: hdoc.SemanticAnalyzer = .{ + .arena = arena.allocator(), + .diagnostics = &diagnostics, + .code = source, + }; + + const token: hdoc.Parser.Token = .{ .text = source, .location = .{ .offset = 0, .length = source.len } }; + _ = try sema.unescape_string(token); + + try std.testing.expectEqual(@as(usize, 1), diagnostics.items.items.len); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .{ .illegal_character = .{ .codepoint = 0x9 } })); +} + test "parser reports unterminated string literals" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); @@ -245,81 +335,187 @@ test "parser handles unknown node types" { } } -fn diagnosticsContain(diag: *const hdoc.Diagnostics, expected: hdoc.Diagnostic.Code) bool { +fn diagnosticCodesEqual(a: hdoc.Diagnostic.Code, b: hdoc.Diagnostic.Code) bool { + if (std.meta.activeTag(a) != std.meta.activeTag(b)) return false; + + return switch (a) { + .document_starts_with_bom, + .unterminated_inline_list, + .unterminated_string, + .unterminated_block_list, + .missing_hdoc_header, + .duplicate_hdoc_header, + .link_not_nestable, + .invalid_link, + .invalid_date_time, + .invalid_date_time_fmt, + .empty_verbatim_block, + .verbatim_missing_trailing_newline, + .verbatim_missing_space, + .trailing_whitespace, + .empty_inline_body, + .attribute_leading_trailing_whitespace, + .invalid_unicode_string_escape, + => true, + + .unexpected_eof => |ctx| blk: { + const other = b.unexpected_eof; + break :blk ctx.expected_char == other.expected_char and std.mem.eql(u8, ctx.context, other.context); + }, + + .unexpected_character => |ctx| blk: { + const other = b.unexpected_character; + break :blk ctx.expected == other.expected and ctx.found == other.found; + }, + + .invalid_identifier_start => |ctx| blk: { + const other = b.invalid_identifier_start; + break :blk ctx.char == other.char; + }, + + .missing_attribute => |ctx| blk: { + const other = b.missing_attribute; + break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name); + }, + + .invalid_attribute => |ctx| blk: { + const other = b.invalid_attribute; + break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name); + }, + + .unknown_block_type => |ctx| blk: { + const other = b.unknown_block_type; + break :blk std.mem.eql(u8, ctx.name, other.name); + }, + + .invalid_block_type => |ctx| blk: { + const other = b.invalid_block_type; + break :blk std.mem.eql(u8, ctx.name, other.name); + }, + + .invalid_inline_combination => |ctx| blk: { + const other = b.invalid_inline_combination; + break :blk ctx.first == other.first and ctx.second == other.second; + }, + + .duplicate_attribute => |ctx| blk: { + const other = b.duplicate_attribute; + break :blk std.mem.eql(u8, ctx.name, other.name); + }, + + .unknown_attribute => |ctx| blk: { + const other = b.unknown_attribute; + break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name); + }, + + .redundant_inline => |ctx| blk: { + const other = b.redundant_inline; + break :blk ctx.attribute == other.attribute; + }, + + .invalid_string_escape => |ctx| blk: { + break :blk b.invalid_string_escape.codepoint == ctx.codepoint; + }, + + .illegal_character => |ctx| blk: { + const other = b.illegal_character; + break :blk ctx.codepoint == other.codepoint; + }, + }; +} + +fn logDiagnostics(diag: *const hdoc.Diagnostics) void { for (diag.items.items) |item| { - if (std.meta.activeTag(item.code) == std.meta.activeTag(expected)) { - return true; + var buf: [256]u8 = undefined; + var stream = std.io.fixedBufferStream(&buf); + item.code.format(stream.writer()) catch {}; + std.log.err("Diagnostic {d}:{d}: {s}", .{ item.location.line, item.location.column, stream.getWritten() }); + } +} + +fn validateDiagnostics(code: []const u8, expected: []const hdoc.Diagnostic.Code) !void { + try std.testing.expect(expected.len > 0); + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const maybe_doc = hdoc.parse(std.testing.allocator, code, &diagnostics) catch |err| switch (err) { + error.OutOfMemory => return err, + else => null, + }; + if (maybe_doc) |doc| { + var owned = doc; + defer owned.deinit(); + } + + if (diagnostics.items.items.len != expected.len) { + logDiagnostics(&diagnostics); + } + try std.testing.expectEqual(expected.len, diagnostics.items.items.len); + for (expected, 0..) |exp, idx| { + const actual = diagnostics.items.items[idx].code; + if (!diagnosticCodesEqual(actual, exp)) { + logDiagnostics(&diagnostics); + return error.MissingDiagnosticCode; } } - return false; } -test "parsing valid document yields empty diagnostics" { +fn expectParseOk(code: []const u8) !void { var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); defer diagnostics.deinit(); - var doc = try hdoc.parse(std.testing.allocator, "hdoc(version=\"2.0\");", &diagnostics); + var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics); defer doc.deinit(); - try std.testing.expect(!diagnostics.has_error()); - try std.testing.expect(!diagnostics.has_warning()); - try std.testing.expectEqual(@as(usize, 0), diagnostics.items.items.len); + if (diagnostics.has_error() or diagnostics.has_warning()) { + logDiagnostics(&diagnostics); + return error.TestExpectedEqual; + } } -test "diagnostic codes are emitted for expected samples" { - const Case = struct { - code: hdoc.Diagnostic.Code, - samples: []const []const u8, - }; +fn expectParseNoFail(code: []const u8) !void { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); - const cases = [_]Case{ - .{ .code = .{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }, .samples = &.{"hdoc(version=\"2.0\"); h1("} }, - .{ .code = .{ .unexpected_character = .{ .expected = '{', .found = '1' } }, .samples = &.{"hdoc(version=\"2.0\"); h1 123"} }, - .{ .code = .unterminated_string, .samples = &.{"hdoc(version=\"2.0\"); h1 \"unterminated"} }, - .{ .code = .{ .invalid_identifier_start = .{ .char = '-' } }, .samples = &.{"hdoc(version=\"2.0\"); -abc"} }, - .{ .code = .unterminated_block_list, .samples = &.{"hdoc{h1 \"x\""} }, - .{ .code = .unterminated_inline_list, .samples = &.{"hdoc(version=\"2.0\"); p {hello"} }, - .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");"} }, - .{ .code = .empty_verbatim_block, .samples = &.{"hdoc(version=\"2.0\"); pre:\n"} }, - .{ .code = .verbatim_missing_trailing_newline, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|line"} }, - .{ .code = .verbatim_missing_space, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|nospace\n"} }, - .{ .code = .trailing_whitespace, .samples = &.{"hdoc(version=\"2.0\"); pre:\n| trailing \n"} }, - .{ .code = .missing_hdoc_header, .samples = &.{"h1 \"Title\""} }, - .{ .code = .duplicate_hdoc_header, .samples = &.{"hdoc(version=\"2.0\"); hdoc(version=\"2.0\");"} }, + var doc = hdoc.parse(std.testing.allocator, code, &diagnostics) catch |err| switch (err) { + error.OutOfMemory => return err, + else => { + logDiagnostics(&diagnostics); + return error.TestExpectedEqual; + }, }; + defer doc.deinit(); - inline for (cases) |case| { - for (case.samples) |sample| { - var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); - defer diagnostics.deinit(); - - const maybe_doc = hdoc.parse(std.testing.allocator, sample, &diagnostics) catch |err| switch (err) { - error.OutOfMemory => return err, - else => null, - }; - - if (maybe_doc) |doc| { - var owned_doc = doc; - defer owned_doc.deinit(); - } + if (diagnostics.has_error()) { + logDiagnostics(&diagnostics); + return error.TestExpectedEqual; + } +} - if (!diagnosticsContain(&diagnostics, case.code)) { - std.log.err("Diagnostics did not contain expected code: '{t}'", .{case.code}); - for (diagnostics.items.items) |item| { - std.log.err(" Emitted diagnostic: {f}", .{item.code}); - } - return error.MissingDiagnosticCode; - } +test "parsing valid document yields empty diagnostics" { + try expectParseOk("hdoc(version=\"2.0\");"); +} - const expected_severity = case.code.severity(); - if (expected_severity == .@"error") { - try std.testing.expect(diagnostics.has_error()); - } else { - try std.testing.expect(!diagnostics.has_error()); - try std.testing.expect(diagnostics.has_warning()); - } - } - } +test "diagnostic codes are emitted for expected samples" { + try validateDiagnostics("hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }}); + try validateDiagnostics("hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }}); + try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string}); + try validateDiagnostics("hdoc(version=\"2.0\"); -abc", &.{.{ .invalid_identifier_start = .{ .char = '-' } }}); + try validateDiagnostics("hdoc{h1 \"x\"", &.{.unterminated_block_list}); + try validateDiagnostics("hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list}); + try validateDiagnostics( + "hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");", + &.{ .{ .duplicate_attribute = .{ .name = "lang" } }, .empty_inline_body }, + ); + try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n", &.{.empty_verbatim_block}); + try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n| line", &.{.verbatim_missing_trailing_newline}); + try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n|nospace\n", &.{.verbatim_missing_space}); + try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n| trailing \n", &.{.trailing_whitespace}); + try validateDiagnostics("h1 \"Title\"", &.{.missing_hdoc_header}); + try validateDiagnostics("hdoc(version=\"2.0\"); hdoc(version=\"2.0\");", &.{.duplicate_hdoc_header}); + try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }}); + try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); } test "parser reports unterminated inline lists" { From 01e548c576829457af9431bbe4118b28b1f4f1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 20:00:31 +0100 Subject: [PATCH 029/116] Simplifies diagnosticCodesEqual with metaprogramming. --- src/testsuite.zig | 117 +++++++++++++--------------------------------- 1 file changed, 32 insertions(+), 85 deletions(-) diff --git a/src/testsuite.zig b/src/testsuite.zig index 034501b..cc5ad4a 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -335,93 +335,40 @@ test "parser handles unknown node types" { } } -fn diagnosticCodesEqual(a: hdoc.Diagnostic.Code, b: hdoc.Diagnostic.Code) bool { - if (std.meta.activeTag(a) != std.meta.activeTag(b)) return false; - - return switch (a) { - .document_starts_with_bom, - .unterminated_inline_list, - .unterminated_string, - .unterminated_block_list, - .missing_hdoc_header, - .duplicate_hdoc_header, - .link_not_nestable, - .invalid_link, - .invalid_date_time, - .invalid_date_time_fmt, - .empty_verbatim_block, - .verbatim_missing_trailing_newline, - .verbatim_missing_space, - .trailing_whitespace, - .empty_inline_body, - .attribute_leading_trailing_whitespace, - .invalid_unicode_string_escape, - => true, - - .unexpected_eof => |ctx| blk: { - const other = b.unexpected_eof; - break :blk ctx.expected_char == other.expected_char and std.mem.eql(u8, ctx.context, other.context); - }, - - .unexpected_character => |ctx| blk: { - const other = b.unexpected_character; - break :blk ctx.expected == other.expected and ctx.found == other.found; - }, - - .invalid_identifier_start => |ctx| blk: { - const other = b.invalid_identifier_start; - break :blk ctx.char == other.char; - }, - - .missing_attribute => |ctx| blk: { - const other = b.missing_attribute; - break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name); - }, - - .invalid_attribute => |ctx| blk: { - const other = b.invalid_attribute; - break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name); - }, - - .unknown_block_type => |ctx| blk: { - const other = b.unknown_block_type; - break :blk std.mem.eql(u8, ctx.name, other.name); - }, - - .invalid_block_type => |ctx| blk: { - const other = b.invalid_block_type; - break :blk std.mem.eql(u8, ctx.name, other.name); - }, - - .invalid_inline_combination => |ctx| blk: { - const other = b.invalid_inline_combination; - break :blk ctx.first == other.first and ctx.second == other.second; - }, - - .duplicate_attribute => |ctx| blk: { - const other = b.duplicate_attribute; - break :blk std.mem.eql(u8, ctx.name, other.name); - }, - - .unknown_attribute => |ctx| blk: { - const other = b.unknown_attribute; - break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name); - }, - - .redundant_inline => |ctx| blk: { - const other = b.redundant_inline; - break :blk ctx.attribute == other.attribute; - }, - - .invalid_string_escape => |ctx| blk: { - break :blk b.invalid_string_escape.codepoint == ctx.codepoint; - }, +fn diagnosticCodesEqual(lhs: hdoc.Diagnostic.Code, rhs: hdoc.Diagnostic.Code) bool { + if (std.meta.activeTag(lhs) != std.meta.activeTag(rhs)) + return false; + + switch (lhs) { + inline else => |_, tag_value| { + const tag = @tagName(tag_value); + const a_struct = @field(lhs, tag); + const b_struct = @field(rhs, tag); + + const TagField = @FieldType(hdoc.Diagnostic.Code, tag); + const info = @typeInfo(TagField); + + switch (info) { + .void => return true, + + .@"struct" => |struct_info| { + inline for (struct_info.fields) |fld| { + const a = @field(a_struct, fld.name); + const b = @field(b_struct, fld.name); + const eql = switch (fld.type) { + []const u8 => std.mem.eql(u8, a, b), + else => (a == b), + }; + if (!eql) + return false; + } + return true; + }, - .illegal_character => |ctx| blk: { - const other = b.illegal_character; - break :blk ctx.codepoint == other.codepoint; + else => @compileError("Unsupported type: " ++ @typeName(TagField)), + } }, - }; + } } fn logDiagnostics(diag: *const hdoc.Diagnostics) void { From 02379dbfc40a0b79ce53fb09c6e3249df6eb8e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 20:33:09 +0100 Subject: [PATCH 030/116] Fixes an edge case in unescape_string. Adds fuzzing for string unescaper. --- build.zig | 2 +- src/hyperdoc.zig | 139 ++++++++++++++++++++++++++- src/testsuite.zig | 24 ++--- test/{parser => accept}/stress.hdoc | 0 test/{parser => accept}/workset.hdoc | 0 5 files changed, 147 insertions(+), 18 deletions(-) rename test/{parser => accept}/stress.hdoc (100%) rename test/{parser => accept}/workset.hdoc (100%) diff --git a/build.zig b/build.zig index 5018027..1d265f5 100644 --- a/build.zig +++ b/build.zig @@ -45,7 +45,7 @@ pub fn build(b: *std.Build) void { rawFileMod(b, "examples/featurematrix.hdoc"), rawFileMod(b, "examples/demo.hdoc"), rawFileMod(b, "examples/guide.hdoc"), - rawFileMod(b, "test/parser/stress.hdoc"), + rawFileMod(b, "test/accept/stress.hdoc"), }, }), .use_llvm = true, diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index b226fc1..378d971 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -1190,7 +1190,7 @@ pub const SemanticAnalyzer = struct { } i += 1; const escape_part = content[start..i]; - std.debug.assert(escape_part.len > 2); + std.debug.assert(escape_part.len >= 3); std.debug.assert(escape_part[0] == '\\'); std.debug.assert(escape_part[1] == 'u'); std.debug.assert(escape_part[escape_part.len - 1] == '}'); @@ -1199,6 +1199,14 @@ pub const SemanticAnalyzer = struct { if (escape_part[2] != '{') { try sema.emit_diagnostic(.invalid_unicode_string_escape, location); + break :blk "???"; + } + + if (escape_part.len == 4) { + // Empty escape: \u{} + std.debug.assert(std.mem.eql(u8, escape_part, "\\u{}")); + try sema.emit_diagnostic(.invalid_unicode_string_escape, location); + break :blk "???"; } const codepoint = std.fmt.parseInt(u21, escape_part[3 .. escape_part.len - 1], 16) catch { @@ -2185,7 +2193,134 @@ test "fuzz parser" { @embedFile("examples/featurematrix.hdoc"), @embedFile("examples/demo.hdoc"), @embedFile("examples/guide.hdoc"), - @embedFile("test/parser/stress.hdoc"), + @embedFile("test/accept/stress.hdoc"), + }, + }); +} + +test "fuzz string unescape" { + const Impl = struct { + fn testOne(impl: @This(), string_literal: []const u8) !void { + // Don't test if the string doesn't follow our rules: + if (string_literal.len < 2) + return; + if (string_literal[0] != '"' or string_literal[string_literal.len - 1] != '"') + return; + if (string_literal.len >= 3 and string_literal[string_literal.len - 2] == '\\') + return; + + // Check for valid UTF-8 + _ = std.unicode.utf8CountCodepoints(string_literal) catch return; + + _ = impl; + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + var diagnostics: Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var sema: SemanticAnalyzer = .{ + .arena = arena.allocator(), + .code = string_literal, + .diagnostics = &diagnostics, + }; + + const output = try sema.unescape_string(.{ + .location = .{ .offset = 0, .length = string_literal.len }, + .text = string_literal, + }); + + _ = output; + } + }; + + try std.testing.fuzz(Impl{}, Impl.testOne, .{ + .corpus = &.{ + \\"" + , + \\"hello" + , + \\"simple ASCII 123" + , + \\"quote: \"inside\" ok" + , + \\"backslash: \\ path" + , + \\"mixed: \"a\" and \\b\\" + , + \\"line1\nline2" + , + \\"windows\r\nnew line" + , + \\"unicode snowman: \u{2603} yay" + , + \\"emoji: \u{1F642} smile" + , + \\"CJK: \u{65E5}\u{672C}\u{8A9E}" + , + \\"math: \u{221E} infinity" + , + \\"euro: \u{20AC} symbol" + , + \\"accented: café" + , + \\"escaped braces: \u{7B} \u{7D}" + , + \\"leading zeros: \u{000041} is A" + , + \\"json-ish: {\"k\":\"v\"}" + , + \\"literal sequence: \\\" done" + , + \\"multiple lines:\n- one\n- two" + , + \\"CR only:\rreturn" + , + \\"mix: \u{1F4A1} idea \"quoted\" \\slash" + , + // + // Adversarial ones: + // + \\"tab escape: \t is not allowed" + , + \\"backspace: \b not allowed" + , + \\"null: \0 not allowed" + , + \\"hex escape: \x20 not allowed" + , + \\"octal-ish: \123 not allowed" + , + \\"single quote escape: \' not allowed" + , + \\"unicode short form: \u0041 not allowed" + , + \\"empty unicode: \u{} not allowed" + , + \\"missing closing brace: \u{41 not closed" + , + \\"missing opening brace: \u41} not opened" + , + \\"non-hex digit: \u{12G} invalid" + , + \\"too many digits: \u{1234567} invalid" + , + \\"out of range: \u{110000} invalid" + , + \\"surrogate: \u{D800} invalid" + , + \\"forbidden NUL via unicode: \u{0} invalid" + , + \\"forbidden TAB via unicode: \u{9} invalid" + , + \\"forbidden C1 control: \u{80} invalid" + , + \\"unknown escape: \q invalid" + , + \\"backslash-space escape: \ a invalid" + , + \\"bad hex tail: \u{1F60Z} invalid" }, }); } diff --git a/src/testsuite.zig b/src/testsuite.zig index cc5ad4a..7a5d640 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -1,15 +1,12 @@ const std = @import("std"); const hdoc = @import("./hyperdoc.zig"); -fn testAcceptDocument(document: []const u8) !void { - var doc = try hdoc.parse(std.testing.allocator, document, null); - defer doc.deinit(); +test "validate examples directory" { + try parseDirectoryTree("examples"); } -fn parseFile(path: []const u8) !void { - const source = try std.fs.cwd().readFileAlloc(std.testing.allocator, path, 10 * 1024 * 1024); - defer std.testing.allocator.free(source); - try testAcceptDocument(source); +test "validate tests directory" { + try parseDirectoryTree("test/accept"); } fn parseDirectoryTree(path: []const u8) !void { @@ -25,16 +22,13 @@ fn parseDirectoryTree(path: []const u8) !void { if (!std.mem.endsWith(u8, entry.path, ".hdoc")) continue; - const full_path = try std.fs.path.join(std.testing.allocator, &.{ path, entry.path }); - defer std.testing.allocator.free(full_path); + errdefer std.log.err("failed to process \"{f}/{f}\"", .{ std.zig.fmtString(entry.path), std.zig.fmtString(entry.basename) }); - try parseFile(full_path); - } -} + const source = try entry.dir.readFileAlloc(std.testing.allocator, entry.basename, 10 * 1024 * 1024); + defer std.testing.allocator.free(source); -test "parser accepts examples and test documents" { - try parseDirectoryTree("examples"); - try parseDirectoryTree("test"); + try expectParseOk(source); + } } test "parser accept identifier and word tokens" { diff --git a/test/parser/stress.hdoc b/test/accept/stress.hdoc similarity index 100% rename from test/parser/stress.hdoc rename to test/accept/stress.hdoc diff --git a/test/parser/workset.hdoc b/test/accept/workset.hdoc similarity index 100% rename from test/parser/workset.hdoc rename to test/accept/workset.hdoc From f993c3e95653b4dea01d16f9ce828725d514143b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 20:51:08 +0100 Subject: [PATCH 031/116] Implements translation of paragraph nodes. --- src/hyperdoc.zig | 24 ++++++++++++++--- src/testsuite.zig | 65 ++++++++++++++++++++++++++++------------------- 2 files changed, 60 insertions(+), 29 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 378d971..97ef573 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -616,9 +616,27 @@ pub const SemanticAnalyzer = struct { } fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + id: ?Reference = null, + }); + + const heading: Block.Paragraph = .{ + .kind = switch (node.type) { + .p => .p, + .note => .note, + .warning => .warning, + .danger => .danger, + .tip => .tip, + .quote => .quote, + .spoiler => .spoiler, + else => unreachable, + }, + .lang = attrs.lang, + .content = try sema.translate_inline(node), + }; + + return .{ heading, attrs.id }; } fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } { diff --git a/src/testsuite.zig b/src/testsuite.zig index 7a5d640..d7d985e 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -16,18 +16,26 @@ fn parseDirectoryTree(path: []const u8) !void { var walker = try dir.walk(std.testing.allocator); defer walker.deinit(); + var path_buffer: std.array_list.Managed(u8) = .init(std.testing.allocator); + defer path_buffer.deinit(); + while (try walker.next()) |entry| { if (entry.kind != .file) continue; if (!std.mem.endsWith(u8, entry.path, ".hdoc")) continue; - errdefer std.log.err("failed to process \"{f}/{f}\"", .{ std.zig.fmtString(entry.path), std.zig.fmtString(entry.basename) }); + errdefer std.log.err("failed to process \"{f}/{f}\"", .{ std.zig.fmtString(path), std.zig.fmtString(entry.path) }); const source = try entry.dir.readFileAlloc(std.testing.allocator, entry.basename, 10 * 1024 * 1024); defer std.testing.allocator.free(source); - try expectParseOk(source); + path_buffer.clearRetainingCapacity(); + try path_buffer.appendSlice(path); + try path_buffer.append('/'); + try path_buffer.appendSlice(entry.path); + + try expectParseOk(.{ .file_path = path_buffer.items }, source); } } @@ -365,16 +373,20 @@ fn diagnosticCodesEqual(lhs: hdoc.Diagnostic.Code, rhs: hdoc.Diagnostic.Code) bo } } -fn logDiagnostics(diag: *const hdoc.Diagnostics) void { +const LogDiagOptions = struct { + file_path: []const u8 = "", +}; + +fn logDiagnostics(diag: *const hdoc.Diagnostics, opts: LogDiagOptions) void { for (diag.items.items) |item| { var buf: [256]u8 = undefined; var stream = std.io.fixedBufferStream(&buf); item.code.format(stream.writer()) catch {}; - std.log.err("Diagnostic {d}:{d}: {s}", .{ item.location.line, item.location.column, stream.getWritten() }); + std.log.err("Diagnostic {s}:{d}:{d}: {s}", .{ opts.file_path, item.location.line, item.location.column, stream.getWritten() }); } } -fn validateDiagnostics(code: []const u8, expected: []const hdoc.Diagnostic.Code) !void { +fn validateDiagnostics(opts: LogDiagOptions, code: []const u8, expected: []const hdoc.Diagnostic.Code) !void { try std.testing.expect(expected.len > 0); var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); @@ -390,19 +402,19 @@ fn validateDiagnostics(code: []const u8, expected: []const hdoc.Diagnostic.Code) } if (diagnostics.items.items.len != expected.len) { - logDiagnostics(&diagnostics); + logDiagnostics(&diagnostics, opts); } try std.testing.expectEqual(expected.len, diagnostics.items.items.len); for (expected, 0..) |exp, idx| { const actual = diagnostics.items.items[idx].code; if (!diagnosticCodesEqual(actual, exp)) { - logDiagnostics(&diagnostics); + logDiagnostics(&diagnostics, opts); return error.MissingDiagnosticCode; } } } -fn expectParseOk(code: []const u8) !void { +fn expectParseOk(opts: LogDiagOptions, code: []const u8) !void { var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); defer diagnostics.deinit(); @@ -410,12 +422,12 @@ fn expectParseOk(code: []const u8) !void { defer doc.deinit(); if (diagnostics.has_error() or diagnostics.has_warning()) { - logDiagnostics(&diagnostics); + logDiagnostics(&diagnostics, opts); return error.TestExpectedEqual; } } -fn expectParseNoFail(code: []const u8) !void { +fn expectParseNoFail(opts: LogDiagOptions, code: []const u8) !void { var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); defer diagnostics.deinit(); @@ -429,34 +441,35 @@ fn expectParseNoFail(code: []const u8) !void { defer doc.deinit(); if (diagnostics.has_error()) { - logDiagnostics(&diagnostics); + logDiagnostics(&diagnostics, opts); return error.TestExpectedEqual; } } test "parsing valid document yields empty diagnostics" { - try expectParseOk("hdoc(version=\"2.0\");"); + try expectParseOk(.{}, "hdoc(version=\"2.0\");"); } test "diagnostic codes are emitted for expected samples" { - try validateDiagnostics("hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }}); - try validateDiagnostics("hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }}); - try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string}); - try validateDiagnostics("hdoc(version=\"2.0\"); -abc", &.{.{ .invalid_identifier_start = .{ .char = '-' } }}); - try validateDiagnostics("hdoc{h1 \"x\"", &.{.unterminated_block_list}); - try validateDiagnostics("hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); -abc", &.{.{ .invalid_identifier_start = .{ .char = '-' } }}); + try validateDiagnostics(.{}, "hdoc{h1 \"x\"", &.{.unterminated_block_list}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list}); try validateDiagnostics( + .{}, "hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");", &.{ .{ .duplicate_attribute = .{ .name = "lang" } }, .empty_inline_body }, ); - try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n", &.{.empty_verbatim_block}); - try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n| line", &.{.verbatim_missing_trailing_newline}); - try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n|nospace\n", &.{.verbatim_missing_space}); - try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n| trailing \n", &.{.trailing_whitespace}); - try validateDiagnostics("h1 \"Title\"", &.{.missing_hdoc_header}); - try validateDiagnostics("hdoc(version=\"2.0\"); hdoc(version=\"2.0\");", &.{.duplicate_hdoc_header}); - try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }}); - try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n", &.{.empty_verbatim_block}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n| line", &.{.verbatim_missing_trailing_newline}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n|nospace\n", &.{.verbatim_missing_space}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n| trailing \n", &.{.trailing_whitespace}); + try validateDiagnostics(.{}, "h1 \"Title\"", &.{.missing_hdoc_header}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); hdoc(version=\"2.0\");", &.{.duplicate_hdoc_header}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); } test "parser reports unterminated inline lists" { From 709423cf0dd1f3f8a6efb68b1218857fc4c60fcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 21:02:46 +0100 Subject: [PATCH 032/116] Implements join_spans to enable date/time parsing, fixes bad example. --- examples/guide.hdoc | 2 +- src/hyperdoc.zig | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/examples/guide.hdoc b/examples/guide.hdoc index 50f7b64..3f939f4 100644 --- a/examples/guide.hdoc +++ b/examples/guide.hdoc @@ -72,7 +72,7 @@ img(id="fig-diagram", alt="Diagram showing a HyperDoc example", path="./assets/d h2(id="dates") { Dates and Times } p { - The event was announced on \date(fmt="long"){2025-12-17} at \time(fmt="short"){13:45:00}. + The event was announced on \date(fmt="long"){2025-12-17} at \time(fmt="short"){13:45:00Z}. A combined timestamp looks like \datetime(fmt="iso"){2025-12-17T13:45:00Z}. } diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 97ef573..01655f7 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -863,8 +863,8 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(&content_spans, node.body, .{}); - // TODO: Convert the content_spans into a "rendered string". - const content_text = ""; + // Convert the content_spans into a "rendered string". + const content_text = try sema.join_spans(content_spans.items, .no_space); const content: Span.Content = switch (node.type) { .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt), @@ -942,6 +942,40 @@ pub const SemanticAnalyzer = struct { }); } + const JoinStyle = enum { no_space, one_space }; + fn join_spans(sema: *SemanticAnalyzer, source_spans: []const Span, style: JoinStyle) ![]const u8 { + var len: usize = switch (style) { + .no_space => 0, + .one_space => (source_spans.len -| 1), + }; + for (source_spans) |span| { + len += switch (span.content) { + .text => |str| str.len, + .date, .time, .datetime => @panic("TODO: Implement date-to-text conversion!"), + }; + } + + var output_str: std.ArrayList(u8) = .empty; + defer output_str.deinit(sema.arena); + + try output_str.ensureTotalCapacityPrecise(sema.arena, len); + + for (source_spans, 0..) |span, index| { + switch (style) { + .no_space => {}, + .one_space => if (index > 0) + output_str.appendAssumeCapacity(' '), + } + + switch (span.content) { + .text => |str| output_str.appendSliceAssumeCapacity(str), + .date, .time, .datetime => @panic("TODO: Implement date-to-text conversion!"), + } + } + + return try output_str.toOwnedSlice(sema.arena); + } + fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void { switch (body) { .empty => |location| { From 535c4119d468eb2ea960d4e37d524a50be4e9762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 21:42:56 +0100 Subject: [PATCH 033/116] Implements new attribute hdoc(tz) which provides a timezone hint for all time/datetime values --- docs/specification.md | 93 ++++++++++++++++++++-------------------- src/hyperdoc.zig | 99 ++++++++++++++++++++++++++++++------------- src/main.zig | 1 + src/testsuite.zig | 33 ++++++++++----- 4 files changed, 139 insertions(+), 87 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index d3c0959..ce20dd7 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -431,32 +431,32 @@ This separation is intentional: it allows autoformatters to parse and rewrite do ## Element Overview -| Element | Element Type | Allowed Children | Attributes | -| ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------- | -| `h1`, `h2`, `h3` | Block | Text Body | `lang`, \[`id`\] | -| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block | Text Body | `lang`, \[`id`\] | -| `ul` | Block | `li` ≥ 1 | `lang`, \[`id`\] | -| `ol` | Block | `li` ≥ 1 | `lang`, \[`id`\], `first` | -| `img` | Block | Text Body | `lang`, \[`id`\], `alt`, `path` | -| `pre` | Block | Text Body | `lang`, \[`id`\], `syntax` | -| `toc` | Block | - | `lang`, \[`id`\], `depth` | -| `table` | Block | Table Rows | `lang`, \[`id`\] | -| *Document* | Document | `hdoc`, Blocks | | -| `hdoc` | Header | - | `lang`, `title`, `version`, `author`, `date` | -| `li` | List Item | Blocks, String, Verbatim | `lang` | -| `td` | Table Cell | Blocks, String, Verbatim | `lang`, `colspan` | -| `columns` | Table Row | `td` ≥ 1 | `lang` | -| `group` | Table Row | Text Body | `lang`, | -| `row` | Table Row | `td` ≥ 1 | `lang`, `title` | -| `\em` | Text Body | Text Body | `lang` | -| `\mono` | Text Body | Text Body | `lang`, `syntax` | -| `\strike` | Text Body | Text Body | `lang` | -| `\sub`, `\sup` | Text Body | Text Body | `lang` | -| `\link` | Text Body | Text Body | `lang`, (`ref` \| `uri`) | -| `\date`, `\time`, `\datetime` | Text Body | Plain Text, String, Verbatim | `lang`, `fmt` | -| *Plain Text* | Text Body | - | | -| *String* | Text Body | - | | -| *Verbatim* | Text Body | - | | +| Element | Element Type | Allowed Children | Attributes | +| ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------------- | +| *Document* | Document | `hdoc`, Blocks | | +| `hdoc` | Header | - | `lang`, `title`, `version`, `author`, `date`, `tz` | +| `h1`, `h2`, `h3` | Block | Text Body | `lang`, \[`id`\] | +| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block | Text Body | `lang`, \[`id`\] | +| `ul` | Block | `li` ≥ 1 | `lang`, \[`id`\] | +| `ol` | Block | `li` ≥ 1 | `lang`, \[`id`\], `first` | +| `img` | Block | Text Body | `lang`, \[`id`\], `alt`, `path` | +| `pre` | Block | Text Body | `lang`, \[`id`\], `syntax` | +| `toc` | Block | - | `lang`, \[`id`\], `depth` | +| `table` | Block | Table Rows | `lang`, \[`id`\] | +| `li` | List Item | Blocks, String, Verbatim | `lang` | +| `td` | Table Cell | Blocks, String, Verbatim | `lang`, `colspan` | +| `columns` | Table Row | `td` ≥ 1 | `lang` | +| `group` | Table Row | Text Body | `lang`, | +| `row` | Table Row | `td` ≥ 1 | `lang`, `title` | +| `\em` | Text Body | Text Body | `lang` | +| `\mono` | Text Body | Text Body | `lang`, `syntax` | +| `\strike` | Text Body | Text Body | `lang` | +| `\sub`, `\sup` | Text Body | Text Body | `lang` | +| `\link` | Text Body | Text Body | `lang`, (`ref` \| `uri`) | +| `\date`, `\time`, `\datetime` | Text Body | Plain Text, String, Verbatim | `lang`, `fmt` | +| *Plain Text* | Text Body | - | | +| *String* | Text Body | - | | +| *Verbatim* | Text Body | - | | Notes: @@ -466,23 +466,24 @@ Notes: ## Attribute Overview -| Attribute | Required | Allowed Values | Description | -| --------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | -| `version` | Yes | `2.0` | Describes the version of this HyperDoc document. | -| `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | -| `title` | No | *Any* | Sets the title of the document or the table row. | -| `author` | No | *Any* | Sets the author of the document. | -| `date` | No | A date-time value using the format specified below (a conservative intersection of [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), compatible with both) | Sets the authoring date of the document. | -| `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | -| `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | -| `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | -| `path` | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | -| `syntax` | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | -| `depth` | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | -| `colspan` | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | -| `ref` | No | Any value present in an `id` attribute. | References any `id` inside this document. | -| `uri` | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | -| `fmt` | No | *See element documentation* | | +| Attribute | Required | Allowed Values | Description | +| --------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | +| `version` | Yes | `2.0` | Describes the version of this HyperDoc document. | +| `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | +| `title` | No | *Any* | Sets the title of the document or the table row. | +| `author` | No | *Any* | Sets the author of the document. | +| `date` | No | A date-time value using the format specified below | Sets the authoring date of the document. | +| `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | +| `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | +| `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | +| `path` | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | +| `syntax` | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | +| `depth` | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | +| `colspan` | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | +| `ref` | No | Any value present in an `id` attribute. | References any `id` inside this document. | +| `uri` | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | +| `fmt` | No | *See element documentation* | Defines how the date/time value shall be displayed. | +| `tz` | No | `Z` for UTC or a `±HH:MM` timezone offset. | Defines the default timezone for time/datetime values. | ## Semantic Structure @@ -732,11 +733,11 @@ Time strings MUST follow `hh:mm:ss` with a required time zone. - An optional fractional seconds component MAY follow the seconds field as `.` plus 1, 2, 3, 6, or 9 digits. - The fractional separator MUST be `.`. Comma is not allowed. -- A time zone is required and MUST be either `Z` (UTC) or a numeric offset - in the form `+hh:mm` or `-hh:mm` with two-digit hour/minute fields. +- A time zone is required when no `tz` attribute is present on the header node and + MUST be either `Z` (UTC) or a numeric offset in the form `+hh:mm` or `-hh:mm` with two-digit hour/minute fields. - Offset hours MUST be in `00` to `23`, offset minutes MUST be in `00` to `59`. -Examples: `22:30:46Z`, `22:30:46.136+01:00`, `21:30:46.136797358-05:30`. +Examples: `22:30:46Z`, `22:30:46.136+01:00`, `21:30:46.136797358-05:30`, `22:30:46` (only with `tz` attribute). ### Date/Time Format diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 01655f7..2ca670a 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -17,6 +17,7 @@ pub const Document = struct { title: ?[]const u8, author: ?[]const u8, date: ?DateTime, + timezone: ?[]const u8, pub fn deinit(doc: *Document) void { doc.arena.deinit(); @@ -189,7 +190,7 @@ pub const DateTime = struct { date: Date, time: Time, - pub fn parse(text: []const u8) !DateTime { + pub fn parse(text: []const u8, default_timezone: ?[]const u8) !DateTime { const split_index = std.mem.indexOfScalar(u8, text, 'T') orelse return error.InvalidValue; const head = text[0..split_index]; @@ -197,7 +198,7 @@ pub const DateTime = struct { return .{ .date = try Date.parse(head), - .time = try Time.parse(tail), + .time = try Time.parse(tail, default_timezone), }; } }; @@ -265,8 +266,9 @@ pub const Time = struct { microsecond: u20, // 0-999999 zone_offset: i32, // in minutes - pub fn parse(text: []const u8) !Time { - if (text.len < 9) return error.InvalidValue; + pub fn parse(text: []const u8, default_timezone: ?[]const u8) !Time { + if (text.len < 8) // "HH:MM:SS" + return error.InvalidValue; const hour = std.fmt.parseInt(u8, text[0..2], 10) catch return error.InvalidValue; if (text[2] != ':') return error.InvalidValue; @@ -279,23 +281,30 @@ pub const Time = struct { var index: usize = 8; var microsecond: u20 = 0; - if (index >= text.len) return error.InvalidValue; + if (index < text.len) { + if (text[index] == '.') { + const start = index + 1; + var end = start; + while (end < text.len and std.ascii.isDigit(text[end])) : (end += 1) {} + if (end == start) return error.InvalidValue; - if (text[index] == '.') { - const start = index + 1; - var end = start; - while (end < text.len and std.ascii.isDigit(text[end])) : (end += 1) {} - if (end == start) return error.InvalidValue; - - const fraction_value = std.fmt.parseInt(u64, text[start..end], 10) catch return error.InvalidValue; - microsecond = fractionToMicrosecond(end - start, fraction_value) orelse return error.InvalidValue; - index = end; + const fraction_value = std.fmt.parseInt(u64, text[start..end], 10) catch return error.InvalidValue; + microsecond = fractionToMicrosecond(end - start, fraction_value) orelse return error.InvalidValue; + index = end; + } } - if (index >= text.len) return error.InvalidValue; + const timezone = if (index == text.len) + default_timezone orelse return error.MissingTimezone + else + text[index..]; + + if (timezone.len != 1 and timezone.len != 6) // "Z" or "±HH:MM" + return error.InvalidValue; - if (text[index] == 'Z') { - if (index + 1 != text.len) return error.InvalidValue; + if (timezone.len == 1) { + if (timezone[0] != 'Z') + return error.InvalidValue; return .{ .hour = @intCast(hour), .minute = @intCast(minute), @@ -304,15 +313,19 @@ pub const Time = struct { .zone_offset = 0, }; } + std.debug.assert(timezone.len == 6); - const sign_char = text[index]; - if (sign_char != '+' and sign_char != '-') return error.InvalidValue; - const sign: i32 = if (sign_char == '+') 1 else -1; + const sign_char = timezone[0]; + const sign: i32 = switch (sign_char) { + '+' => 1, + '-' => -1, + else => return error.InvalidValue, + }; + if (timezone[3] != ':') + return error.InvalidValue; - if (text.len - index != 6) return error.InvalidValue; - const zone_hour = std.fmt.parseInt(u8, text[index + 1 .. index + 3], 10) catch return error.InvalidValue; - if (text[index + 3] != ':') return error.InvalidValue; - const zone_minute = std.fmt.parseInt(u8, text[index + 4 .. index + 6], 10) catch return error.InvalidValue; + const zone_hour = std.fmt.parseInt(u8, timezone[1..3], 10) catch return error.InvalidValue; + const zone_minute = std.fmt.parseInt(u8, timezone[4..6], 10) catch return error.InvalidValue; if (zone_hour > 23 or zone_minute > 59) return error.InvalidValue; @@ -428,6 +441,7 @@ pub fn parse( .version = header.version, .author = header.author, .date = header.date, + .timezone = header.timezone, }; } @@ -460,6 +474,7 @@ pub const SemanticAnalyzer = struct { lang: ?[]const u8, title: ?[]const u8, author: ?[]const u8, + timezone: ?[]const u8, date: ?DateTime, }; @@ -520,6 +535,7 @@ pub const SemanticAnalyzer = struct { author: ?[]const u8 = null, date: ?DateTime = null, lang: ?[]const u8 = null, + tz: ?[]const u8 = null, }); return .{ @@ -528,6 +544,7 @@ pub const SemanticAnalyzer = struct { .title = attrs.title, .author = attrs.author, .date = attrs.date, + .timezone = attrs.tz, }; } @@ -918,11 +935,28 @@ pub const SemanticAnalyzer = struct { ) !Span.Content { const Format: type = DTValue.Format; - const value: DTValue = if (DTValue.parse(value_str)) |value| + const timezone_hint: ?[]const u8 = if (sema.header) |header| header.timezone else null; + + const value_or_err: error{ InvalidValue, MissingTimezone }!DTValue = switch (DTValue) { + Date => Date.parse(value_str), + Time => Time.parse(value_str, timezone_hint), + DateTime => DateTime.parse(value_str, timezone_hint), + else => unreachable, + }; + + const value: DTValue = if (value_or_err) |value| value - else |_| blk: { - // TODO: Report error for invalid value - try sema.emit_diagnostic(.invalid_date_time, node.location); + else |err| blk: { + switch (err) { + error.InvalidValue => { + try sema.emit_diagnostic(.invalid_date_time, node.location); + }, + error.MissingTimezone => { + // TODO: Use (timezone_hint != null) to emit diagnostic for hint with + // adding `tz` attribute when all date/time values share a common base. + try sema.emit_diagnostic(.invalid_date_time, node.location); + }, + } break :blk std.mem.zeroes(DTValue); }; @@ -1118,6 +1152,11 @@ pub const SemanticAnalyzer = struct { const value = try sema.unescape_string(attrib); + const timezone_hint = if (sema.header) |header| + header.timezone + else + null; + return switch (T) { []const u8 => value, @@ -1138,9 +1177,9 @@ pub const SemanticAnalyzer = struct { }, Version => Version.parse(value) catch return error.InvalidValue, - DateTime => DateTime.parse(value) catch return error.InvalidValue, Date => Date.parse(value) catch return error.InvalidValue, - Time => Time.parse(value) catch return error.InvalidValue, + Time => Time.parse(value, timezone_hint) catch return error.InvalidValue, + DateTime => DateTime.parse(value, timezone_hint) catch return error.InvalidValue, else => @compileError("Unsupported attribute type: " ++ @typeName(T)), }; diff --git a/src/main.zig b/src/main.zig index 2f9b602..645041c 100644 --- a/src/main.zig +++ b/src/main.zig @@ -432,6 +432,7 @@ test "dumpDocument escapes string values" { .title = title, .author = null, .date = null, + .timezone = null, }; defer doc.deinit(); diff --git a/src/testsuite.zig b/src/testsuite.zig index d7d985e..111649f 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -529,31 +529,42 @@ test "Date.parse accepts ISO dates" { } test "Time.parse accepts ISO times with zones" { - const utc = try hdoc.Time.parse("22:30:46Z"); + const utc = try hdoc.Time.parse("22:30:46Z", null); try std.testing.expectEqual(@as(u5, 22), utc.hour); try std.testing.expectEqual(@as(u6, 30), utc.minute); try std.testing.expectEqual(@as(u6, 46), utc.second); try std.testing.expectEqual(@as(u20, 0), utc.microsecond); try std.testing.expectEqual(@as(i32, 0), utc.zone_offset); - const fractional = try hdoc.Time.parse("22:30:46.136+01:00"); + const utc_hint = try hdoc.Time.parse("22:30:46", "Z"); + try std.testing.expectEqual(@as(u5, 22), utc_hint.hour); + try std.testing.expectEqual(@as(u6, 30), utc_hint.minute); + try std.testing.expectEqual(@as(u6, 46), utc_hint.second); + try std.testing.expectEqual(@as(u20, 0), utc_hint.microsecond); + try std.testing.expectEqual(@as(i32, 0), utc_hint.zone_offset); + + const fractional = try hdoc.Time.parse("22:30:46.136+01:00", null); try std.testing.expectEqual(@as(u20, 136_000), fractional.microsecond); try std.testing.expectEqual(@as(i32, 60), fractional.zone_offset); - const nanos = try hdoc.Time.parse("21:30:46.136797358-05:30"); + const fractional_hint = try hdoc.Time.parse("22:30:46.136", "+01:30"); + try std.testing.expectEqual(@as(u20, 136_000), fractional_hint.microsecond); + try std.testing.expectEqual(@as(i32, 90), fractional_hint.zone_offset); + + const nanos = try hdoc.Time.parse("21:30:46.136797358-05:30", null); try std.testing.expectEqual(@as(u20, 136_797), nanos.microsecond); try std.testing.expectEqual(@as(i32, -330), nanos.zone_offset); - try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("21:30:46,1Z")); - try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("22:30:46")); - try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("24:00:00Z")); - try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:60:00Z")); - try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:60Z")); - try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:59.1234Z")); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("21:30:46,1Z", null)); + try std.testing.expectError(error.MissingTimezone, hdoc.Time.parse("22:30:46", null)); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("24:00:00Z", null)); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:60:00Z", null)); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:60Z", null)); + try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:59.1234Z", null)); } test "DateTime.parse accepts ISO date-time" { - const datetime = try hdoc.DateTime.parse("2025-12-25T22:31:50.13+01:00"); + const datetime = try hdoc.DateTime.parse("2025-12-25T22:31:50.13+01:00", null); try std.testing.expectEqual(@as(i32, 2025), datetime.date.year); try std.testing.expectEqual(@as(u4, 12), datetime.date.month); try std.testing.expectEqual(@as(u5, 25), datetime.date.day); @@ -563,5 +574,5 @@ test "DateTime.parse accepts ISO date-time" { try std.testing.expectEqual(@as(u20, 130_000), datetime.time.microsecond); try std.testing.expectEqual(@as(i32, 60), datetime.time.zone_offset); - try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z")); + try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z", null)); } From 93fc34b2cefd83f1e5691bd9326072bbf98b8b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 22:01:00 +0100 Subject: [PATCH 034/116] Adds tests for Date/Time/DateTime.parse --- src/hyperdoc.zig | 179 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 2ca670a..0715b6d 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -221,6 +221,8 @@ pub const Date = struct { day: u5, // 1-31 pub fn parse(text: []const u8) !Date { + if (text.len < 7) // "Y-MM-DD" + return error.InvalidValue; const first_dash = std.mem.indexOfScalar(u8, text, '-') orelse return error.InvalidValue; const tail = text[first_dash + 1 ..]; const second_dash_rel = std.mem.indexOfScalar(u8, tail, '-') orelse return error.InvalidValue; @@ -2415,3 +2417,180 @@ test "fuzz string unescape" { }, }); } + +test "fuzz Date.parse" { + const Impl = struct { + fn testOne(impl: @This(), string_literal: []const u8) !void { + _ = impl; + _ = Date.parse(string_literal) catch return; + } + }; + + const corpus: []const []const u8 = &.{ + "", + // good input: + "2025-12-25", + "1-01-01", + "0-01-01", + "1999-11-30", + "2024-02-29", + "2025-02-31", + "9999-12-31", + "10000-01-01", + "123456-07-04", + "42-03-15", + "2025-01-31", + "2025-04-30", + "2025-06-01", + "2025-10-10", + "2025-09-09", + "2025-08-08", + "2025-07-07", + "2025-05-05", + "2025-12-01", + "2025-11-11", + // bad input: + "2025-1-01", + "2025-01-1", + "2025/01/01", + "2025-00-10", + "2025-13-10", + "2025-12-00", + "2025-12-32", + "2025-12-3a", + "20a5-12-25", + "-2025-12-25", + "+2025-12-25", + "20251225", + "2025--12-25", + "2025-12-25 ", + " 2025-12-25", + "٢٠٢٥-١٢-٢٥", + "2025-12", + "2025-12-250", + "2025-12-25T00:00:00Z", + "2025-12-25\n", + }; + + for (corpus) |item| { + try Impl.testOne(.{}, item); + } + + try std.testing.fuzz(Impl{}, Impl.testOne, .{ + .corpus = corpus, + }); +} + +test "fuzz Time.parse" { + const Impl = struct { + fn testOne(impl: @This(), string_literal: []const u8) !void { + _ = impl; + _ = Time.parse(string_literal, null) catch return; + } + }; + + try std.testing.fuzz(Impl{}, Impl.testOne, .{ + .corpus = &.{ + "", + // good input: + "00:00:00Z", + "23:59:59Z", + "12:34:56Z", + "01:02:03+00:00", + "22:30:46+01:00", + "22:30:46-05:30", + "08:15:00+14:00", + "19:45:30-00:45", + "05:06:07.1Z", + "05:06:07.12Z", + "05:06:07.123Z", + "05:06:07.123456Z", + "05:06:07.123456789Z", + "23:59:59.000+02:00", + "10:20:30.000000-03:00", + "10:20:30.000000000+03:00", + "00:00:00.9-12:34", + "14:00:00+23:59", + "09:09:09.6+09:00", + "16:17:18.136+01:00", + // bad input: + "24:00:00Z", + "23:60:00Z", + "23:59:60Z", + "9:00:00Z", + "09:0:00Z", + "09:00:0Z", + "09:00Z", + "09:00:00", + "09:00:00z", + "09:00:00+1:00", + "09:00:00+01:0", + "09:00:00+0100", + "09:00:00+25:00", + "09:00:00+01:60", + "09:00:00,+01:00", + "09:00:00,123Z", + "09:00:00.1234Z", + "09:00:00.12345Z", + "09:00:00.1234567Z", + "٠٩:٠٠:٠٠Z", + }, + }); +} + +test "fuzz DateTime.parse" { + const Impl = struct { + fn testOne(impl: @This(), string_literal: []const u8) !void { + _ = impl; + _ = DateTime.parse(string_literal, null) catch return; + } + }; + + try std.testing.fuzz(Impl{}, Impl.testOne, .{ + .corpus = &.{ + "", + // good input: + "2025-12-25T22:31:50Z", + "2025-12-25T22:31:50.1Z", + "2025-12-25T22:31:50.12+01:00", + "2025-12-25T22:31:50.123-05:30", + "1-01-01T00:00:00Z", + "0-01-01T00:00:00+00:00", + "1999-11-30T23:59:59-00:45", + "2024-02-29T12:00:00Z", + "2025-02-31T08:15:00+14:00", + "9999-12-31T23:59:59.123456Z", + "10000-01-01T00:00:00.123456789+03:00", + "42-03-15T01:02:03+23:59", + "2025-01-31T10:20:30.000000-03:00", + "2025-04-30T10:20:30.000+02:00", + "2025-06-01T16:17:18.136+01:00", + "2025-10-10T09:09:09.6+09:00", + "2025-09-09T19:45:30-00:45", + "2025-08-08T05:06:07.123Z", + "2025-07-07T05:06:07.123456789Z", + "123456-07-04T14:00:00Z", + // bad input: + "2025-12-25 22:31:50Z", + "2025-12-25t22:31:50Z", + "2025-12-25T22:31:50", + "2025-12-25T22:31Z", + "2025-12-25T24:00:00Z", + "2025-12-25T23:60:00Z", + "2025-12-25T23:59:60Z", + "2025-12-25T23:59:59.1234Z", + "2025-12-25T23:59:59,123Z", + "2025-12-25T23:59:59+0100", + "2025-12-25T23:59:59+01:60", + "2025-12-25T23:59:59+25:00", + "2025-00-25T23:59:59Z", + "2025-13-25T23:59:59Z", + "2025-12-00T23:59:59Z", + "2025-12-32T23:59:59Z", + "2025-12-25TT23:59:59Z", + "2025-12-25T23:59:59Z ", + "٢٠٢٥-١٢-٢٥T٢٢:٣١:٥٠Z", + "2025-12-25T23:59:59+01", + }, + }); +} From 103bdecf53ff4c3e9e24ee1637cd9b74672d5562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 22:32:50 +0100 Subject: [PATCH 035/116] Adds parsing of lists and list nodes. --- src/hyperdoc.zig | 127 +++++++++++++++++++++++++++++++++++++-- src/main.zig | 6 +- test/accept/workset.hdoc | 7 +++ 3 files changed, 131 insertions(+), 9 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 0715b6d..c45cd29 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -63,7 +63,7 @@ pub const Block = union(enum) { pub const ListItem = struct { lang: ?[]const u8, - content: []Span, + content: []Block, }; pub const Image = struct { @@ -114,7 +114,7 @@ pub const Block = union(enum) { pub const TableCell = struct { lang: ?[]const u8, colspan: ?u32, - content: []Span, + content: []Block, }; }; @@ -550,6 +550,7 @@ pub const SemanticAnalyzer = struct { }; } + /// Translates a top-level block node. fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes, Unimplemented }!struct { Block, ?Reference } { std.debug.assert(node.type != .hdoc); @@ -659,9 +660,46 @@ pub const SemanticAnalyzer = struct { } fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + id: ?Reference = null, + first: ?u32 = null, + }); + + if (attrs.first != null and node.type != .ol) { + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "first" } }, get_attribute_location(node, "first", .name).?); + } + + var children: std.ArrayList(Block.ListItem) = .empty; + defer children.deinit(sema.arena); + + switch (node.body) { + .list => |child_nodes| { + try children.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); + for (child_nodes) |child_node| { + const list_item = sema.translate_list_item_node(child_node) catch |err| switch (err) { + error.InvalidNodeType => { + try sema.emit_diagnostic(.illegal_child_item, node.location); + continue; + }, + else => |e| return e, + }; + children.appendAssumeCapacity(list_item); + } + }, + + .empty, .string, .text_span, .verbatim => { + try sema.emit_diagnostic(.list_body_required, node.location); + }, + } + + const list: Block.List = .{ + .first = attrs.first, + .lang = attrs.lang, + .items = try children.toOwnedSlice(sema.arena), + }; + + return .{ list, attrs.id }; } fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } { @@ -688,6 +726,67 @@ pub const SemanticAnalyzer = struct { return error.Unimplemented; // TODO: Implement this node type } + fn translate_list_item_node(sema: *SemanticAnalyzer, node: Parser.Node) !Block.ListItem { + switch (node.type) { + .li => {}, + else => return error.InvalidNodeType, + } + + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + }); + + return .{ + .lang = attrs.lang, + .content = try sema.translate_block_list(node, .text_to_p), + }; + } + + const BlockTextUpgrade = enum { no_upgrade, text_to_p }; + + fn translate_block_list(sema: *SemanticAnalyzer, node: Parser.Node, upgrade: BlockTextUpgrade) error{ Unimplemented, InvalidNodeType, OutOfMemory, BadAttributes }![]Block { + switch (node.body) { + .list => |child_nodes| { + var blocks: std.ArrayList(Block) = .empty; + defer blocks.deinit(sema.arena); + + try blocks.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); + + for (child_nodes) |child_node| { + const block, const id = try sema.translate_block_node(child_node); + if (id != null) { + try sema.emit_diagnostic(.illegal_id_attribute, get_attribute_location(child_node, "id", .name).?); + } + blocks.appendAssumeCapacity(block); + } + + return try blocks.toOwnedSlice(sema.arena); + }, + + .empty, .string, .verbatim, .text_span => switch (upgrade) { + .no_upgrade => { + try sema.emit_diagnostic(.list_body_required, node.location); // TODO: Use better diagnostic + return &.{}; + }, + .text_to_p => { + const spans = try sema.translate_inline(node); + + const blocks = try sema.arena.alloc(Block, 1); + blocks[0] = .{ + .paragraph = .{ + .kind = .p, + .lang = null, + .content = spans, + }, + }; + + return blocks; + }, + }, + } + } + + /// Translates a node into a sequence of inline spans. fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }![]Span { var spans: std.ArrayList(Span) = .empty; errdefer spans.deinit(sema.arena); @@ -923,7 +1022,10 @@ pub const SemanticAnalyzer = struct { .td, .li, .unknown_block, - => @panic("PARSER ERROR: The parser emitted a block node inside an inline context"), + => { + std.log.err("type: {t} location: {}", .{ node.type, node.location }); + @panic("PARSER ERROR: The parser emitted a block node inside an inline context"); + }, } } @@ -1162,6 +1264,8 @@ pub const SemanticAnalyzer = struct { return switch (T) { []const u8 => value, + u32 => std.fmt.parseInt(u32, value, 10) catch return error.InvalidValue, + Reference => { const stripped = std.mem.trim(u8, value, whitespace_chars); if (stripped.len != value.len) { @@ -2104,6 +2208,9 @@ pub const Diagnostic = struct { invalid_unicode_string_escape, invalid_string_escape: InvalidStringEscape, illegal_character: ForbiddenControlCharacter, + illegal_child_item, + list_body_required, + illegal_id_attribute, // warnings: document_starts_with_bom, @@ -2139,6 +2246,9 @@ pub const Diagnostic = struct { .invalid_string_escape, .illegal_character, .invalid_unicode_string_escape, + .illegal_child_item, + .list_body_required, + .illegal_id_attribute, => .@"error", .unknown_attribute, @@ -2206,6 +2316,11 @@ pub const Diagnostic = struct { .invalid_unicode_string_escape => try w.writeAll("Invalid unicode escape sequence"), .illegal_character => |ctx| try w.print("Forbidden control character U+{X:0>4}.", .{ctx.codepoint}), + + .list_body_required => try w.writeAll("Node requires list body."), + .illegal_child_item => try w.writeAll("Node not allowed here."), + + .illegal_id_attribute => try w.writeAll("Attribute 'id' not allowed here."), } } }; diff --git a/src/main.zig b/src/main.zig index 645041c..e7e83a5 100644 --- a/src/main.zig +++ b/src/main.zig @@ -234,7 +234,7 @@ fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []c fn dumpListItem(writer: anytype, indent: usize, item: hdoc.Block.ListItem) !void { try dumpOptionalStringField(writer, indent, "lang", item.lang); - try dumpSpanListField(writer, indent, "content", item.content); + try dumpBlockListField(writer, indent, "content", item.content); } fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: []const hdoc.Block.ListItem) !void { @@ -254,7 +254,7 @@ fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: [] fn dumpTableCell(writer: anytype, indent: usize, cell: hdoc.Block.TableCell) !void { try dumpOptionalStringField(writer, indent, "lang", cell.lang); try dumpOptionalNumberField(writer, indent, "colspan", cell.colspan); - try dumpSpanListField(writer, indent, "content", cell.content); + try dumpBlockListField(writer, indent, "content", cell.content); } fn dumpTableCellsField(writer: anytype, indent: usize, key: []const u8, cells: []const hdoc.Block.TableCell) !void { @@ -367,7 +367,7 @@ fn dumpBlockInline(writer: anytype, indent: usize, block: hdoc.Block) !void { } } -fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: []const hdoc.Block) !void { +fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: []const hdoc.Block) @TypeOf(writer).Error!void { try writeIndent(writer, indent); if (blocks.len == 0) { try writer.print("{s}: []\n", .{key}); diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index fbfaf77..26561fb 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -4,3 +4,10 @@ h1: | Hello, World! h2{Hello \em{World}!} + +p { This is a span. } + +ul { + li "Item 1" + li { p { Item 2 } } +} \ No newline at end of file From fa37e8b2e3d56068574cc6a76dc28b8e73994cd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 22:34:14 +0100 Subject: [PATCH 036/116] Fixes missing fallback for ol having first==1 --- src/hyperdoc.zig | 2 +- test/accept/workset.hdoc | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index c45cd29..2257d0b 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -694,7 +694,7 @@ pub const SemanticAnalyzer = struct { } const list: Block.List = .{ - .first = attrs.first, + .first = attrs.first orelse if (node.type == .ol) 1 else null, .lang = attrs.lang, .items = try children.toOwnedSlice(sema.arena), }; diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index 26561fb..696b944 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -10,4 +10,8 @@ p { This is a span. } ul { li "Item 1" li { p { Item 2 } } +} +ol { + li "Item 1" + li { p { Item 2 } } } \ No newline at end of file From 035ac7c37d10d11ca5dbbe1a9625b499bd1c6afd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 28 Dec 2025 23:25:33 +0100 Subject: [PATCH 037/116] Vibecoded: Implements rest of the node types, adds TODO comments for further tasks --- AGENTS.md | 3 + examples/featurematrix.hdoc | 18 +-- examples/tables.hdoc | 2 +- src/hyperdoc.zig | 221 ++++++++++++++++++++++++++++++++---- src/main.zig | 2 +- test/accept/workset.hdoc | 1 + 6 files changed, 216 insertions(+), 31 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 2579445..2ab16dd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -10,6 +10,9 @@ - Run `zig build` to validate the main application still compiles - Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/` and `test/`. - Avoid editing documentation unless the request explicitly asks for it. +- Treat `docs/specification.md` as the authoritative source of behavior; examples may be outdated or incorrect. +- If the spec is unclear or conflicts with code/tests, ask before changing behavior. +- Do not implement "just make it work" fallbacks that alter semantics to satisfy examples. ## Zig Programming Style diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc index 3600e02..d6dd2a3 100644 --- a/examples/featurematrix.hdoc +++ b/examples/featurematrix.hdoc @@ -5,10 +5,10 @@ h1 { Small Computer Feature Matrix } table { columns { td "Ashet Home Computer" - td { \link(uri="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" } - td { \link(uri="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" } - td { \link(uri="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" } - td { \link(uri="https://www.codycomputer.org/") "Cody Computer" } + td { p { \link(uri="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" } } + td { p { \link(uri="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" } } + td { p { \link(uri="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" } } + td { p { \link(uri="https://www.codycomputer.org/") "Cody Computer" } } } row(title="CPU Bus Width") { td "32 bit" @@ -25,11 +25,11 @@ table { td "6502" } row(title="CPU Model") { - td { \link(uri="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" } - td { \link(uri="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" } - td { \link(uri="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" } - td { \link(uri="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" } - td { \link(uri="https://wdc65xx.com/integrated-circuit") "W65C02S" } + td { p { \link(uri="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" } } + td { p { \link(uri="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" } } + td { p { \link(uri="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" } } + td { p { \link(uri="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" } } + td { p { \link(uri="https://wdc65xx.com/integrated-circuit") "W65C02S" } } } row(title="CPU Cores") { td "2" diff --git a/examples/tables.hdoc b/examples/tables.hdoc index 28f73f9..70d0e8a 100644 --- a/examples/tables.hdoc +++ b/examples/tables.hdoc @@ -21,7 +21,7 @@ table(id="inventory") { row(title="Vegetables") { td "Carrots" td "7" - td { p { Store at \time(fmt="rough"){08:00:00}. } } + td { p { Store at \time(fmt="rough"){08:00:00Z}. } } } group { Pantry } row(title="Dry Goods") { diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 2257d0b..370c357 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -113,7 +113,7 @@ pub const Block = union(enum) { pub const TableCell = struct { lang: ?[]const u8, - colspan: ?u32, + colspan: u32, content: []Block, }; }; @@ -364,7 +364,7 @@ pub const Uri = struct { text: []const u8, pub fn init(text: []const u8) Uri { - // TODO: Add correctness validation here + // TODO: Add correctness validation here (IRI syntax, non-empty). return .{ .text = text }; } }; @@ -376,7 +376,7 @@ pub const Reference = struct { text: []const u8, pub fn init(text: []const u8) Reference { - // TODO: Add correctness validation here + // TODO: Add correctness validation here (non-empty, allowed characters). return .{ .text = text }; } }; @@ -433,6 +433,7 @@ pub fn parse( const header = sema.header orelse return error.MalformedDocument; + // TODO: Validate document-level semantic constraints (unique ids, ref resolution, table shape). return .{ .arena = arena, .contents = try sema.blocks.toOwnedSlice(arena.allocator()), @@ -703,27 +704,205 @@ pub const SemanticAnalyzer = struct { } fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + id: ?Reference = null, + alt: ?[]const u8 = null, + path: []const u8, + }); + + // TODO: Enforce non-empty "path" (required) and "alt" (if provided). + const content = switch (node.body) { + .empty => @constCast(&[_]Span{}), + else => try sema.translate_inline(node), + }; + + const image: Block.Image = .{ + .lang = attrs.lang, + .alt = attrs.alt, + .path = attrs.path, + .content = content, + }; + + return .{ image, attrs.id }; } fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + id: ?Reference = null, + syntax: ?[]const u8 = null, + }); + + const preformatted: Block.Preformatted = .{ + .lang = attrs.lang, + .syntax = attrs.syntax, + .content = try sema.translate_inline(node), + }; + + return .{ preformatted, attrs.id }; } fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + id: ?Reference = null, + depth: ?u32 = null, + }); + + var depth: ?u8 = null; + if (attrs.depth) |depth_value| { + if (depth_value < 1 or depth_value > 3) { + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "depth" } }, get_attribute_location(node, "depth", .value) orelse node.location); + } else { + depth = @intCast(depth_value); + } + } + + switch (node.body) { + .empty => {}, + .list => |child_nodes| { + for (child_nodes) |child_node| { + try sema.emit_diagnostic(.illegal_child_item, child_node.location); + } + }, + .string, .verbatim, .text_span => { + try sema.emit_diagnostic(.illegal_child_item, node.location); + }, + } + + const toc: Block.TableOfContents = .{ + .lang = attrs.lang, + .depth = depth, + }; + + return .{ toc, attrs.id }; } fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } { - _ = sema; - _ = node; - return error.Unimplemented; // TODO: Implement this node type + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + id: ?Reference = null, + }); + + var rows: std.ArrayList(Block.TableRow) = .empty; + defer rows.deinit(sema.arena); + + switch (node.body) { + .list => |child_nodes| { + try rows.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); + for (child_nodes) |child_node| { + switch (child_node.type) { + .columns => { + const row_attrs = try sema.get_attributes(child_node, struct { + lang: ?[]const u8 = null, + }); + + const cells = try sema.translate_table_cells(child_node); + + rows.appendAssumeCapacity(.{ + .columns = .{ + .lang = row_attrs.lang, + .cells = cells, + }, + }); + }, + .row => { + const row_attrs = try sema.get_attributes(child_node, struct { + lang: ?[]const u8 = null, + title: ?[]const u8 = null, + }); + + const cells = try sema.translate_table_cells(child_node); + + rows.appendAssumeCapacity(.{ + .row = .{ + .lang = row_attrs.lang, + .title = row_attrs.title, + .cells = cells, + }, + }); + }, + .group => { + const row_attrs = try sema.get_attributes(child_node, struct { + lang: ?[]const u8 = null, + }); + + rows.appendAssumeCapacity(.{ + .group = .{ + .lang = row_attrs.lang, + .content = try sema.translate_inline(child_node), + }, + }); + }, + else => { + try sema.emit_diagnostic(.illegal_child_item, child_node.location); + }, + } + } + }, + .empty, .string, .verbatim, .text_span => { + try sema.emit_diagnostic(.list_body_required, node.location); + }, + } + + // TODO: Validate column counts after colspan and title/group leading column rules. + const table: Block.Table = .{ + .lang = attrs.lang, + .rows = try rows.toOwnedSlice(sema.arena), + }; + + return .{ table, attrs.id }; + } + + fn translate_table_cells(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes, InvalidNodeType, Unimplemented }![]Block.TableCell { + var cells: std.ArrayList(Block.TableCell) = .empty; + defer cells.deinit(sema.arena); + + switch (node.body) { + .list => |child_nodes| { + try cells.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); + for (child_nodes) |child_node| { + const cell = sema.translate_table_cell_node(child_node) catch |err| switch (err) { + error.InvalidNodeType => { + try sema.emit_diagnostic(.illegal_child_item, child_node.location); + continue; + }, + else => |e| return e, + }; + cells.appendAssumeCapacity(cell); + } + }, + .empty, .string, .verbatim, .text_span => { + try sema.emit_diagnostic(.list_body_required, node.location); + }, + } + + return try cells.toOwnedSlice(sema.arena); + } + + fn translate_table_cell_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes, InvalidNodeType, Unimplemented }!Block.TableCell { + switch (node.type) { + .td => {}, + else => return error.InvalidNodeType, + } + + const attrs = try sema.get_attributes(node, struct { + lang: ?[]const u8 = null, + colspan: ?u32 = null, + }); + + var colspan = attrs.colspan orelse 1; + if (colspan < 1) { + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "colspan" } }, get_attribute_location(node, "colspan", .value) orelse node.location); + colspan = 1; + } + + return .{ + .lang = attrs.lang, + .colspan = colspan, + .content = try sema.translate_block_list(node, .text_to_p), + }; } fn translate_list_item_node(sema: *SemanticAnalyzer, node: Parser.Node) !Block.ListItem { @@ -979,10 +1158,11 @@ pub const SemanticAnalyzer = struct { // TODO: Implement automatic space insertion. // This must be done when two consecutive nodes are separated by a space + // TODO: Enforce that date/time bodies only contain plain text/string/verbatim. try sema.translate_inline_body(&content_spans, node.body, .{}); // Convert the content_spans into a "rendered string". - const content_text = try sema.join_spans(content_spans.items, .no_space); + const content_text = try sema.render_spans_to_plaintext(content_spans.items, .no_space); const content: Span.Content = switch (node.type) { .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt), @@ -1081,7 +1261,7 @@ pub const SemanticAnalyzer = struct { } const JoinStyle = enum { no_space, one_space }; - fn join_spans(sema: *SemanticAnalyzer, source_spans: []const Span, style: JoinStyle) ![]const u8 { + fn render_spans_to_plaintext(sema: *SemanticAnalyzer, source_spans: []const Span, style: JoinStyle) ![]const u8 { var len: usize = switch (style) { .no_space => 0, .one_space => (source_spans.len -| 1), @@ -1191,6 +1371,7 @@ pub const SemanticAnalyzer = struct { const Fields = std.meta.FieldEnum(Attrs); const fields = @typeInfo(Attrs).@"struct".fields; + // TODO: Enforce per-attribute constraints from the spec (non-empty strings, lang tag format, etc). var required: std.EnumSet(Fields) = .initEmpty(); var attrs: Attrs = undefined; @@ -1425,9 +1606,9 @@ pub const SemanticAnalyzer = struct { }, else => { - // Unknown escape sequence, emit escaped char verbatim: - // TODO: How to handle something like "\😭", which is - // definitly valid and in-scope. + // Unknown escape sequence, emit escaped char verbatim. Use the full UTF-8 codepoint + // inside the error message, so we can tell that "\😢" is not a valid escape sequence + // instead of saying that "\{F0}" is not a valid escape sequence const len = std.unicode.utf8ByteSequenceLength(esc_char) catch unreachable; diff --git a/src/main.zig b/src/main.zig index e7e83a5..fb5fb83 100644 --- a/src/main.zig +++ b/src/main.zig @@ -253,7 +253,7 @@ fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: [] fn dumpTableCell(writer: anytype, indent: usize, cell: hdoc.Block.TableCell) !void { try dumpOptionalStringField(writer, indent, "lang", cell.lang); - try dumpOptionalNumberField(writer, indent, "colspan", cell.colspan); + try dumpOptionalNumberField(writer, indent, "colspan", @as(?u32, cell.colspan)); try dumpBlockListField(writer, indent, "content", cell.content); } diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index 696b944..19ffba1 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -11,6 +11,7 @@ ul { li "Item 1" li { p { Item 2 } } } + ol { li "Item 1" li { p { Item 2 } } From 40d82be6047f9b29ef38ff8a9f233a51cbff0fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 14:18:07 +0100 Subject: [PATCH 038/116] Moves dump code into src/render/dump.zig --- src/hyperdoc.zig | 5 +- src/main.zig | 483 +----------------------------------------- src/render/dump.zig | 497 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 505 insertions(+), 480 deletions(-) create mode 100644 src/render/dump.zig diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 370c357..b752b72 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -1,5 +1,8 @@ const std = @import("std"); -const parser_toolkit = @import("parser-toolkit"); + +pub const render = struct { + pub const yaml = @import("render/dump.zig").render; +}; /// A HyperDoc document. Contains both memory and /// tree structure of the document. diff --git a/src/main.zig b/src/main.zig index fb5fb83..19161d5 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,483 +4,6 @@ const hdoc = @import("hyperdoc"); var debug_allocator: std.heap.DebugAllocator(.{}) = .init; -const indent_step: usize = 2; - -fn writeIndent(writer: anytype, indent: usize) !void { - var i: usize = 0; - while (i < indent) : (i += 1) { - try writer.writeByte(' '); - } -} - -fn writeStringValue(writer: anytype, value: []const u8) !void { - try writer.print("\"{f}\"", .{std.zig.fmtString(value)}); -} - -fn writeOptionalStringValue(writer: anytype, value: ?[]const u8) !void { - if (value) |text| { - try writeStringValue(writer, text); - } else { - try writer.writeAll("null"); - } -} - -fn writeOptionalIntValue(writer: anytype, value: anytype) !void { - if (value) |number| { - try writer.print("{}", .{number}); - } else { - try writer.writeAll("null"); - } -} - -fn dumpOptionalStringField(writer: anytype, indent: usize, key: []const u8, value: ?[]const u8) !void { - try writeIndent(writer, indent); - try writer.print("{s}: ", .{key}); - try writeOptionalStringValue(writer, value); - try writer.writeByte('\n'); -} - -fn dumpOptionalNumberField(writer: anytype, indent: usize, key: []const u8, value: anytype) !void { - try writeIndent(writer, indent); - try writer.print("{s}: ", .{key}); - try writeOptionalIntValue(writer, value); - try writer.writeByte('\n'); -} - -fn dumpBoolField(writer: anytype, indent: usize, key: []const u8, value: bool) !void { - try writeIndent(writer, indent); - try writer.print("{s}: {}\n", .{ key, value }); -} - -fn dumpEnumField(writer: anytype, indent: usize, key: []const u8, value: anytype) !void { - try writeIndent(writer, indent); - try writer.print("{s}: {s}\n", .{ key, @tagName(value) }); -} - -fn dumpVersion(writer: anytype, indent: usize, version: hdoc.Version) !void { - try writeIndent(writer, indent); - try writer.writeAll("version:\n"); - try writeIndent(writer, indent + indent_step); - try writer.print("major: {}\n", .{version.major}); - try writeIndent(writer, indent + indent_step); - try writer.print("minor: {}\n", .{version.minor}); -} - -fn dumpDate(writer: anytype, indent: usize, date: hdoc.Date) !void { - try writeIndent(writer, indent); - try writer.print("year: {}\n", .{date.year}); - try writeIndent(writer, indent); - try writer.print("month: {}\n", .{date.month}); - try writeIndent(writer, indent); - try writer.print("day: {}\n", .{date.day}); -} - -fn dumpTime(writer: anytype, indent: usize, time: hdoc.Time) !void { - try writeIndent(writer, indent); - try writer.print("hour: {}\n", .{time.hour}); - try writeIndent(writer, indent); - try writer.print("minute: {}\n", .{time.minute}); - try writeIndent(writer, indent); - try writer.print("second: {}\n", .{time.second}); - try writeIndent(writer, indent); - try writer.print("microsecond: {}\n", .{time.microsecond}); -} - -fn dumpDateTime(writer: anytype, indent: usize, datetime: hdoc.DateTime) !void { - try writeIndent(writer, indent); - try writer.writeAll("date:\n"); - try dumpDate(writer, indent + indent_step, datetime.date); - try writeIndent(writer, indent); - try writer.writeAll("time:\n"); - try dumpTime(writer, indent + indent_step, datetime.time); -} - -fn writeAttrSeparator(writer: anytype, first: *bool) !void { - if (first.*) { - first.* = false; - } else { - try writer.writeByte(' '); - } -} - -fn writeSpanAttributes(writer: anytype, span: hdoc.Span) !void { - try writer.writeByte('['); - var first = true; - if (span.attribs.em) { - try writeAttrSeparator(writer, &first); - try writer.writeAll("em"); - } - if (span.attribs.mono) { - try writeAttrSeparator(writer, &first); - try writer.writeAll("mono"); - } - if (span.attribs.strike) { - try writeAttrSeparator(writer, &first); - try writer.writeAll("strike"); - } - if (span.attribs.position != .baseline) { - try writeAttrSeparator(writer, &first); - try writer.print("position=\"{s}\"", .{@tagName(span.attribs.position)}); - } - switch (span.attribs.link) { - .none => {}, - .ref => |value| { - try writeAttrSeparator(writer, &first); - try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.text)}); - }, - .uri => |value| { - try writeAttrSeparator(writer, &first); - try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value.text)}); - }, - } - if (span.attribs.lang.len != 0) { - try writeAttrSeparator(writer, &first); - try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang)}); - } - if (span.attribs.syntax.len != 0) { - try writeAttrSeparator(writer, &first); - try writer.print("syntax=\"{f}\"", .{std.zig.fmtString(span.attribs.syntax)}); - } - try writer.writeByte(']'); -} - -fn writeDateValue(writer: anytype, date: hdoc.Date) !void { - try writer.print("{d:0>4}-{d:0>2}-{d:0>2}", .{ date.year, date.month, date.day }); -} - -fn writeTimeValue(writer: anytype, time: hdoc.Time) !void { - try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ time.hour, time.minute, time.second }); - if (time.microsecond != 0) { - try writer.print(".{d:0>6}", .{time.microsecond}); - } -} - -fn writeDateTimeValue(writer: anytype, datetime: hdoc.DateTime) !void { - try writeDateValue(writer, datetime.date); - try writer.writeByte('T'); - try writeTimeValue(writer, datetime.time); -} - -fn writeFormattedDateInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.Date)) !void { - try writer.writeAll("date:"); - try writeDateValue(writer, formatted.value); - if (formatted.format != hdoc.Date.Format.default) { - try writer.writeByte('@'); - try writer.writeAll(@tagName(formatted.format)); - } -} - -fn writeFormattedTimeInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.Time)) !void { - try writer.writeAll("time:"); - try writeTimeValue(writer, formatted.value); - if (formatted.format != hdoc.Time.Format.default) { - try writer.writeByte('@'); - try writer.writeAll(@tagName(formatted.format)); - } -} - -fn writeFormattedDateTimeInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) !void { - try writer.writeAll("datetime:"); - try writeDateTimeValue(writer, formatted.value); - if (formatted.format != hdoc.DateTime.Format.default) { - try writer.writeByte('@'); - try writer.writeAll(@tagName(formatted.format)); - } -} - -fn writeSpanContentInline(writer: anytype, content: hdoc.Span.Content) !void { - switch (content) { - .text => |text| { - try writeStringValue(writer, text); - }, - .date => |date| { - try writer.writeByte('"'); - try writeFormattedDateInline(writer, date); - try writer.writeByte('"'); - }, - .time => |time| { - try writer.writeByte('"'); - try writeFormattedTimeInline(writer, time); - try writer.writeByte('"'); - }, - .datetime => |datetime| { - try writer.writeByte('"'); - try writeFormattedDateTimeInline(writer, datetime); - try writer.writeByte('"'); - }, - } -} - -fn dumpSpanInline(writer: anytype, span: hdoc.Span) !void { - try writeSpanAttributes(writer, span); - try writer.writeByte(' '); - try writeSpanContentInline(writer, span.content); -} - -fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []const hdoc.Span) !void { - try writeIndent(writer, indent); - if (spans.len == 0) { - try writer.print("{s}: []\n", .{key}); - return; - } - try writer.print("{s}:\n", .{key}); - for (spans) |span| { - try writeIndent(writer, indent + indent_step); - try writer.writeAll("- "); - try dumpSpanInline(writer, span); - try writer.writeByte('\n'); - } -} - -fn dumpListItem(writer: anytype, indent: usize, item: hdoc.Block.ListItem) !void { - try dumpOptionalStringField(writer, indent, "lang", item.lang); - try dumpBlockListField(writer, indent, "content", item.content); -} - -fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: []const hdoc.Block.ListItem) !void { - try writeIndent(writer, indent); - if (items.len == 0) { - try writer.print("{s}: []\n", .{key}); - return; - } - try writer.print("{s}:\n", .{key}); - for (items) |item| { - try writeIndent(writer, indent + indent_step); - try writer.writeAll("-\n"); - try dumpListItem(writer, indent + indent_step * 2, item); - } -} - -fn dumpTableCell(writer: anytype, indent: usize, cell: hdoc.Block.TableCell) !void { - try dumpOptionalStringField(writer, indent, "lang", cell.lang); - try dumpOptionalNumberField(writer, indent, "colspan", @as(?u32, cell.colspan)); - try dumpBlockListField(writer, indent, "content", cell.content); -} - -fn dumpTableCellsField(writer: anytype, indent: usize, key: []const u8, cells: []const hdoc.Block.TableCell) !void { - try writeIndent(writer, indent); - if (cells.len == 0) { - try writer.print("{s}: []\n", .{key}); - return; - } - try writer.print("{s}:\n", .{key}); - for (cells) |cell| { - try writeIndent(writer, indent + indent_step); - try writer.writeAll("-\n"); - try dumpTableCell(writer, indent + indent_step * 2, cell); - } -} - -fn dumpTableColumns(writer: anytype, indent: usize, columns: hdoc.Block.TableColumns) !void { - try dumpOptionalStringField(writer, indent, "lang", columns.lang); - try dumpTableCellsField(writer, indent, "cells", columns.cells); -} - -fn dumpTableDataRow(writer: anytype, indent: usize, row: hdoc.Block.TableDataRow) !void { - try dumpOptionalStringField(writer, indent, "lang", row.lang); - try dumpOptionalStringField(writer, indent, "title", row.title); - try dumpTableCellsField(writer, indent, "cells", row.cells); -} - -fn dumpTableGroup(writer: anytype, indent: usize, group: hdoc.Block.TableGroup) !void { - try dumpOptionalStringField(writer, indent, "lang", group.lang); - try dumpSpanListField(writer, indent, "content", group.content); -} - -fn dumpTableRow(writer: anytype, indent: usize, row: hdoc.Block.TableRow) !void { - switch (row) { - .columns => |columns| { - try writeIndent(writer, indent); - try writer.writeAll("columns:\n"); - try dumpTableColumns(writer, indent + indent_step, columns); - }, - .row => |data_row| { - try writeIndent(writer, indent); - try writer.writeAll("row:\n"); - try dumpTableDataRow(writer, indent + indent_step, data_row); - }, - .group => |group| { - try writeIndent(writer, indent); - try writer.writeAll("group:\n"); - try dumpTableGroup(writer, indent + indent_step, group); - }, - } -} - -fn dumpTableRowsField(writer: anytype, indent: usize, key: []const u8, rows: []const hdoc.Block.TableRow) !void { - try writeIndent(writer, indent); - if (rows.len == 0) { - try writer.print("{s}: []\n", .{key}); - return; - } - try writer.print("{s}:\n", .{key}); - for (rows) |row| { - try writeIndent(writer, indent + indent_step); - try writer.writeAll("-\n"); - try dumpTableRow(writer, indent + indent_step * 2, row); - } -} - -fn dumpBlockInline(writer: anytype, indent: usize, block: hdoc.Block) !void { - switch (block) { - .heading => |heading| { - try writer.writeAll("heading:\n"); - try dumpEnumField(writer, indent + indent_step, "level", heading.level); - try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang); - try dumpSpanListField(writer, indent + indent_step, "content", heading.content); - }, - .paragraph => |paragraph| { - try writer.writeAll("paragraph:\n"); - try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind); - try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang); - try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content); - }, - .list => |list| { - try writer.writeAll("list:\n"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang); - try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first); - try dumpListItemsField(writer, indent + indent_step, "items", list.items); - }, - .image => |image| { - try writer.writeAll("image:\n"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang); - try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt); - try dumpOptionalStringField(writer, indent + indent_step, "path", image.path); - try dumpSpanListField(writer, indent + indent_step, "content", image.content); - }, - .preformatted => |preformatted| { - try writer.writeAll("preformatted:\n"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang); - try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax); - try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content); - }, - .toc => |toc| { - try writer.writeAll("toc:\n"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang); - try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth); - }, - .table => |table| { - try writer.writeAll("table:\n"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang); - try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows); - }, - } -} - -fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: []const hdoc.Block) @TypeOf(writer).Error!void { - try writeIndent(writer, indent); - if (blocks.len == 0) { - try writer.print("{s}: []\n", .{key}); - return; - } - try writer.print("{s}:\n", .{key}); - for (blocks) |block| { - try writeIndent(writer, indent + indent_step); - try writer.writeAll("- "); - try dumpBlockInline(writer, indent + indent_step, block); - } -} - -fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, values: []?hdoc.Reference) !void { - try writeIndent(writer, indent); - if (values.len == 0) { - try writer.print("{s}: []\n", .{key}); - return; - } - try writer.print("{s}:\n", .{key}); - for (values) |value| { - try writeIndent(writer, indent + indent_step); - try writer.writeAll("- "); - try writeOptionalStringValue(writer, if (value) |val| val.text else null); - try writer.writeByte('\n'); - } -} - -fn dumpOptionalDateTimeField(writer: anytype, indent: usize, key: []const u8, value: ?hdoc.DateTime) !void { - try writeIndent(writer, indent); - if (value) |datetime| { - try writer.print("{s}:\n", .{key}); - try dumpDateTime(writer, indent + indent_step, datetime); - } else { - try writer.print("{s}: null\n", .{key}); - } -} - -fn dumpDocument(writer: anytype, doc: *const hdoc.Document) !void { - try writer.writeAll("document:\n"); - try dumpVersion(writer, indent_step, doc.version); - try dumpOptionalStringField(writer, indent_step, "lang", doc.lang); - try dumpOptionalStringField(writer, indent_step, "title", doc.title); - try dumpOptionalStringField(writer, indent_step, "author", doc.author); - try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date); - try dumpBlockListField(writer, indent_step, "contents", doc.contents); - try dumpOptionalStringListField(writer, indent_step, "ids", doc.ids); -} - -test "dumpDocument escapes string values" { - const title = "Doc \"Title\"\n"; - const span_text = "Hello \"world\"\n"; - const link_ref: hdoc.Reference = .init("section \"A\""); - const id_value: hdoc.Reference = .init("id:1\n"); - - var doc: hdoc.Document = .{ - .arena = std.heap.ArenaAllocator.init(std.testing.allocator), - .version = .{ .major = 1, .minor = 2 }, - .contents = &.{}, - .ids = &.{}, - .lang = null, - .title = title, - .author = null, - .date = null, - .timezone = null, - }; - defer doc.deinit(); - - const arena_alloc = doc.arena.allocator(); - - const spans = try arena_alloc.alloc(hdoc.Span, 1); - spans[0] = .{ - .content = .{ .text = span_text }, - .attribs = .{ .link = .{ .ref = link_ref } }, - }; - - const blocks = try arena_alloc.alloc(hdoc.Block, 1); - blocks[0] = .{ - .heading = .{ - .level = .h1, - .lang = null, - .content = spans, - }, - }; - doc.contents = blocks; - - const ids = try arena_alloc.alloc(?hdoc.Reference, 1); - ids[0] = id_value; - doc.ids = ids; - - var buffer: std.ArrayList(u8) = .empty; - defer buffer.deinit(std.testing.allocator); - - try dumpDocument(buffer.writer(std.testing.allocator), &doc); - const output = buffer.items; - - const expected_title = try std.fmt.allocPrint(std.testing.allocator, "title: \"{f}\"\n", .{std.zig.fmtString(title)}); - defer std.testing.allocator.free(expected_title); - try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null); - - const expected_span = try std.fmt.allocPrint( - std.testing.allocator, - "- [link=\"ref:{f}\"] \"{f}\"\n", - .{ std.zig.fmtString(link_ref.text), std.zig.fmtString(span_text) }, - ); - defer std.testing.allocator.free(expected_span); - try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null); - - const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value.text)}); - defer std.testing.allocator.free(expected_id); - try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null); -} - pub fn main() !u8 { defer if (builtin.mode == .Debug) { std.debug.assert(debug_allocator.deinit() == .ok); @@ -512,8 +35,10 @@ pub fn main() !u8 { if (diagnostics.has_error()) return 1; - const stdout = std.fs.File.stdout().deprecatedWriter(); - try dumpDocument(stdout, &parsed); + var stdout_buffer: [4096]u8 = undefined; + var stdout = std.fs.File.stdout().writer(&stdout_buffer); + + try hdoc.render.yaml(parsed, &stdout.interface); return 0; } diff --git a/src/render/dump.zig b/src/render/dump.zig new file mode 100644 index 0000000..347e90e --- /dev/null +++ b/src/render/dump.zig @@ -0,0 +1,497 @@ +const std = @import("std"); +const hdoc = @import("../hyperdoc.zig"); + +const Writer = std.Io.Writer; +const indent_step: usize = 2; + +fn writeIndent(writer: *Writer, indent: usize) Writer.Error!void { + var i: usize = 0; + while (i < indent) : (i += 1) { + try writer.writeByte(' '); + } +} + +fn writeStringValue(writer: *Writer, value: []const u8) Writer.Error!void { + try writer.print("\"{f}\"", .{std.zig.fmtString(value)}); +} + +fn writeOptionalStringValue(writer: *Writer, value: ?[]const u8) Writer.Error!void { + if (value) |text| { + try writeStringValue(writer, text); + } else { + try writer.writeAll("null"); + } +} + +fn writeOptionalIntValue(writer: *Writer, value: anytype) Writer.Error!void { + if (value) |number| { + try writer.print("{}", .{number}); + } else { + try writer.writeAll("null"); + } +} + +fn dumpOptionalStringField(writer: *Writer, indent: usize, key: []const u8, value: ?[]const u8) Writer.Error!void { + try writeIndent(writer, indent); + try writer.print("{s}: ", .{key}); + try writeOptionalStringValue(writer, value); + try writer.writeByte('\n'); +} + +fn dumpOptionalStringFieldInline(writer: *Writer, key: []const u8, value: ?[]const u8) Writer.Error!void { + try writer.print("{s}: ", .{key}); + try writeOptionalStringValue(writer, value); + try writer.writeByte('\n'); +} + +fn dumpOptionalStringFieldWithIndent(writer: *Writer, indent: usize, key: []const u8, value: ?[]const u8) Writer.Error!void { + try writeIndent(writer, indent); + try dumpOptionalStringFieldInline(writer, key, value); +} + +fn dumpOptionalNumberField(writer: *Writer, indent: usize, key: []const u8, value: anytype) Writer.Error!void { + try writeIndent(writer, indent); + try writer.print("{s}: ", .{key}); + try writeOptionalIntValue(writer, value); + try writer.writeByte('\n'); +} + +fn dumpBoolField(writer: *Writer, indent: usize, key: []const u8, value: bool) Writer.Error!void { + try writeIndent(writer, indent); + try writer.print("{s}: {}\n", .{ key, value }); +} + +fn dumpEnumField(writer: *Writer, indent: usize, key: []const u8, value: anytype) Writer.Error!void { + try writeIndent(writer, indent); + try writer.print("{s}: {s}\n", .{ key, @tagName(value) }); +} + +fn dumpVersion(writer: *Writer, indent: usize, version: hdoc.Version) Writer.Error!void { + try writeIndent(writer, indent); + try writer.writeAll("version:\n"); + try writeIndent(writer, indent + indent_step); + try writer.print("major: {}\n", .{version.major}); + try writeIndent(writer, indent + indent_step); + try writer.print("minor: {}\n", .{version.minor}); +} + +fn dumpDate(writer: *Writer, indent: usize, date: hdoc.Date) Writer.Error!void { + try writeIndent(writer, indent); + try writer.print("year: {}\n", .{date.year}); + try writeIndent(writer, indent); + try writer.print("month: {}\n", .{date.month}); + try writeIndent(writer, indent); + try writer.print("day: {}\n", .{date.day}); +} + +fn dumpTime(writer: *Writer, indent: usize, time: hdoc.Time) Writer.Error!void { + try writeIndent(writer, indent); + try writer.print("hour: {}\n", .{time.hour}); + try writeIndent(writer, indent); + try writer.print("minute: {}\n", .{time.minute}); + try writeIndent(writer, indent); + try writer.print("second: {}\n", .{time.second}); + try writeIndent(writer, indent); + try writer.print("microsecond: {}\n", .{time.microsecond}); +} + +fn dumpDateTime(writer: *Writer, indent: usize, datetime: hdoc.DateTime) Writer.Error!void { + try writeIndent(writer, indent); + try writer.writeAll("date:\n"); + try dumpDate(writer, indent + indent_step, datetime.date); + try writeIndent(writer, indent); + try writer.writeAll("time:\n"); + try dumpTime(writer, indent + indent_step, datetime.time); +} + +fn writeAttrSeparator(writer: *Writer, first: *bool) Writer.Error!void { + if (first.*) { + first.* = false; + } else { + try writer.writeByte(' '); + } +} + +fn writeSpanAttributes(writer: *Writer, span: hdoc.Span) Writer.Error!void { + try writer.writeByte('['); + var first = true; + if (span.attribs.em) { + try writeAttrSeparator(writer, &first); + try writer.writeAll("em"); + } + if (span.attribs.mono) { + try writeAttrSeparator(writer, &first); + try writer.writeAll("mono"); + } + if (span.attribs.strike) { + try writeAttrSeparator(writer, &first); + try writer.writeAll("strike"); + } + if (span.attribs.position != .baseline) { + try writeAttrSeparator(writer, &first); + try writer.print("position=\"{s}\"", .{@tagName(span.attribs.position)}); + } + switch (span.attribs.link) { + .none => {}, + .ref => |value| { + try writeAttrSeparator(writer, &first); + try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.text)}); + }, + .uri => |value| { + try writeAttrSeparator(writer, &first); + try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value.text)}); + }, + } + if (span.attribs.lang.len != 0) { + try writeAttrSeparator(writer, &first); + try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang)}); + } + if (span.attribs.syntax.len != 0) { + try writeAttrSeparator(writer, &first); + try writer.print("syntax=\"{f}\"", .{std.zig.fmtString(span.attribs.syntax)}); + } + try writer.writeByte(']'); +} + +fn writeDateValue(writer: *Writer, date: hdoc.Date) Writer.Error!void { + try writer.print("{d:0>4}-{d:0>2}-{d:0>2}", .{ date.year, date.month, date.day }); +} + +fn writeTimeValue(writer: *Writer, time: hdoc.Time) Writer.Error!void { + try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ time.hour, time.minute, time.second }); + if (time.microsecond != 0) { + try writer.print(".{d:0>6}", .{time.microsecond}); + } +} + +fn writeDateTimeValue(writer: *Writer, datetime: hdoc.DateTime) Writer.Error!void { + try writeDateValue(writer, datetime.date); + try writer.writeByte('T'); + try writeTimeValue(writer, datetime.time); +} + +fn writeFormattedDateInline(writer: *Writer, formatted: hdoc.FormattedDateTime(hdoc.Date)) Writer.Error!void { + try writer.writeAll("date:"); + try writeDateValue(writer, formatted.value); + if (formatted.format != hdoc.Date.Format.default) { + try writer.writeByte('@'); + try writer.writeAll(@tagName(formatted.format)); + } +} + +fn writeFormattedTimeInline(writer: *Writer, formatted: hdoc.FormattedDateTime(hdoc.Time)) Writer.Error!void { + try writer.writeAll("time:"); + try writeTimeValue(writer, formatted.value); + if (formatted.format != hdoc.Time.Format.default) { + try writer.writeByte('@'); + try writer.writeAll(@tagName(formatted.format)); + } +} + +fn writeFormattedDateTimeInline(writer: *Writer, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) Writer.Error!void { + try writer.writeAll("datetime:"); + try writeDateTimeValue(writer, formatted.value); + if (formatted.format != hdoc.DateTime.Format.default) { + try writer.writeByte('@'); + try writer.writeAll(@tagName(formatted.format)); + } +} + +fn writeSpanContentInline(writer: *Writer, content: hdoc.Span.Content) Writer.Error!void { + switch (content) { + .text => |text| { + try writeStringValue(writer, text); + }, + .date => |date| { + try writer.writeByte('"'); + try writeFormattedDateInline(writer, date); + try writer.writeByte('"'); + }, + .time => |time| { + try writer.writeByte('"'); + try writeFormattedTimeInline(writer, time); + try writer.writeByte('"'); + }, + .datetime => |datetime| { + try writer.writeByte('"'); + try writeFormattedDateTimeInline(writer, datetime); + try writer.writeByte('"'); + }, + } +} + +fn dumpSpanInline(writer: *Writer, span: hdoc.Span) Writer.Error!void { + try writeSpanAttributes(writer, span); + try writer.writeByte(' '); + try writeSpanContentInline(writer, span.content); +} + +fn writeTypeTag(writer: *Writer, tag: []const u8) Writer.Error!void { + try writer.print("{s}:\n", .{tag}); +} + +fn dumpSpanListField(writer: *Writer, indent: usize, key: []const u8, spans: []const hdoc.Span) Writer.Error!void { + try writeIndent(writer, indent); + if (spans.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (spans) |span| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("- "); + try dumpSpanInline(writer, span); + try writer.writeByte('\n'); + } +} + +fn dumpBlockListField(writer: *Writer, indent: usize, key: []const u8, blocks: []const hdoc.Block) Writer.Error!void { + try writeIndent(writer, indent); + if (blocks.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (blocks) |block| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("- "); + try dumpBlockInline(writer, indent + indent_step, block); + } +} + +fn dumpOptionalStringListField(writer: *Writer, indent: usize, key: []const u8, values: []?hdoc.Reference) Writer.Error!void { + try writeIndent(writer, indent); + if (values.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (values) |value| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("- "); + try writeOptionalStringValue(writer, if (value) |val| val.text else null); + try writer.writeByte('\n'); + } +} + +fn dumpListItem(writer: *Writer, indent: usize, item: hdoc.Block.ListItem) Writer.Error!void { + try dumpOptionalStringFieldInline(writer, "lang", item.lang); + try dumpBlockListField(writer, indent + indent_step, "content", item.content); +} + +fn dumpListItemsField(writer: *Writer, indent: usize, key: []const u8, items: []const hdoc.Block.ListItem) Writer.Error!void { + try writeIndent(writer, indent); + if (items.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (items) |item| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("- "); + try dumpListItem(writer, indent + indent_step, item); + } +} + +fn dumpTableCell(writer: *Writer, indent: usize, cell: hdoc.Block.TableCell) Writer.Error!void { + try dumpOptionalStringFieldInline(writer, "lang", cell.lang); + try dumpOptionalNumberField(writer, indent + indent_step, "colspan", @as(?u32, cell.colspan)); + try dumpBlockListField(writer, indent + indent_step, "content", cell.content); +} + +fn dumpTableCellsField(writer: *Writer, indent: usize, key: []const u8, cells: []const hdoc.Block.TableCell) Writer.Error!void { + try writeIndent(writer, indent); + if (cells.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (cells) |cell| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("- "); + try dumpTableCell(writer, indent + indent_step, cell); + } +} + +fn dumpTableColumns(writer: *Writer, indent: usize, columns: hdoc.Block.TableColumns) Writer.Error!void { + try dumpOptionalStringField(writer, indent, "lang", columns.lang); + try dumpTableCellsField(writer, indent, "cells", columns.cells); +} + +fn dumpTableDataRow(writer: *Writer, indent: usize, row: hdoc.Block.TableDataRow) Writer.Error!void { + try dumpOptionalStringFieldWithIndent(writer, indent, "lang", row.lang); + try dumpOptionalStringField(writer, indent, "title", row.title); + try dumpTableCellsField(writer, indent, "cells", row.cells); +} + +fn dumpTableGroup(writer: *Writer, indent: usize, group: hdoc.Block.TableGroup) Writer.Error!void { + try dumpOptionalStringFieldWithIndent(writer, indent, "lang", group.lang); + try dumpSpanListField(writer, indent, "content", group.content); +} + +fn dumpTableRow(writer: *Writer, indent: usize, row: hdoc.Block.TableRow) Writer.Error!void { + switch (row) { + .columns => |columns| { + try writeTypeTag(writer, "columns"); + try dumpTableColumns(writer, indent + indent_step, columns); + }, + .row => |data_row| { + try writeTypeTag(writer, "row"); + try dumpTableDataRow(writer, indent + indent_step, data_row); + }, + .group => |group| { + try writeTypeTag(writer, "group"); + try dumpTableGroup(writer, indent + indent_step, group); + }, + } +} + +fn dumpTableRowsField(writer: *Writer, indent: usize, key: []const u8, rows: []const hdoc.Block.TableRow) Writer.Error!void { + try writeIndent(writer, indent); + if (rows.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (rows) |row| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("- "); + try dumpTableRow(writer, indent + indent_step, row); + } +} + +fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Error!void { + switch (block) { + .heading => |heading| { + try writeTypeTag(writer, "heading"); + try dumpEnumField(writer, indent + indent_step, "level", heading.level); + try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang); + try dumpSpanListField(writer, indent + indent_step, "content", heading.content); + }, + .paragraph => |paragraph| { + try writeTypeTag(writer, "paragraph"); + try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind); + try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang); + try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content); + }, + .list => |list| { + try writeTypeTag(writer, "list"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang); + try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first); + try dumpListItemsField(writer, indent + indent_step, "items", list.items); + }, + .image => |image| { + try writeTypeTag(writer, "image"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang); + try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt); + try dumpOptionalStringField(writer, indent + indent_step, "path", image.path); + try dumpSpanListField(writer, indent + indent_step, "content", image.content); + }, + .preformatted => |preformatted| { + try writeTypeTag(writer, "preformatted"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang); + try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax); + try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content); + }, + .toc => |toc| { + try writeTypeTag(writer, "toc"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang); + try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth); + }, + .table => |table| { + try writeTypeTag(writer, "table"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang); + try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows); + }, + } +} + +fn dumpOptionalDateTimeField(writer: *Writer, indent: usize, key: []const u8, value: ?hdoc.DateTime) Writer.Error!void { + try writeIndent(writer, indent); + if (value) |datetime| { + try writer.print("{s}:\n", .{key}); + try dumpDateTime(writer, indent + indent_step, datetime); + } else { + try writer.print("{s}: null\n", .{key}); + } +} + +fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void { + try writer.writeAll("document:\n"); + try dumpVersion(writer, indent_step, doc.version); + try dumpOptionalStringField(writer, indent_step, "lang", doc.lang); + try dumpOptionalStringField(writer, indent_step, "title", doc.title); + try dumpOptionalStringField(writer, indent_step, "author", doc.author); + try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date); + try dumpBlockListField(writer, indent_step, "contents", doc.contents); + try dumpOptionalStringListField(writer, indent_step, "ids", doc.ids); +} + +pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void { + try dumpDocument(writer, &doc); +} + +test "render escapes string values" { + const title = "Doc \"Title\"\n"; + const span_text = "Hello \"world\"\n"; + const link_ref: hdoc.Reference = .init("section \"A\""); + const id_value: hdoc.Reference = .init("id:1\n"); + + var doc: hdoc.Document = .{ + .arena = std.heap.ArenaAllocator.init(std.testing.allocator), + .version = .{ .major = 1, .minor = 2 }, + .contents = &.{}, + .ids = &.{}, + .lang = null, + .title = title, + .author = null, + .date = null, + .timezone = null, + }; + defer doc.deinit(); + + const arena_alloc = doc.arena.allocator(); + + const spans = try arena_alloc.alloc(hdoc.Span, 1); + spans[0] = .{ + .content = .{ .text = span_text }, + .attribs = .{ .link = .{ .ref = link_ref } }, + }; + + const blocks = try arena_alloc.alloc(hdoc.Block, 1); + blocks[0] = .{ + .heading = .{ + .level = .h1, + .lang = null, + .content = spans, + }, + }; + doc.contents = blocks; + + const ids = try arena_alloc.alloc(?hdoc.Reference, 1); + ids[0] = id_value; + doc.ids = ids; + + var buffer = Writer.Allocating.init(std.testing.allocator); + defer buffer.deinit(); + + try render(doc, &buffer.writer); + try buffer.writer.flush(); + const output = buffer.writer.buffered(); + + const expected_title = try std.fmt.allocPrint(std.testing.allocator, "title: \"{f}\"\n", .{std.zig.fmtString(title)}); + defer std.testing.allocator.free(expected_title); + try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null); + + const expected_span = try std.fmt.allocPrint( + std.testing.allocator, + "- [link=\"ref:{f}\"] \"{f}\"\n", + .{ std.zig.fmtString(link_ref.text), std.zig.fmtString(span_text) }, + ); + defer std.testing.allocator.free(expected_span); + try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null); + + const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value.text)}); + defer std.testing.allocator.free(expected_id); + try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null); +} From 98d386ed31781ae58a5b0f4868025dcea1005224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 14:31:06 +0100 Subject: [PATCH 039/116] Cleans up main a bit and implements diagnostic printing. --- src/main.zig | 52 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/src/main.zig b/src/main.zig index 19161d5..5b6cd6f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -13,32 +13,62 @@ pub fn main() !u8 { else std.heap.smp_allocator; + var stderr_buffer: [4096]u8 = undefined; + var stderr = std.fs.File.stderr().writer(&stderr_buffer); + + var stdout_buffer: [4096]u8 = undefined; + var stdout = std.fs.File.stdout().writer(&stdout_buffer); + const args = try std.process.argsAlloc(allocator); defer std.process.argsFree(allocator, args); if (args.len < 2) { - const stderr = std.fs.File.stderr().deprecatedWriter(); - try stderr.print("usage: {s} \n", .{args[0]}); + try stderr.interface.print("usage: {s} \n", .{args[0]}); + try stderr.interface.flush(); return 1; } const path = args[1]; - const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10); - defer allocator.free(document); var diagnostics: hdoc.Diagnostics = .init(allocator); defer diagnostics.deinit(); - var parsed = try hdoc.parse(allocator, document, &diagnostics); - defer parsed.deinit(); + const parse_result = parse_and_process( + allocator, + &diagnostics, + &stdout.interface, + path, + ); - if (diagnostics.has_error()) - return 1; + for (diagnostics.items.items) |diag| { + try stderr.interface.print("{s}:{f}: {f}\n", .{ + path, + diag.location, + diag.code, + }); + } + try stderr.interface.flush(); - var stdout_buffer: [4096]u8 = undefined; - var stdout = std.fs.File.stdout().writer(&stdout_buffer); + parse_result catch |err| { + std.log.err("failed to parse \"{s}\": {t}", .{ path, err }); + return 1; + }; - try hdoc.render.yaml(parsed, &stdout.interface); + try stdout.interface.flush(); return 0; } + +fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostics, output_stream: *std.Io.Writer, path: []const u8) !void { + const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10); + defer allocator.free(document); + + var parsed = try hdoc.parse(allocator, document, diagnostics); + defer parsed.deinit(); + + if (diagnostics.has_error()) { + return error.InvalidFile; + } + + try hdoc.render.yaml(parsed, output_stream); +} From 76d40851ba3d1847582e7e2c61ee22b574b82697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 14:40:27 +0100 Subject: [PATCH 040/116] Unifies handling for empty spans --- src/hyperdoc.zig | 55 ++++++++++++++++++---------------------- test/accept/workset.hdoc | 28 ++++++++++---------- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index b752b72..85605b5 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -633,7 +633,7 @@ pub const SemanticAnalyzer = struct { else => unreachable, }, .lang = attrs.lang, - .content = try sema.translate_inline(node), + .content = try sema.translate_inline(node, .emit_diagnostic), }; return .{ heading, attrs.id }; @@ -657,7 +657,7 @@ pub const SemanticAnalyzer = struct { else => unreachable, }, .lang = attrs.lang, - .content = try sema.translate_inline(node), + .content = try sema.translate_inline(node, .emit_diagnostic), }; return .{ heading, attrs.id }; @@ -715,16 +715,12 @@ pub const SemanticAnalyzer = struct { }); // TODO: Enforce non-empty "path" (required) and "alt" (if provided). - const content = switch (node.body) { - .empty => @constCast(&[_]Span{}), - else => try sema.translate_inline(node), - }; const image: Block.Image = .{ .lang = attrs.lang, .alt = attrs.alt, .path = attrs.path, - .content = content, + .content = try sema.translate_inline(node, .allow_empty), }; return .{ image, attrs.id }; @@ -740,7 +736,7 @@ pub const SemanticAnalyzer = struct { const preformatted: Block.Preformatted = .{ .lang = attrs.lang, .syntax = attrs.syntax, - .content = try sema.translate_inline(node), + .content = try sema.translate_inline(node, .emit_diagnostic), }; return .{ preformatted, attrs.id }; @@ -834,7 +830,7 @@ pub const SemanticAnalyzer = struct { rows.appendAssumeCapacity(.{ .group = .{ .lang = row_attrs.lang, - .content = try sema.translate_inline(child_node), + .content = try sema.translate_inline(child_node, .emit_diagnostic), }, }); }, @@ -951,7 +947,7 @@ pub const SemanticAnalyzer = struct { return &.{}; }, .text_to_p => { - const spans = try sema.translate_inline(node); + const spans = try sema.translate_inline(node, .emit_diagnostic); const blocks = try sema.arena.alloc(Block, 1); blocks[0] = .{ @@ -969,14 +965,14 @@ pub const SemanticAnalyzer = struct { } /// Translates a node into a sequence of inline spans. - fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }![]Span { + fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }![]Span { var spans: std.ArrayList(Span) = .empty; errdefer spans.deinit(sema.arena); // TODO: Implement automatic space insertion. // This must be done when two consecutive nodes are separated by a space - try sema.translate_inline_body(&spans, node.body, .{}); + try sema.translate_inline_body(&spans, node.body, .{}, empty_handling); // TODO: Compact spans by joining spans with equal properties @@ -1063,7 +1059,7 @@ pub const SemanticAnalyzer = struct { switch (node.type) { .unknown_inline, .text, - => try sema.translate_inline_body(spans, node.body, attribs), + => try sema.translate_inline_body(spans, node.body, attribs, .emit_diagnostic), .@"\\em" => { const props = try sema.get_attributes(node, struct { @@ -1073,7 +1069,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .em = true, - })); + }), .emit_diagnostic); }, .@"\\strike" => { @@ -1084,7 +1080,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .strike = true, - })); + }), .emit_diagnostic); }, .@"\\sub" => { @@ -1095,7 +1091,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .position = .superscript, - })); + }), .emit_diagnostic); }, .@"\\sup" => { @@ -1106,7 +1102,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .position = .subscript, - })); + }), .emit_diagnostic); }, .@"\\link" => { @@ -1131,7 +1127,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .link = link, - })); + }), .emit_diagnostic); }, .@"\\mono" => { @@ -1143,7 +1139,7 @@ pub const SemanticAnalyzer = struct { .mono = true, .lang = props.lang, .syntax = props.syntax, - })); + }), .emit_diagnostic); }, .@"\\date", @@ -1155,17 +1151,11 @@ pub const SemanticAnalyzer = struct { fmt: []const u8 = "", }); - var content_spans: std.ArrayList(Span) = .empty; - defer content_spans.deinit(sema.arena); - - // TODO: Implement automatic space insertion. - // This must be done when two consecutive nodes are separated by a space - // TODO: Enforce that date/time bodies only contain plain text/string/verbatim. - try sema.translate_inline_body(&content_spans, node.body, .{}); + const content_spans = try sema.translate_inline(node, .emit_diagnostic); // Convert the content_spans into a "rendered string". - const content_text = try sema.render_spans_to_plaintext(content_spans.items, .no_space); + const content_text = try sema.render_spans_to_plaintext(content_spans, .no_space); const content: Span.Content = switch (node.type) { .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt), @@ -1297,10 +1287,15 @@ pub const SemanticAnalyzer = struct { return try output_str.toOwnedSlice(sema.arena); } - fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void { + const EmptyHandling = enum { + allow_empty, + emit_diagnostic, + }; + fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }!void { switch (body) { - .empty => |location| { - try sema.emit_diagnostic(.empty_inline_body, location); + .empty => |location| switch (empty_handling) { + .allow_empty => {}, + .emit_diagnostic => try sema.emit_diagnostic(.empty_inline_body, location), }, .string => |string_body| { diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index 19ffba1..b8717d6 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -1,18 +1,18 @@ hdoc(version="2.0"); -h1: -| Hello, World! - -h2{Hello \em{World}!} - -p { This is a span. } - -ul { - li "Item 1" - li { p { Item 2 } } +p { + In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed + steps like: export → transform → validate → import, then added a parenthetical aside (which itself contained + parentheses): "Use the staging key (not the production key (seriously))". A different person pasted a pseudo-path, + /var/tmp/builds/2025-12-23/, and then warned, "If you see `NULL` in the output, don't 'fix' it by replacing it with + '0'—that's how we broke reporting last time." } -ol { - li "Item 1" - li { p { Item 2 } } -} \ No newline at end of file +pre(syntax="zig") { + pub fn FormattedDateTime(comptime DT: type) type { + return struct { + value: DT, + format: DT.Format = .default, + }; + } +} From 0c77468e712ac6c9d532cdd38e036a2dd8887e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 16:03:52 +0100 Subject: [PATCH 041/116] Implements span compaction for regular 'single whitespace' join spans. The parser now also emits whitespace-only spans separately from the word spans. pre compaction is not implemented yet. --- src/hyperdoc.zig | 181 +++++++++++++++++++++++++++++++++++++-- src/testsuite.zig | 29 ++++--- test/accept/workset.hdoc | 2 +- 3 files changed, 196 insertions(+), 16 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 85605b5..9b3e528 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -144,10 +144,35 @@ pub const Span = struct { strike: bool = false, link: Link = .none, syntax: []const u8 = "", // empty is absence + + pub fn eql(lhs: Attributes, rhs: Attributes) bool { + // Trivial comparisons: + if (lhs.position != rhs.position) + return false; + if (lhs.em != rhs.em) + return false; + if (lhs.mono != rhs.mono) + return false; + if (lhs.strike != rhs.strike) + return false; + + // string comparison: + if (!std.mem.eql(u8, lhs.lang, rhs.lang)) + return false; + if (!std.mem.eql(u8, lhs.syntax, rhs.syntax)) + return false; + + // complex comparison + if (!lhs.link.eql(rhs.link)) + return false; + + return true; + } }; content: Content, attribs: Attributes, + location: Parser.Location, }; pub const ScriptPosition = enum { @@ -160,6 +185,14 @@ pub const Link = union(enum) { none, ref: Reference, uri: Uri, + + pub fn eql(lhs: Link, rhs: Link) bool { + return switch (lhs) { + .none => (rhs == .none), + .ref => (rhs == .ref) and std.mem.eql(u8, lhs.ref.text, rhs.ref.text), + .uri => (rhs == .uri) and std.mem.eql(u8, lhs.uri.text, rhs.uri.text), + }; + } }; /// HyperDoc Version Number @@ -473,7 +506,7 @@ pub fn remove_byte_order_mark(diagnostics: ?*Diagnostics, plain_text: []const u8 } pub const SemanticAnalyzer = struct { - const whitespace_chars = " \t"; + const whitespace_chars = " \t\r\n"; const Header = struct { version: Version, @@ -967,18 +1000,120 @@ pub const SemanticAnalyzer = struct { /// Translates a node into a sequence of inline spans. fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }![]Span { var spans: std.ArrayList(Span) = .empty; - errdefer spans.deinit(sema.arena); + defer spans.deinit(sema.arena); // TODO: Implement automatic space insertion. // This must be done when two consecutive nodes are separated by a space try sema.translate_inline_body(&spans, node.body, .{}, empty_handling); - // TODO: Compact spans by joining spans with equal properties + // TODO: Use different whitespace strategies here: + return try sema.compact_spans(spans.items, .one_space); + } + + const Whitespace = enum { + one_space, + keep_space, + }; + + /// Compacts and merges spans of equal attributes by `whitespace` ruling. + fn compact_spans(sema: *SemanticAnalyzer, input: []const Span, whitespace: Whitespace) ![]Span { + var merger: SpanMerger = .{ + .arena = sema.arena, + .whitespace = whitespace, + }; + + for (input) |span| { + try merger.push(span); + } + + try merger.flush(); - return try spans.toOwnedSlice(sema.arena); + return try merger.output.toOwnedSlice(sema.arena); } + /// Checks if only + fn is_only_whitespace(str: []const u8) bool { + return std.mem.indexOfNone(u8, str, whitespace_chars) == null; + } + + const SpanMerger = struct { + arena: std.mem.Allocator, + whitespace: Whitespace, + + output: std.ArrayList(Span) = .empty, + + span_start: usize = 0, + current_span: std.ArrayList(u8) = .empty, + attribs: Span.Attributes = .{}, + last_end: usize = std.math.maxInt(usize), + + fn push(merger: *SpanMerger, span: Span) !void { + if (merger.last_end == std.math.maxInt(usize)) { + merger.last_end = span.location.offset; + } + + if (!span.attribs.eql(merger.attribs)) { + try merger.flush_internal(.keep); + std.debug.assert(merger.current_span.items.len == 0); + merger.attribs = span.attribs; + std.debug.assert(span.attribs.eql(merger.attribs)); + } + switch (span.content) { + .date, .time, .datetime => { + // All date/time/datetime require to be passed verbatim into the output + try merger.flush_internal(.keep); + std.debug.assert(merger.current_span.items.len == 0); + + try merger.output.append(merger.arena, span); + }, + .text => |text_content| { + std.debug.assert(span.attribs.eql(merger.attribs)); + + const append_text, const skip_head = if (is_only_whitespace(text_content)) + switch (merger.whitespace) { + .one_space => .{ " ", true }, + .keep_space => .{ text_content, false }, + } + else + .{ text_content, false }; + + // check if we already have text, and if not, if we should keep the whitespace + if (merger.current_span.items.len > 0 or !skip_head) { + try merger.current_span.appendSlice(merger.arena, append_text); + } + }, + } + merger.last_end = span.location.offset_one_after(); + } + + pub fn flush(merger: *SpanMerger) !void { + return merger.flush_internal(.strip); + } + + fn flush_internal(merger: *SpanMerger, mode: enum { strip, keep }) !void { + if (merger.current_span.items.len == 0) + return; + + const raw_string = try merger.current_span.toOwnedSlice(merger.arena); + + const string = switch (mode) { + .strip => std.mem.trimRight(u8, raw_string, whitespace_chars), + .keep => raw_string, + }; + + try merger.output.append(merger.arena, .{ + .attribs = merger.attribs, + .content = .{ .text = string }, + .location = .{ + .offset = merger.span_start, + .length = merger.last_end - merger.span_start, + }, + }); + merger.span_start = merger.last_end; + } + }; + pub const AttribOverrides = struct { lang: ?[]const u8 = null, em: ?bool = null, @@ -1169,6 +1304,7 @@ pub const SemanticAnalyzer = struct { .attribs = try sema.derive_attribute(node.location, attribs, .{ .lang = attribs.lang, }), + .location = node.location, }); }, @@ -1224,11 +1360,13 @@ pub const SemanticAnalyzer = struct { const value: DTValue = if (value_or_err) |value| value else |err| blk: { + std.log.warn("failed to parse {t}: \"{s}\"", .{ body, value_str }); switch (err) { error.InvalidValue => { try sema.emit_diagnostic(.invalid_date_time, node.location); }, error.MissingTimezone => { + std.log.err("emit missing timezone for {}", .{node.location}); // TODO: Use (timezone_hint != null) to emit diagnostic for hint with // adding `tz` attribute when all date/time values share a common base. try sema.emit_diagnostic(.invalid_date_time, node.location); @@ -1304,6 +1442,7 @@ pub const SemanticAnalyzer = struct { try spans.append(sema.arena, .{ .content = .{ .text = text }, .attribs = attribs, + .location = string_body.location, }); }, @@ -1334,9 +1473,19 @@ pub const SemanticAnalyzer = struct { text_buffer.appendSliceAssumeCapacity(stripped); } + const location: Parser.Location = if (verbatim_lines.len > 0) blk: { + const head = verbatim_lines[0].location.offset; + const tail = verbatim_lines[verbatim_lines.len - 1].location.offset_one_after(); + break :blk .{ + .offset = head, + .length = tail - head, + }; + } else .{ .offset = 0, .length = 0 }; + try spans.append(sema.arena, .{ .content = .{ .text = try text_buffer.toOwnedSlice(sema.arena) }, .attribs = attribs, + .location = location, }); }, @@ -1350,6 +1499,7 @@ pub const SemanticAnalyzer = struct { try spans.append(sema.arena, .{ .content = .{ .text = text_span.text }, .attribs = attribs, + .location = text_span.location, }); }, } @@ -1854,7 +2004,24 @@ pub const Parser = struct { var nesting: usize = 0; while (true) { - parser.skip_whitespace(); + // If necessary, emit a whitespace span: + { + const before = parser.offset; + parser.skip_whitespace(); + const after = parser.offset; + std.debug.assert(after >= before); + if (after > before) { + // We've skipped over whitespace, so we emit a "whitespace" node here: + const whitespace = parser.slice(before, after); + try children.append(parser.arena, .{ + .location = whitespace.location, + .type = .text, + .body = .{ + .text_span = whitespace, + }, + }); + } + } const head = parser.peek_char() orelse { emitDiagnostic(parser, .unterminated_inline_list, parser.make_diagnostic_location(parser.offset)); @@ -2187,6 +2354,10 @@ pub const Parser = struct { pub const Location = struct { offset: usize, length: usize, + + pub fn offset_one_after(loc: Location) usize { + return loc.offset + loc.length; + } }; pub const NodeType = enum { diff --git a/src/testsuite.zig b/src/testsuite.zig index 111649f..d366816 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -294,16 +294,25 @@ test "parser handles inline node lists" { try std.testing.expectEqual(hdoc.Parser.NodeType.p, node.type); switch (node.body) { .list => |children| { - try std.testing.expectEqual(@as(usize, 2), children.len); - try std.testing.expectEqual(hdoc.Parser.NodeType.text, children[0].type); - try std.testing.expectEqual(@as(usize, 5), children[0].location.length); + try std.testing.expectEqual(@as(usize, 5), children.len); + + try std.testing.expectEqual(.text, children[0].type); + try std.testing.expectEqual(.text, children[1].type); + try std.testing.expectEqual(.text, children[2].type); + try std.testing.expectEqual(.@"\\em", children[3].type); + try std.testing.expectEqual(.text, children[4].type); + + try std.testing.expectEqual(" ".len, children[0].location.length); + try std.testing.expectEqual("Hello".len, children[1].location.length); + try std.testing.expectEqual(" ".len, children[2].location.length); + try std.testing.expectEqual("\\em{world}".len, children[3].location.length); + try std.testing.expectEqual(" ".len, children[4].location.length); - try std.testing.expectEqual(hdoc.Parser.NodeType.@"\\em", children[1].type); - switch (children[1].body) { + switch (children[3].body) { .list => |inline_children| { - try std.testing.expectEqual(@as(usize, 1), inline_children.len); - try std.testing.expectEqual(hdoc.Parser.NodeType.text, inline_children[0].type); - try std.testing.expectEqual(@as(usize, 5), inline_children[0].location.length); + try std.testing.expectEqual(1, inline_children.len); + try std.testing.expectEqual(.text, inline_children[0].type); + try std.testing.expectEqual("world".len, inline_children[0].location.length); }, else => return error.TestExpectedEqual, } @@ -423,7 +432,7 @@ fn expectParseOk(opts: LogDiagOptions, code: []const u8) !void { if (diagnostics.has_error() or diagnostics.has_warning()) { logDiagnostics(&diagnostics, opts); - return error.TestExpectedEqual; + return error.TestExpectedNoDiagnostics; } } @@ -442,7 +451,7 @@ fn expectParseNoFail(opts: LogDiagOptions, code: []const u8) !void { if (diagnostics.has_error()) { logDiagnostics(&diagnostics, opts); - return error.TestExpectedEqual; + return error.TestExpectedNoErrors; } } diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index b8717d6..65ffc31 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -4,7 +4,7 @@ p { In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed steps like: export → transform → validate → import, then added a parenthetical aside (which itself contained parentheses): "Use the staging key (not the production key (seriously))". A different person pasted a pseudo-path, - /var/tmp/builds/2025-12-23/, and then warned, "If you see `NULL` in the output, don't 'fix' it by replacing it with + /var/tmp/builds/\date(fmt="iso"){2025-12-23}/, and then warned, "If you see \mono{NULL} in the output, don't 'fix' it by replacing it with '0'—that's how we broke reporting last time." } From 923d01f1dc9a8440e175aca75e7992f70a150623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 16:09:08 +0100 Subject: [PATCH 042/116] Implements p/pre split for whitespace compaction --- src/hyperdoc.zig | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 9b3e528..bc92d96 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -666,7 +666,7 @@ pub const SemanticAnalyzer = struct { else => unreachable, }, .lang = attrs.lang, - .content = try sema.translate_inline(node, .emit_diagnostic), + .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), }; return .{ heading, attrs.id }; @@ -690,7 +690,7 @@ pub const SemanticAnalyzer = struct { else => unreachable, }, .lang = attrs.lang, - .content = try sema.translate_inline(node, .emit_diagnostic), + .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), }; return .{ heading, attrs.id }; @@ -753,7 +753,7 @@ pub const SemanticAnalyzer = struct { .lang = attrs.lang, .alt = attrs.alt, .path = attrs.path, - .content = try sema.translate_inline(node, .allow_empty), + .content = try sema.translate_inline(node, .allow_empty, .one_space), }; return .{ image, attrs.id }; @@ -769,7 +769,7 @@ pub const SemanticAnalyzer = struct { const preformatted: Block.Preformatted = .{ .lang = attrs.lang, .syntax = attrs.syntax, - .content = try sema.translate_inline(node, .emit_diagnostic), + .content = try sema.translate_inline(node, .emit_diagnostic, .keep_space), }; return .{ preformatted, attrs.id }; @@ -863,7 +863,7 @@ pub const SemanticAnalyzer = struct { rows.appendAssumeCapacity(.{ .group = .{ .lang = row_attrs.lang, - .content = try sema.translate_inline(child_node, .emit_diagnostic), + .content = try sema.translate_inline(child_node, .emit_diagnostic, .one_space), }, }); }, @@ -980,7 +980,7 @@ pub const SemanticAnalyzer = struct { return &.{}; }, .text_to_p => { - const spans = try sema.translate_inline(node, .emit_diagnostic); + const spans = try sema.translate_inline(node, .emit_diagnostic, .one_space); const blocks = try sema.arena.alloc(Block, 1); blocks[0] = .{ @@ -998,7 +998,7 @@ pub const SemanticAnalyzer = struct { } /// Translates a node into a sequence of inline spans. - fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }![]Span { + fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling, whitespace_handling: Whitespace) error{ OutOfMemory, BadAttributes }![]Span { var spans: std.ArrayList(Span) = .empty; defer spans.deinit(sema.arena); @@ -1008,7 +1008,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(&spans, node.body, .{}, empty_handling); // TODO: Use different whitespace strategies here: - return try sema.compact_spans(spans.items, .one_space); + return try sema.compact_spans(spans.items, whitespace_handling); } const Whitespace = enum { @@ -1287,7 +1287,7 @@ pub const SemanticAnalyzer = struct { }); // TODO: Enforce that date/time bodies only contain plain text/string/verbatim. - const content_spans = try sema.translate_inline(node, .emit_diagnostic); + const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space); // Convert the content_spans into a "rendered string". const content_text = try sema.render_spans_to_plaintext(content_spans, .no_space); From 73763b7cd2f61a7429ee9e2f8149db1bb3ce6ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 16:29:48 +0100 Subject: [PATCH 043/116] Resolves several TODOs: Table shape (column count) validation, date/time/datetime nesting detection, image attribute validation. --- src/hyperdoc.zig | 118 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 31 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index bc92d96..07440ca 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -71,8 +71,8 @@ pub const Block = union(enum) { pub const Image = struct { lang: ?[]const u8, - alt: ?[]const u8, - path: ?[]const u8, + alt: []const u8, // empty means none + path: []const u8, content: []Span, }; @@ -469,7 +469,7 @@ pub fn parse( const header = sema.header orelse return error.MalformedDocument; - // TODO: Validate document-level semantic constraints (unique ids, ref resolution, table shape). + // TODO: Validate document-level semantic constraints (unique ids, ref resolution). return .{ .arena = arena, .contents = try sema.blocks.toOwnedSlice(arena.allocator()), @@ -747,12 +747,30 @@ pub const SemanticAnalyzer = struct { path: []const u8, }); - // TODO: Enforce non-empty "path" (required) and "alt" (if provided). + const alt = if (attrs.alt) |alt| + std.mem.trim(u8, alt, whitespace_chars) + else + ""; + + const path = std.mem.trim(u8, attrs.path, whitespace_chars); + if (path.len == 0) { + // The path must be non-empty. + + // TODO: Implement better diagnostic message + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = .img, .name = "path" } }, get_attribute_location(node, "path", .value).?); + } + + if (attrs.alt != null and alt.len == 0) { + // If alt is present, it must be non-empty, and not fully whitespace. + + // TODO: Implement better diagnostic message + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = .img, .name = "alt" } }, get_attribute_location(node, "alt", .value).?); + } const image: Block.Image = .{ .lang = attrs.lang, - .alt = attrs.alt, - .path = attrs.path, + .alt = alt, + .path = path, .content = try sema.translate_inline(node, .allow_empty, .one_space), }; @@ -820,6 +838,8 @@ pub const SemanticAnalyzer = struct { var rows: std.ArrayList(Block.TableRow) = .empty; defer rows.deinit(sema.arena); + var column_count: ?usize = null; + switch (node.body) { .list => |child_nodes| { try rows.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); @@ -831,13 +851,26 @@ pub const SemanticAnalyzer = struct { }); const cells = try sema.translate_table_cells(child_node); - rows.appendAssumeCapacity(.{ .columns = .{ .lang = row_attrs.lang, .cells = cells, }, }); + + var width: usize = 0; + for (cells) |cell| { + std.debug.assert(cell.colspan > 0); + width += cell.colspan; + } + + column_count = column_count orelse width; + if (width != column_count) { + try sema.emit_diagnostic(.{ .column_count_mismatch = .{ + .expected = column_count.?, + .actual = width, + } }, child_node.location); + } }, .row => { const row_attrs = try sema.get_attributes(child_node, struct { @@ -854,6 +887,20 @@ pub const SemanticAnalyzer = struct { .cells = cells, }, }); + + var width: usize = 0; + for (cells) |cell| { + std.debug.assert(cell.colspan > 0); + width += cell.colspan; + } + + column_count = column_count orelse width; + if (width != column_count) { + try sema.emit_diagnostic(.{ .column_count_mismatch = .{ + .expected = column_count.?, + .actual = width, + } }, child_node.location); + } }, .group => { const row_attrs = try sema.get_attributes(child_node, struct { @@ -878,7 +925,6 @@ pub const SemanticAnalyzer = struct { }, } - // TODO: Validate column counts after colspan and title/group leading column rules. const table: Block.Table = .{ .lang = attrs.lang, .rows = try rows.toOwnedSlice(sema.arena), @@ -1002,12 +1048,8 @@ pub const SemanticAnalyzer = struct { var spans: std.ArrayList(Span) = .empty; defer spans.deinit(sema.arena); - // TODO: Implement automatic space insertion. - // This must be done when two consecutive nodes are separated by a space - try sema.translate_inline_body(&spans, node.body, .{}, empty_handling); - // TODO: Use different whitespace strategies here: return try sema.compact_spans(spans.items, whitespace_handling); } @@ -1280,17 +1322,33 @@ pub const SemanticAnalyzer = struct { .@"\\date", .@"\\time", .@"\\datetime", - => { + => blk: { const props = try sema.get_attributes(node, struct { lang: ?[]const u8 = null, fmt: []const u8 = "", }); - // TODO: Enforce that date/time bodies only contain plain text/string/verbatim. const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space); + // Enforce that date/time bodies only contain plain text/string/verbatim. + // HyperDoc cannot format date/time values on it's own so we can't render + // \date, \time and \datetime into a string. It also doesn't make any sense + // to nest them. + for (content_spans) |span| { + switch (span.content) { + .text => {}, + .date, .time, .datetime => { + try sema.emit_diagnostic(.nested_date_time, span.location); + break :blk; + }, + } + } + // Convert the content_spans into a "rendered string". - const content_text = try sema.render_spans_to_plaintext(content_spans, .no_space); + const content_text = sema.render_spans_to_plaintext(content_spans) catch |err| switch (err) { + error.DateTimeRenderingUnsupported => unreachable, + else => |e| return e, + }; const content: Span.Content = switch (node.type) { .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt), @@ -1391,16 +1449,12 @@ pub const SemanticAnalyzer = struct { }); } - const JoinStyle = enum { no_space, one_space }; - fn render_spans_to_plaintext(sema: *SemanticAnalyzer, source_spans: []const Span, style: JoinStyle) ![]const u8 { - var len: usize = switch (style) { - .no_space => 0, - .one_space => (source_spans.len -| 1), - }; + fn render_spans_to_plaintext(sema: *SemanticAnalyzer, source_spans: []const Span) error{ OutOfMemory, DateTimeRenderingUnsupported }![]const u8 { + var len: usize = 0; for (source_spans) |span| { len += switch (span.content) { .text => |str| str.len, - .date, .time, .datetime => @panic("TODO: Implement date-to-text conversion!"), + .date, .time, .datetime => return error.DateTimeRenderingUnsupported, }; } @@ -1409,16 +1463,10 @@ pub const SemanticAnalyzer = struct { try output_str.ensureTotalCapacityPrecise(sema.arena, len); - for (source_spans, 0..) |span, index| { - switch (style) { - .no_space => {}, - .one_space => if (index > 0) - output_str.appendAssumeCapacity(' '), - } - + for (source_spans) |span| { switch (span.content) { .text => |str| output_str.appendSliceAssumeCapacity(str), - .date, .time, .datetime => @panic("TODO: Implement date-to-text conversion!"), + .date, .time, .datetime => unreachable, } } @@ -1519,7 +1567,6 @@ pub const SemanticAnalyzer = struct { const Fields = std.meta.FieldEnum(Attrs); const fields = @typeInfo(Attrs).@"struct".fields; - // TODO: Enforce per-attribute constraints from the spec (non-empty strings, lang tag format, etc). var required: std.EnumSet(Fields) = .initEmpty(); var attrs: Attrs = undefined; @@ -2535,6 +2582,7 @@ pub const Diagnostic = struct { pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute }; pub const InvalidStringEscape = struct { codepoint: u21 }; pub const ForbiddenControlCharacter = struct { codepoint: u21 }; + pub const TableShapeError = struct { actual: usize, expected: usize }; pub const Code = union(enum) { // errors: @@ -2554,6 +2602,7 @@ pub const Diagnostic = struct { link_not_nestable, invalid_link, invalid_date_time, + nested_date_time, invalid_date_time_fmt, invalid_unicode_string_escape, invalid_string_escape: InvalidStringEscape, @@ -2561,6 +2610,7 @@ pub const Diagnostic = struct { illegal_child_item, list_body_required, illegal_id_attribute, + column_count_mismatch: TableShapeError, // warnings: document_starts_with_bom, @@ -2599,6 +2649,8 @@ pub const Diagnostic = struct { .illegal_child_item, .list_body_required, .illegal_id_attribute, + .nested_date_time, + .column_count_mismatch, => .@"error", .unknown_attribute, @@ -2671,6 +2723,10 @@ pub const Diagnostic = struct { .illegal_child_item => try w.writeAll("Node not allowed here."), .illegal_id_attribute => try w.writeAll("Attribute 'id' not allowed here."), + + .nested_date_time => try w.writeAll("Nesting \\date, \\time and \\datetime is not allowed."), + + .column_count_mismatch => |ctx| try w.print("Expected {} columns, but found {}", .{ ctx.expected, ctx.actual }), } } }; From 68eb43e039c77446037d9e9fe4c3abfac9afcc16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 16:36:30 +0100 Subject: [PATCH 044/116] Implements id uniqueness check. --- src/hyperdoc.zig | 38 +++++++++++++++++++++++++++++++++++--- src/render/dump.zig | 3 ++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 07440ca..441ba35 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -13,7 +13,8 @@ pub const Document = struct { // document contents: contents: []Block, - ids: []?Reference, + content_ids: []?Reference, + id_map: std.StringArrayHashMapUnmanaged(usize), // id -> index // header information lang: ?[]const u8, @@ -469,11 +470,34 @@ pub fn parse( const header = sema.header orelse return error.MalformedDocument; - // TODO: Validate document-level semantic constraints (unique ids, ref resolution). + const content_ids = try sema.ids.toOwnedSlice(arena.allocator()); + + var id_map: std.StringArrayHashMapUnmanaged(usize) = .empty; + errdefer id_map.deinit(arena.allocator()); + + try id_map.ensureTotalCapacity(arena.allocator(), content_ids.len); + + for (content_ids, 0..) |id_or_null, index| { + const id = id_or_null orelse continue; + + const gop = id_map.getOrPutAssumeCapacity(id.text); + if (gop.found_existing) { + try sema.emit_diagnostic( + .{ .duplicate_id = .{ .ref = id.text } }, + .{ .offset = 0, .length = 0 }, // TODO: Figure out proper node location + ); + continue; + } + gop.value_ptr.* = index; + } + + // TODO: Validate document-level semantic constraints (ref resolution). + return .{ .arena = arena, .contents = try sema.blocks.toOwnedSlice(arena.allocator()), - .ids = try sema.ids.toOwnedSlice(arena.allocator()), + .content_ids = content_ids, + .id_map = id_map, .lang = header.lang, .title = header.title, @@ -2583,6 +2607,7 @@ pub const Diagnostic = struct { pub const InvalidStringEscape = struct { codepoint: u21 }; pub const ForbiddenControlCharacter = struct { codepoint: u21 }; pub const TableShapeError = struct { actual: usize, expected: usize }; + pub const ReferenceError = struct { ref: []const u8 }; pub const Code = union(enum) { // errors: @@ -2611,6 +2636,8 @@ pub const Diagnostic = struct { list_body_required, illegal_id_attribute, column_count_mismatch: TableShapeError, + duplicate_id: ReferenceError, + unknown_id: ReferenceError, // warnings: document_starts_with_bom, @@ -2651,6 +2678,8 @@ pub const Diagnostic = struct { .illegal_id_attribute, .nested_date_time, .column_count_mismatch, + .duplicate_id, + .unknown_id, => .@"error", .unknown_attribute, @@ -2727,6 +2756,9 @@ pub const Diagnostic = struct { .nested_date_time => try w.writeAll("Nesting \\date, \\time and \\datetime is not allowed."), .column_count_mismatch => |ctx| try w.print("Expected {} columns, but found {}", .{ ctx.expected, ctx.actual }), + + .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}), + .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}), } } }; diff --git a/src/render/dump.zig b/src/render/dump.zig index 347e90e..bedf742 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -424,7 +424,8 @@ fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void { try dumpOptionalStringField(writer, indent_step, "author", doc.author); try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date); try dumpBlockListField(writer, indent_step, "contents", doc.contents); - try dumpOptionalStringListField(writer, indent_step, "ids", doc.ids); + try dumpOptionalStringListField(writer, indent_step, "ids", doc.content_ids); + // TODO: Dump ID map } pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void { From cdd8245fb029d7a843d0d100501cc74ed8ce779c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 17:19:19 +0100 Subject: [PATCH 045/116] Spec/code alignment: Allows trailing commas in attribute lists, allows 'fmt=iso' for \date and \time as well, checks document version, also allows '-' and ':' inside identifiers, adds more TODO comments --- docs/specification.md | 6 +-- src/hyperdoc.zig | 95 ++++++++++++++++++++++++++++++++-------- src/testsuite.zig | 15 ++++++- test/accept/workset.hdoc | 2 +- 4 files changed, 93 insertions(+), 25 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index ce20dd7..4e0d0f7 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -133,7 +133,7 @@ attribute ::= attr_key , ws , "=" , ws , string_literal ; *) attr_key ::= attr_key_char , { attr_key_char } ; -attr_key_char ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | ":" | "\" ; +attr_key_char ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | "\" ; (* ---------- Block-list content ---------- *) @@ -703,8 +703,8 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically | Element | Attribute | Function | | ---------- | --------- | ----------------------------------------------------------------------------------------------------------- | -| `date` | `fmt` | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`. | -| `time` | `fmt` | `short`, `long`, `rough`, `relative`. | +| `date` | `fmt` | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` (raw ISO 8601). | +| `time` | `fmt` | `short`, `long`, `rough`, `relative`, `iso` (raw ISO 8601). | | `datetime` | `fmt` | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). | Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner. diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 441ba35..12b8cee 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -426,8 +426,10 @@ pub fn parse( /// An optional diagnostics element that receives diagnostic messages like errors and warnings. /// If present, will be filled out by the parser. diagnostics: ?*Diagnostics, -) error{ OutOfMemory, SyntaxError, MalformedDocument, InvalidUtf8 }!Document { - const source_text = try remove_byte_order_mark(diagnostics, raw_plain_text); +) error{ OutOfMemory, SyntaxError, MalformedDocument, UnsupportedVersion, InvalidUtf8 }!Document { + const source_text = try clean_utf8_input(diagnostics, raw_plain_text); + + // We now know that the source code is 'fine' and var arena = std.heap.ArenaAllocator.init(allocator); errdefer arena.deinit(); @@ -508,25 +510,39 @@ pub fn parse( }; } -pub fn remove_byte_order_mark(diagnostics: ?*Diagnostics, plain_text: []const u8) error{ OutOfMemory, InvalidUtf8 }![]const u8 { +pub fn clean_utf8_input(diagnostics: ?*Diagnostics, raw_plain_text: []const u8) error{ OutOfMemory, InvalidUtf8 }![]const u8 { + // First check if all of our code is valid UTF-8 // and if it potentially starts with a BOM. - var view = std.unicode.Utf8View.init(plain_text) catch { + var view = std.unicode.Utf8View.init(raw_plain_text) catch { return error.InvalidUtf8; }; var iter = view.iterator(); - if (iter.nextCodepointSlice()) |slice| { const codepoint = std.unicode.utf8Decode(slice) catch unreachable; if (codepoint == 0xFEFF) { if (diagnostics) |diag| { try diag.add(.document_starts_with_bom, .{ .column = 1, .line = 1 }); } - return plain_text[slice.len..]; + std.debug.assert(iter.i == slice.len); + } else { + iter.i = 0; // Reset iterator to start position } } - return plain_text; + const source_head = iter.i; + + while (iter.nextCodepointSlice()) |slice| { + const codepoint = std.unicode.utf8Decode(slice) catch unreachable; + + // TODO: Write codepoint validation which rejects the file if invalid codepoints are detected and + // emits warnings for TAB characters. + // Bare CR is forbidden, just CR LF or LF is allowed. + + _ = codepoint; + } + + return raw_plain_text[source_head..]; } pub const SemanticAnalyzer = struct { @@ -549,16 +565,26 @@ pub const SemanticAnalyzer = struct { blocks: std.ArrayList(Block) = .empty, ids: std.ArrayList(?Reference) = .empty, - fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{OutOfMemory}!void { + fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, UnsupportedVersion }!void { switch (node.type) { .hdoc => { + const header = sema.translate_header_node(node) catch |err| switch (err) { + error.OutOfMemory, error.UnsupportedVersion => |e| return e, + error.BadAttributes => null, + }; if (sema.header != null) { try sema.emit_diagnostic(.duplicate_hdoc_header, node.location); + } else { + sema.header = header orelse .{ + .version = .{ .major = 2, .minor = 0 }, + .lang = null, + .title = null, + .author = null, + .timezone = null, + .date = null, + }; } - sema.header = sema.translate_header_node(node) catch |err| switch (err) { - error.OutOfMemory => |e| return e, - error.BadAttributes => null, - }; + std.debug.assert(sema.header != null); }, else => { @@ -589,7 +615,7 @@ pub const SemanticAnalyzer = struct { } } - fn translate_header_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }!Header { + fn translate_header_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes, UnsupportedVersion }!Header { std.debug.assert(node.type == .hdoc); const attrs = try sema.get_attributes(node, struct { @@ -597,10 +623,17 @@ pub const SemanticAnalyzer = struct { title: ?[]const u8 = null, author: ?[]const u8 = null, date: ?DateTime = null, - lang: ?[]const u8 = null, + lang: ?[]const u8 = null, // TODO: Introduce with "LanguageTag" type for all "lang" attributes which performs proper validation tz: ?[]const u8 = null, }); + if (attrs.version.major != 2) + return error.UnsupportedVersion; + if (attrs.version.minor != 0) + return error.UnsupportedVersion; + + // TODO: Validate TZ format + return .{ .version = attrs.version, .lang = attrs.lang, @@ -754,6 +787,8 @@ pub const SemanticAnalyzer = struct { }, } + // TODO: Validate `children.items.len >= 1` + const list: Block.List = .{ .first = attrs.first orelse if (node.type == .ol) 1 else null, .lang = attrs.lang, @@ -980,6 +1015,8 @@ pub const SemanticAnalyzer = struct { }, } + // TODO: Validate `children.items.len >= 1` + return try cells.toOwnedSlice(sema.arena); } @@ -1291,7 +1328,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, - .position = .superscript, + .position = .subscript, }), .emit_diagnostic); }, @@ -1302,7 +1339,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, - .position = .subscript, + .position = .superscript, }), .emit_diagnostic); }, @@ -1327,6 +1364,7 @@ pub const SemanticAnalyzer = struct { }; try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + .lang = props.lang, .link = link, }), .emit_diagnostic); }, @@ -1799,13 +1837,22 @@ pub const SemanticAnalyzer = struct { break :blk "???"; } - if (escape_part.len == 4) { + const min_len = "\\u{}".len; + const max_len = "\\u{123456}".len; + + if (escape_part.len == min_len) { // Empty escape: \u{} std.debug.assert(std.mem.eql(u8, escape_part, "\\u{}")); try sema.emit_diagnostic(.invalid_unicode_string_escape, location); break :blk "???"; } + if (escape_part.len > max_len) { + // Escape sequence is more than 6 chars long + try sema.emit_diagnostic(.invalid_unicode_string_escape, location); + break :blk "???"; + } + const codepoint = std.fmt.parseInt(u21, escape_part[3 .. escape_part.len - 1], 16) catch { try sema.emit_diagnostic(.invalid_unicode_string_escape, location); break :blk "???"; @@ -1956,6 +2003,8 @@ pub const Parser = struct { // so we know that the next token must be the attribute name. while (true) { + if (parser.try_accept_char(')')) + break; const attr_name = try parser.accept_identifier(); _ = try parser.accept_char('='); const attr_value = try parser.accept_string(); @@ -1966,10 +2015,10 @@ pub const Parser = struct { }); if (!parser.try_accept_char(',')) { + try parser.accept_char(')'); break; } } - try parser.accept_char(')'); } } @@ -2285,6 +2334,8 @@ pub const Parser = struct { parser.offset += 1; switch (c) { + '\n' => return error.UnterminatedStringLiteral, + '"' => return parser.slice(start, parser.offset), '\\' => { @@ -2412,7 +2463,13 @@ pub const Parser = struct { pub fn is_ident_char(c: u8) bool { return switch (c) { - 'a'...'z', 'A'...'Z', '0'...'9', '_', '\\' => true, + 'a'...'z', + 'A'...'Z', + '0'...'9', + '_', + '-', + '\\', + => true, else => false, }; } diff --git a/src/testsuite.zig b/src/testsuite.zig index d366816..bfa3d17 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -1,6 +1,17 @@ const std = @import("std"); const hdoc = @import("./hyperdoc.zig"); +// TODO: Write unit test for trailing comma in attribute lists +// TODO: Write unit test for invalid escape sequence detection when more than 6 (hex) chars are used +// TODO: Write unit test for invalid version detection (must be 2.0) +// TODO: Write unit test for duplicate header recognition +// TODO: Write unit test for clean_utf8_input() passthrough +// TODO: Write unit test for clean_utf8_input() BOM detection +// TODO: Write unit test for clean_utf8_input() invalid UTF-8 detection +// TODO: Write unit test for clean_utf8_input() illegal codepoint detection (bare CR -> error) +// TODO: Write unit test for clean_utf8_input() illegal codepoint detection (TAB -> warning) +// TODO: Write unit test for clean_utf8_input() illegal codepoint detection (any other control character -> error) + test "validate examples directory" { try parseDirectoryTree("examples"); } @@ -66,7 +77,7 @@ test "parser rejects identifiers with invalid start characters" { defer arena.deinit(); var parser: hdoc.Parser = .{ - .code = "-abc", + .code = "*abc", .arena = arena.allocator(), .diagnostics = null, }; @@ -463,7 +474,7 @@ test "diagnostic codes are emitted for expected samples" { try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }}); try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }}); try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); -abc", &.{.{ .invalid_identifier_start = .{ .char = '-' } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); *abc", &.{.{ .invalid_identifier_start = .{ .char = '*' } }}); try validateDiagnostics(.{}, "hdoc{h1 \"x\"", &.{.unterminated_block_list}); try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list}); try validateDiagnostics( diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index 65ffc31..cf91f7b 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -1,4 +1,4 @@ -hdoc(version="2.0"); +hdoc(version="2.0",); p { In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed From b8f36e2aaa17212b09df92d11a1ea4caf80e1189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 20:40:59 +0100 Subject: [PATCH 046/116] Refactored code to use LanguageTag and TimeZoneOffset instead of ?[]const u8/i32 to increase type safety. --- docs/specification.md | 2 +- src/hyperdoc.zig | 228 ++++++++++++++++++++++++------------------ src/render/dump.zig | 30 +++--- src/testsuite.zig | 18 ++-- 4 files changed, 158 insertions(+), 120 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 4e0d0f7..50c255d 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -129,7 +129,7 @@ attribute_list ::= "(" , ws , attribute ::= attr_key , ws , "=" , ws , string_literal ; (* - Attribute keys may include '-' and ':' in addition to node-name characters. + Attribute keys may include '-' in addition to node-name characters. *) attr_key ::= attr_key_char , { attr_key_char } ; diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 12b8cee..f6d4386 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -17,11 +17,11 @@ pub const Document = struct { id_map: std.StringArrayHashMapUnmanaged(usize), // id -> index // header information - lang: ?[]const u8, + lang: LanguageTag = .inherit, // inherit here means "unset" title: ?[]const u8, author: ?[]const u8, date: ?DateTime, - timezone: ?[]const u8, + timezone: ?TimeZoneOffset, pub fn deinit(doc: *Document) void { doc.arena.deinit(); @@ -45,7 +45,7 @@ pub const Block = union(enum) { pub const Heading = struct { level: HeadingLevel, - lang: ?[]const u8, + lang: LanguageTag, content: []Span, }; @@ -53,43 +53,43 @@ pub const Block = union(enum) { pub const Paragraph = struct { kind: ParagraphKind, - lang: ?[]const u8, + lang: LanguageTag, content: []Span, }; pub const ParagraphKind = enum { p, note, warning, danger, tip, quote, spoiler }; pub const List = struct { - lang: ?[]const u8, + lang: LanguageTag, first: ?u32, items: []ListItem, }; pub const ListItem = struct { - lang: ?[]const u8, + lang: LanguageTag, content: []Block, }; pub const Image = struct { - lang: ?[]const u8, + lang: LanguageTag, alt: []const u8, // empty means none path: []const u8, content: []Span, }; pub const Preformatted = struct { - lang: ?[]const u8, + lang: LanguageTag, syntax: ?[]const u8, content: []Span, }; pub const TableOfContents = struct { - lang: ?[]const u8, + lang: LanguageTag, depth: ?u8, }; pub const Table = struct { - lang: ?[]const u8, + lang: LanguageTag, rows: []TableRow, }; @@ -100,23 +100,23 @@ pub const Block = union(enum) { }; pub const TableColumns = struct { - lang: ?[]const u8, + lang: LanguageTag, cells: []TableCell, }; pub const TableDataRow = struct { - lang: ?[]const u8, + lang: LanguageTag, title: ?[]const u8, cells: []TableCell, }; pub const TableGroup = struct { - lang: ?[]const u8, + lang: LanguageTag, content: []Span, }; pub const TableCell = struct { - lang: ?[]const u8, + lang: LanguageTag, colspan: u32, content: []Block, }; @@ -138,7 +138,7 @@ pub const Span = struct { }; pub const Attributes = struct { - lang: []const u8 = "", // empty is absence + lang: LanguageTag = .inherit, position: ScriptPosition = .baseline, em: bool = false, mono: bool = false, @@ -158,12 +158,12 @@ pub const Span = struct { return false; // string comparison: - if (!std.mem.eql(u8, lhs.lang, rhs.lang)) - return false; if (!std.mem.eql(u8, lhs.syntax, rhs.syntax)) return false; // complex comparison + if (!lhs.lang.eql(rhs.lang)) + return false; if (!lhs.link.eql(rhs.link)) return false; @@ -227,7 +227,7 @@ pub const DateTime = struct { date: Date, time: Time, - pub fn parse(text: []const u8, default_timezone: ?[]const u8) !DateTime { + pub fn parse(text: []const u8, timezone_hint: ?TimeZoneOffset) !DateTime { const split_index = std.mem.indexOfScalar(u8, text, 'T') orelse return error.InvalidValue; const head = text[0..split_index]; @@ -235,7 +235,7 @@ pub const DateTime = struct { return .{ .date = try Date.parse(head), - .time = try Time.parse(tail, default_timezone), + .time = try Time.parse(tail, timezone_hint), }; } }; @@ -303,9 +303,9 @@ pub const Time = struct { minute: u6, // 0-59 second: u6, // 0-59 microsecond: u20, // 0-999999 - zone_offset: i32, // in minutes + timezone: TimeZoneOffset, - pub fn parse(text: []const u8, default_timezone: ?[]const u8) !Time { + pub fn parse(text: []const u8, timezone_hint: ?TimeZoneOffset) !Time { if (text.len < 8) // "HH:MM:SS" return error.InvalidValue; @@ -334,23 +334,59 @@ pub const Time = struct { } const timezone = if (index == text.len) - default_timezone orelse return error.MissingTimezone + timezone_hint orelse return error.MissingTimezone else - text[index..]; + try TimeZoneOffset.parse(text[index..]); + + return .{ + .hour = @intCast(hour), + .minute = @intCast(minute), + .second = @intCast(second), + .microsecond = microsecond, + .timezone = timezone, + }; + } + fn fractionToMicrosecond(len: usize, value: u64) ?u20 { + const micro: u64 = switch (len) { + 1 => value * 100_000, + 2 => value * 10_000, + 3 => value * 1_000, + 6 => value, + 9 => value / 1_000, + else => return null, + }; + if (micro > 999_999) return null; + return @intCast(micro); + } +}; + +/// A time offset to timezones in minutes. +pub const TimeZoneOffset = enum(i32) { + utc = 0, + + _, + + pub fn from_hhmm(hour: i8, minute: u8) error{InvalidValue}!TimeZoneOffset { + const hour_pos = @abs(hour); + const sign = std.math.sign(hour); + + if (hour < -23 and hour > 23) + return error.InvalidValue; + if (minute >= 60) + return error.InvalidValue; + + return @enumFromInt(@as(i32, sign) * (hour_pos * @as(i32, 60) + minute)); + } + + pub fn parse(timezone: []const u8) error{InvalidValue}!TimeZoneOffset { if (timezone.len != 1 and timezone.len != 6) // "Z" or "±HH:MM" return error.InvalidValue; if (timezone.len == 1) { if (timezone[0] != 'Z') return error.InvalidValue; - return .{ - .hour = @intCast(hour), - .minute = @intCast(minute), - .second = @intCast(second), - .microsecond = microsecond, - .zone_offset = 0, - }; + return .utc; } std.debug.assert(timezone.len == 6); @@ -371,26 +407,7 @@ pub const Time = struct { const zone_total: u16 = @as(u16, zone_hour) * 60 + zone_minute; const offset_minutes: i32 = sign * @as(i32, zone_total); - return .{ - .hour = @intCast(hour), - .minute = @intCast(minute), - .second = @intCast(second), - .microsecond = microsecond, - .zone_offset = offset_minutes, - }; - } - - fn fractionToMicrosecond(len: usize, value: u64) ?u20 { - const micro: u64 = switch (len) { - 1 => value * 100_000, - 2 => value * 10_000, - 3 => value * 1_000, - 6 => value, - 9 => value / 1_000, - else => return null, - }; - if (micro > 999_999) return null; - return @intCast(micro); + return @enumFromInt(offset_minutes); } }; @@ -412,10 +429,33 @@ pub const Reference = struct { text: []const u8, - pub fn init(text: []const u8) Reference { + pub fn parse(text: []const u8) !Reference { // TODO: Add correctness validation here (non-empty, allowed characters). return .{ .text = text }; } + + pub fn eql(lhs: Reference, rhs: Reference) bool { + return std.mem.eql(u8, lhs.text, rhs.text); + } +}; + +/// A BCP 47 language tag. +pub const LanguageTag = struct { + //! https://datatracker.ietf.org/doc/html/rfc5646 + + /// The empty language tag means that the language is inherited from the parent. + pub const inherit: LanguageTag = .{ .text = "" }; + + text: []const u8, + + pub fn parse(tag_str: []const u8) !LanguageTag { + // TODO: Implement proper BCP 47 tag verification + return .{ .text = tag_str }; + } + + pub fn eql(lhs: LanguageTag, rhs: LanguageTag) bool { + return std.mem.eql(u8, lhs.text, rhs.text); + } }; /// Parses a HyperDoc document. @@ -495,13 +535,18 @@ pub fn parse( // TODO: Validate document-level semantic constraints (ref resolution). + const doc_lang = header.lang orelse blk: { + // TODO: Emit diagnostic warning for missing document language. + break :blk LanguageTag.inherit; + }; + return .{ .arena = arena, .contents = try sema.blocks.toOwnedSlice(arena.allocator()), .content_ids = content_ids, .id_map = id_map, - .lang = header.lang, + .lang = doc_lang, .title = header.title, .version = header.version, .author = header.author, @@ -550,10 +595,10 @@ pub const SemanticAnalyzer = struct { const Header = struct { version: Version, - lang: ?[]const u8, + lang: ?LanguageTag, title: ?[]const u8, author: ?[]const u8, - timezone: ?[]const u8, + timezone: ?TimeZoneOffset, date: ?DateTime, }; @@ -623,8 +668,8 @@ pub const SemanticAnalyzer = struct { title: ?[]const u8 = null, author: ?[]const u8 = null, date: ?DateTime = null, - lang: ?[]const u8 = null, // TODO: Introduce with "LanguageTag" type for all "lang" attributes which performs proper validation - tz: ?[]const u8 = null, + lang: LanguageTag = .inherit, + tz: ?TimeZoneOffset = null, }); if (attrs.version.major != 2) @@ -632,8 +677,6 @@ pub const SemanticAnalyzer = struct { if (attrs.version.minor != 0) return error.UnsupportedVersion; - // TODO: Validate TZ format - return .{ .version = attrs.version, .lang = attrs.lang, @@ -711,7 +754,7 @@ pub const SemanticAnalyzer = struct { fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?Reference } { const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, id: ?Reference = null, }); @@ -731,7 +774,7 @@ pub const SemanticAnalyzer = struct { fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } { const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, id: ?Reference = null, }); @@ -755,7 +798,7 @@ pub const SemanticAnalyzer = struct { fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } { const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, id: ?Reference = null, first: ?u32 = null, }); @@ -800,7 +843,7 @@ pub const SemanticAnalyzer = struct { fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } { const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, id: ?Reference = null, alt: ?[]const u8 = null, path: []const u8, @@ -838,7 +881,7 @@ pub const SemanticAnalyzer = struct { fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } { const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, id: ?Reference = null, syntax: ?[]const u8 = null, }); @@ -854,7 +897,7 @@ pub const SemanticAnalyzer = struct { fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } { const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, id: ?Reference = null, depth: ?u32 = null, }); @@ -890,7 +933,7 @@ pub const SemanticAnalyzer = struct { fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } { const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, id: ?Reference = null, }); @@ -906,7 +949,7 @@ pub const SemanticAnalyzer = struct { switch (child_node.type) { .columns => { const row_attrs = try sema.get_attributes(child_node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, }); const cells = try sema.translate_table_cells(child_node); @@ -933,7 +976,7 @@ pub const SemanticAnalyzer = struct { }, .row => { const row_attrs = try sema.get_attributes(child_node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, title: ?[]const u8 = null, }); @@ -963,7 +1006,7 @@ pub const SemanticAnalyzer = struct { }, .group => { const row_attrs = try sema.get_attributes(child_node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, }); rows.appendAssumeCapacity(.{ @@ -1027,7 +1070,7 @@ pub const SemanticAnalyzer = struct { } const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, colspan: ?u32 = null, }); @@ -1051,7 +1094,7 @@ pub const SemanticAnalyzer = struct { } const attrs = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, }); return .{ @@ -1093,7 +1136,7 @@ pub const SemanticAnalyzer = struct { blocks[0] = .{ .paragraph = .{ .kind = .p, - .lang = null, + .lang = .inherit, .content = spans, }, }; @@ -1218,7 +1261,7 @@ pub const SemanticAnalyzer = struct { }; pub const AttribOverrides = struct { - lang: ?[]const u8 = null, + lang: ?LanguageTag = null, em: ?bool = null, mono: ?bool = null, strike: ?bool = null, @@ -1301,7 +1344,7 @@ pub const SemanticAnalyzer = struct { .@"\\em" => { const props = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, }); try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ @@ -1312,7 +1355,7 @@ pub const SemanticAnalyzer = struct { .@"\\strike" => { const props = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, }); try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ @@ -1323,7 +1366,7 @@ pub const SemanticAnalyzer = struct { .@"\\sub" => { const props = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, }); try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ @@ -1334,7 +1377,7 @@ pub const SemanticAnalyzer = struct { .@"\\sup" => { const props = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, }); try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ @@ -1345,7 +1388,7 @@ pub const SemanticAnalyzer = struct { .@"\\link" => { const props = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, uri: ?Uri = null, ref: ?Reference = null, }); @@ -1371,7 +1414,7 @@ pub const SemanticAnalyzer = struct { .@"\\mono" => { const props = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, syntax: []const u8 = "", }); try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ @@ -1386,7 +1429,7 @@ pub const SemanticAnalyzer = struct { .@"\\datetime", => blk: { const props = try sema.get_attributes(node, struct { - lang: ?[]const u8 = null, + lang: LanguageTag = .inherit, fmt: []const u8 = "", }); @@ -1468,7 +1511,7 @@ pub const SemanticAnalyzer = struct { ) !Span.Content { const Format: type = DTValue.Format; - const timezone_hint: ?[]const u8 = if (sema.header) |header| header.timezone else null; + const timezone_hint: ?TimeZoneOffset = if (sema.header) |header| header.timezone else null; const value_or_err: error{ InvalidValue, MissingTimezone }!DTValue = switch (DTValue) { Date => Date.parse(value_str), @@ -1692,7 +1735,12 @@ pub const SemanticAnalyzer = struct { return try sema.cast_value(attrib, @typeInfo(T).optional.child); } - const value = try sema.unescape_string(attrib); + const unstripped_value = try sema.unescape_string(attrib); + + const value = std.mem.trim(u8, unstripped_value, whitespace_chars); + if (value.len != unstripped_value.len) { + try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location); + } const timezone_hint = if (sema.header) |header| header.timezone @@ -1704,26 +1752,16 @@ pub const SemanticAnalyzer = struct { u32 => std.fmt.parseInt(u32, value, 10) catch return error.InvalidValue, - Reference => { - const stripped = std.mem.trim(u8, value, whitespace_chars); - if (stripped.len != value.len) { - try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location); - } - return .init(stripped); - }, + Reference => Reference.parse(value) catch return error.InvalidValue, - Uri => { - const stripped = std.mem.trim(u8, value, whitespace_chars); - if (stripped.len != value.len) { - try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location); - } - return .init(stripped); - }, + Uri => Uri.init(value), Version => Version.parse(value) catch return error.InvalidValue, Date => Date.parse(value) catch return error.InvalidValue, Time => Time.parse(value, timezone_hint) catch return error.InvalidValue, DateTime => DateTime.parse(value, timezone_hint) catch return error.InvalidValue, + LanguageTag => LanguageTag.parse(value) catch return error.InvalidValue, + TimeZoneOffset => TimeZoneOffset.parse(value) catch return error.InvalidValue, else => @compileError("Unsupported attribute type: " ++ @typeName(T)), }; diff --git a/src/render/dump.zig b/src/render/dump.zig index bedf742..94e25da 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -142,9 +142,9 @@ fn writeSpanAttributes(writer: *Writer, span: hdoc.Span) Writer.Error!void { try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value.text)}); }, } - if (span.attribs.lang.len != 0) { + if (span.attribs.lang.text.len != 0) { try writeAttrSeparator(writer, &first); - try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang)}); + try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang.text)}); } if (span.attribs.syntax.len != 0) { try writeAttrSeparator(writer, &first); @@ -275,7 +275,7 @@ fn dumpOptionalStringListField(writer: *Writer, indent: usize, key: []const u8, } fn dumpListItem(writer: *Writer, indent: usize, item: hdoc.Block.ListItem) Writer.Error!void { - try dumpOptionalStringFieldInline(writer, "lang", item.lang); + try dumpOptionalStringFieldInline(writer, "lang", item.lang.text); try dumpBlockListField(writer, indent + indent_step, "content", item.content); } @@ -294,7 +294,7 @@ fn dumpListItemsField(writer: *Writer, indent: usize, key: []const u8, items: [] } fn dumpTableCell(writer: *Writer, indent: usize, cell: hdoc.Block.TableCell) Writer.Error!void { - try dumpOptionalStringFieldInline(writer, "lang", cell.lang); + try dumpOptionalStringFieldInline(writer, "lang", cell.lang.text); try dumpOptionalNumberField(writer, indent + indent_step, "colspan", @as(?u32, cell.colspan)); try dumpBlockListField(writer, indent + indent_step, "content", cell.content); } @@ -314,18 +314,18 @@ fn dumpTableCellsField(writer: *Writer, indent: usize, key: []const u8, cells: [ } fn dumpTableColumns(writer: *Writer, indent: usize, columns: hdoc.Block.TableColumns) Writer.Error!void { - try dumpOptionalStringField(writer, indent, "lang", columns.lang); + try dumpOptionalStringField(writer, indent, "lang", columns.lang.text); try dumpTableCellsField(writer, indent, "cells", columns.cells); } fn dumpTableDataRow(writer: *Writer, indent: usize, row: hdoc.Block.TableDataRow) Writer.Error!void { - try dumpOptionalStringFieldWithIndent(writer, indent, "lang", row.lang); + try dumpOptionalStringFieldWithIndent(writer, indent, "lang", row.lang.text); try dumpOptionalStringField(writer, indent, "title", row.title); try dumpTableCellsField(writer, indent, "cells", row.cells); } fn dumpTableGroup(writer: *Writer, indent: usize, group: hdoc.Block.TableGroup) Writer.Error!void { - try dumpOptionalStringFieldWithIndent(writer, indent, "lang", group.lang); + try dumpOptionalStringFieldWithIndent(writer, indent, "lang", group.lang.text); try dumpSpanListField(writer, indent, "content", group.content); } @@ -365,42 +365,42 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err .heading => |heading| { try writeTypeTag(writer, "heading"); try dumpEnumField(writer, indent + indent_step, "level", heading.level); - try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang); + try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang.text); try dumpSpanListField(writer, indent + indent_step, "content", heading.content); }, .paragraph => |paragraph| { try writeTypeTag(writer, "paragraph"); try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind); - try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang); + try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang.text); try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content); }, .list => |list| { try writeTypeTag(writer, "list"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang); + try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang.text); try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first); try dumpListItemsField(writer, indent + indent_step, "items", list.items); }, .image => |image| { try writeTypeTag(writer, "image"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang); + try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang.text); try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt); try dumpOptionalStringField(writer, indent + indent_step, "path", image.path); try dumpSpanListField(writer, indent + indent_step, "content", image.content); }, .preformatted => |preformatted| { try writeTypeTag(writer, "preformatted"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang); + try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang.text); try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax); try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content); }, .toc => |toc| { try writeTypeTag(writer, "toc"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang); + try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang.text); try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth); }, .table => |table| { try writeTypeTag(writer, "table"); - try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang); + try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang.text); try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows); }, } @@ -419,7 +419,7 @@ fn dumpOptionalDateTimeField(writer: *Writer, indent: usize, key: []const u8, va fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void { try writer.writeAll("document:\n"); try dumpVersion(writer, indent_step, doc.version); - try dumpOptionalStringField(writer, indent_step, "lang", doc.lang); + try dumpOptionalStringField(writer, indent_step, "lang", doc.lang.text); try dumpOptionalStringField(writer, indent_step, "title", doc.title); try dumpOptionalStringField(writer, indent_step, "author", doc.author); try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date); diff --git a/src/testsuite.zig b/src/testsuite.zig index bfa3d17..f1fbabd 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -554,26 +554,26 @@ test "Time.parse accepts ISO times with zones" { try std.testing.expectEqual(@as(u6, 30), utc.minute); try std.testing.expectEqual(@as(u6, 46), utc.second); try std.testing.expectEqual(@as(u20, 0), utc.microsecond); - try std.testing.expectEqual(@as(i32, 0), utc.zone_offset); + try std.testing.expectEqual(.utc, utc.timezone); - const utc_hint = try hdoc.Time.parse("22:30:46", "Z"); + const utc_hint = try hdoc.Time.parse("22:30:46", .utc); try std.testing.expectEqual(@as(u5, 22), utc_hint.hour); try std.testing.expectEqual(@as(u6, 30), utc_hint.minute); try std.testing.expectEqual(@as(u6, 46), utc_hint.second); try std.testing.expectEqual(@as(u20, 0), utc_hint.microsecond); - try std.testing.expectEqual(@as(i32, 0), utc_hint.zone_offset); + try std.testing.expectEqual(.utc, utc_hint.timezone); - const fractional = try hdoc.Time.parse("22:30:46.136+01:00", null); + const fractional = try hdoc.Time.parse("22:30:46.136-01:00", null); try std.testing.expectEqual(@as(u20, 136_000), fractional.microsecond); - try std.testing.expectEqual(@as(i32, 60), fractional.zone_offset); + try std.testing.expectEqual(try hdoc.TimeZoneOffset.from_hhmm(-1, 0), fractional.timezone); - const fractional_hint = try hdoc.Time.parse("22:30:46.136", "+01:30"); + const fractional_hint = try hdoc.Time.parse("22:30:46.136", try .parse("+01:30")); try std.testing.expectEqual(@as(u20, 136_000), fractional_hint.microsecond); - try std.testing.expectEqual(@as(i32, 90), fractional_hint.zone_offset); + try std.testing.expectEqual(@as(hdoc.TimeZoneOffset, @enumFromInt(90)), fractional_hint.timezone); const nanos = try hdoc.Time.parse("21:30:46.136797358-05:30", null); try std.testing.expectEqual(@as(u20, 136_797), nanos.microsecond); - try std.testing.expectEqual(@as(i32, -330), nanos.zone_offset); + try std.testing.expectEqual(@as(hdoc.TimeZoneOffset, @enumFromInt(-330)), nanos.timezone); try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("21:30:46,1Z", null)); try std.testing.expectError(error.MissingTimezone, hdoc.Time.parse("22:30:46", null)); @@ -592,7 +592,7 @@ test "DateTime.parse accepts ISO date-time" { try std.testing.expectEqual(@as(u6, 31), datetime.time.minute); try std.testing.expectEqual(@as(u6, 50), datetime.time.second); try std.testing.expectEqual(@as(u20, 130_000), datetime.time.microsecond); - try std.testing.expectEqual(@as(i32, 60), datetime.time.zone_offset); + try std.testing.expectEqual(@as(hdoc.TimeZoneOffset, @enumFromInt(60)), datetime.time.timezone); try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z", null)); } From 075821cbf9665fd16012b076eda79477220e1379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 21:59:16 +0100 Subject: [PATCH 047/116] Vibecoded: Resolves all diagnostic-related TODOs --- src/hyperdoc.zig | 271 +++++++++++++++++++++++++++++++++++---- src/testsuite.zig | 42 +++--- test/accept/workset.hdoc | 2 +- 3 files changed, 271 insertions(+), 44 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index f6d4386..f7d810e 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -513,6 +513,8 @@ pub fn parse( const header = sema.header orelse return error.MalformedDocument; const content_ids = try sema.ids.toOwnedSlice(arena.allocator()); + const id_locations = sema.id_locations.items; + std.debug.assert(id_locations.len == content_ids.len); var id_map: std.StringArrayHashMapUnmanaged(usize) = .empty; errdefer id_map.deinit(arena.allocator()); @@ -521,24 +523,22 @@ pub fn parse( for (content_ids, 0..) |id_or_null, index| { const id = id_or_null orelse continue; + const id_location = id_locations[index] orelse Parser.Location{ .offset = 0, .length = 0 }; const gop = id_map.getOrPutAssumeCapacity(id.text); if (gop.found_existing) { try sema.emit_diagnostic( .{ .duplicate_id = .{ .ref = id.text } }, - .{ .offset = 0, .length = 0 }, // TODO: Figure out proper node location + id_location, ); continue; } gop.value_ptr.* = index; } - // TODO: Validate document-level semantic constraints (ref resolution). + try sema.validate_references(&id_map); - const doc_lang = header.lang orelse blk: { - // TODO: Emit diagnostic warning for missing document language. - break :blk LanguageTag.inherit; - }; + const doc_lang = header.lang orelse LanguageTag.inherit; return .{ .arena = arena, @@ -577,16 +577,69 @@ pub fn clean_utf8_input(diagnostics: ?*Diagnostics, raw_plain_text: []const u8) } const source_head = iter.i; + var line: u32 = 1; + var column: u32 = 1; + var saw_invalid = false; + + var prev_was_cr = false; + var prev_cr_location: Diagnostic.Location = undefined; + while (iter.nextCodepointSlice()) |slice| { const codepoint = std.unicode.utf8Decode(slice) catch unreachable; - // TODO: Write codepoint validation which rejects the file if invalid codepoints are detected and - // emits warnings for TAB characters. - // Bare CR is forbidden, just CR LF or LF is allowed. + const location: Diagnostic.Location = .{ .line = line, .column = column }; + + if (prev_was_cr) { + if (codepoint != '\n') { + if (diagnostics) |diag| { + try diag.add(.bare_carriage_return, prev_cr_location); + } + saw_invalid = true; + } + prev_was_cr = false; + if (codepoint == '\n') { + continue; + } + } + + if (codepoint == '\r') { + prev_was_cr = true; + prev_cr_location = location; + line += 1; + column = 1; + continue; + } + + if (codepoint == '\n') { + line += 1; + column = 1; + continue; + } - _ = codepoint; + if (codepoint == '\t') { + if (diagnostics) |diag| { + try diag.add(.tab_character, location); + } + } else if (SemanticAnalyzer.is_illegal_character(codepoint)) { + if (diagnostics) |diag| { + try diag.add(.{ .illegal_character = .{ .codepoint = codepoint } }, location); + } + saw_invalid = true; + } + + column += 1; } + if (prev_was_cr) { + if (diagnostics) |diag| { + try diag.add(.bare_carriage_return, prev_cr_location); + } + saw_invalid = true; + } + + if (saw_invalid) + return error.InvalidUtf8; + return raw_plain_text[source_head..]; } @@ -602,6 +655,11 @@ pub const SemanticAnalyzer = struct { date: ?DateTime, }; + const RefUse = struct { + ref: Reference, + location: Parser.Location, + }; + arena: std.mem.Allocator, diagnostics: ?*Diagnostics, code: []const u8, @@ -609,6 +667,8 @@ pub const SemanticAnalyzer = struct { header: ?Header = null, blocks: std.ArrayList(Block) = .empty, ids: std.ArrayList(?Reference) = .empty, + id_locations: std.ArrayList(?Parser.Location) = .empty, + pending_refs: std.ArrayList(RefUse) = .empty, fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, UnsupportedVersion }!void { switch (node.type) { @@ -654,8 +714,14 @@ pub const SemanticAnalyzer = struct { }, }; + const id_location = if (id != null) + get_attribute_location(node, "id", .value) orelse get_attribute_location(node, "id", .name) orelse node.location + else + null; + try sema.blocks.append(sema.arena, block); try sema.ids.append(sema.arena, id); + try sema.id_locations.append(sema.arena, id_location); }, } } @@ -672,6 +738,11 @@ pub const SemanticAnalyzer = struct { tz: ?TimeZoneOffset = null, }); + const lang_location = get_attribute_location(node, "lang", .name); + if (lang_location == null) { + try sema.emit_diagnostic(.missing_document_language, node.location); + } + if (attrs.version.major != 2) return error.UnsupportedVersion; if (attrs.version.minor != 0) @@ -679,7 +750,7 @@ pub const SemanticAnalyzer = struct { return .{ .version = attrs.version, - .lang = attrs.lang, + .lang = if (lang_location != null) attrs.lang else null, .title = attrs.title, .author = attrs.author, .date = attrs.date, @@ -810,8 +881,10 @@ pub const SemanticAnalyzer = struct { var children: std.ArrayList(Block.ListItem) = .empty; defer children.deinit(sema.arena); + var saw_list_body = false; switch (node.body) { .list => |child_nodes| { + saw_list_body = true; try children.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); for (child_nodes) |child_node| { const list_item = sema.translate_list_item_node(child_node) catch |err| switch (err) { @@ -830,7 +903,9 @@ pub const SemanticAnalyzer = struct { }, } - // TODO: Validate `children.items.len >= 1` + if (saw_list_body and children.items.len == 0) { + try sema.emit_diagnostic(.list_body_required, node.location); + } const list: Block.List = .{ .first = attrs.first orelse if (node.type == .ol) 1 else null, @@ -858,15 +933,13 @@ pub const SemanticAnalyzer = struct { if (path.len == 0) { // The path must be non-empty. - // TODO: Implement better diagnostic message - try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = .img, .name = "path" } }, get_attribute_location(node, "path", .value).?); + try sema.emit_diagnostic(.{ .empty_attribute = .{ .type = .img, .name = "path" } }, get_attribute_location(node, "path", .value).?); } if (attrs.alt != null and alt.len == 0) { // If alt is present, it must be non-empty, and not fully whitespace. - // TODO: Implement better diagnostic message - try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = .img, .name = "alt" } }, get_attribute_location(node, "alt", .value).?); + try sema.emit_diagnostic(.{ .empty_attribute = .{ .type = .img, .name = "alt" } }, get_attribute_location(node, "alt", .value).?); } const image: Block.Image = .{ @@ -1039,8 +1112,10 @@ pub const SemanticAnalyzer = struct { var cells: std.ArrayList(Block.TableCell) = .empty; defer cells.deinit(sema.arena); + var saw_list_body = false; switch (node.body) { .list => |child_nodes| { + saw_list_body = true; try cells.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); for (child_nodes) |child_node| { const cell = sema.translate_table_cell_node(child_node) catch |err| switch (err) { @@ -1058,7 +1133,9 @@ pub const SemanticAnalyzer = struct { }, } - // TODO: Validate `children.items.len >= 1` + if (saw_list_body and cells.items.len == 0) { + try sema.emit_diagnostic(.list_body_required, node.location); + } return try cells.toOwnedSlice(sema.arena); } @@ -1126,7 +1203,7 @@ pub const SemanticAnalyzer = struct { .empty, .string, .verbatim, .text_span => switch (upgrade) { .no_upgrade => { - try sema.emit_diagnostic(.list_body_required, node.location); // TODO: Use better diagnostic + try sema.emit_diagnostic(.{ .block_list_required = .{ .type = node.type } }, node.location); return &.{}; }, .text_to_p => { @@ -1406,6 +1483,13 @@ pub const SemanticAnalyzer = struct { break :blk .none; }; + if (props.ref) |ref| { + if (props.uri == null) { + const ref_location = get_attribute_location(node, "ref", .value) orelse node.location; + try sema.pending_refs.append(sema.arena, .{ .ref = ref, .location = ref_location }); + } + } + try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .link = link, @@ -1523,16 +1607,12 @@ pub const SemanticAnalyzer = struct { const value: DTValue = if (value_or_err) |value| value else |err| blk: { - std.log.warn("failed to parse {t}: \"{s}\"", .{ body, value_str }); switch (err) { error.InvalidValue => { try sema.emit_diagnostic(.invalid_date_time, node.location); }, error.MissingTimezone => { - std.log.err("emit missing timezone for {}", .{node.location}); - // TODO: Use (timezone_hint != null) to emit diagnostic for hint with - // adding `tz` attribute when all date/time values share a common base. - try sema.emit_diagnostic(.invalid_date_time, node.location); + try sema.emit_diagnostic(.missing_timezone, node.location); }, } break :blk std.mem.zeroes(DTValue); @@ -1543,8 +1623,7 @@ pub const SemanticAnalyzer = struct { else if (std.meta.stringToEnum(Format, format_str)) |format| format else blk: { - // TODO: Report error about invalid format - try sema.emit_diagnostic(.invalid_date_time_fmt, get_attribute_location(node, "fmt", .value) orelse node.location); + try sema.emit_diagnostic(.{ .invalid_date_time_fmt = .{ .fmt = format_str } }, get_attribute_location(node, "fmt", .value) orelse node.location); break :blk .default; }; @@ -1767,6 +1846,14 @@ pub const SemanticAnalyzer = struct { }; } + fn validate_references(sema: *SemanticAnalyzer, id_map: *const std.StringArrayHashMapUnmanaged(usize)) !void { + for (sema.pending_refs.items) |ref_use| { + if (!id_map.contains(ref_use.ref.text)) { + try sema.emit_diagnostic(.{ .unknown_id = .{ .ref = ref_use.ref.text } }, ref_use.location); + } + } + } + fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, location: Parser.Location) !void { if (sema.diagnostics) |diag| { try diag.add(code, sema.make_location(location.offset)); @@ -1967,7 +2054,6 @@ pub const SemanticAnalyzer = struct { return view.bytes; } - // TODO: Also validate the whole document against this rules. fn is_illegal_character(codepoint: u21) bool { // Surrogate codepoints are illegal, we're only ever using UTF-8 which doesn't need them. if (std.unicode.isSurrogateCodepoint(codepoint)) @@ -2694,11 +2780,13 @@ pub const Diagnostic = struct { pub const InvalidIdentifierStart = struct { char: u8 }; pub const DuplicateAttribute = struct { name: []const u8 }; pub const NodeAttributeError = struct { type: Parser.NodeType, name: []const u8 }; + pub const NodeBodyError = struct { type: Parser.NodeType }; pub const MissingHdocHeader = struct {}; pub const DuplicateHdocHeader = struct {}; pub const InvalidBlockError = struct { name: []const u8 }; pub const InlineUsageError = struct { attribute: InlineAttribute }; pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute }; + pub const DateTimeFormatError = struct { fmt: []const u8 }; pub const InvalidStringEscape = struct { codepoint: u21 }; pub const ForbiddenControlCharacter = struct { codepoint: u21 }; pub const TableShapeError = struct { actual: usize, expected: usize }; @@ -2716,17 +2804,21 @@ pub const Diagnostic = struct { duplicate_hdoc_header: DuplicateHdocHeader, missing_attribute: NodeAttributeError, invalid_attribute: NodeAttributeError, + empty_attribute: NodeAttributeError, unknown_block_type: InvalidBlockError, invalid_block_type: InvalidBlockError, + block_list_required: NodeBodyError, invalid_inline_combination: InlineCombinationError, link_not_nestable, invalid_link, invalid_date_time, nested_date_time, - invalid_date_time_fmt, + invalid_date_time_fmt: DateTimeFormatError, + missing_timezone, invalid_unicode_string_escape, invalid_string_escape: InvalidStringEscape, illegal_character: ForbiddenControlCharacter, + bare_carriage_return, illegal_child_item, list_body_required, illegal_id_attribute, @@ -2736,6 +2828,7 @@ pub const Diagnostic = struct { // warnings: document_starts_with_bom, + missing_document_language, unknown_attribute: NodeAttributeError, duplicate_attribute: DuplicateAttribute, empty_verbatim_block, @@ -2745,6 +2838,7 @@ pub const Diagnostic = struct { empty_inline_body, redundant_inline: InlineUsageError, attribute_leading_trailing_whitespace, + tab_character, pub fn severity(code: Code) Severity { return switch (code) { @@ -2758,15 +2852,19 @@ pub const Diagnostic = struct { .duplicate_hdoc_header, .invalid_attribute, .missing_attribute, + .empty_attribute, .unknown_block_type, .invalid_block_type, + .block_list_required, .invalid_inline_combination, .link_not_nestable, .invalid_link, .invalid_date_time, .invalid_date_time_fmt, + .missing_timezone, .invalid_string_escape, .illegal_character, + .bare_carriage_return, .invalid_unicode_string_escape, .illegal_child_item, .list_body_required, @@ -2777,6 +2875,7 @@ pub const Diagnostic = struct { .unknown_id, => .@"error", + .missing_document_language, .unknown_attribute, .duplicate_attribute, .empty_verbatim_block, @@ -2786,6 +2885,7 @@ pub const Diagnostic = struct { .empty_inline_body, .redundant_inline, .attribute_leading_trailing_whitespace, + .tab_character, .document_starts_with_bom, => .warning, }; @@ -2817,9 +2917,11 @@ pub const Diagnostic = struct { .missing_attribute => |ctx| try w.print("Missing required attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }), .invalid_attribute => |ctx| try w.print("Invalid value for attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }), + .empty_attribute => |ctx| try w.print("Attribute '{s}' for node type '{t}' must be non-empty.", .{ ctx.name, ctx.type }), .unknown_attribute => |ctx| try w.print("Unknown attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }), .unknown_block_type => |ctx| try w.print("Unknown block type '{s}'.", .{ctx.name}), .invalid_block_type => |ctx| try w.print("Invalid block type '{s}' in this context.", .{ctx.name}), + .block_list_required => |ctx| try w.print("Node type '{t}' requires a block list body.", .{ctx.type}), .empty_inline_body => try w.writeAll("Inline body is empty."), @@ -2832,7 +2934,9 @@ pub const Diagnostic = struct { .invalid_date_time => try w.writeAll("Invalid date/time value."), - .invalid_date_time_fmt => try w.writeAll("Invalid 'fmt' for date/time value."), + .missing_timezone => try w.writeAll("Missing timezone offset; add a 'tz' header attribute or include a timezone in the value."), + + .invalid_date_time_fmt => |ctx| try w.print("Invalid 'fmt' value '{s}' for date/time.", .{ctx.fmt}), .invalid_string_escape => |ctx| if (ctx.codepoint > 0x20 and ctx.codepoint <= 0x7F) try w.print("\\{u} is not a valid escape sequence.", .{ctx.codepoint}) @@ -2842,6 +2946,7 @@ pub const Diagnostic = struct { .invalid_unicode_string_escape => try w.writeAll("Invalid unicode escape sequence"), .illegal_character => |ctx| try w.print("Forbidden control character U+{X:0>4}.", .{ctx.codepoint}), + .bare_carriage_return => try w.writeAll("Bare carriage return (CR) is not allowed; use LF or CRLF."), .list_body_required => try w.writeAll("Node requires list body."), .illegal_child_item => try w.writeAll("Node not allowed here."), @@ -2854,6 +2959,9 @@ pub const Diagnostic = struct { .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}), .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}), + + .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."), + .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."), } } }; @@ -2911,6 +3019,113 @@ pub const InlineAttribute = enum { syntax, }; +test "diagnostics for missing language and empty image attributes" { + var diagnostics: Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + \\hdoc(version="2.0"); + \\img(path="", alt=""); + ; + + var doc = try parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var saw_missing_lang = false; + var saw_empty_path = false; + var saw_empty_alt = false; + + for (diagnostics.items.items) |item| { + switch (item.code) { + .missing_document_language => saw_missing_lang = true, + .empty_attribute => |ctx| { + if (ctx.type == .img and std.mem.eql(u8, ctx.name, "path")) { + saw_empty_path = true; + } + if (ctx.type == .img and std.mem.eql(u8, ctx.name, "alt")) { + saw_empty_alt = true; + } + }, + else => {}, + } + } + + try std.testing.expect(saw_missing_lang); + try std.testing.expect(saw_empty_path); + try std.testing.expect(saw_empty_alt); +} + +test "diagnostics for missing timezone and unknown id" { + var diagnostics: Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + \\hdoc(version="2.0"); + \\p{ \time"12:00:00" \link(ref="missing"){missing} } + ; + + var doc = try parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var saw_missing_timezone = false; + var saw_unknown_id = false; + + for (diagnostics.items.items) |item| { + switch (item.code) { + .missing_timezone => saw_missing_timezone = true, + .unknown_id => |ctx| { + if (std.mem.eql(u8, ctx.ref, "missing")) { + saw_unknown_id = true; + } + }, + else => {}, + } + } + + try std.testing.expect(saw_missing_timezone); + try std.testing.expect(saw_unknown_id); +} + +test "diagnostics for tab characters" { + var diagnostics: Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = "hdoc(version=\"2.0\");\n\tp{ ok }"; + + var doc = try parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var saw_tab = false; + + for (diagnostics.items.items) |item| { + switch (item.code) { + .tab_character => saw_tab = true, + else => {}, + } + } + + try std.testing.expect(saw_tab); +} + +test "diagnostics for bare carriage return" { + var diagnostics: Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = "hdoc(version=\"2.0\");\r"; + + try std.testing.expectError(error.InvalidUtf8, parse(std.testing.allocator, source, &diagnostics)); + + var saw_bare_cr = false; + for (diagnostics.items.items) |item| { + switch (item.code) { + .bare_carriage_return => saw_bare_cr = true, + else => {}, + } + } + + try std.testing.expect(saw_bare_cr); +} + test "fuzz parser" { const Impl = struct { fn testOne(impl: @This(), data: []const u8) !void { diff --git a/src/testsuite.zig b/src/testsuite.zig index f1fbabd..cf048f0 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -441,10 +441,22 @@ fn expectParseOk(opts: LogDiagOptions, code: []const u8) !void { var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics); defer doc.deinit(); - if (diagnostics.has_error() or diagnostics.has_warning()) { + if (diagnostics.has_error()) { logDiagnostics(&diagnostics, opts); return error.TestExpectedNoDiagnostics; } + + for (diagnostics.items.items) |item| { + if (item.code.severity() != .warning) + continue; + switch (item.code) { + .missing_document_language => {}, + else => { + logDiagnostics(&diagnostics, opts); + return error.TestExpectedNoDiagnostics; + }, + } + } } fn expectParseNoFail(opts: LogDiagOptions, code: []const u8) !void { @@ -467,29 +479,29 @@ fn expectParseNoFail(opts: LogDiagOptions, code: []const u8) !void { } test "parsing valid document yields empty diagnostics" { - try expectParseOk(.{}, "hdoc(version=\"2.0\");"); + try expectParseOk(.{}, "hdoc(version=\"2.0\",lang=\"en\");"); } test "diagnostic codes are emitted for expected samples" { - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); *abc", &.{.{ .invalid_identifier_start = .{ .char = '*' } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"unterminated", &.{.unterminated_string}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); *abc", &.{.{ .invalid_identifier_start = .{ .char = '*' } }}); try validateDiagnostics(.{}, "hdoc{h1 \"x\"", &.{.unterminated_block_list}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); p {hello", &.{.unterminated_inline_list}); try validateDiagnostics( .{}, - "hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");", + "hdoc(version=\"2.0\",lang=\"en\"); h1(lang=\"a\",lang=\"b\");", &.{ .{ .duplicate_attribute = .{ .name = "lang" } }, .empty_inline_body }, ); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n", &.{.empty_verbatim_block}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n| line", &.{.verbatim_missing_trailing_newline}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n|nospace\n", &.{.verbatim_missing_space}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n| trailing \n", &.{.trailing_whitespace}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n", &.{.empty_verbatim_block}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n| line", &.{.verbatim_missing_trailing_newline}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n|nospace\n", &.{.verbatim_missing_space}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n| trailing \n", &.{.trailing_whitespace}); try validateDiagnostics(.{}, "h1 \"Title\"", &.{.missing_hdoc_header}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); hdoc(version=\"2.0\");", &.{.duplicate_hdoc_header}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{.duplicate_hdoc_header}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); } test "parser reports unterminated inline lists" { diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index cf91f7b..efb6bf4 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -1,4 +1,4 @@ -hdoc(version="2.0",); +hdoc(version="2.0", lang="en"); p { In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed From 1a61967a3079e2934ecd8f937433f2ac5905bfe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Mon, 29 Dec 2025 22:35:03 +0100 Subject: [PATCH 048/116] Moves some tests from hyperdoc.zig to testsuite.zig --- src/hyperdoc.zig | 107 ---------------------------------------------- src/testsuite.zig | 107 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 107 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index f7d810e..2bf170c 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -3019,113 +3019,6 @@ pub const InlineAttribute = enum { syntax, }; -test "diagnostics for missing language and empty image attributes" { - var diagnostics: Diagnostics = .init(std.testing.allocator); - defer diagnostics.deinit(); - - const source = - \\hdoc(version="2.0"); - \\img(path="", alt=""); - ; - - var doc = try parse(std.testing.allocator, source, &diagnostics); - defer doc.deinit(); - - var saw_missing_lang = false; - var saw_empty_path = false; - var saw_empty_alt = false; - - for (diagnostics.items.items) |item| { - switch (item.code) { - .missing_document_language => saw_missing_lang = true, - .empty_attribute => |ctx| { - if (ctx.type == .img and std.mem.eql(u8, ctx.name, "path")) { - saw_empty_path = true; - } - if (ctx.type == .img and std.mem.eql(u8, ctx.name, "alt")) { - saw_empty_alt = true; - } - }, - else => {}, - } - } - - try std.testing.expect(saw_missing_lang); - try std.testing.expect(saw_empty_path); - try std.testing.expect(saw_empty_alt); -} - -test "diagnostics for missing timezone and unknown id" { - var diagnostics: Diagnostics = .init(std.testing.allocator); - defer diagnostics.deinit(); - - const source = - \\hdoc(version="2.0"); - \\p{ \time"12:00:00" \link(ref="missing"){missing} } - ; - - var doc = try parse(std.testing.allocator, source, &diagnostics); - defer doc.deinit(); - - var saw_missing_timezone = false; - var saw_unknown_id = false; - - for (diagnostics.items.items) |item| { - switch (item.code) { - .missing_timezone => saw_missing_timezone = true, - .unknown_id => |ctx| { - if (std.mem.eql(u8, ctx.ref, "missing")) { - saw_unknown_id = true; - } - }, - else => {}, - } - } - - try std.testing.expect(saw_missing_timezone); - try std.testing.expect(saw_unknown_id); -} - -test "diagnostics for tab characters" { - var diagnostics: Diagnostics = .init(std.testing.allocator); - defer diagnostics.deinit(); - - const source = "hdoc(version=\"2.0\");\n\tp{ ok }"; - - var doc = try parse(std.testing.allocator, source, &diagnostics); - defer doc.deinit(); - - var saw_tab = false; - - for (diagnostics.items.items) |item| { - switch (item.code) { - .tab_character => saw_tab = true, - else => {}, - } - } - - try std.testing.expect(saw_tab); -} - -test "diagnostics for bare carriage return" { - var diagnostics: Diagnostics = .init(std.testing.allocator); - defer diagnostics.deinit(); - - const source = "hdoc(version=\"2.0\");\r"; - - try std.testing.expectError(error.InvalidUtf8, parse(std.testing.allocator, source, &diagnostics)); - - var saw_bare_cr = false; - for (diagnostics.items.items) |item| { - switch (item.code) { - .bare_carriage_return => saw_bare_cr = true, - else => {}, - } - } - - try std.testing.expect(saw_bare_cr); -} - test "fuzz parser" { const Impl = struct { fn testOne(impl: @This(), data: []const u8) !void { diff --git a/src/testsuite.zig b/src/testsuite.zig index cf048f0..aa26072 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -608,3 +608,110 @@ test "DateTime.parse accepts ISO date-time" { try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z", null)); } + +test "diagnostics for missing language and empty image attributes" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + \\hdoc(version="2.0"); + \\img(path="", alt=""); + ; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var saw_missing_lang = false; + var saw_empty_path = false; + var saw_empty_alt = false; + + for (diagnostics.items.items) |item| { + switch (item.code) { + .missing_document_language => saw_missing_lang = true, + .empty_attribute => |ctx| { + if (ctx.type == .img and std.mem.eql(u8, ctx.name, "path")) { + saw_empty_path = true; + } + if (ctx.type == .img and std.mem.eql(u8, ctx.name, "alt")) { + saw_empty_alt = true; + } + }, + else => {}, + } + } + + try std.testing.expect(saw_missing_lang); + try std.testing.expect(saw_empty_path); + try std.testing.expect(saw_empty_alt); +} + +test "diagnostics for missing timezone and unknown id" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + \\hdoc(version="2.0"); + \\p{ \time"12:00:00" \link(ref="missing"){missing} } + ; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var saw_missing_timezone = false; + var saw_unknown_id = false; + + for (diagnostics.items.items) |item| { + switch (item.code) { + .missing_timezone => saw_missing_timezone = true, + .unknown_id => |ctx| { + if (std.mem.eql(u8, ctx.ref, "missing")) { + saw_unknown_id = true; + } + }, + else => {}, + } + } + + try std.testing.expect(saw_missing_timezone); + try std.testing.expect(saw_unknown_id); +} + +test "diagnostics for tab characters" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = "hdoc(version=\"2.0\");\n\tp{ ok }"; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var saw_tab = false; + + for (diagnostics.items.items) |item| { + switch (item.code) { + .tab_character => saw_tab = true, + else => {}, + } + } + + try std.testing.expect(saw_tab); +} + +test "diagnostics for bare carriage return" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = "hdoc(version=\"2.0\");\r"; + + try std.testing.expectError(error.InvalidUtf8, hdoc.parse(std.testing.allocator, source, &diagnostics)); + + var saw_bare_cr = false; + for (diagnostics.items.items) |item| { + switch (item.code) { + .bare_carriage_return => saw_bare_cr = true, + else => {}, + } + } + + try std.testing.expect(saw_bare_cr); +} From 274263709314fe6ecf8603120cf80a23aaabab27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Tue, 30 Dec 2025 21:43:01 +0100 Subject: [PATCH 049/116] Improves specification and fixes some minor mismatches in implementation --- docs/specification.md | 58 +++++++++++++++++++++++++++---------------- src/hyperdoc.zig | 22 +++++++++++++--- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 50c255d..3b6420b 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -34,7 +34,7 @@ This section defines the required byte-level encoding and line structure of Hype **Byte Order Mark (BOM):** -- A UTF-8 BOM (the byte sequence `EF BB BF`) **SHOULD NOT** be used. Tooling **MAY** accept it and treat it as U+FEFF at the beginning of the document. +- A UTF-8 BOM (the byte sequence `EF BB BF`) **SHOULD NOT** be used. Tooling **MAY** accept it and treat it as whitespace at the beginning of the document. ### Line endings @@ -466,24 +466,40 @@ Notes: ## Attribute Overview -| Attribute | Required | Allowed Values | Description | -| --------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | -| `version` | Yes | `2.0` | Describes the version of this HyperDoc document. | -| `lang` | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | -| `title` | No | *Any* | Sets the title of the document or the table row. | -| `author` | No | *Any* | Sets the author of the document. | -| `date` | No | A date-time value using the format specified below | Sets the authoring date of the document. | -| `id` | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | -| `first` | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | -| `alt` | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | -| `path` | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | -| `syntax` | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | -| `depth` | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | -| `colspan` | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | -| `ref` | No | Any value present in an `id` attribute. | References any `id` inside this document. | -| `uri` | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | -| `fmt` | No | *See element documentation* | Defines how the date/time value shall be displayed. | -| `tz` | No | `Z` for UTC or a `±HH:MM` timezone offset. | Defines the default timezone for time/datetime values. | +| Attribute | Type | Required | Allowed Values | Description | +| --------- | --------------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | +| `version` | Version | Yes | `2.0` | Describes the version of this HyperDoc document. | +| `lang` | Language Tag | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | +| `title` | String | No | *Any* | Sets the title of the document or the table row. | +| `author` | String | No | *Any* | Sets the author of the document. | +| `date` | Date | No | A date-time value using the format specified below | Sets the authoring date of the document. | +| `id` | Reference | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | +| `first` | Integer | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | +| `alt` | String | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | +| `path` | String | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | +| `syntax` | String | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | +| `depth` | Integer | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | +| `colspan` | Integer | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | +| `ref` | Reference | No | Any value present in an `id` attribute. | References any `id` inside this document. | +| `uri` | URI | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | +| `fmt` | Enum | No | *See element documentation* | Defines how the date/time value shall be displayed. | +| `tz` | Timezone Offset | No | `Z` for UTC or a `±HH:MM` timezone offset. | Defines the default timezone for time/datetime values. | + +NOTE: All attribute values allow leading and trailing whitespace, but it's heavily discouraged and should yield a non-fatal diagnostic or hint in implementations. + +## Attribute Types + +| Type | Example | Syntax | Notes | +| ----------------- | ----------------------------------- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | +| `Date` | `2025-12-31` | `\d+-{00..12}-{00..31}` | A date value as specified below. | +| `Enum` | `auto` | `\w+` | | +| `Integer` | `10` | `\d+` | Leading zeroes are allowed, but discouraged. | +| `Language Tag` | `de-DE` | *See [RFC 5646](https://datatracker.ietf.org/doc/html/rfc5646)* | | +| `Reference` | `attribute-types` | *No control characters or whitespace* | | +| `String` | `This image shows a cat and a dog.` | *Any Value* | Any textual value. | +| `Timezone Offset` | `+13:30` | `Z\|[+-]{00..23}:{00..59}` | Expresses the UTC timezone with `Z` or a relative offset in hours + minutes | +| `URI` | `www://example.com` | *See [RFC 3987](https://datatracker.ietf.org/doc/html/rfc3987) | The type actually allows IRIs (unicode-enabled URIs), but is called URI to use the core common term. | +| `Version` | `2.0` | `\d+\.\d+` | Has no semantic meaning yet, and is forced to be `2.0`. All other values are reserved for future use. | ## Semantic Structure @@ -589,7 +605,7 @@ These elements wrap a sequence of blocks that will be rendered for this list ite It also allows a string to be used as it's content directly, this will be equivalent to having a nested paragraph with that strings content: -``` +```hdoc ul { li { p { This is a normal item. } } li "This is a normal item." @@ -688,7 +704,7 @@ Renders the text a bit smaller and moved upwards (`sup`) or downwards (`sub`) to ### Linking: `link` -**Nesting:** Yes +**Nesting:** No | Attribute | Function | | --------- | -------------------------------------------------------------------------------------------------------- | diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 2bf170c..a22376c 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -430,7 +430,20 @@ pub const Reference = struct { text: []const u8, pub fn parse(text: []const u8) !Reference { - // TODO: Add correctness validation here (non-empty, allowed characters). + if (text.len == 0) + return error.InvalidValue; + + var view: std.unicode.Utf8View = try .init(text); + var iter = view.iterator(); + while (iter.nextCodepoint()) |codepoint| { + if (SemanticAnalyzer.is_illegal_character(codepoint)) + return error.InvalidValue; + switch (codepoint) { + '\t', '\r', '\n', ' ' => return error.InvalidValue, + else => {}, + } + } + return .{ .text = text }; } @@ -733,7 +746,7 @@ pub const SemanticAnalyzer = struct { version: Version, title: ?[]const u8 = null, author: ?[]const u8 = null, - date: ?DateTime = null, + date: ?Date = null, lang: LanguageTag = .inherit, tz: ?TimeZoneOffset = null, }); @@ -1321,7 +1334,10 @@ pub const SemanticAnalyzer = struct { const raw_string = try merger.current_span.toOwnedSlice(merger.arena); const string = switch (mode) { - .strip => std.mem.trimRight(u8, raw_string, whitespace_chars), + .strip => switch (merger.whitespace) { + .one_space => std.mem.trimRight(u8, raw_string, whitespace_chars), + .keep_space => raw_string, + }, .keep => raw_string, }; From 734c49969500c0b16e4ce07d9494fd87ccce6da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Tue, 30 Dec 2025 22:05:30 +0100 Subject: [PATCH 050/116] Updates spec to have hdoc(date=...) be a DateTime value, so the author can include the timestamp. --- docs/specification.md | 4 ++-- src/hyperdoc.zig | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 3b6420b..a4ecd99 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -472,7 +472,7 @@ Notes: | `lang` | Language Tag | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | | `title` | String | No | *Any* | Sets the title of the document or the table row. | | `author` | String | No | *Any* | Sets the author of the document. | -| `date` | Date | No | A date-time value using the format specified below | Sets the authoring date of the document. | +| `date` | DateTime | No | A date-time value using the format specified below | Sets the authoring date of the document. | | `id` | Reference | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | | `first` | Integer | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | | `alt` | String | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | @@ -491,7 +491,7 @@ NOTE: All attribute values allow leading and trailing whitespace, but it's heavi | Type | Example | Syntax | Notes | | ----------------- | ----------------------------------- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | -| `Date` | `2025-12-31` | `\d+-{00..12}-{00..31}` | A date value as specified below. | +| `Date` | `2025-12-31T13:37:42` | *See below* | A date value as specified below. | | `Enum` | `auto` | `\w+` | | | `Integer` | `10` | `\d+` | Leading zeroes are allowed, but discouraged. | | `Language Tag` | `de-DE` | *See [RFC 5646](https://datatracker.ietf.org/doc/html/rfc5646)* | | diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index a22376c..1f5225f 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -746,7 +746,7 @@ pub const SemanticAnalyzer = struct { version: Version, title: ?[]const u8 = null, author: ?[]const u8 = null, - date: ?Date = null, + date: ?DateTime = null, // TODO: Allow skipping TZ value! lang: LanguageTag = .inherit, tz: ?TimeZoneOffset = null, }); From 949ec597fc03f631d5b48d7aadf7fffaff3c80a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Tue, 30 Dec 2025 22:42:15 +0100 Subject: [PATCH 051/116] Prepares HTML5 rendering task. --- src/hyperdoc.zig | 1 + src/main.zig | 4 +- src/render/html5.zig | 101 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 src/render/html5.zig diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 1f5225f..8afc0fe 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -2,6 +2,7 @@ const std = @import("std"); pub const render = struct { pub const yaml = @import("render/dump.zig").render; + pub const html5 = @import("render/html5.zig").render; }; /// A HyperDoc document. Contains both memory and diff --git a/src/main.zig b/src/main.zig index 5b6cd6f..d7807f3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -70,5 +70,7 @@ fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostic return error.InvalidFile; } - try hdoc.render.yaml(parsed, output_stream); + // TODO: Make render format selectable via CLI: + // try hdoc.render.yaml(parsed, output_stream); + try hdoc.render.html5(parsed, output_stream); } diff --git a/src/render/html5.zig b/src/render/html5.zig new file mode 100644 index 0000000..f46a3b8 --- /dev/null +++ b/src/render/html5.zig @@ -0,0 +1,101 @@ +//! +//! This file implements a HTML content renderer for HyperDoc. +//! +const std = @import("std"); +const hdoc = @import("../hyperdoc.zig"); + +const Writer = std.Io.Writer; +const indent_step: usize = 2; + +// TODO: Implementation hints: +// - Use writeStartTag, writeEndTag to construct the document +// - Use and expand writeEscapedHtml to suite the needs of HyperDoc. +// - Implement a custom formatter for string attribute values so they have proper escaping applied. +// - Use semantic HTML. Never use `div` or `span`. If necessary, ask back when you encounter the need for a "custom tag". +// - For the different paragraph types, use a class="hdoc-${kind}", so for example class="hdoc-warning" to distinguish the special paragraphs from regular

ones. +// - The TOC element must be unrolled manually and should auto-link to the h1,h2,h3 elements. + +/// This function emits the body-only part of a HyperDoc document as +/// valid HTML5. +pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void { + _ = doc; + + // TODO: Implement this proper + + try writeStartTag(writer, "p", .regular, .{ + .style = "font-weight: bold", + }); + try writeEscapedHtml(writer, "Hello, World!"); + try writeEndTag(writer, "p"); + try writer.writeAll("\n"); +} + +fn writeEscapedHtml(writer: *Writer, text: []const u8) !void { + var view = std.unicode.Utf8View.init(text) catch @panic("invalid utf-8 passed"); + var iter = view.iterator(); + while (iter.nextCodepointSlice()) |slice| { + const codepoint = std.unicode.utf8Decode(slice) catch unreachable; + switch (codepoint) { + '<' => try writer.writeAll("<"), + '>' => try writer.writeAll(">"), + '&' => try writer.writeAll("&"), + '"' => try writer.writeAll("""), + '\'' => try writer.writeAll("'"), + + 0xA0 => try writer.writeAll(" "), + + // TODO: Fill out other required codes. + + else => try writer.writeAll(slice), + } + } +} + +fn writeStartTag(writer: *Writer, tag: []const u8, style: enum { regular, auto_close }, attribs: anytype) !void { + try writer.print("<{s}", .{tag}); + + const Attribs = @TypeOf(attribs); + inline for (@typeInfo(Attribs).@"struct".fields) |fld| { + const value = @field(attribs, fld.name); + + if (fld.type == bool) { + if (value) { + try writer.print(" {s}", .{fld.name}); + } + } else { + try writer.print(" {s}=", .{fld.name}); + + switch (@typeInfo(fld.type)) { + .int, .comptime_int => try writer.print("\"{}\"", .{value}), + .float, .comptime_float => try writer.print("\"{d}\"", .{value}), + + .pointer => |info| if (info.size == .one) { + const child = @typeInfo(info.child); + + if (child != .array) + @compileError("unsupported pointer type " ++ @typeName(fld.type)); + if (child.array.child != u8) + @compileError("unsupported pointer type " ++ @typeName(fld.type)); + + try writer.print("\"{s}\"", .{value}); // TODO: Implement proper HTML escaping! + }, + + else => switch (fld.type) { + bool => unreachable, + + []u8, []const u8 => try writer.print("\"{s}\"", .{value}), // TODO: Implement proper HTML escaping! + + else => @compileError("unsupported tag type " ++ @typeName(fld.type) ++ ", implement support above."), + }, + } + } + } + switch (style) { + .auto_close => try writer.writeAll("/>"), + .regular => try writer.writeAll(">"), + } +} + +fn writeEndTag(writer: *Writer, tag: []const u8) !void { + try writer.print("", .{tag}); +} From 27b658369d3ebf771f546a6e2ac256883b9d1b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Tue, 30 Dec 2025 22:46:13 +0100 Subject: [PATCH 052/116] Adds new spec draft --- docs/specification-proper-draft.md | 538 +++++++++++++++++++++++++++++ 1 file changed, 538 insertions(+) create mode 100644 docs/specification-proper-draft.md diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md new file mode 100644 index 0000000..e331d96 --- /dev/null +++ b/docs/specification-proper-draft.md @@ -0,0 +1,538 @@ +# HyperDoc 2.0 + +**Status:** Cleaned-up draft. + +--- + +## 1. Introduction + +HyperDoc 2.0 ("HyperDoc") is a plain-text markup language for hypertext documents. + +Design goals: + +- Deterministic, unambiguous parsing. +- Convenient authoring in plain text. +- Round-trippable formatting (tooling can rewrite without losing information). + +## 2. Conformance and terminology + +The key words **MUST**, **MUST NOT**, **SHOULD**, **SHOULD NOT**, and **MAY** are to be interpreted as described in RFC 2119. + +A document can be: + +- **Syntactically valid**: conforms to the grammar and additional syntax rules. +- **Semantically valid**: syntactically valid **and** conforms to semantic rules (elements, attributes, escape decoding, IDs/refs, etc.). + +Unless explicitly stated, rules in chapters 3–5 are **syntax** rules; rules in chapters 6–9 are **semantic** rules. + +## 3. Document encoding (byte- and line-level) + +### 3.1 Character encoding + +- A HyperDoc document **MUST** be encoded as UTF-8. +- A HyperDoc document **MUST NOT** contain invalid UTF-8 byte sequences. + +**UTF-8 BOM** + +- A UTF-8 BOM (`EF BB BF`) **SHOULD NOT** be used. +- Tooling **MAY** accept a BOM and treat it as whitespace at the beginning of the document. + +### 3.2 Line endings + +- Lines **MUST** be terminated by either: + - `` (U+000A), or + - `` (U+000D U+000A). +- A bare `` **MUST NOT** appear except as part of ``. + +A document **MAY** mix `` and `` line endings, but tooling **SHOULD** normalize to a single convention when rewriting documents. + +The canonical line ending emitted by tooling **SHOULD** be ``. + +### 3.3 Control characters in source text + +- A syntactically valid document **MAY** contain `` (U+0009). +- Other Unicode control characters (General Category `Cc`) **MUST NOT** appear in source text, except: + - U+000A (LF) and + - U+000D (CR) as part of a valid line ending. + +A semantic validator **MAY** reject TABs in source text (see §6.2). + +### 3.4 Unicode text + +Apart from the restrictions above, arbitrary Unicode scalar values are allowed. + +### 3.5 Recommendations for directionality (non-normative) + +HyperDoc does not define special handling for right-to-left scripts or bidirectional layout. + +Authors **SHOULD** keep each paragraph primarily in a single writing system/directionality where practical. Tooling **MAY** warn when paragraphs contain bidi override/formatting characters. + +## 4. Syntactic model + +A HyperDoc document is a sequence of **nodes**. + +Each node has: + +- a **name** (identifier), +- an optional **attribute list** `(key="value", ...)`, +- and a mandatory **body**. + +### 4.1 Bodies + +A body is one of: + +- `;` — empty body +- `"..."` — string literal body +- `:` — verbatim body (one or more `|` lines) +- `{ ... }` — list body + +### 4.2 List bodies and modes + +A list body `{ ... }` is parsed in one of two modes: + +- **Block-list mode**: contains nested nodes. +- **Inline-list mode**: contains an inline token stream of text items and inline nodes. + +The grammar is intentionally ambiguous; a deterministic external rule selects a mode (see §5.2). + +### 4.3 Attributes (syntax) + +- Attribute lists are comma-separated `(key="value", ...)`. +- Trailing commas are allowed. +- Attribute values are **string literals** (see §5.5). +- Attribute keys are identifiers with hyphen-separated segments (see §5.1 and §9.1). + +## 5. Grammar and additional syntax rules + +### 5.1 Grammar (EBNF) + +The grammar below is syntax-only. + +```ebnf +document ::= ws , { node , ws } , EOF ; + +node ::= node_name , ws , [ attribute_list , ws ] , body ; + +body ::= ";" | string_literal | verbatim_body | list_body ; + +list_body ::= "{" , list_content , "}" ; +list_content ::= inline_content | block_content ; + +attribute_list ::= "(" , ws , + [ attribute , { ws , "," , ws , attribute } , [ ws , "," ] ] , + ws , ")" ; +attribute ::= attr_key , ws , "=" , ws , string_literal ; + +block_content ::= ws , { node , ws } ; + +inline_content ::= ws , { inline_item , ws } ; +inline_item ::= word | escape_text | inline_node | inline_group ; +inline_group ::= "{" , inline_content , "}" ; + +escape_text ::= "\\" , ( "\\" | "{" | "}" ) ; +inline_node ::= inline_name , ws , [ attribute_list , ws ] , body ; + +(* Identifiers *) +node_name ::= [ "\\" ] , ident_char , { ident_char } ; +inline_name ::= "\\" , ident_char , { ident_char } ; +attr_key ::= key_seg , { "-" , key_seg } ; + +ident_char ::= "A".."Z" | "a".."z" | "0".."9" | "_" ; +key_seg ::= ident_char , { ident_char } ; + +string_literal ::= '"' , { string_unit } , '"' ; + +(* verbatim_body and ws productions match the source spec. *) +``` + +### 5.2 Deterministic list-mode disambiguation + +Before parsing the contents of any `{ ... }` list body, the parser **MUST** choose exactly one list mode. + +The mode is determined solely from the **node name token**: + +1. If the node name begins with `\`, the parser **MUST** choose **Inline-list mode**. +2. Else, if the node name is a recognized built-in with a specified list mode, the parser **MUST** choose that mode. +3. Otherwise (unknown node name), the parser **MUST** choose **Inline-list mode**. + +Built-in elements and their list modes are defined in §8.1. + + +### 5.3 Maximal munch + +When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consume the longest possible sequence of allowed identifier characters. + +### 5.4 Inline-list brace balancing and backslash dispatch + +In Inline-list mode: + +- Literal braces are structural (`inline_group`) and therefore **must be balanced**. +- If braces cannot be balanced, they **must** be written as escape-text tokens `\\{` and `\\}`. +- A backslash in inline content is interpreted as: + - one of the three escape-text tokens `\\\\`, `\\{`, `\\}`, or + - the start of an inline node otherwise. + +### 5.5 String literals (syntax) + +String literals are delimiter-based and do **not** validate escape *meaning*. + +Syntactically invalid inside `"..."`: + +- raw LF or CR +- a backslash immediately followed by a control character (Unicode `Cc`) — **note:** this includes TAB. + +## 6. Escape processing (semantic) + +### 6.1 Scope + +Escape sequences are recognized only in: + +1. String literals (node bodies of the `"..."` form and attribute values). +2. Inline escape-text tokens emitted by the parser: `\\\\`, `\\{`, `\\}`. + +No other syntax performs escape decoding. + +### 6.2 Control character policy (semantic) + +- A semantic validator **MAY** reject TAB (U+0009) in source text. +- Regardless of whether TAB is accepted in source text, TAB **MUST** be rejected in the **resolved value of any string literal** (quoted node bodies and attribute values). This includes TAB that appears literally between quotes and TAB produced via `\u{...}`. + +Apart from LF/CR line terminators and TAB (U+0009) in source text, a semantically valid document **MUST NOT** contain other Unicode control characters (General Category `Cc`). Resolved string-literal values are restricted by the rules above (TAB is always forbidden there). + +### 6.3 Supported escapes in string literals + +A semantic validator/decoder **MUST** accept exactly: + +| Escape | Decodes to | +| ----------- | --------------------------- | +| `\\\\` | U+005C (`\\`) | +| `\\"` | U+0022 (`"`) | +| `\\n` | U+000A (LF) | +| `\\r` | U+000D (CR) | +| `\\u{H...}` | Unicode scalar value U+H... | + +#### 6.3.1 Unicode escape `\\u{H...}` + +- 1–6 hex digits +- value in `0x0..0x10FFFF` +- not in `0xD800..0xDFFF` (surrogates) +- must not decode to a forbidden control character (§6.2) + +### 6.4 Invalid escapes + +A semantic validator/decoder **MUST** reject a string literal that contains: + +- any other escape (`\\t`, `\\xHH`, `\\0`, etc.) +- an unterminated escape (string ends after `\\`) +- malformed `\\u{...}` (missing braces, empty, non-hex, >6 digits) +- out-of-range or surrogate code points +- forbidden control characters produced by `\\u{...}` + +### 6.5 Inline escape-text tokens + +In inline-list bodies, the parser emits three special text tokens: + +- `\\\\` +- `\\{` +- `\\}` + +During semantic text construction, implementations **MAY** decode these to literal `\\`, `{`, `}`. + +Tooling that aims to preserve author intent **SHOULD** preserve whether braces were written as balanced groups vs escaped brace tokens. + +## 7. Semantic document model + +### 7.1 Document structure + +- A semantically valid document **MUST** contain exactly one `hdoc` header node. +- The `hdoc` node **MUST** be the first node in the document. +- The `hdoc` node **MUST NOT** appear anywhere else. +- The `hdoc` node **MUST** have an empty body (`;`). + +### 7.2 Inline text construction and normalization + +Many elements (e.g. `p`, headings, and inline elements) produce **inline text** for rendering. Inline text is constructed from one of: + +- a string body (`"..."`), +- a verbatim body (`:`), or +- an inline-list body (`{ ... }` in Inline-list mode). + +Semantic processing **MUST** construct inline text as a sequence of **spans**, where each span has: + +- a Unicode string, and +- an attribute set (e.g. emphasis/monospace/link, language overrides, etc.). + +Processing rules: + +1. **Parse → tree:** Parsing preserves `ws` and yields an inline tree (text items, inline nodes, and inline groups). +2. **Tree → spans:** Convert the inline tree into a sequence of spans. +3. **Span merging:** Adjacent spans with identical attribute sets **MUST** be merged. +4. **Whitespace normalization (non-`pre` only):** For elements other than `pre`, the resulting text (across all spans) **MUST** be normalized so that: + - any run of whitespace is collapsed to a single U+0020 SPACE, and + - leading and trailing whitespace is removed. + +The renderer **MUST** see the post-normalization result. + +**String and verbatim bodies:** When a string body or verbatim body is converted into spans, it is treated as a single text source (no nested inline nodes) and then processed using the same rules above, including whitespace normalization for non-`pre` elements. + +### 7.3 Attribute uniqueness + +- Within a node, attribute keys **MUST** be unique (case-sensitive). + +### 7.4 Attribute validity + +- Attributes **MUST** be allowed on the element they appear on. +- Required attributes **MUST** be present. +- Attributes not defined for an element **MUST** be rejected. + +### 7.5 IDs and references + +- `id` is allowed only on **top-level block elements** (direct children of the document; not inside another node). +- `id` values **MUST** be non-empty and **MUST** be unique (case-sensitive) across the document. +- `\link(ref="...")` **MUST** reference an existing `id`. + +### 7.6 Built-in element recognition + +- Built-in element names are defined in §8. +- Unknown elements are syntactically valid (parseable), but semantically invalid. + +## 8. Elements and attributes + +### 8.1 Built-in elements and list mode + +#### 8.1.1 Inline vs block + +- Any element name starting with `\` is an **inline element**. +- Any element name not starting with `\` is a **block element**. + +#### 8.1.2 List-body mode per built-in element + +When a built-in element uses a `{ ... }` list body, it is parsed in the mode below: + +- **Inline-list mode:** `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\date`, `\time`, `\datetime`, ...). +- **Block-list mode:** `ul`, `ol`, `li`, `table`, `columns`, `row`, `td`. + +- Containers (`ul`, `ol`, `table`, `row`, `columns`) naturally contain nested nodes. +- Text blocks (`p`, headings, etc.) contain inline text streams. +- `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode. + +### 8.2 Element catalog (normative) + +#### 8.2.1 `hdoc` (header) + +- **Role:** document header +- **Body:** `;` (empty) +- **Attributes:** + - `version` (required): must be `"2.0"` + - `lang` (optional) + - `title` (optional) + - `author` (optional) + - `date` (optional): datetime lexical format (§9.2.3) + - `tz` (optional): default timezone for time/datetime values (§9.2) + +#### 8.2.2 Headings: `h1`, `h2`, `h3` + +- **Role:** block heading levels 1–3 +- **Body:** inline text (string body or inline-list body) +- **Attributes:** `lang` (optional), `id` (optional; top-level only) + +#### 8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` + +- **Role:** paragraph-like block with semantic hint +- **Body:** inline text (string body or inline-list body) +- **Attributes:** `lang` (optional), `id` (optional; top-level only) + +#### 8.2.4 Lists: `ul`, `ol` + +- **Body:** block-list containing `li` (at least one) +- **Attributes:** `lang` (optional), `id` (optional; top-level only) + +`ol` additional attribute: + +- `first` (optional Integer ≥ 0; default 1): number of the first list item + +#### 8.2.5 List item: `li` + +- **Body:** either + - a block-list of block elements, or + - a single string body, or + - a verbatim body +- **Attributes:** `lang` (optional) + +#### 8.2.6 Figure: `img` + +- **Body:** inline text caption/description (may be empty) +- **Attributes:** + - `path` (required, non-empty) + - `alt` (optional, non-empty recommended) + - `lang` (optional) + - `id` (optional; top-level only) + +#### 8.2.7 Preformatted: `pre` + +- **Body:** either + - verbatim body (`:`) for literal lines (**recommended**), or + - inline text body (string or inline-list); whitespace is preserved (no trimming/collapse) +- **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only) + +#### 8.2.8 Table of contents: `toc` + +- **Body:** `;` (empty) +- **Attributes:** `depth` (optional Integer in {1,2,3}; default 3), `lang` (optional), `id` (optional; top-level only) + +#### 8.2.9 Tables: `table` + +- **Body:** block-list containing: + - optional `columns`, then + - zero or more `row` and `group` nodes +- **Attributes:** `lang` (optional), `id` (optional; top-level only) + +Table layout rules: + +- `columns` defines header labels and the column count. +- Each `row` defines a data row. +- Each `group` acts as a section heading for subsequent rows. +- After applying `td.colspan`, all `row` and `columns` entries **MUST** resolve to the same effective column count. +- If any `row` has a `title` attribute **or** any `group` is present, renderers **MUST** reserve a leading title column. + - In that case, `columns` **SHOULD** include an empty leading header cell. + +#### 8.2.10 `columns` (table header row) + +- **Body:** block-list containing `td` (at least one) +- **Attributes:** `lang` (optional) + +#### 8.2.11 `row` (table data row) + +- **Body:** block-list containing `td` (at least one) +- **Attributes:** `title` (optional string), `lang` (optional) + +#### 8.2.12 `group` (table row group) + +- **Body:** inline text +- **Attributes:** `lang` (optional) + +#### 8.2.13 `td` (table cell) + +- **Body:** either + - a block-list of block elements, or + - a single string body, or + - a verbatim body +- **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional) + +### 8.3 Inline elements + +Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer). + +#### 8.3.1 `\\em` + +- **Role:** emphasis +- **Body:** inline text +- **Attributes:** `lang` (optional) + +#### 8.3.2 `\\mono` + +- **Role:** monospaced span +- **Body:** inline text +- **Attributes:** `syntax` (optional), `lang` (optional) + +#### 8.3.3 `\\strike`, `\\sub`, `\\sup` + +- **Role:** strike-through / subscript / superscript +- **Body:** inline text +- **Attributes:** `lang` (optional) + +#### 8.3.4 `\\link` + +- **Role:** hyperlink +- **Body:** inline text +- **Attributes:** + - `ref` or `uri` (**exactly one required**) + - `lang` (optional) + +#### 8.3.5 `\\date`, `\\time`, `\\datetime` + +- **Role:** localized date/time rendering +- **Body:** must be plain text, a single string, or verbatim (no nested inline elements) +- **Attributes:** `fmt` (optional; per element), `lang` (optional) + +## 9. Attribute types and date/time formats + +### 9.1 Common attribute types + +- **Version:** must be `2.0`. +- **Integer:** ASCII decimal digits; leading zeros allowed but discouraged. +- **Reference:** non-empty; must not contain whitespace or control characters. +- **Language tag:** BCP 47 (RFC 5646). +- **Timezone offset:** `Z` or `±HH:MM`. +- **URI/IRI:** per RFC 3987. + +### 9.2 Date / time lexical formats (normative) + +These formats are a conservative intersection of RFC 3339 and ISO 8601. + +#### 9.2.1 Date + +`YYYY-MM-DD` + +- `YYYY`: one or more digits +- `MM`: `01`–`12` +- `DD`: `01`–`31` + +#### 9.2.2 Time + +`hh:mm:ss` with a required time zone unless a default `tz` is defined in `hdoc`. + +- `hh`: `00`–`23` +- `mm`: `00`–`59` +- `ss`: `00`–`59` +- optional fraction: `.` followed by 1,2,3,6, or 9 digits +- zone: + - `Z`, or + - `+hh:mm` / `-hh:mm` (two-digit hour/minute) + +If `hdoc(tz="...")` is present, a time value **MAY** omit the zone. + +#### 9.2.3 Datetime + +`YYYY-MM-DD` `T` `hh:mm:ss` (with optional fraction and required zone, unless `hdoc.tz` is present) + +If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is permitted specifically for `hdoc(date="...")` and for `\datetime` bodies. + +### 9.3 `fmt` values + + +- `\\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` +- `\\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso` +- `\\datetime(fmt=...)`: `short`, `long`, `relative`, `iso` + +Defaults when omitted: + +- `\date(fmt=...)`: default `short` +- `\time(fmt=...)`: default `long` +- `\datetime(fmt=...)`: default `short` + +## 10. Non-normative guidance for tooling + +- Formatters should normalize line endings to LF. +- Provide diagnostics for discouraged patterns (leading/trailing whitespace in attribute values, leading zeros, mixed directionality, etc.). +- For typo recovery, treat unknown nodes as inline-list mode (§5.2). + +--- + +## Appendix A. Example + +```hdoc +hdoc(version="2.0", title="Example", lang="en"); + +h1 "Introduction" + +p { This is my first HyperDoc 2.0 document! } + +pre(syntax="c"): +| #include +| int main(int argc, char *argv[]) { +| printf("Hello, World!"); +| return 0; +| } +``` + From 0891fcf80a55a1d891d411806d7e84c265df58f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Tue, 30 Dec 2025 23:51:04 +0100 Subject: [PATCH 053/116] Implement automatic table of contents generation --- src/hyperdoc.zig | 130 +++++++++++++++++++++++++++++++++++++++++++- src/render/dump.zig | 69 +++++++++++++++++++++-- src/testsuite.zig | 44 +++++++++++++++ 3 files changed, 238 insertions(+), 5 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 8afc0fe..dd2aa16 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -16,6 +16,7 @@ pub const Document = struct { contents: []Block, content_ids: []?Reference, id_map: std.StringArrayHashMapUnmanaged(usize), // id -> index + toc: TableOfContents, // header information lang: LanguageTag = .inherit, // inherit here means "unset" @@ -24,6 +25,12 @@ pub const Document = struct { date: ?DateTime, timezone: ?TimeZoneOffset, + pub const TableOfContents = struct { + level: Block.HeadingLevel, + headings: []usize, + children: []TableOfContents, + }; + pub fn deinit(doc: *Document) void { doc.arena.deinit(); doc.* = undefined; @@ -553,12 +560,16 @@ pub fn parse( try sema.validate_references(&id_map); const doc_lang = header.lang orelse LanguageTag.inherit; + const contents = try sema.blocks.toOwnedSlice(arena.allocator()); + const block_locations = try sema.block_locations.toOwnedSlice(arena.allocator()); + const toc = try sema.build_toc(contents, block_locations); return .{ .arena = arena, - .contents = try sema.blocks.toOwnedSlice(arena.allocator()), + .contents = contents, .content_ids = content_ids, .id_map = id_map, + .toc = toc, .lang = doc_lang, .title = header.title, @@ -674,12 +685,27 @@ pub const SemanticAnalyzer = struct { location: Parser.Location, }; + const TocBuilder = struct { + level: Block.HeadingLevel, + headings: std.ArrayList(usize), + children: std.ArrayList(*TocBuilder), + + fn init(level: Block.HeadingLevel) @This() { + return .{ + .level = level, + .headings = .empty, + .children = .empty, + }; + } + }; + arena: std.mem.Allocator, diagnostics: ?*Diagnostics, code: []const u8, header: ?Header = null, blocks: std.ArrayList(Block) = .empty, + block_locations: std.ArrayList(Parser.Location) = .empty, ids: std.ArrayList(?Reference) = .empty, id_locations: std.ArrayList(?Parser.Location) = .empty, pending_refs: std.ArrayList(RefUse) = .empty, @@ -734,6 +760,7 @@ pub const SemanticAnalyzer = struct { null; try sema.blocks.append(sema.arena, block); + try sema.block_locations.append(sema.arena, node.location); try sema.ids.append(sema.arena, id); try sema.id_locations.append(sema.arena, id_location); }, @@ -1871,6 +1898,102 @@ pub const SemanticAnalyzer = struct { } } + fn build_toc(sema: *SemanticAnalyzer, contents: []const Block, block_locations: []const Parser.Location) !Document.TableOfContents { + std.debug.assert(contents.len == block_locations.len); + + var root_builder = TocBuilder.init(.h1); + defer root_builder.headings.deinit(sema.arena); + defer root_builder.children.deinit(sema.arena); + + var stack: std.ArrayList(*TocBuilder) = .empty; + defer stack.deinit(sema.arena); + + try stack.append(sema.arena, &root_builder); + + for (contents, 0..) |block, block_index| { + const heading = switch (block) { + .heading => |value| value, + else => continue, + }; + + const target_depth = heading_level_index(heading.level); + + while (stack.items.len > target_depth) { + _ = stack.pop(); + } + + while (stack.items.len < target_depth) { + const parent = stack.items[stack.items.len - 1]; + try sema.append_toc_entry(&stack, parent, block_index, block_locations, .automatic); + } + + const parent = stack.items[stack.items.len - 1]; + try sema.append_toc_entry(&stack, parent, block_index, block_locations, .real); + } + + return sema.materialize_toc(&root_builder); + } + + fn append_toc_entry( + sema: *SemanticAnalyzer, + stack: *std.ArrayList(*TocBuilder), + parent: *TocBuilder, + heading_index: usize, + block_locations: []const Parser.Location, + kind: enum { automatic, real }, + ) !void { + if (kind == .automatic) { + const heading_location = block_locations[heading_index]; + try sema.emit_diagnostic( + .{ .automatic_heading_insertion = .{ .level = parent.level } }, + heading_location, + ); + } + + try parent.headings.append(sema.arena, heading_index); + + const child_level = next_heading_level(parent.level); + if (child_level == parent.level) { + return; + } + + const child = try sema.arena.create(TocBuilder); + child.* = TocBuilder.init(child_level); + + try parent.children.append(sema.arena, child); + try stack.append(sema.arena, child); + } + + fn materialize_toc(sema: *SemanticAnalyzer, builder: *TocBuilder) !Document.TableOfContents { + var node: Document.TableOfContents = .{ + .level = builder.level, + .headings = try builder.headings.toOwnedSlice(sema.arena), + .children = try sema.arena.alloc(Document.TableOfContents, builder.children.items.len), + }; + + for (builder.children.items, 0..) |child_builder, index| { + node.children[index] = try sema.materialize_toc(child_builder); + } + + return node; + } + + fn heading_level_index(level: Block.HeadingLevel) usize { + return switch (level) { + .h1 => 1, + .h2 => 2, + .h3 => 3, + }; + } + + fn next_heading_level(level: Block.HeadingLevel) Block.HeadingLevel { + return switch (level) { + .h1 => .h2, + .h2 => .h3, + .h3 => .h3, + }; + } + fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, location: Parser.Location) !void { if (sema.diagnostics) |diag| { try diag.add(code, sema.make_location(location.offset)); @@ -2808,6 +2931,7 @@ pub const Diagnostic = struct { pub const ForbiddenControlCharacter = struct { codepoint: u21 }; pub const TableShapeError = struct { actual: usize, expected: usize }; pub const ReferenceError = struct { ref: []const u8 }; + pub const AutomaticHeading = struct { level: Block.HeadingLevel }; pub const Code = union(enum) { // errors: @@ -2856,6 +2980,7 @@ pub const Diagnostic = struct { redundant_inline: InlineUsageError, attribute_leading_trailing_whitespace, tab_character, + automatic_heading_insertion: AutomaticHeading, pub fn severity(code: Code) Severity { return switch (code) { @@ -2904,6 +3029,7 @@ pub const Diagnostic = struct { .attribute_leading_trailing_whitespace, .tab_character, .document_starts_with_bom, + .automatic_heading_insertion, => .warning, }; } @@ -2979,6 +3105,8 @@ pub const Diagnostic = struct { .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."), .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."), + + .automatic_heading_insertion => |ctx| try w.print("Inserted automatic {t} to fill heading level gap.", .{ctx.level}), } } }; diff --git a/src/render/dump.zig b/src/render/dump.zig index 94e25da..1635df9 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -259,6 +259,19 @@ fn dumpBlockListField(writer: *Writer, indent: usize, key: []const u8, blocks: [ } } +fn dumpNumberListField(writer: *Writer, indent: usize, key: []const u8, values: []const usize) Writer.Error!void { + try writeIndent(writer, indent); + if (values.len == 0) { + try writer.print("{s}: []\n", .{key}); + return; + } + try writer.print("{s}:\n", .{key}); + for (values) |value| { + try writeIndent(writer, indent + indent_step); + try writer.print("- {}\n", .{value}); + } +} + fn dumpOptionalStringListField(writer: *Writer, indent: usize, key: []const u8, values: []?hdoc.Reference) Writer.Error!void { try writeIndent(writer, indent); if (values.len == 0) { @@ -360,6 +373,32 @@ fn dumpTableRowsField(writer: *Writer, indent: usize, key: []const u8, rows: []c } } +fn dumpTableOfContentsChildren(writer: *Writer, indent: usize, children: []const hdoc.Document.TableOfContents) Writer.Error!void { + try writeIndent(writer, indent); + if (children.len == 0) { + try writer.writeAll("children: []\n"); + return; + } + try writer.writeAll("children:\n"); + for (children) |child| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("-\n"); + try dumpTableOfContentsNode(writer, indent + 2 * indent_step, child); + } +} + +fn dumpTableOfContentsNode(writer: *Writer, indent: usize, toc: hdoc.Document.TableOfContents) Writer.Error!void { + try dumpEnumField(writer, indent, "level", toc.level); + try dumpNumberListField(writer, indent, "headings", toc.headings); + try dumpTableOfContentsChildren(writer, indent, toc.children); +} + +fn dumpTableOfContents(writer: *Writer, indent: usize, toc: hdoc.Document.TableOfContents) Writer.Error!void { + try writeIndent(writer, indent); + try writer.writeAll("toc:\n"); + try dumpTableOfContentsNode(writer, indent + indent_step, toc); +} + fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Error!void { switch (block) { .heading => |heading| { @@ -423,6 +462,7 @@ fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void { try dumpOptionalStringField(writer, indent_step, "title", doc.title); try dumpOptionalStringField(writer, indent_step, "author", doc.author); try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date); + try dumpTableOfContents(writer, indent_step, doc.toc); try dumpBlockListField(writer, indent_step, "contents", doc.contents); try dumpOptionalStringListField(writer, indent_step, "ids", doc.content_ids); // TODO: Dump ID map @@ -442,8 +482,10 @@ test "render escapes string values" { .arena = std.heap.ArenaAllocator.init(std.testing.allocator), .version = .{ .major = 1, .minor = 2 }, .contents = &.{}, - .ids = &.{}, - .lang = null, + .content_ids = &.{}, + .id_map = .{}, + .toc = undefined, + .lang = .inherit, .title = title, .author = null, .date = null, @@ -452,6 +494,13 @@ test "render escapes string values" { defer doc.deinit(); const arena_alloc = doc.arena.allocator(); + doc.contents = try arena_alloc.alloc(hdoc.Block, 0); + doc.content_ids = try arena_alloc.alloc(?hdoc.Reference, 0); + doc.toc = .{ + .level = .h1, + .headings = try arena_alloc.alloc(usize, 0), + .children = try arena_alloc.alloc(hdoc.Document.TableOfContents, 0), + }; const spans = try arena_alloc.alloc(hdoc.Span, 1); spans[0] = .{ @@ -463,7 +512,7 @@ test "render escapes string values" { blocks[0] = .{ .heading = .{ .level = .h1, - .lang = null, + .lang = .inherit, .content = spans, }, }; @@ -471,7 +520,19 @@ test "render escapes string values" { const ids = try arena_alloc.alloc(?hdoc.Reference, 1); ids[0] = id_value; - doc.ids = ids; + doc.content_ids = ids; + + const headings = try arena_alloc.alloc(usize, 1); + headings[0] = 0; + + const children = try arena_alloc.alloc(hdoc.Document.TableOfContents, 1); + children[0] = .{ .level = .h2, .headings = &.{}, .children = &.{} }; + + doc.toc = .{ + .level = .h1, + .headings = headings, + .children = children, + }; var buffer = Writer.Allocating.init(std.testing.allocator); defer buffer.deinit(); diff --git a/src/testsuite.zig b/src/testsuite.zig index aa26072..dd5ffd1 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -357,6 +357,50 @@ test "parser handles unknown node types" { } } +test "table of contents inserts automatic headings when skipping levels" { + const source = + \\hdoc(version="2.0"); + \\h3{Third} + \\h2{Second} + \\h1{First} + ; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expectEqual(@as(usize, 3), diagnostics.items.items.len); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .missing_document_language)); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[1].code, .{ .automatic_heading_insertion = .{ .level = .h1 } })); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[2].code, .{ .automatic_heading_insertion = .{ .level = .h2 } })); + + const toc = doc.toc; + try std.testing.expectEqual(hdoc.Block.HeadingLevel.h1, toc.level); + try std.testing.expectEqualSlices(usize, &.{ 0, 2 }, toc.headings); + try std.testing.expectEqual(@as(usize, 2), toc.children.len); + + const auto_h1 = toc.children[0]; + try std.testing.expectEqual(hdoc.Block.HeadingLevel.h2, auto_h1.level); + try std.testing.expectEqualSlices(usize, &.{ 0, 1 }, auto_h1.headings); + try std.testing.expectEqual(@as(usize, 2), auto_h1.children.len); + + const auto_h2 = auto_h1.children[0]; + try std.testing.expectEqual(hdoc.Block.HeadingLevel.h3, auto_h2.level); + try std.testing.expectEqualSlices(usize, &.{0}, auto_h2.headings); + + const h2_child = auto_h1.children[1]; + try std.testing.expectEqual(hdoc.Block.HeadingLevel.h3, h2_child.level); + try std.testing.expectEqual(@as(usize, 0), h2_child.headings.len); + try std.testing.expectEqual(@as(usize, 0), h2_child.children.len); + + const trailing_h1_child = toc.children[1]; + try std.testing.expectEqual(hdoc.Block.HeadingLevel.h2, trailing_h1_child.level); + try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.headings.len); + try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.children.len); +} + fn diagnosticCodesEqual(lhs: hdoc.Diagnostic.Code, rhs: hdoc.Diagnostic.Code) bool { if (std.meta.activeTag(lhs) != std.meta.activeTag(rhs)) return false; From 6bd87f8eb0eacda91e235c28b399a2eaddc5e01f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 14:58:25 +0100 Subject: [PATCH 054/116] Adjust golden fixtures and table rendering semantics --- src/render/html5.zig | 793 ++++++++++++++++++++++++++-- test/html5/AGENTS.md | 7 + test/html5/media_and_toc.hdoc | 21 + test/html5/media_and_toc.html | 23 + test/html5/nesting_and_inlines.hdoc | 21 + test/html5/nesting_and_inlines.html | 23 + test/html5/paragraph_styles.hdoc | 17 + test/html5/paragraph_styles.html | 8 + test/html5/tables.hdoc | 28 + test/html5/tables.html | 58 ++ 10 files changed, 951 insertions(+), 48 deletions(-) create mode 100644 test/html5/AGENTS.md create mode 100644 test/html5/media_and_toc.hdoc create mode 100644 test/html5/media_and_toc.html create mode 100644 test/html5/nesting_and_inlines.hdoc create mode 100644 test/html5/nesting_and_inlines.html create mode 100644 test/html5/paragraph_styles.hdoc create mode 100644 test/html5/paragraph_styles.html create mode 100644 test/html5/tables.hdoc create mode 100644 test/html5/tables.html diff --git a/src/render/html5.zig b/src/render/html5.zig index f46a3b8..275453a 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -5,32 +5,511 @@ const std = @import("std"); const hdoc = @import("../hyperdoc.zig"); const Writer = std.Io.Writer; +const RenderError = Writer.Error || error{NoSpaceLeft}; const indent_step: usize = 2; -// TODO: Implementation hints: -// - Use writeStartTag, writeEndTag to construct the document -// - Use and expand writeEscapedHtml to suite the needs of HyperDoc. -// - Implement a custom formatter for string attribute values so they have proper escaping applied. -// - Use semantic HTML. Never use `div` or `span`. If necessary, ask back when you encounter the need for a "custom tag". -// - For the different paragraph types, use a class="hdoc-${kind}", so for example class="hdoc-warning" to distinguish the special paragraphs from regular

ones. -// - The TOC element must be unrolled manually and should auto-link to the h1,h2,h3 elements. +pub fn render(doc: hdoc.Document, writer: *Writer) RenderError!void { + var ctx: RenderContext = .{ .doc = &doc, .writer = writer }; -/// This function emits the body-only part of a HyperDoc document as -/// valid HTML5. -pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void { - _ = doc; + for (doc.contents, 0..) |block, index| { + try ctx.renderBlock(block, index, 0); + } +} + +const RenderContext = struct { + doc: *const hdoc.Document, + writer: *Writer, + + fn renderBlock(ctx: *RenderContext, block: hdoc.Block, block_index: ?usize, indent: usize) RenderError!void { + switch (block) { + .heading => |heading| try ctx.renderHeading(heading, block_index, indent), + .paragraph => |paragraph| try ctx.renderParagraph(paragraph, block_index, indent), + .list => |list| try ctx.renderList(list, block_index, indent), + .image => |image| try ctx.renderImage(image, block_index, indent), + .preformatted => |preformatted| try ctx.renderPreformatted(preformatted, block_index, indent), + .toc => |toc| try ctx.renderTableOfContents(toc, block_index, indent), + .table => |table| try ctx.renderTable(table, block_index, indent), + } + } + + fn renderBlocks(ctx: *RenderContext, blocks: []const hdoc.Block, indent: usize) RenderError!void { + for (blocks) |block| { + try ctx.renderBlock(block, null, indent); + } + } + + fn renderHeading(ctx: *RenderContext, heading: hdoc.Block.Heading, block_index: ?usize, indent: usize) RenderError!void { + const lang_attr = langAttribute(heading.lang); + + var id_buffer: [32]u8 = undefined; + const id_attr = if (block_index) |idx| + ctx.resolveHeadingId(idx, &id_buffer) + else + null; + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, headingTag(heading.level), .regular, .{ + .id = id_attr, + .lang = lang_attr, + }); + try ctx.renderSpans(heading.content); + try writeEndTag(ctx.writer, headingTag(heading.level)); + try ctx.writer.writeByte('\n'); + } + + fn renderParagraph(ctx: *RenderContext, paragraph: hdoc.Block.Paragraph, block_index: ?usize, indent: usize) RenderError!void { + const lang_attr = langAttribute(paragraph.lang); + const id_attr = ctx.resolveBlockId(block_index); + + var class_buffer: [32]u8 = undefined; + const class_attr: ?[]const u8 = switch (paragraph.kind) { + .p => null, + else => std.fmt.bufPrint(&class_buffer, "hdoc-{s}", .{@tagName(paragraph.kind)}) catch unreachable, + }; + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "p", .regular, .{ + .id = id_attr, + .lang = lang_attr, + .class = class_attr, + }); + try ctx.renderSpans(paragraph.content); + try writeEndTag(ctx.writer, "p"); + try ctx.writer.writeByte('\n'); + } + + fn renderList(ctx: *RenderContext, list: hdoc.Block.List, block_index: ?usize, indent: usize) RenderError!void { + const lang_attr = langAttribute(list.lang); + const id_attr = ctx.resolveBlockId(block_index); + + const tag = if (list.first != null) + "ol" + else + "ul"; + + try writeIndent(ctx.writer, indent); + if (std.mem.eql(u8, tag, "ol")) { + try writeStartTag(ctx.writer, tag, .regular, .{ + .id = id_attr, + .lang = lang_attr, + .start = list.first, + }); + } else { + try writeStartTag(ctx.writer, tag, .regular, .{ + .id = id_attr, + .lang = lang_attr, + }); + } + try ctx.writer.writeByte('\n'); + + for (list.items) |item| { + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "li", .regular, .{ .lang = langAttribute(item.lang) }); + if (item.content.len > 0) { + try ctx.writer.writeByte('\n'); + try ctx.renderBlocks(item.content, indent + 2 * indent_step); + try writeIndent(ctx.writer, indent + indent_step); + } + try writeEndTag(ctx.writer, "li"); + try ctx.writer.writeByte('\n'); + } + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, tag); + try ctx.writer.writeByte('\n'); + } + + fn renderImage(ctx: *RenderContext, image: hdoc.Block.Image, block_index: ?usize, indent: usize) RenderError!void { + const lang_attr = langAttribute(image.lang); + const id_attr = ctx.resolveBlockId(block_index); + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "figure", .regular, .{ .id = id_attr, .lang = lang_attr }); + try ctx.writer.writeByte('\n'); + + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "img", .auto_close, .{ + .src = image.path, + .alt = image.alt, + }); + try ctx.writer.writeByte('\n'); + + if (image.content.len > 0) { + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "figcaption", .regular, .{}); + try ctx.renderSpans(image.content); + try writeEndTag(ctx.writer, "figcaption"); + try ctx.writer.writeByte('\n'); + } + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, "figure"); + try ctx.writer.writeByte('\n'); + } + + fn renderPreformatted(ctx: *RenderContext, preformatted: hdoc.Block.Preformatted, block_index: ?usize, indent: usize) RenderError!void { + const lang_attr = langAttribute(preformatted.lang); + const id_attr = ctx.resolveBlockId(block_index); + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "pre", .regular, .{ .id = id_attr, .lang = lang_attr }); + const class_attr = "hdoc-code"; + if (preformatted.syntax) |syntax| { + try writeStartTag(ctx.writer, "code", .regular, .{ .class = class_attr, .data_syntax = syntax }); + } else { + try writeStartTag(ctx.writer, "code", .regular, .{ .class = class_attr }); + } + try ctx.renderSpans(preformatted.content); + try writeEndTag(ctx.writer, "code"); + try writeEndTag(ctx.writer, "pre"); + try ctx.writer.writeByte('\n'); + } + + fn renderTableOfContents(ctx: *RenderContext, toc_block: hdoc.Block.TableOfContents, block_index: ?usize, indent: usize) RenderError!void { + const depth = toc_block.depth orelse 3; + const lang_attr = langAttribute(toc_block.lang); + const id_attr = ctx.resolveBlockId(block_index); + + if (!tocHasEntries(ctx.doc.toc)) { + return; + } + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "nav", .regular, .{ + .id = id_attr, + .lang = lang_attr, + .aria_label = "Table of contents", + }); + try ctx.writer.writeByte('\n'); + + try ctx.renderTocList(ctx.doc.toc, indent + indent_step, depth, 1); + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, "nav"); + try ctx.writer.writeByte('\n'); + } + + fn renderTocList(ctx: *RenderContext, node: hdoc.Document.TableOfContents, indent: usize, max_depth: u8, current_depth: u8) RenderError!void { + if (node.headings.len == 0) { + return; + } + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "ol", .regular, .{}); + try ctx.writer.writeByte('\n'); + + for (node.headings, 0..) |heading_index, child_index| { + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "li", .regular, .{}); + + const heading_block = ctx.doc.contents[heading_index].heading; + var id_buffer: [32]u8 = undefined; + const target_id = ctx.resolveHeadingId(heading_index, &id_buffer); + + var href_buffer: [64]u8 = undefined; + const href = std.fmt.bufPrint(&href_buffer, "#{s}", .{target_id}) catch unreachable; + + try writeStartTag(ctx.writer, "a", .regular, .{ .href = href }); + try ctx.renderSpans(heading_block.content); + try writeEndTag(ctx.writer, "a"); + + const child_allowed = current_depth < max_depth and + child_index < node.children.len and + tocHasEntries(node.children[child_index]); + if (child_allowed) { + try ctx.writer.writeByte('\n'); + try ctx.renderTocList(node.children[child_index], indent + 2 * indent_step, max_depth, current_depth + 1); + try writeIndent(ctx.writer, indent + indent_step); + } + + try writeEndTag(ctx.writer, "li"); + try ctx.writer.writeByte('\n'); + } + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, "ol"); + try ctx.writer.writeByte('\n'); + } + + fn renderTable(ctx: *RenderContext, table: hdoc.Block.Table, block_index: ?usize, indent: usize) RenderError!void { + const lang_attr = langAttribute(table.lang); + const id_attr = ctx.resolveBlockId(block_index); + + const column_count = inferColumnCount(table.rows) orelse 0; + const has_title_column = tableHasTitleColumn(table.rows); + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "table", .regular, .{ .id = id_attr, .lang = lang_attr }); + try ctx.writer.writeByte('\n'); + + const header_index = findHeaderIndex(table.rows); + if (header_index) |index| { + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "thead", .regular, .{}); + try ctx.writer.writeByte('\n'); + try ctx.renderHeaderRow(table.rows[index].columns, indent + 2 * indent_step, has_title_column); + try writeIndent(ctx.writer, indent + indent_step); + try writeEndTag(ctx.writer, "thead"); + try ctx.writer.writeByte('\n'); + } + + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "tbody", .regular, .{}); + try ctx.writer.writeByte('\n'); + + for (table.rows, 0..) |row, index| { + if (header_index) |head_idx| { + if (index == head_idx) continue; + } + switch (row) { + .columns => |columns| try ctx.renderHeaderRow(columns, indent + 2 * indent_step, has_title_column), + .row => |data_row| try ctx.renderDataRow(data_row, indent + 2 * indent_step, has_title_column), + .group => |group| try ctx.renderGroupRow(group, indent + 2 * indent_step, column_count, has_title_column), + } + } + + try writeIndent(ctx.writer, indent + indent_step); + try writeEndTag(ctx.writer, "tbody"); + try ctx.writer.writeByte('\n'); + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, "table"); + try ctx.writer.writeByte('\n'); + } + + fn renderHeaderRow(ctx: *RenderContext, columns: hdoc.Block.TableColumns, indent: usize, has_title_column: bool) RenderError!void { + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "tr", .regular, .{ .lang = langAttribute(columns.lang) }); + try ctx.writer.writeByte('\n'); + + if (has_title_column) { + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "th", .regular, .{ .scope = "col" }); + try writeEndTag(ctx.writer, "th"); + try ctx.writer.writeByte('\n'); + } + + for (columns.cells) |cell| { + try ctx.renderTableCellWithScope(cell, indent + indent_step, true, "col"); + } + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, "tr"); + try ctx.writer.writeByte('\n'); + } + + fn renderDataRow(ctx: *RenderContext, row: hdoc.Block.TableDataRow, indent: usize, has_title_column: bool) RenderError!void { + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "tr", .regular, .{ .lang = langAttribute(row.lang) }); + try ctx.writer.writeByte('\n'); + + if (has_title_column) { + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "th", .regular, .{ .scope = "row" }); + if (row.title) |title| { + try writeEscapedHtml(ctx.writer, title); + } + try writeEndTag(ctx.writer, "th"); + try ctx.writer.writeByte('\n'); + } + + for (row.cells) |cell| { + try ctx.renderTableCell(cell, indent + indent_step, false); + } + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, "tr"); + try ctx.writer.writeByte('\n'); + } + + fn renderGroupRow(ctx: *RenderContext, group: hdoc.Block.TableGroup, indent: usize, column_count: usize, has_title_column: bool) RenderError!void { + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "tr", .regular, .{ .lang = langAttribute(group.lang) }); + try ctx.writer.writeByte('\n'); + + if (has_title_column) { + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "td", .regular, .{}); + try writeEndTag(ctx.writer, "td"); + try ctx.writer.writeByte('\n'); + } + + try writeIndent(ctx.writer, indent + indent_step); + try writeStartTag(ctx.writer, "th", .regular, .{ + .scope = "colgroup", + .colspan = @as(u32, @intCast(@max(@as(usize, 1), column_count))), + }); + try ctx.renderSpans(group.content); + try writeEndTag(ctx.writer, "th"); + try ctx.writer.writeByte('\n'); + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, "tr"); + try ctx.writer.writeByte('\n'); + } + + fn renderTableCell(ctx: *RenderContext, cell: hdoc.Block.TableCell, indent: usize, is_header: bool) RenderError!void { + try ctx.renderTableCellWithScope(cell, indent, is_header, null); + } + + fn renderTableCellWithScope(ctx: *RenderContext, cell: hdoc.Block.TableCell, indent: usize, is_header: bool, scope: ?[]const u8) RenderError!void { + const tag = if (is_header) "th" else "td"; + const lang_attr = langAttribute(cell.lang); + const colspan_attr: ?u32 = if (cell.colspan > 1) cell.colspan else null; + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, tag, .regular, .{ .lang = lang_attr, .colspan = colspan_attr, .scope = scope }); + if (cell.content.len > 0) { + try ctx.writer.writeByte('\n'); + try ctx.renderBlocks(cell.content, indent + indent_step); + try writeIndent(ctx.writer, indent); + } + try writeEndTag(ctx.writer, tag); + try ctx.writer.writeByte('\n'); + } + + fn resolveHeadingId(ctx: *RenderContext, index: usize, buffer: *[32]u8) []const u8 { + if (index < ctx.doc.content_ids.len) { + if (ctx.doc.content_ids[index]) |value| { + return value.text; + } + } + + return std.fmt.bufPrint(buffer, "hdoc-auto-{d}", .{index}) catch unreachable; + } + + fn resolveBlockId(ctx: *RenderContext, block_index: ?usize) ?[]const u8 { + if (block_index) |idx| { + if (idx < ctx.doc.content_ids.len) { + if (ctx.doc.content_ids[idx]) |value| { + return value.text; + } + } + } + return null; + } + + fn renderSpans(ctx: *RenderContext, spans: []const hdoc.Span) RenderError!void { + for (spans) |span| { + try ctx.renderSpan(span); + } + } + + fn renderSpan(ctx: *RenderContext, span: hdoc.Span) RenderError!void { + var pending_lang = langAttribute(span.attribs.lang); + + var opened: [6][]const u8 = undefined; + var opened_len: usize = 0; + + const link_tag = span.attribs.link != .none; + if (link_tag) { + const href_value = switch (span.attribs.link) { + .none => unreachable, + .ref => |reference| blk: { + var href_buffer: [128]u8 = undefined; + break :blk std.fmt.bufPrint(&href_buffer, "#{s}", .{reference.text}) catch unreachable; + }, + .uri => |uri| uri.text, + }; + + try writeStartTag(ctx.writer, "a", .regular, .{ .href = href_value, .lang = takeLang(&pending_lang) }); + opened[opened_len] = "a"; + opened_len += 1; + } + + switch (span.attribs.position) { + .baseline => {}, + .subscript => { + try writeStartTag(ctx.writer, "sub", .regular, .{ .lang = takeLang(&pending_lang) }); + opened[opened_len] = "sub"; + opened_len += 1; + }, + .superscript => { + try writeStartTag(ctx.writer, "sup", .regular, .{ .lang = takeLang(&pending_lang) }); + opened[opened_len] = "sup"; + opened_len += 1; + }, + } + + if (span.attribs.strike) { + try writeStartTag(ctx.writer, "s", .regular, .{ .lang = takeLang(&pending_lang) }); + opened[opened_len] = "s"; + opened_len += 1; + } + + if (span.attribs.em) { + try writeStartTag(ctx.writer, "em", .regular, .{ .lang = takeLang(&pending_lang) }); + opened[opened_len] = "em"; + opened_len += 1; + } + + if (span.attribs.mono) { + const syntax_attr = if (span.attribs.syntax.len > 0) span.attribs.syntax else null; + try writeStartTag(ctx.writer, "code", .regular, .{ .lang = takeLang(&pending_lang), .class = "hdoc-code", .data_syntax = syntax_attr }); + opened[opened_len] = "code"; + opened_len += 1; + } + + const content_lang = takeLang(&pending_lang); + switch (span.content) { + .text => |text| { + if (content_lang) |lang| { + try writeStartTag(ctx.writer, "bdi", .regular, .{ .lang = lang }); + try writeEscapedHtml(ctx.writer, text); + try writeEndTag(ctx.writer, "bdi"); + } else { + try writeEscapedHtml(ctx.writer, text); + } + }, + .date => |date| try ctx.renderDateTimeValue(.date, date, content_lang), + .time => |time| try ctx.renderDateTimeValue(.time, time, content_lang), + .datetime => |datetime| try ctx.renderDateTimeValue(.datetime, datetime, content_lang), + } + + while (opened_len > 0) { + opened_len -= 1; + try writeEndTag(ctx.writer, opened[opened_len]); + } + } + + fn renderDateTimeValue(ctx: *RenderContext, comptime kind: enum { date, time, datetime }, value: anytype, lang_attr: ?[]const u8) RenderError!void { + var datetime_buffer: [128]u8 = undefined; + const datetime_value = switch (kind) { + .date => try formatIsoDate(value.value, &datetime_buffer), + .time => try formatIsoTime(value.value, &datetime_buffer), + .datetime => try formatIsoDateTime(value.value, &datetime_buffer), + }; - // TODO: Implement this proper + var display_buffer: [128]u8 = undefined; + const display_text = switch (kind) { + .date => try formatDateValue(value, &display_buffer), + .time => try formatTimeValue(value, &display_buffer), + .datetime => try formatDateTimeValue(value, &display_buffer), + }; + + try writeStartTag(ctx.writer, "time", .regular, .{ .datetime = datetime_value, .lang = lang_attr }); + try ctx.writer.writeAll(display_text); + try writeEndTag(ctx.writer, "time"); + } +}; - try writeStartTag(writer, "p", .regular, .{ - .style = "font-weight: bold", - }); - try writeEscapedHtml(writer, "Hello, World!"); - try writeEndTag(writer, "p"); - try writer.writeAll("\n"); +fn writeIndent(writer: *Writer, indent: usize) RenderError!void { + var i: usize = 0; + while (i < indent) : (i += 1) { + try writer.writeByte(' '); + } } -fn writeEscapedHtml(writer: *Writer, text: []const u8) !void { +fn writeAttributeName(writer: *Writer, name: []const u8) RenderError!void { + for (name) |char| { + if (char == '_') + try writer.writeByte('-') + else + try writer.writeByte(char); + } +} + +fn writeEscapedHtml(writer: *Writer, text: []const u8) RenderError!void { var view = std.unicode.Utf8View.init(text) catch @panic("invalid utf-8 passed"); var iter = view.iterator(); while (iter.nextCodepointSlice()) |slice| { @@ -44,58 +523,276 @@ fn writeEscapedHtml(writer: *Writer, text: []const u8) !void { 0xA0 => try writer.writeAll(" "), - // TODO: Fill out other required codes. - else => try writer.writeAll(slice), } } } -fn writeStartTag(writer: *Writer, tag: []const u8, style: enum { regular, auto_close }, attribs: anytype) !void { +fn writeStartTag(writer: *Writer, tag: []const u8, style: enum { regular, auto_close }, attribs: anytype) RenderError!void { try writer.print("<{s}", .{tag}); const Attribs = @TypeOf(attribs); inline for (@typeInfo(Attribs).@"struct".fields) |fld| { const value = @field(attribs, fld.name); + try writeAttribute(writer, fld.name, value); + } + + switch (style) { + .auto_close => try writer.writeAll("/>"), + .regular => try writer.writeAll(">"), + } +} - if (fld.type == bool) { +fn writeAttribute(writer: *Writer, name: []const u8, value: anytype) RenderError!void { + const T = @TypeOf(value); + switch (@typeInfo(T)) { + .bool => { if (value) { - try writer.print(" {s}", .{fld.name}); + try writer.writeByte(' '); + try writeAttributeName(writer, name); } - } else { - try writer.print(" {s}=", .{fld.name}); + }, + .optional => { + if (value) |inner| { + try writeAttribute(writer, name, inner); + } + }, + .int, .comptime_int => try writeNumericAttribute(writer, name, value), + .float, .comptime_float => try writeFloatAttribute(writer, name, value), + .@"enum" => try writeStringAttribute(writer, name, @tagName(value)), + .pointer => |info| switch (info.size) { + .slice => { + if (info.child != u8) @compileError("unsupported pointer type " ++ @typeName(T)); + try writeStringAttribute(writer, name, value); + }, + .one => { + const child = @typeInfo(info.child); + if (child != .array) @compileError("unsupported pointer type " ++ @typeName(T)); + if (child.array.child != u8) @compileError("unsupported pointer type " ++ @typeName(T)); + const slice: []const u8 = value[0..child.array.len]; + try writeStringAttribute(writer, name, slice); + }, + else => @compileError("unsupported pointer type " ++ @typeName(T)), + }, + .array => |info| { + if (info.child != u8) @compileError("unsupported array type " ++ @typeName(T)); + const slice: []const u8 = value[0..]; + try writeStringAttribute(writer, name, slice); + }, + else => switch (T) { + []u8, []const u8 => try writeStringAttribute(writer, name, value), + else => @compileError("unsupported tag type " ++ @typeName(T) ++ ", implement support above."), + }, + } +} - switch (@typeInfo(fld.type)) { - .int, .comptime_int => try writer.print("\"{}\"", .{value}), - .float, .comptime_float => try writer.print("\"{d}\"", .{value}), +fn writeStringAttribute(writer: *Writer, name: []const u8, value: []const u8) RenderError!void { + try writer.writeByte(' '); + try writeAttributeName(writer, name); + try writer.writeByte('='); + try writer.writeByte('"'); + try writeEscapedHtml(writer, value); + try writer.writeByte('"'); +} - .pointer => |info| if (info.size == .one) { - const child = @typeInfo(info.child); +fn writeNumericAttribute(writer: *Writer, name: []const u8, value: anytype) RenderError!void { + try writer.writeByte(' '); + try writeAttributeName(writer, name); + try writer.print("=\"{}\"", .{value}); +} - if (child != .array) - @compileError("unsupported pointer type " ++ @typeName(fld.type)); - if (child.array.child != u8) - @compileError("unsupported pointer type " ++ @typeName(fld.type)); +fn writeFloatAttribute(writer: *Writer, name: []const u8, value: anytype) RenderError!void { + try writer.writeByte(' '); + try writeAttributeName(writer, name); + try writer.print("=\"{d}\"", .{value}); +} - try writer.print("\"{s}\"", .{value}); // TODO: Implement proper HTML escaping! - }, +fn writeEndTag(writer: *Writer, tag: []const u8) RenderError!void { + try writer.print("", .{tag}); +} - else => switch (fld.type) { - bool => unreachable, +fn langAttribute(lang: hdoc.LanguageTag) ?[]const u8 { + if (lang.text.len == 0) + return null; + return lang.text; +} - []u8, []const u8 => try writer.print("\"{s}\"", .{value}), // TODO: Implement proper HTML escaping! +fn takeLang(lang: *?[]const u8) ?[]const u8 { + if (lang.*) |value| { + lang.* = null; + return value; + } + return null; +} - else => @compileError("unsupported tag type " ++ @typeName(fld.type) ++ ", implement support above."), - }, - } +fn headingTag(level: hdoc.Block.HeadingLevel) []const u8 { + return switch (level) { + .h1 => "h1", + .h2 => "h2", + .h3 => "h3", + }; +} + +fn tocHasEntries(node: hdoc.Document.TableOfContents) bool { + if (node.headings.len > 0) return true; + for (node.children) |child| { + if (tocHasEntries(child)) return true; + } + return false; +} + +fn inferColumnCount(rows: []const hdoc.Block.TableRow) ?usize { + for (rows) |row| { + switch (row) { + .columns => |columns| { + var width: usize = 0; + for (columns.cells) |cell| { + width += cell.colspan; + } + return width; + }, + .row => |data_row| { + var width: usize = 0; + for (data_row.cells) |cell| { + width += cell.colspan; + } + return width; + }, + .group => {}, } } - switch (style) { - .auto_close => try writer.writeAll("/>"), - .regular => try writer.writeAll(">"), + return null; +} + +fn tableHasTitleColumn(rows: []const hdoc.Block.TableRow) bool { + for (rows) |row| { + switch (row) { + .row => |data_row| if (data_row.title != null) return true, + .group => return true, + .columns => {}, + } } + return false; } -fn writeEndTag(writer: *Writer, tag: []const u8) !void { - try writer.print("", .{tag}); +fn findHeaderIndex(rows: []const hdoc.Block.TableRow) ?usize { + for (rows, 0..) |row, index| { + if (row == .columns) return index; + } + return null; +} + +fn formatIsoDate(value: hdoc.Date, buffer: []u8) RenderError![]const u8 { + return std.fmt.bufPrint(buffer, "{d:0>4}-{d:0>2}-{d:0>2}", .{ value.year, value.month, value.day }) catch unreachable; +} + +fn writeTimeZone(writer: anytype, timezone: hdoc.TimeZoneOffset) RenderError!void { + const minutes = @intFromEnum(timezone); + if (minutes == 0) { + try writer.writeByte('Z'); + return; + } + + const sign: u8 = if (minutes < 0) '-' else '+'; + const abs_minutes: u32 = @intCast(@abs(minutes)); + const hour: u32 = abs_minutes / 60; + const minute: u32 = abs_minutes % 60; + + try writer.print("{c}{d:0>2}:{d:0>2}", .{ sign, hour, minute }); +} + +fn formatIsoTime(value: hdoc.Time, buffer: []u8) RenderError![]const u8 { + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ value.hour, value.minute, value.second }); + if (value.microsecond > 0) { + try writer.print(".{d:0>6}", .{value.microsecond}); + } + try writeTimeZone(writer, value.timezone); + + return stream.getWritten(); +} + +fn formatIsoDateTime(value: hdoc.DateTime, buffer: []u8) RenderError![]const u8 { + var date_buffer: [32]u8 = undefined; + var time_buffer: [64]u8 = undefined; + + const date_text = try formatIsoDate(value.date, &date_buffer); + const time_text = try formatIsoTime(value.time, &time_buffer); + + return std.fmt.bufPrint(buffer, "{s}T{s}", .{ date_text, time_text }) catch unreachable; +} + +fn formatDateValue(value: hdoc.FormattedDateTime(hdoc.Date), buffer: []u8) RenderError![]const u8 { + return switch (value.format) { + .year => std.fmt.bufPrint(buffer, "{d}", .{value.value.year}) catch unreachable, + .month => std.fmt.bufPrint(buffer, "{d:0>4}-{d:0>2}", .{ value.value.year, value.value.month }) catch unreachable, + .day => std.fmt.bufPrint(buffer, "{d:0>2}", .{value.value.day}) catch unreachable, + .weekday => std.fmt.bufPrint(buffer, "{s}", .{weekdayName(value.value)}) catch unreachable, + .short, .long, .relative, .iso => formatIsoDate(value.value, buffer), + }; +} + +fn formatTimeValue(value: hdoc.FormattedDateTime(hdoc.Time), buffer: []u8) RenderError![]const u8 { + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + switch (value.format) { + .short, .rough => try writer.print("{d:0>2}:{d:0>2}", .{ value.value.hour, value.value.minute }), + .long, .relative => { + try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ value.value.hour, value.value.minute, value.value.second }); + if (value.value.microsecond > 0) { + try writer.print(".{d:0>6}", .{value.value.microsecond}); + } + }, + .iso => try writer.writeAll(try formatIsoTime(value.value, buffer)), + } + + if (value.format != .iso) { + try writer.writeByte(' '); + try writeTimeZone(writer, value.value.timezone); + } + + return stream.getWritten(); +} + +fn formatDateTimeValue(value: hdoc.FormattedDateTime(hdoc.DateTime), buffer: []u8) RenderError![]const u8 { + var date_buffer: [32]u8 = undefined; + var time_buffer: [64]u8 = undefined; + + const date_text = try formatIsoDate(value.value.date, &date_buffer); + + return switch (value.format) { + .short => std.fmt.bufPrint(buffer, "{s} {s}", .{ + date_text, + try formatTimeValue(.{ .format = .short, .value = value.value.time }, &time_buffer), + }) catch unreachable, + .long, .relative => std.fmt.bufPrint(buffer, "{s} {s}", .{ + date_text, + try formatTimeValue(.{ .format = .long, .value = value.value.time }, &time_buffer), + }) catch unreachable, + .iso => formatIsoDateTime(value.value, buffer), + }; +} + +fn weekdayName(date: hdoc.Date) []const u8 { + const y = if (date.month < 3) date.year - 1 else date.year; + const m = if (date.month < 3) date.month + 12 else date.month; + const k: i32 = @mod(y, 100); + const j: i32 = @divTrunc(y, 100); + + const day_component: i32 = @intCast(date.day); + const z: i32 = day_component + @divTrunc(13 * (m + 1), 5) + k + @divTrunc(k, 4) + @divTrunc(j, 4) + 5 * j; + const h: i32 = @mod(z, 7); + return switch (h) { + 0 => "Saturday", + 1 => "Sunday", + 2 => "Monday", + 3 => "Tuesday", + 4 => "Wednesday", + 5 => "Thursday", + 6 => "Friday", + else => "", + }; } diff --git a/test/html5/AGENTS.md b/test/html5/AGENTS.md new file mode 100644 index 0000000..b79d9ed --- /dev/null +++ b/test/html5/AGENTS.md @@ -0,0 +1,7 @@ +# AGENTS + +These files are HTML5 renderer golden tests. + +- Each `.hdoc` example here is paired with a `.html` file rendered by `./zig-out/bin/hyperdoc`. +- When changing the HTML5 renderer, update the corresponding `.html` outputs to match the new behavior. +- Keep scenarios focused: each example should target specific constructs (paragraph styles, nesting, tables, media/toc, etc.). diff --git a/test/html5/media_and_toc.hdoc b/test/html5/media_and_toc.hdoc new file mode 100644 index 0000000..a4f0cf4 --- /dev/null +++ b/test/html5/media_and_toc.hdoc @@ -0,0 +1,21 @@ +hdoc(version="2.0", title="Media and TOC", lang="en", tz="+00:00"); + +h1(id="intro") "Media and TOC" + +toc(depth="3"); + +h2(id="code") "Preformatted" + +pre(syntax="python") { print("hello world") } + +h2(id="figure") "Figure" + +img(id="fig-code",path="./example.png",alt="Example figure") { Figure caption text. } + +h2(id="dates") "Dates and Times" + +p { Today is \date(fmt="iso"){2024-03-01}. } + +p { The meeting is at \time(fmt="long"){14:30:45+00:00}. } + +p { Release happens on \datetime(fmt="short"){2024-04-15T08:00:00+00:00}. } diff --git a/test/html5/media_and_toc.html b/test/html5/media_and_toc.html new file mode 100644 index 0000000..563874e --- /dev/null +++ b/test/html5/media_and_toc.html @@ -0,0 +1,23 @@ +

Media and TOC

+ +

Preformatted

+
 print("hello world") 
+

Figure

+
+ Example figure +
Figure caption text.
+
+

Dates and Times

+

Today is .

+

The meeting is at .

+

Release happens on .

diff --git a/test/html5/nesting_and_inlines.hdoc b/test/html5/nesting_and_inlines.hdoc new file mode 100644 index 0000000..f1bd8a2 --- /dev/null +++ b/test/html5/nesting_and_inlines.hdoc @@ -0,0 +1,21 @@ +hdoc(version="2.0", title="Nesting and Inlines", lang="en"); + +h1(id="top") "Nesting and Inline Styling" + +p "This document exercises inline formatting and nested lists." + +p { We can mix \em{emphasis}, \strike{strike}, \mono{monospace} text. Superscript x\sup{2} and subscript x\sub{2} also appear. } + +p { Links point to \link(ref="top"){local anchors} or \link(uri="https://example.com"){external sites}. } + +ul { + li { p "Top-level item one" } + li { + p "Top-level item two with nested list" + ol(first="1") { + li "Nested ordered item A" + li "Nested ordered item B" + } + } + li { p "Top-level item three" } +} diff --git a/test/html5/nesting_and_inlines.html b/test/html5/nesting_and_inlines.html new file mode 100644 index 0000000..5db4d36 --- /dev/null +++ b/test/html5/nesting_and_inlines.html @@ -0,0 +1,23 @@ +

Nesting and Inline Styling

+

This document exercises inline formatting and nested lists.

+

We can mix emphasis, strike, monospacetext. Superscript x2and subscript x2also appear.

+

Links point to local anchorsor external sites.

+
    +
  • +

    Top-level item one

    +
  • +
  • +

    Top-level item two with nested list

    +
      +
    1. +

      Nested ordered item A

      +
    2. +
    3. +

      Nested ordered item B

      +
    4. +
    +
  • +
  • +

    Top-level item three

    +
  • +
diff --git a/test/html5/paragraph_styles.hdoc b/test/html5/paragraph_styles.hdoc new file mode 100644 index 0000000..f5b3a96 --- /dev/null +++ b/test/html5/paragraph_styles.hdoc @@ -0,0 +1,17 @@ +hdoc(version="2.0", title="Paragraph Styles", lang="en"); + +h1 "Paragraph Styles" + +p "A standard paragraph introducing the styles below." + +note "Notes provide informational context without urgency." + +warning "Warnings highlight potential issues to watch for." + +danger "Danger blocks signal critical problems." + +tip "Tips offer helpful hints for readers." + +quote "Quoted material sits in its own paragraph style." + +spoiler "This is a spoiler; renderers may hide or blur this content." diff --git a/test/html5/paragraph_styles.html b/test/html5/paragraph_styles.html new file mode 100644 index 0000000..82e8555 --- /dev/null +++ b/test/html5/paragraph_styles.html @@ -0,0 +1,8 @@ +

Paragraph Styles

+

A standard paragraph introducing the styles below.

+

Notes provide informational context without urgency.

+

Warnings highlight potential issues to watch for.

+

Danger blocks signal critical problems.

+

Tips offer helpful hints for readers.

+

Quoted material sits in its own paragraph style.

+

This is a spoiler; renderers may hide or blur this content.

diff --git a/test/html5/tables.hdoc b/test/html5/tables.hdoc new file mode 100644 index 0000000..22e728f --- /dev/null +++ b/test/html5/tables.hdoc @@ -0,0 +1,28 @@ +hdoc(version="2.0", title="Tables", lang="en"); + +h1 "Table Coverage" + +p "This file covers header rows, data rows with titles, groups, and colspans." + +table { + columns { + td { p "Column A" } + td { p "Column B" } + td { p "Column C" } + } + group { "Section One" } + row(title="Row 1") { + td { p "A1" } + td(colspan="2") { p "B1-C1" } + } + row(title="Row 2") { + td(colspan="2") { p "A2-B2" } + td { p "C2" } + } + group { "Section Two" } + row(title="Row 3") { + td { p "A3" } + td { p "B3" } + td { p "C3" } + } +} diff --git a/test/html5/tables.html b/test/html5/tables.html new file mode 100644 index 0000000..bfce614 --- /dev/null +++ b/test/html5/tables.html @@ -0,0 +1,58 @@ +

Table Coverage

+

This file covers header rows, data rows with titles, groups, and colspans.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Column A

+
+

Column B

+
+

Column C

+
"Section One"
Row 1 +

A1

+
+

B1-C1

+
Row 2 +

A2-B2

+
+

C2

+
"Section Two"
Row 3 +

A3

+
+

B3

+
+

C3

+
From 71315b170451fc61991b996aa29ec8447823d8b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 14:25:51 +0100 Subject: [PATCH 055/116] Overhauls docs/specification-proper-draft.md with an additional AGENTS.md and TODO.md to keep an overview over what is still open --- docs/AGENTS.md | 17 +++ docs/TODO.md | 10 ++ docs/specification-proper-draft.md | 200 +++++++++++++++++++++++++---- 3 files changed, 199 insertions(+), 28 deletions(-) create mode 100644 docs/AGENTS.md create mode 100644 docs/TODO.md diff --git a/docs/AGENTS.md b/docs/AGENTS.md new file mode 100644 index 0000000..238ae4e --- /dev/null +++ b/docs/AGENTS.md @@ -0,0 +1,17 @@ +# Specification Editing + +## General + +- `specification.md` is the current "status quo" specifiction. Do not edit unless explicitly asked. +- `docs/specification-proper-draft.md` is the new "shiny" specification. This is the one you should edit if only asked about the "specification". + - This file contains a chapter `0. Chapter Status`. This chapter marks each other chapter of the file as FROZEN, DONE, DRAFT or MISSING + - If a chapter is marked FROZEN, you are not permitted to change anything in it. + - If a chapter is marked DONE, you are only permitted to perform language changes, but not semantic changes. + - If a chapter is marked DRAFT, you are permitted to change it's semantic meaning. + - If a chapter is marked MISSING, the chapter does not yet exist and shall be added eventually. You are permitted to do so. + - A block quote starting with `> TODO:` notes some tasks that shall be done. These lines can be removed if, and only if the task was fully completed. + +## Formatting + +- Do not use any dashes except for `-`. Do NOT use En-Dashes (`–`) or Em-Dashes (`—`). +- Stick to ASCII text as good as possible. If you require symbols from the unicode plane, use them, but inform the user about it. diff --git a/docs/TODO.md b/docs/TODO.md new file mode 100644 index 0000000..e55f3c7 --- /dev/null +++ b/docs/TODO.md @@ -0,0 +1,10 @@ +# Specification TODOs + +- Introduction of `\ref` vs. `\link` + - +- Introduction of `\footnote{body}`, `\footnote(id="foo"){body}` and `\footnote(ref="");` + - `id` namespace is separate from toplevel `id` namespace. requires better naming + - Equal to `\footnote{}` introduce a `\cite{}` inline for citations or use \footnote{style="citation") {} +- Assign semantics to node types, paragraph kinds, ... +- Specify "syntax" proper +- Add links to RFCs where possible \ No newline at end of file diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index e331d96..6966c2b 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -2,6 +2,75 @@ **Status:** Cleaned-up draft. +## 0. Chapter Status + +Chapters that are marked FROZEN must not be changed by AI agents. + +FROZEN: No changes allowed. +DONE: Semantics are correct, language might need improvement. +DRAFT: Current semantics are not finalized yet. +MISSING: Chapter needs to be added still. + +- "1. Introduction": DONE +- "2. Conformance and terminology": FROZEN +- "3. Document encoding (byte- and line-level)": DONE +- "4. Syntactic model": DONE +- "5. Grammar and additional syntax rules" + - "5.1 Grammar (EBNF)": DRAFT + - "5.2 Deterministic list-mode disambiguation: DONE + - "5.3 Maximal munch": FROZEN + - "5.4 Inline-list brace balancing and backslash dispatch": DONE + - "5.5 String literals (syntax)": DRAFT +- "6. Escape processing (semantic)": DRAFT + - "6.1 Scope": DRAFT + - "6.2 Control character policy (semantic)": DRAFT + - "6.3 Supported escapes in string literals": DRAFT + - "6.3.1 Unicode escape `\\u{H...}`": DRAFT + - "6.4 Invalid escapes": DRAFT + - "6.5 Inline escape-text tokens": DRAFT +- "7. Semantic document model": DRAFT + - "7.1 Document structure": DONE + - "7.2 Inline text construction and normalization": DONE + - "7.3 Attribute uniqueness": DONE + - "7.4 Attribute validity": DONE + - "7.5 IDs and references": DRAFT + - "7.6 Built-in element recognition": DONE +- "8. Elements and attributes" + - "8.1 Built-in elements and list mode" + - "8.1.1 Inline vs block": DONE + - "8.1.2 List-body mode per built-in element": TODO + - "8.2 Element catalog (normative)": DRAFT + - "8.2.1 `hdoc` (header)": DONE + - "8.2.2 Headings: `h1`, `h2`, `h3`": DRAFT + - "8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT + - "8.2.4 Lists: `ul`, `ol`": DRAFT + - "8.2.5 List item: `li`": DRAFT + - "8.2.6 Figure: `img`": DRAFT + - "8.2.7 Preformatted: `pre`": DRAFT + - "8.2.8 Table of contents: `toc`": DRAFT + - "8.2.9 Tables: `table`": DRAFT + - "8.2.10 `columns` (table header row)": DRAFT + - "8.2.11 `row` (table data row)": DRAFT + - "8.2.12 `group` (table row group)": DRAFT + - "8.2.13 `td` (table cell)": DRAFT + - "8.3 Inline elements" + - "8.3.1 `\\em`": DRAFT + - "8.3.2 `\\mono`": DRAFT + - "8.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT + - "8.3.4 `\\link`": DRAFT + - "8.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT +- "9. Attribute types and date/time formats": DRAFT + - "9.1 Common attribute types": DRAFT + - "9.2 Date / time lexical formats (normative)": DRAFT + - "9.2.1 Date": DRAFT + - "9.2.2 Time": DRAFT + - "9.2.3 Datetime": DRAFT + - "9.3 `fmt` values": DRAFT +- "10. Non-normative guidance for tooling": DRAFT +- "Appendix A. Example": DRAFT +- "Appendix B. Element Overview": MISSING +- "Appendix C. Attribute Overview": MISSING + --- ## 1. Introduction @@ -23,7 +92,7 @@ A document can be: - **Syntactically valid**: conforms to the grammar and additional syntax rules. - **Semantically valid**: syntactically valid **and** conforms to semantic rules (elements, attributes, escape decoding, IDs/refs, etc.). -Unless explicitly stated, rules in chapters 3–5 are **syntax** rules; rules in chapters 6–9 are **semantic** rules. +Unless explicitly stated, rules in chapters 3-5 are **syntax** rules; rules in chapters 6-9 are **semantic** rules. ## 3. Document encoding (byte- and line-level) @@ -32,7 +101,7 @@ Unless explicitly stated, rules in chapters 3–5 are **syntax** rules; rules in - A HyperDoc document **MUST** be encoded as UTF-8. - A HyperDoc document **MUST NOT** contain invalid UTF-8 byte sequences. -**UTF-8 BOM** +#### UTF-8 BOM - A UTF-8 BOM (`EF BB BF`) **SHOULD NOT** be used. - Tooling **MAY** accept a BOM and treat it as whitespace at the beginning of the document. @@ -54,6 +123,7 @@ The canonical line ending emitted by tooling **SHOULD** be ``. - Other Unicode control characters (General Category `Cc`) **MUST NOT** appear in source text, except: - U+000A (LF) and - U+000D (CR) as part of a valid line ending. +- Surrogate characters (Plane "unassigned", U+D800…U+DFFF) **MUST NOT** appear in the source text. A conforming parser **MUST** reject them. A semantic validator **MAY** reject TABs in source text (see §6.2). @@ -81,10 +151,10 @@ Each node has: A body is one of: -- `;` — empty body -- `"..."` — string literal body -- `:` — verbatim body (one or more `|` lines) -- `{ ... }` — list body +- `;` - empty body +- `"..."` - string literal body +- `:` - verbatim body (one or more `|` lines) +- `{ ... }` - list body ### 4.2 List bodies and modes @@ -157,7 +227,6 @@ The mode is determined solely from the **node name token**: Built-in elements and their list modes are defined in §8.1. - ### 5.3 Maximal munch When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consume the longest possible sequence of allowed identifier characters. @@ -167,22 +236,46 @@ When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consum In Inline-list mode: - Literal braces are structural (`inline_group`) and therefore **must be balanced**. -- If braces cannot be balanced, they **must** be written as escape-text tokens `\\{` and `\\}`. +- If braces cannot be balanced, they **must** be written as escape-text tokens `\{` and `\}`. - A backslash in inline content is interpreted as: - - one of the three escape-text tokens `\\\\`, `\\{`, `\\}`, or + - one of the three escape-text tokens `\\`, `\{`, `\}`, or - the start of an inline node otherwise. ### 5.5 String literals (syntax) +> TODO: This chapter requires improved wording. String literals are basically parsed by: +> +> ```pseudo +> assert next() == '"' +> while(not eof()): +> char = next() +> if char == '\\': +> _ = next() # skip character +> elif char == '"': +> break # end of string literal +> elif is_control(char): # includes CR, LF, TAB and all other control characters +> abort() # invalid character +> ``` + String literals are delimiter-based and do **not** validate escape *meaning*. Syntactically invalid inside `"..."`: - raw LF or CR -- a backslash immediately followed by a control character (Unicode `Cc`) — **note:** this includes TAB. +- a backslash in the last position of the string (`\"` never terminates the string literal) +- a control character (Unicode `Cc`) - **note:** this includes TAB. ## 6. Escape processing (semantic) +> TODO: This chapter must be split into two chapters: +> +> - "Inline Text Escape Processing" +> - "String Literal Escape Processing" +> +> This includes renumbering all chapters and their references for the markdown spec. +> +> Chapter "6.1 Scope" will be removed then. + ### 6.1 Scope Escape sequences are recognized only in: @@ -194,6 +287,8 @@ No other syntax performs escape decoding. ### 6.2 Control character policy (semantic) +> TODO: The same rules as in §3 are applied, except that `TAB` is also additionally forbidden after escaping. + - A semantic validator **MAY** reject TAB (U+0009) in source text. - Regardless of whether TAB is accepted in source text, TAB **MUST** be rejected in the **resolved value of any string literal** (quoted node bodies and attribute values). This includes TAB that appears literally between quotes and TAB produced via `\u{...}`. @@ -213,7 +308,7 @@ A semantic validator/decoder **MUST** accept exactly: #### 6.3.1 Unicode escape `\\u{H...}` -- 1–6 hex digits +- 1-6 hex digits - value in `0x0..0x10FFFF` - not in `0xD800..0xDFFF` (surrogates) - must not decode to a forbidden control character (§6.2) @@ -222,21 +317,25 @@ A semantic validator/decoder **MUST** accept exactly: A semantic validator/decoder **MUST** reject a string literal that contains: -- any other escape (`\\t`, `\\xHH`, `\\0`, etc.) -- an unterminated escape (string ends after `\\`) -- malformed `\\u{...}` (missing braces, empty, non-hex, >6 digits) +- any other escape (`\t`, `\\xHH`, `\0`, etc.) +- an unterminated escape (string ends after `\`) +- malformed `\u{...}` (missing braces, empty, non-hex, >6 digits) - out-of-range or surrogate code points -- forbidden control characters produced by `\\u{...}` +- forbidden control characters produced by `\u{...}` ### 6.5 Inline escape-text tokens +> TODO: Move to chapter "Inline Text Escape Processing" + In inline-list bodies, the parser emits three special text tokens: -- `\\\\` -- `\\{` -- `\\}` +- `\\` +- `\{` +- `\}` + +During semantic text construction, implementations **MUST** decode these to literal `\`, `{`, `}`. -During semantic text construction, implementations **MAY** decode these to literal `\\`, `{`, `}`. +> TODO: The following sentence is unclear. The intent is: "When parsing, tooling should not perform ad-hoc conversion of escape sequences, so the output can be rendered again as-is. The escape sequences must always be display their escaped variant." Tooling that aims to preserve author intent **SHOULD** preserve whether braces were written as balanced groups vs escaped brace tokens. @@ -287,6 +386,8 @@ The renderer **MUST** see the post-normalization result. ### 7.5 IDs and references +> TODO: References must not contain control characters or whitespace. They can be any sequence of characters that are not spaces or control characters. + - `id` is allowed only on **top-level block elements** (direct children of the document; not inside another node). - `id` values **MUST** be non-empty and **MUST** be unique (case-sensitive) across the document. - `\link(ref="...")` **MUST** reference an existing `id`. @@ -307,6 +408,9 @@ The renderer **MUST** see the post-normalization result. #### 8.1.2 List-body mode per built-in element +> TODO: `li` and `td` have an auto-upgrade rule, which performs a conversion of string/verbatim body to `{ p { } }`. +> This means they auto-upgrade their body from literal to "paragraph with literal content" + When a built-in element uses a `{ ... }` list body, it is parsed in the mode below: - **Inline-list mode:** `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\date`, `\time`, `\datetime`, ...). @@ -318,6 +422,14 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel ### 8.2 Element catalog (normative) +> TODO: "inline text" bodies are: +> +> - inline list body +> - string body +> - verbatim body +> +> So only an empty body is not "inline text" + #### 8.2.1 `hdoc` (header) - **Role:** document header @@ -332,7 +444,7 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel #### 8.2.2 Headings: `h1`, `h2`, `h3` -- **Role:** block heading levels 1–3 +- **Role:** block heading levels 1-3 - **Body:** inline text (string body or inline-list body) - **Attributes:** `lang` (optional), `id` (optional; top-level only) @@ -344,6 +456,8 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel #### 8.2.4 Lists: `ul`, `ol` +> TODO: Split into two separate parts "Unordered Lists" and "Ordered Lists" + - **Body:** block-list containing `li` (at least one) - **Attributes:** `lang` (optional), `id` (optional; top-level only) @@ -353,6 +467,8 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel #### 8.2.5 List item: `li` +> TODO: Include correct body upgrade rules + - **Body:** either - a block-list of block elements, or - a single string body, or @@ -364,12 +480,14 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel - **Body:** inline text caption/description (may be empty) - **Attributes:** - `path` (required, non-empty) - - `alt` (optional, non-empty recommended) + - `alt` (optional, non-empty) - `lang` (optional) - `id` (optional; top-level only) #### 8.2.7 Preformatted: `pre` +> TODO: Body is always just "inline text", as verbatim bodies are also always inline text. + - **Body:** either - verbatim body (`:`) for literal lines (**recommended**), or - inline text body (string or inline-list); whitespace is preserved (no trimming/collapse) @@ -389,6 +507,15 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel Table layout rules: +> TODO: `group` is not a "row with implicit title and no cells", but basically +> `group { }` is equivalent to `columns { td(colspan="") { } }`, +> so a regular row with a single cell spanning all columns. +> `group` never implies the existence of the "leading title column" + +> TODO: The `row(title="…")` does never affect the effective column count. +> It implies an additional untitled first column, which is blank in `columns` and `group` rows. +> The `title` row is designed to form matrices with an empty top-left field. + - `columns` defines header labels and the column count. - Each `row` defines a data row. - Each `group` acts as a section heading for subsequent rows. @@ -413,6 +540,8 @@ Table layout rules: #### 8.2.13 `td` (table cell) +> TODO: Include correct body upgrade rules + - **Body:** either - a block-list of block elements, or - a single string body, or @@ -457,6 +586,13 @@ Inline elements appear only in inline-list bodies (or inside string/verbatim, de ## 9. Attribute types and date/time formats +> TODO: Attributes should be documented well and not only be mentioned in the element catalog. +> This chapter shall document attributes and their types, including detailled descriptions for both. + +> TODO: Specify that leading and trailing whitespay is allowed but discouraged. +> Non-fatal diagnostics **MUST** be emitted for that. +> Leading and trailing whitespace must be stripped. + ### 9.1 Common attribute types - **Version:** must be `2.0`. @@ -475,16 +611,16 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601. `YYYY-MM-DD` - `YYYY`: one or more digits -- `MM`: `01`–`12` -- `DD`: `01`–`31` +- `MM`: `01`-`12` +- `DD`: `01`-`31` #### 9.2.2 Time `hh:mm:ss` with a required time zone unless a default `tz` is defined in `hdoc`. -- `hh`: `00`–`23` -- `mm`: `00`–`59` -- `ss`: `00`–`59` +- `hh`: `00`-`23` +- `mm`: `00`-`59` +- `ss`: `00`-`59` - optional fraction: `.` followed by 1,2,3,6, or 9 digits - zone: - `Z`, or @@ -500,6 +636,15 @@ If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is ### 9.3 `fmt` values +> TODO: `fmt` values need a proper description of what the expected output is. +> The output is using the `lang` context of the \date, \time, \datetime element and +> we provide examples in german and english for each `fmt` option. + +> TODO: This chapter shall be split into: +> +> - `fmt` for `\date` +> - `fmt` for `\time` +> - `fmt` for `\datetime` - `\\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` - `\\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso` @@ -508,7 +653,7 @@ If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is Defaults when omitted: - `\date(fmt=...)`: default `short` -- `\time(fmt=...)`: default `long` +- `\time(fmt=...)`: default `short` - `\datetime(fmt=...)`: default `short` ## 10. Non-normative guidance for tooling @@ -535,4 +680,3 @@ pre(syntax="c"): | return 0; | } ``` - From d8e7388797a52191ce6314f8b7cc223b63811b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 14:37:48 +0100 Subject: [PATCH 056/116] Adds two new TODOs to the Table type --- src/hyperdoc.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index dd2aa16..2dd2ca9 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -97,6 +97,8 @@ pub const Block = union(enum) { }; pub const Table = struct { + // TODO: column_count: usize, + // TODO: has_row_titles: bool, // not counted inside `Table.column_count`! lang: LanguageTag, rows: []TableRow, }; From 92eef4affb9727b1ec9b9e371acb0440fef66cfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 15:36:21 +0100 Subject: [PATCH 057/116] Implements basic CLI parser --- src/main.zig | 74 ++++++++++++++++++++++++++++++++-------- test/accept/workset.hdoc | 16 +-------- 2 files changed, 60 insertions(+), 30 deletions(-) diff --git a/src/main.zig b/src/main.zig index d7807f3..693a2f1 100644 --- a/src/main.zig +++ b/src/main.zig @@ -22,13 +22,7 @@ pub fn main() !u8 { const args = try std.process.argsAlloc(allocator); defer std.process.argsFree(allocator, args); - if (args.len < 2) { - try stderr.interface.print("usage: {s} \n", .{args[0]}); - try stderr.interface.flush(); - return 1; - } - - const path = args[1]; + const options = try parse_options(&stderr.interface, args); var diagnostics: hdoc.Diagnostics = .init(allocator); defer diagnostics.deinit(); @@ -37,12 +31,12 @@ pub fn main() !u8 { allocator, &diagnostics, &stdout.interface, - path, + options, ); for (diagnostics.items.items) |diag| { try stderr.interface.print("{s}:{f}: {f}\n", .{ - path, + options.file_path, diag.location, diag.code, }); @@ -50,7 +44,7 @@ pub fn main() !u8 { try stderr.interface.flush(); parse_result catch |err| { - std.log.err("failed to parse \"{s}\": {t}", .{ path, err }); + std.log.err("failed to parse \"{s}\": {t}", .{ options.file_path, err }); return 1; }; @@ -59,8 +53,8 @@ pub fn main() !u8 { return 0; } -fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostics, output_stream: *std.Io.Writer, path: []const u8) !void { - const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10); +fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostics, output_stream: *std.Io.Writer, options: CliOptions) !void { + const document = try std.fs.cwd().readFileAlloc(allocator, options.file_path, 1024 * 1024 * 10); defer allocator.free(document); var parsed = try hdoc.parse(allocator, document, diagnostics); @@ -70,7 +64,57 @@ fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostic return error.InvalidFile; } - // TODO: Make render format selectable via CLI: - // try hdoc.render.yaml(parsed, output_stream); - try hdoc.render.html5(parsed, output_stream); + switch (options.format) { + .dump => try hdoc.render.yaml(parsed, output_stream), + .html => try hdoc.render.html5(parsed, output_stream), + } +} + +const CliOptions = struct { + format: RenderFormat = .html, + file_path: []const u8, +}; + +const RenderFormat = enum { + dump, + html, +}; + +fn parse_options(stderr: *std.Io.Writer, argv: []const []const u8) !CliOptions { + var options: CliOptions = .{ + .file_path = "", + }; + + const app_name = argv[0]; + + { + var i: usize = 1; + while (i < argv.len) { + const value = argv[i]; + if (std.mem.startsWith(u8, value, "--")) { + if (std.mem.eql(u8, value, "--format")) { + i += 1; + options.format = std.meta.stringToEnum(RenderFormat, argv[i]) orelse return error.InvalidCli; + i += 1; + continue; + } + return error.InvalidCli; + } + + if (options.file_path.len > 0) { + return error.InvalidCli; + } + options.file_path = value; + + i += 1; + } + } + + if (options.file_path.len == 0) { + try stderr.print("usage: {s} \n", .{app_name}); + try stderr.flush(); + return error.InvalidCli; + } + + return options; } diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index efb6bf4..77cabaf 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -1,18 +1,4 @@ hdoc(version="2.0", lang="en"); -p { - In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed - steps like: export → transform → validate → import, then added a parenthetical aside (which itself contained - parentheses): "Use the staging key (not the production key (seriously))". A different person pasted a pseudo-path, - /var/tmp/builds/\date(fmt="iso"){2025-12-23}/, and then warned, "If you see \mono{NULL} in the output, don't 'fix' it by replacing it with - '0'—that's how we broke reporting last time." -} +p { We can mix \em{emphasis}, \strike{strike}, \mono{monospace} text. Superscript x\sup{2} and subscript x\sub{2} also appear. } -pre(syntax="zig") { - pub fn FormattedDateTime(comptime DT: type) type { - return struct { - value: DT, - format: DT.Format = .default, - }; - } -} From 988520edcb8b8f253cd5e18d657fdd018d13580c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 15:59:11 +0100 Subject: [PATCH 058/116] Fixes bug in span merger that would prevent leading whitespace of elements be trimmed even if text was already emitted. --- src/hyperdoc.zig | 4 ++-- src/testsuite.zig | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 2dd2ca9..ee7ef20 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -1344,8 +1344,8 @@ pub const SemanticAnalyzer = struct { else .{ text_content, false }; - // check if we already have text, and if not, if we should keep the whitespace - if (merger.current_span.items.len > 0 or !skip_head) { + // check if we already have any text collected, and if not, if we should keep the whitespace + if (merger.output.items.len > 0 or merger.current_span.items.len > 0 or !skip_head) { try merger.current_span.appendSlice(merger.arena, append_text); } }, diff --git a/src/testsuite.zig b/src/testsuite.zig index dd5ffd1..a4133fb 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -189,6 +189,41 @@ test "semantic analyzer forbids raw control characters" { try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .{ .illegal_character = .{ .codepoint = 0x9 } })); } +test "span merger preserves whitespace after inline mono" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + \\hdoc(version="2.0",lang="en"); + \\p{ \mono{monospace} text. } + ; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 1), doc.contents.len); + + switch (doc.contents[0]) { + .paragraph => |para| { + try std.testing.expectEqual(@as(usize, 2), para.content.len); + try std.testing.expect(para.content[0].attribs.mono); + try std.testing.expect(!para.content[1].attribs.mono); + + switch (para.content[0].content) { + .text => |text| try std.testing.expectEqualStrings("monospace", text), + else => return error.TestExpectedEqual, + } + + switch (para.content[1].content) { + .text => |text| try std.testing.expectEqualStrings(" text.", text), + else => return error.TestExpectedEqual, + } + }, + else => return error.TestExpectedEqual, + } +} + test "parser reports unterminated string literals" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); From 8a6e302ca8f9251b1707be67670e2968faabc533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 16:20:00 +0100 Subject: [PATCH 059/116] Updates spec to include \footnote, footnotes{}, \ref and updated \link. --- docs/TODO.md | 3 +- docs/specification-proper-draft.md | 153 +++++++++++++++++++++++++++-- 2 files changed, 146 insertions(+), 10 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index e55f3c7..3bfde40 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -7,4 +7,5 @@ - Equal to `\footnote{}` introduce a `\cite{}` inline for citations or use \footnote{style="citation") {} - Assign semantics to node types, paragraph kinds, ... - Specify "syntax" proper -- Add links to RFCs where possible \ No newline at end of file +- Add links to RFCs where possible +- Document `lang` inheritance. No `lang` attribute means that parent language is used. diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index 6966c2b..2d94b41 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -348,6 +348,26 @@ Tooling that aims to preserve author intent **SHOULD** preserve whether braces w - The `hdoc` node **MUST NOT** appear anywhere else. - The `hdoc` node **MUST** have an empty body (`;`). +#### Document title + +- A document **MAY** contain one `title` node (document-level title). +- If present, `title` **MUST** be the second node in the document (i.e., the first node after `hdoc`). +- `title` **MUST** be a top-level block element (direct child of the document). +- `title` **MUST NOT** have an `id` attribute. + +`hdoc(title="...")` and `title { ... }` interact as follows: + +- If exactly one of `hdoc(title="...")` or `title { ... }` is present, implementations **SHOULD** treat the single value as both: + - the document metadata title, and + - the document display title. + If the single value is `title { ... }`, tooling **SHOULD** derive a plaintext title (via inline-text construction) for use as metadata where needed. + +- If both are present, tooling **SHOULD** compare their plaintext forms: + - If they match, tooling **SHOULD** emit a diagnostic hint that `hdoc(title)` is redundant. + +- If neither is present, tooling **MAY** emit a diagnostic hint that the document has no title. + + ### 7.2 Inline text construction and normalization Many elements (e.g. `p`, headings, and inline elements) produce **inline text** for rendering. Inline text is constructed from one of: @@ -390,7 +410,20 @@ The renderer **MUST** see the post-normalization result. - `id` is allowed only on **top-level block elements** (direct children of the document; not inside another node). - `id` values **MUST** be non-empty and **MUST** be unique (case-sensitive) across the document. -- `\link(ref="...")` **MUST** reference an existing `id`. + +#### Interior references (`ref`) + +- A `ref` attribute value **MUST** be a valid Reference value (§9.1). +- `\ref(ref="...")` **MUST** reference an existing top-level `id`. + +#### Footnote references (`key` / `ref`) + +Footnotes define a separate reference namespace from top-level `id`: + +- `\footnote(key="..."){...}` defines a footnote key in the **footnote namespace**. +- Footnote keys **MUST** be unique (case-sensitive) within the footnote namespace. +- `\footnote(ref="...");` **MUST** reference an existing footnote key. + ### 7.6 Built-in element recognition @@ -413,11 +446,11 @@ The renderer **MUST** see the post-normalization result. When a built-in element uses a `{ ... }` list body, it is parsed in the mode below: -- **Inline-list mode:** `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\date`, `\time`, `\datetime`, ...). +- **Inline-list mode:** `title`, `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\ref`, `\footnote`, `\date`, `\time`, `\datetime`, ...). - **Block-list mode:** `ul`, `ol`, `li`, `table`, `columns`, `row`, `td`. - Containers (`ul`, `ol`, `table`, `row`, `columns`) naturally contain nested nodes. -- Text blocks (`p`, headings, etc.) contain inline text streams. +- Text blocks (`title`, `p`, headings, etc.) contain inline text streams. - `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode. ### 8.2 Element catalog (normative) @@ -548,6 +581,37 @@ Table layout rules: - a verbatim body - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional) +#### 8.2.X `title` (document title) + +- **Role:** document-level display title +- **Body:** inline text (string body or inline-list body) +- **Attributes:** `lang` (optional) + +Semantic constraints: + +- `title` **MUST** be a top-level block element. +- `title` **MUST** appear at most once. +- If present, `title` **MUST** be the second node in the document (after `hdoc`). +- `title` **MUST NOT** have an `id` attribute. + +#### 8.2.X Footnote dump: `footnotes` + +- **Role:** collect and render accumulated footnotes +- **Body:** `;` (empty) +- **Attributes:** + - `kind` (optional; one of `footnote`, `citation`) + - `lang` (optional) + +Semantics: + +- `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet). +- `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node. +- `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node. +- Each invocation of `footnotes(...)` **MUST** advance the “collection cursor” for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set). +- `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`. +- Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears. + + ### 8.3 Inline elements Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer). @@ -570,20 +634,86 @@ Inline elements appear only in inline-list bodies (or inside string/verbatim, de - **Body:** inline text - **Attributes:** `lang` (optional) -#### 8.3.4 `\\link` +#### 8.3.4 `\link` -- **Role:** hyperlink +- **Role:** foreign hyperlink (external or non-validated target) - **Body:** inline text - **Attributes:** - - `ref` or `uri` (**exactly one required**) + - `uri` (**required**) - `lang` (optional) +Notes: + +- `\link` is used for hyperlinks that are not validated as interior document references. +- Interior references use `\ref(ref="...")`. + + #### 8.3.5 `\\date`, `\\time`, `\\datetime` - **Role:** localized date/time rendering - **Body:** must be plain text, a single string, or verbatim (no nested inline elements) - **Attributes:** `fmt` (optional; per element), `lang` (optional) +#### 8.3.X `\ref` + +- **Role:** validated interior reference (to a top-level `id`) +- **Body:** inline text (optional; may be empty) +- **Attributes:** + - `ref` (**required**; must reference an existing `id`) + - `fmt` (optional; one of `full`, `name`, `index`; default `full`) + - `lang` (optional) + +Semantics: + +- `\ref(ref="X")` **MUST** resolve to a top-level element with `id="X"`, otherwise it is semantically invalid. +- If `\ref` has a non-empty body, the body **MUST** be used as the rendered link text. +- If `\ref` has an empty body (`;`), the renderer **MUST** synthesize link text from the referenced target and `fmt`: + + - `fmt="full"`: renders `" "` (default) + - `fmt="name"`: renders `""` + - `fmt="index"`: renders `""` + +Target-derived values: + +- For heading targets (`h1`, `h2`, `h3`), `` is the heading’s constructed plaintext inline text. +- For heading targets, `` is the heading’s hierarchical number within the document (e.g. `3.` / `3.2.` / `3.2.1.`). + +If the referenced target is not a heading: + +- `\ref(ref="X");` (implicit body) is semantically invalid and **MUST** be rejected. +- `\ref(ref="X"){...}` remains valid. + +When computing `` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored). + +#### 8.3.X `\footnote` + +- **Role:** footnote/citation marker and definition +- **Body:** inline text (required for defining form; empty for reference form) +- **Attributes:** + - `key` (optional; defines a named footnote) + - `ref` (optional; references a previously defined named footnote) + - `kind` (optional; one of `footnote`, `citation`; default `footnote`) + - `lang` (optional) + +Attribute rules: + +- `key` and `ref` are mutually exclusive. +- `kind` is only valid on the defining form (a `\footnote` with a non-empty body). A `\footnote(ref="...");` **MUST NOT** specify `kind`. + +Semantics: + +- `\footnote{...}` defines an anonymous footnote entry at the marker position. +- `\footnote(key="X"){...}` defines a named footnote entry in the footnote namespace and emits its marker at the marker position. +- `\footnote(ref="X");` emits a marker for the previously defined named footnote `X`. +- Each `kind` has an independent numeric namespace: footnotes and citations are numbered separately. +- A renderer **MAY** hyperlink markers and dumped entries back-and-forth. + +Marker rendering (normative): + +- A renderer **SHALL** render a regular footnote marker as `\sup{\link{\d+}}`. +- A renderer **SHALL** render a citation marker as `\sup{\link{[\d+]}}`. + + ## 9. Attribute types and date/time formats > TODO: Attributes should be documented well and not only be mentioned in the element catalog. @@ -646,21 +776,26 @@ If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is > - `fmt` for `\time` > - `fmt` for `\datetime` -- `\\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` -- `\\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso` -- `\\datetime(fmt=...)`: `short`, `long`, `relative`, `iso` +- `\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` +- `\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso` +- `\datetime(fmt=...)`: `short`, `long`, `relative`, `iso` +- `\ref(fmt=...)`: `full`, `name`, `index` Defaults when omitted: - `\date(fmt=...)`: default `short` - `\time(fmt=...)`: default `short` - `\datetime(fmt=...)`: default `short` +- `\ref(fmt=...)`: default `full` ## 10. Non-normative guidance for tooling - Formatters should normalize line endings to LF. - Provide diagnostics for discouraged patterns (leading/trailing whitespace in attribute values, leading zeros, mixed directionality, etc.). - For typo recovery, treat unknown nodes as inline-list mode (§5.2). +- Emit a warning when `\footnote(...)` occurs in a document but no `footnotes(...)` node appears. +- Emit a diagnostic hint when neither `hdoc(title="...")` nor `title { ... }` is present. +- Emit a diagnostic when both `hdoc(title="...")` and `title { ... }` are present but their plaintext forms differ. --- From effbb391b29176aab2e7cd8e4980d9d679d55105 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 18:55:50 +0100 Subject: [PATCH 060/116] Vibecoded: Cleans up specification and fixes internal consistency issues. --- docs/specification-proper-draft.md | 120 +++++++++++++++++++---------- 1 file changed, 79 insertions(+), 41 deletions(-) diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index 2d94b41..57e6b3e 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -11,13 +11,15 @@ DONE: Semantics are correct, language might need improvement. DRAFT: Current semantics are not finalized yet. MISSING: Chapter needs to be added still. +If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chapters unless a sub-chapter is explicitly listed with a different status. + - "1. Introduction": DONE - "2. Conformance and terminology": FROZEN - "3. Document encoding (byte- and line-level)": DONE - "4. Syntactic model": DONE - "5. Grammar and additional syntax rules" - "5.1 Grammar (EBNF)": DRAFT - - "5.2 Deterministic list-mode disambiguation: DONE + - "5.2 Deterministic list-mode disambiguation": DONE - "5.3 Maximal munch": FROZEN - "5.4 Inline-list brace balancing and backslash dispatch": DONE - "5.5 String literals (syntax)": DRAFT @@ -38,7 +40,7 @@ MISSING: Chapter needs to be added still. - "8. Elements and attributes" - "8.1 Built-in elements and list mode" - "8.1.1 Inline vs block": DONE - - "8.1.2 List-body mode per built-in element": TODO + - "8.1.2 List-body mode per built-in element": DRAFT - "8.2 Element catalog (normative)": DRAFT - "8.2.1 `hdoc` (header)": DONE - "8.2.2 Headings: `h1`, `h2`, `h3`": DRAFT @@ -53,12 +55,16 @@ MISSING: Chapter needs to be added still. - "8.2.11 `row` (table data row)": DRAFT - "8.2.12 `group` (table row group)": DRAFT - "8.2.13 `td` (table cell)": DRAFT + - "8.2.14 `title` (document title)": DRAFT + - "8.2.15 Footnote dump: `footnotes`": DRAFT - "8.3 Inline elements" - "8.3.1 `\\em`": DRAFT - "8.3.2 `\\mono`": DRAFT - "8.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT - - "8.3.4 `\\link`": DRAFT + - "8.3.4 `\link`": DRAFT - "8.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT + - "8.3.6 `\ref`": DRAFT + - "8.3.7 `\footnote`": DRAFT - "9. Attribute types and date/time formats": DRAFT - "9.1 Common attribute types": DRAFT - "9.2 Date / time lexical formats (normative)": DRAFT @@ -153,7 +159,7 @@ A body is one of: - `;` - empty body - `"..."` - string literal body -- `:` - verbatim body (one or more `|` lines) +- `:` - verbatim body (zero or more `|` lines; empty verbatim bodies **MUST** emit a diagnostic) - `{ ... }` - list body ### 4.2 List bodies and modes @@ -212,7 +218,33 @@ key_seg ::= ident_char , { ident_char } ; string_literal ::= '"' , { string_unit } , '"' ; -(* verbatim_body and ws productions match the source spec. *) +(* Words *) +word ::= word_char , { word_char } ; + +(* word_char matches any Unicode scalar value except: + - whitespace + - '{' or '}' + - '\\' (because '\\' begins escape_text or inline_node) +*) +word_char ::= ? any scalar value except WS, "{", "}", "\\" ? ; + +(* String literals (syntax only; no escape validation here) *) +string_unit ::= string_char | "\\" , escaped_char ; +string_char ::= ? any scalar value except '"', "\\", control characters (Unicode category Cc) ? ; +escaped_char ::= ? any scalar value except control characters (Unicode category Cc) ? ; + +(* Verbatim lines *) +verbatim_body ::= ":" , { ws , piped_line } ; +(* An empty verbatim body (no piped_line) is syntactically valid, but tooling MUST emit a diagnostic. *) +piped_line ::= "|" , { not_line_end } , line_terminator ; +not_line_end ::= ? any scalar value except CR and LF ? ; +line_terminator ::= LF | ( CR , LF ) | EOF ; + +(* Whitespace *) +ws ::= { WS } ; +WS ::= " " | "\t" | LF | ( CR , LF ) ; +CR ::= "\r" ; +LF ::= "\n" ; ``` ### 5.2 Deterministic list-mode disambiguation @@ -243,27 +275,33 @@ In Inline-list mode: ### 5.5 String literals (syntax) -> TODO: This chapter requires improved wording. String literals are basically parsed by: -> -> ```pseudo -> assert next() == '"' -> while(not eof()): -> char = next() -> if char == '\\': -> _ = next() # skip character -> elif char == '"': -> break # end of string literal -> elif is_control(char): # includes CR, LF, TAB and all other control characters -> abort() # invalid character -> ``` - -String literals are delimiter-based and do **not** validate escape *meaning*. - -Syntactically invalid inside `"..."`: - -- raw LF or CR -- a backslash in the last position of the string (`\"` never terminates the string literal) -- a control character (Unicode `Cc`) - **note:** this includes TAB. +String literals are delimited by `"` and are parsed without interpreting escape *meaning*. + +Syntactic rules: + +- The literal starts with `"` and ends at the next `"` that is not consumed as the escaped character after a backslash. +- A string literal **MUST NOT** contain any Unicode control characters (General Category `Cc`), including TAB, LF, and CR. +- A backslash (`\`) **MUST NOT** be the last character before the closing `"` (unterminated escape). +- The closing `"` **MUST** appear before end-of-file. + +The following reference algorithm is authoritative: + +```pseudo +assert next() == '"' +while(not eof()): + char = next() + if char == '\\': + if eof(): abort() # backslash in last position + esc = next() # escaped character (meaning is not interpreted here) + if is_control(esc): abort() # includes CR, LF, TAB and all other control characters + elif char == '"': + return # end of string literal + elif is_control(char): # includes CR, LF, TAB and all other control characters + abort() # invalid character +abort() # eof before closing '"' +``` + +Semantic escape decoding and validation is specified in §6. ## 6. Escape processing (semantic) @@ -455,13 +493,13 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel ### 8.2 Element catalog (normative) -> TODO: "inline text" bodies are: -> -> - inline list body -> - string body -> - verbatim body -> -> So only an empty body is not "inline text" +In this chapter, an "inline text" body is one of: + +- a string body (`"..."`) +- a verbatim body (`:`) +- an inline-list body (`{ ... }` parsed in Inline-list mode) + +Only an empty body (`;`) is not "inline text". #### 8.2.1 `hdoc` (header) @@ -478,13 +516,13 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel #### 8.2.2 Headings: `h1`, `h2`, `h3` - **Role:** block heading levels 1-3 -- **Body:** inline text (string body or inline-list body) +- **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) #### 8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` - **Role:** paragraph-like block with semantic hint -- **Body:** inline text (string body or inline-list body) +- **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) #### 8.2.4 Lists: `ul`, `ol` @@ -512,7 +550,7 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel - **Body:** inline text caption/description (may be empty) - **Attributes:** - - `path` (required, non-empty) + - `path` (required, non-empty; relative to the current file location) - `alt` (optional, non-empty) - `lang` (optional) - `id` (optional; top-level only) @@ -581,10 +619,10 @@ Table layout rules: - a verbatim body - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional) -#### 8.2.X `title` (document title) +#### 8.2.14 `title` (document title) - **Role:** document-level display title -- **Body:** inline text (string body or inline-list body) +- **Body:** inline text - **Attributes:** `lang` (optional) Semantic constraints: @@ -594,7 +632,7 @@ Semantic constraints: - If present, `title` **MUST** be the second node in the document (after `hdoc`). - `title` **MUST NOT** have an `id` attribute. -#### 8.2.X Footnote dump: `footnotes` +#### 8.2.15 Footnote dump: `footnotes` - **Role:** collect and render accumulated footnotes - **Body:** `;` (empty) @@ -654,7 +692,7 @@ Notes: - **Body:** must be plain text, a single string, or verbatim (no nested inline elements) - **Attributes:** `fmt` (optional; per element), `lang` (optional) -#### 8.3.X `\ref` +#### 8.3.6 `\ref` - **Role:** validated interior reference (to a top-level `id`) - **Body:** inline text (optional; may be empty) @@ -685,7 +723,7 @@ If the referenced target is not a heading: When computing `` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored). -#### 8.3.X `\footnote` +#### 8.3.7 `\footnote` - **Role:** footnote/citation marker and definition - **Body:** inline text (required for defining form; empty for reference form) From f57acfb751f0159e0656d50fe819eb62aa5e316d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 20:41:11 +0100 Subject: [PATCH 061/116] Add spec compliance TODOs --- SPEC_TODO.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 SPEC_TODO.md diff --git a/SPEC_TODO.md b/SPEC_TODO.md new file mode 100644 index 0000000..d0d5ef7 --- /dev/null +++ b/SPEC_TODO.md @@ -0,0 +1,9 @@ +# Spec compliance TODOs + +- Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】 +- Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】 +- Align date/time parsing with spec: enforce that `\date`/`\time`/`\datetime` bodies are plain text (no nested inline nodes), and honor `hdoc(tz=...)` as the default zone when parsing header and inline date/time values that omit a timezone. Today only nested date/time spans are rejected and header parsing cannot use the header’s own `tz` hint. 【F:docs/specification-proper-draft.md†L689-L804】【F:src/hyperdoc.zig†L1560-L1645】 +- Preserve verbatim whitespace for `pre` content: verbatim bodies should not strip trailing spaces when used as inline text for `pre`, but `translate_inline_body` currently trims the right side of each verbatim line. 【F:docs/specification-proper-draft.md†L558-L565】【F:src/hyperdoc.zig†L1710-L1768】 +- Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】 +- Implement table layout semantics: reserve a leading title column when any row has `title` or any `group` appears, and treat `group` as spanning all columns rather than leaving column accounting unaffected. The current translator only checks column-count consistency. 【F:docs/specification-proper-draft.md†L572-L596】【F:src/hyperdoc.zig†L1050-L1149】 +- Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】 From 342439278aa5a287dc9ad79aae0a1103ea905e6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 20:48:26 +0100 Subject: [PATCH 062/116] Update spec TODOs --- SPEC_TODO.md | 1 - 1 file changed, 1 deletion(-) diff --git a/SPEC_TODO.md b/SPEC_TODO.md index d0d5ef7..06140f5 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -5,5 +5,4 @@ - Align date/time parsing with spec: enforce that `\date`/`\time`/`\datetime` bodies are plain text (no nested inline nodes), and honor `hdoc(tz=...)` as the default zone when parsing header and inline date/time values that omit a timezone. Today only nested date/time spans are rejected and header parsing cannot use the header’s own `tz` hint. 【F:docs/specification-proper-draft.md†L689-L804】【F:src/hyperdoc.zig†L1560-L1645】 - Preserve verbatim whitespace for `pre` content: verbatim bodies should not strip trailing spaces when used as inline text for `pre`, but `translate_inline_body` currently trims the right side of each verbatim line. 【F:docs/specification-proper-draft.md†L558-L565】【F:src/hyperdoc.zig†L1710-L1768】 - Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】 -- Implement table layout semantics: reserve a leading title column when any row has `title` or any `group` appears, and treat `group` as spanning all columns rather than leaving column accounting unaffected. The current translator only checks column-count consistency. 【F:docs/specification-proper-draft.md†L572-L596】【F:src/hyperdoc.zig†L1050-L1149】 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】 From 8af92dbd4ab0ce4d9acdf78d8c7c78d28c51cdaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Wed, 31 Dec 2025 23:42:40 +0100 Subject: [PATCH 063/116] Revert inline-node clarification for string/verbatim bodies --- docs/specification-proper-draft.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index 57e6b3e..d3eee13 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -490,6 +490,7 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel - Containers (`ul`, `ol`, `table`, `row`, `columns`) naturally contain nested nodes. - Text blocks (`title`, `p`, headings, etc.) contain inline text streams. - `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode. +- Built-in elements with empty bodies are also parsed in Inline-list mode so accidental `{ ... }` usage stays balanced and formatters can recover consistently (e.g., `toc;`, `footnotes;`). ### 8.2 Element catalog (normative) @@ -591,8 +592,8 @@ Table layout rules: - Each `row` defines a data row. - Each `group` acts as a section heading for subsequent rows. - After applying `td.colspan`, all `row` and `columns` entries **MUST** resolve to the same effective column count. -- If any `row` has a `title` attribute **or** any `group` is present, renderers **MUST** reserve a leading title column. - - In that case, `columns` **SHOULD** include an empty leading header cell. +- If any `row` has a `title` attribute, renderers **MUST** reserve a leading title column. + - The leading column’s header cell is implicit (empty/invisible) and **MUST NOT** be authored inside `columns`. #### 8.2.10 `columns` (table header row) From ee8504b5ceaef411b299846a5879ee55248e8dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 12:10:11 +0100 Subject: [PATCH 064/116] Stop trimming verbatim lines --- SPEC_TODO.md | 1 - src/hyperdoc.zig | 4 +--- src/testsuite.zig | 23 +++++++++++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/SPEC_TODO.md b/SPEC_TODO.md index 06140f5..cf3d56f 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -3,6 +3,5 @@ - Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】 - Align date/time parsing with spec: enforce that `\date`/`\time`/`\datetime` bodies are plain text (no nested inline nodes), and honor `hdoc(tz=...)` as the default zone when parsing header and inline date/time values that omit a timezone. Today only nested date/time spans are rejected and header parsing cannot use the header’s own `tz` hint. 【F:docs/specification-proper-draft.md†L689-L804】【F:src/hyperdoc.zig†L1560-L1645】 -- Preserve verbatim whitespace for `pre` content: verbatim bodies should not strip trailing spaces when used as inline text for `pre`, but `translate_inline_body` currently trims the right side of each verbatim line. 【F:docs/specification-proper-draft.md†L558-L565】【F:src/hyperdoc.zig†L1710-L1768】 - Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】 diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index ee7ef20..cfccc88 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -1746,9 +1746,7 @@ pub const SemanticAnalyzer = struct { else line.text[1..]; - const stripped = std.mem.trimRight(u8, text, whitespace_chars); - - text_buffer.appendSliceAssumeCapacity(stripped); + text_buffer.appendSliceAssumeCapacity(text); } const location: Parser.Location = if (verbatim_lines.len > 0) blk: { diff --git a/src/testsuite.zig b/src/testsuite.zig index a4133fb..8b72758 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -224,6 +224,29 @@ test "span merger preserves whitespace after inline mono" { } } +test "pre verbatim preserves trailing whitespace" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + "hdoc(version=\"2.0\",lang=\"en\");\n" ++ "pre:\n" ++ "| line with trailing spaces \n" ++ "| indented line \n"; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 1), doc.contents.len); + + const preformatted = doc.contents[0].preformatted; + try std.testing.expectEqual(@as(usize, 1), preformatted.content.len); + + const expected = "line with trailing spaces \n indented line "; + switch (preformatted.content[0].content) { + .text => |text| try std.testing.expectEqualStrings(expected, text), + else => return error.TestExpectedEqual, + } +} + test "parser reports unterminated string literals" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); From f4cde949dbd2ff1a33cb0aaccecc4321036a8804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 16:46:33 +0100 Subject: [PATCH 065/116] Implements hdoc(date) respecting hdoc(tz) default. --- src/hyperdoc.zig | 12 ++++++++++-- src/testsuite.zig | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index cfccc88..59a58ba 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -776,7 +776,7 @@ pub const SemanticAnalyzer = struct { version: Version, title: ?[]const u8 = null, author: ?[]const u8 = null, - date: ?DateTime = null, // TODO: Allow skipping TZ value! + date: ?[]const u8 = null, lang: LanguageTag = .inherit, tz: ?TimeZoneOffset = null, }); @@ -791,12 +791,20 @@ pub const SemanticAnalyzer = struct { if (attrs.version.minor != 0) return error.UnsupportedVersion; + const date = if (attrs.date) |date_str| + DateTime.parse(date_str, attrs.tz) catch blk: { + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "date" } }, get_attribute_location(node, "date", .value).?); + break :blk null; + } + else + null; + return .{ .version = attrs.version, .lang = if (lang_location != null) attrs.lang else null, .title = attrs.title, .author = attrs.author, - .date = attrs.date, + .date = date, .timezone = attrs.tz, }; } diff --git a/src/testsuite.zig b/src/testsuite.zig index 8b72758..6e141ed 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -817,3 +817,23 @@ test "diagnostics for bare carriage return" { try std.testing.expect(saw_bare_cr); } + +test "hdoc header date uses timezone hint for missing zone" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = "hdoc(version=\"2.0\",lang=\"en\",tz=\"-01:30\",date=\"2026-01-01T12:00:00\");"; + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + const parsed = doc.date orelse return error.TestExpectedEqual; + try std.testing.expectEqual(@as(i32, 2026), parsed.date.year); + try std.testing.expectEqual(@as(u4, 1), parsed.date.month); + try std.testing.expectEqual(@as(u5, 1), parsed.date.day); + try std.testing.expectEqual(@as(u5, 12), parsed.time.hour); + try std.testing.expectEqual(@as(u6, 0), parsed.time.minute); + try std.testing.expectEqual(@as(u6, 0), parsed.time.second); + try std.testing.expectEqual(@as(u20, 0), parsed.time.microsecond); + try std.testing.expectEqual(try hdoc.TimeZoneOffset.parse("-01:30"), parsed.time.timezone); +} From d1af43fe1db1c3c7b60678935f4768294c9b3b5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 16:52:41 +0100 Subject: [PATCH 066/116] Implements \date, \time and \datetime reject anything except bare text bodies. --- SPEC_TODO.md | 1 - src/hyperdoc.zig | 35 ++++++++++++++++++----------------- src/testsuite.zig | 9 +++++++++ 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/SPEC_TODO.md b/SPEC_TODO.md index cf3d56f..128cd1b 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -2,6 +2,5 @@ - Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】 -- Align date/time parsing with spec: enforce that `\date`/`\time`/`\datetime` bodies are plain text (no nested inline nodes), and honor `hdoc(tz=...)` as the default zone when parsing header and inline date/time values that omit a timezone. Today only nested date/time spans are rejected and header parsing cannot use the header’s own `tz` hint. 【F:docs/specification-proper-draft.md†L689-L804】【F:src/hyperdoc.zig†L1560-L1645】 - Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】 diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 59a58ba..a42ae8b 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -1571,22 +1571,23 @@ pub const SemanticAnalyzer = struct { fmt: []const u8 = "", }); - const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space); - - // Enforce that date/time bodies only contain plain text/string/verbatim. - // HyperDoc cannot format date/time values on it's own so we can't render - // \date, \time and \datetime into a string. It also doesn't make any sense - // to nest them. - for (content_spans) |span| { - switch (span.content) { - .text => {}, - .date, .time, .datetime => { - try sema.emit_diagnostic(.nested_date_time, span.location); - break :blk; - }, - } + // Enforce the body is only plain text. + const ok = switch (node.body) { + .empty => false, + .string, .verbatim, .text_span => true, // always ok + .list => |list| for (list) |item| { + if (item.type != .text) { + break false; + } + } else true, + }; + if (!ok) { + try sema.emit_diagnostic(.invalid_date_time_body, node.location); + break :blk; } + const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space); + // Convert the content_spans into a "rendered string". const content_text = sema.render_spans_to_plaintext(content_spans) catch |err| switch (err) { error.DateTimeRenderingUnsupported => unreachable, @@ -2961,7 +2962,7 @@ pub const Diagnostic = struct { link_not_nestable, invalid_link, invalid_date_time, - nested_date_time, + invalid_date_time_body, invalid_date_time_fmt: DateTimeFormatError, missing_timezone, invalid_unicode_string_escape, @@ -3019,7 +3020,7 @@ pub const Diagnostic = struct { .illegal_child_item, .list_body_required, .illegal_id_attribute, - .nested_date_time, + .invalid_date_time_body, .column_count_mismatch, .duplicate_id, .unknown_id, @@ -3104,7 +3105,7 @@ pub const Diagnostic = struct { .illegal_id_attribute => try w.writeAll("Attribute 'id' not allowed here."), - .nested_date_time => try w.writeAll("Nesting \\date, \\time and \\datetime is not allowed."), + .invalid_date_time_body => try w.writeAll("\\date, \\time and \\datetime do not allow any inlines inside their body."), .column_count_mismatch => |ctx| try w.print("Expected {} columns, but found {}", .{ ctx.expected, ctx.actual }), diff --git a/src/testsuite.zig b/src/testsuite.zig index 6e141ed..5949fe3 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -837,3 +837,12 @@ test "hdoc header date uses timezone hint for missing zone" { try std.testing.expectEqual(@as(u20, 0), parsed.time.microsecond); try std.testing.expectEqual(try hdoc.TimeZoneOffset.parse("-01:30"), parsed.time.timezone); } + +test "\\date rejects bad body" { + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); p { \\date; }", &.{ + .invalid_date_time_body, + }); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); p { \\date{start \\em{inner}} }", &.{ + .invalid_date_time_body, + }); +} From 1da78c3612b20dff60b0c7c235f338fb693f027d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 17:02:39 +0100 Subject: [PATCH 067/116] Refactory Block.TableOfContents.depth to non-optional value between 1 and 3. --- SPEC_TODO.md | 1 - src/hyperdoc.zig | 18 +++++++++--------- src/render/dump.zig | 2 +- src/render/html5.zig | 2 +- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/SPEC_TODO.md b/SPEC_TODO.md index 128cd1b..4a92f37 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -2,5 +2,4 @@ - Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】 -- Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】 diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index a42ae8b..61e37e6 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -93,7 +93,7 @@ pub const Block = union(enum) { pub const TableOfContents = struct { lang: LanguageTag, - depth: ?u8, + depth: u8, }; pub const Table = struct { @@ -1023,16 +1023,15 @@ pub const SemanticAnalyzer = struct { const attrs = try sema.get_attributes(node, struct { lang: LanguageTag = .inherit, id: ?Reference = null, - depth: ?u32 = null, + depth: ?u8 = null, }); - var depth: ?u8 = null; - if (attrs.depth) |depth_value| { - if (depth_value < 1 or depth_value > 3) { - try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "depth" } }, get_attribute_location(node, "depth", .value) orelse node.location); - } else { - depth = @intCast(depth_value); - } + const max_depth: comptime_int = @typeInfo(Block.HeadingLevel).@"enum".fields.len; + + var depth = attrs.depth orelse max_depth; + if (depth < 1 or depth > max_depth) { + try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "depth" } }, get_attribute_location(node, "depth", .value) orelse node.location); + depth = @max(1, @min(max_depth, depth)); } switch (node.body) { @@ -1882,6 +1881,7 @@ pub const SemanticAnalyzer = struct { return switch (T) { []const u8 => value, + u8 => std.fmt.parseInt(u8, value, 10) catch return error.InvalidValue, u32 => std.fmt.parseInt(u32, value, 10) catch return error.InvalidValue, Reference => Reference.parse(value) catch return error.InvalidValue, diff --git a/src/render/dump.zig b/src/render/dump.zig index 1635df9..e731a96 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -435,7 +435,7 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err .toc => |toc| { try writeTypeTag(writer, "toc"); try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang.text); - try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth); + try dumpOptionalNumberField(writer, indent + indent_step, "depth", @as(?u8, toc.depth)); }, .table => |table| { try writeTypeTag(writer, "table"); diff --git a/src/render/html5.zig b/src/render/html5.zig index 275453a..1eb76bc 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -166,7 +166,7 @@ const RenderContext = struct { } fn renderTableOfContents(ctx: *RenderContext, toc_block: hdoc.Block.TableOfContents, block_index: ?usize, indent: usize) RenderError!void { - const depth = toc_block.depth orelse 3; + const depth = toc_block.depth; const lang_attr = langAttribute(toc_block.lang); const id_attr = ctx.resolveBlockId(block_index); From 59cbae272eac1ca04d3431557f9397dcbee76e50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 22:51:35 +0100 Subject: [PATCH 068/116] =?UTF-8?q?Rewrites=20=C2=A77.5=20Identifiers=20an?= =?UTF-8?q?d=20References?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/TODO.md | 23 +++++++++++++++----- docs/specification-proper-draft.md | 35 +++++++++++++++++------------- justfile | 10 +++++++++ 3 files changed, 48 insertions(+), 20 deletions(-) create mode 100644 justfile diff --git a/docs/TODO.md b/docs/TODO.md index 3bfde40..f703537 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -1,11 +1,24 @@ # Specification TODOs -- Introduction of `\ref` vs. `\link` - - -- Introduction of `\footnote{body}`, `\footnote(id="foo"){body}` and `\footnote(ref="");` - - `id` namespace is separate from toplevel `id` namespace. requires better naming - - Equal to `\footnote{}` introduce a `\cite{}` inline for citations or use \footnote{style="citation") {} - Assign semantics to node types, paragraph kinds, ... - Specify "syntax" proper - Add links to RFCs where possible - Document `lang` inheritance. No `lang` attribute means that parent language is used. + + +- Special-style blocks become block containers + - The “special paragraph” family (e.g. note, info, warning, danger, tip, spoiler, quote, …) are block containers. + - Their { ... } list body is always Block-list mode (i.e., they contain blocks like p, ul, ol, pre, etc.). + - They do not accept inline-list bodies directly. Inline markup requires an explicit paragraph: + note { p { text with \link(...) { inline } nodes } } +- General implicit-paragraph shorthand (removes special cases) + - You’re removing the element-specific special casing (like the old quote/li/td convenience rules) and replacing it with one general semantic rule: + - Rule: If a block element’s list body would allow “regular top-level blocks” (e.g. p, pre, ol, ul, …), then that element’s body MAY be written as a string or verbatim literal. + - Equivalence: A string/verbatim body is equivalent to a block-list body containing a single paragraph with the same content as plain text. + Concretely: + X "TEXT" ≡ X { p "TEXT" } + X: | TEXT ≡ X { p: | TEXT } + - Notes: + - This shorthand produces plain text and therefore follows your normal inline text construction rules (including whitespace normalization). + - This shorthand should apply to “flow containers” like quote, note, and also fixes li / td ergonomics cleanly. + - It should not be used for structural containers where a string would be misleading (e.g. ul/ol/table/columns/row), because those don’t “allow regular blocks” as direct children in the first place. diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index d3eee13..c13936b 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -442,26 +442,33 @@ The renderer **MUST** see the post-normalization result. - Required attributes **MUST** be present. - Attributes not defined for an element **MUST** be rejected. -### 7.5 IDs and references +### 7.5 Identifiers and References -> TODO: References must not contain control characters or whitespace. They can be any sequence of characters that are not spaces or control characters. +HyperDoc defines two separate namespaces for identifiers to allow cross-referencing within a document: the **Block Namespace** and the **Footnote Namespace**. -- `id` is allowed only on **top-level block elements** (direct children of the document; not inside another node). -- `id` values **MUST** be non-empty and **MUST** be unique (case-sensitive) across the document. +Identifiers in both namespaces are case-sensitive and share the same syntax: they **MUST** be a non-empty sequence of one or more characters, and **MUST NOT** contain any whitespace or Unicode control characters (General Category `Cc`). -#### Interior references (`ref`) +#### 7.5.1 Block Namespace (`id` and `\ref(ref)`) -- A `ref` attribute value **MUST** be a valid Reference value (§9.1). -- `\ref(ref="...")` **MUST** reference an existing top-level `id`. +The Block Namespace is used for referencing top-level block elements like headings, figures, or tables. -#### Footnote references (`key` / `ref`) +- **Definition**: An identifier is added to the Block Namespace using the `id` attribute. + - The `id` attribute is only allowed on **top-level block elements** (direct children of the document, not nested inside another node). + - `id` values **MUST** be unique across the document's Block Namespace. -Footnotes define a separate reference namespace from top-level `id`: +- **Reference**: An identifier in the Block Namespace is referenced using the `\ref` inline element. + - `\ref(ref="...")` **MUST** reference an `id` that exists in the Block Namespace. -- `\footnote(key="..."){...}` defines a footnote key in the **footnote namespace**. -- Footnote keys **MUST** be unique (case-sensitive) within the footnote namespace. -- `\footnote(ref="...");` **MUST** reference an existing footnote key. +#### 7.5.2 Footnote Namespace (`\footnote(key)` and `\footnote(ref)`) +The Footnote Namespace is used for defining and referencing reusable footnotes. + +- **Definition**: An identifier is added to the Footnote Namespace using the `key` attribute on a `\footnote` element that has a body. + - `\footnote(key="..."){...}` defines a footnote and associates it with an identifier. + - `key` values **MUST** be unique across the document's Footnote Namespace. + +- **Reference**: An identifier in the Footnote Namespace is referenced using a `\footnote` element that has no body. + - `\footnote(ref="...");` **MUST** reference a `key` that has been defined in the Footnote Namespace. ### 7.6 Built-in element recognition @@ -560,9 +567,7 @@ Only an empty body (`;`) is not "inline text". > TODO: Body is always just "inline text", as verbatim bodies are also always inline text. -- **Body:** either - - verbatim body (`:`) for literal lines (**recommended**), or - - inline text body (string or inline-list); whitespace is preserved (no trimming/collapse) +- **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only) #### 8.2.8 Table of contents: `toc` diff --git a/justfile b/justfile new file mode 100644 index 0000000..3c1c2ee --- /dev/null +++ b/justfile @@ -0,0 +1,10 @@ +default: build test + +build: + zig-0.15.2 build -freference-trace=11 --prominent-compile-errors + +test: + zig-0.15.2 build -freference-trace=11 --prominent-compile-errors test + +dump: build + ./zig-out/bin/hyperdoc --format dump "test/accept/workset.hdoc" From fe9218b11b097da26fc8c824a5b5ee7a09c6c6d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 23:13:59 +0100 Subject: [PATCH 069/116] Restructures chapter 8 (Elements and attributes) into a more well-formed structure, and refines rules for block promotion rules --- docs/TODO.md | 11 -- docs/specification-proper-draft.md | 170 ++++++++++++++++------------- 2 files changed, 94 insertions(+), 87 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index f703537..f6e284e 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -11,14 +11,3 @@ - Their { ... } list body is always Block-list mode (i.e., they contain blocks like p, ul, ol, pre, etc.). - They do not accept inline-list bodies directly. Inline markup requires an explicit paragraph: note { p { text with \link(...) { inline } nodes } } -- General implicit-paragraph shorthand (removes special cases) - - You’re removing the element-specific special casing (like the old quote/li/td convenience rules) and replacing it with one general semantic rule: - - Rule: If a block element’s list body would allow “regular top-level blocks” (e.g. p, pre, ol, ul, …), then that element’s body MAY be written as a string or verbatim literal. - - Equivalence: A string/verbatim body is equivalent to a block-list body containing a single paragraph with the same content as plain text. - Concretely: - X "TEXT" ≡ X { p "TEXT" } - X: | TEXT ≡ X { p: | TEXT } - - Notes: - - This shorthand produces plain text and therefore follows your normal inline text construction rules (including whitespace normalization). - - This shorthand should apply to “flow containers” like quote, note, and also fixes li / td ergonomics cleanly. - - It should not be used for structural containers where a string would be misleading (e.g. ul/ol/table/columns/row), because those don’t “allow regular blocks” as direct children in the first place. diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index c13936b..09413ca 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -35,7 +35,7 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap - "7.2 Inline text construction and normalization": DONE - "7.3 Attribute uniqueness": DONE - "7.4 Attribute validity": DONE - - "7.5 IDs and references": DRAFT + - "7.5 Identifiers and References": DONE - "7.6 Built-in element recognition": DONE - "8. Elements and attributes" - "8.1 Built-in elements and list mode" @@ -486,9 +486,6 @@ The Footnote Namespace is used for defining and referencing reusable footnotes. #### 8.1.2 List-body mode per built-in element -> TODO: `li` and `td` have an auto-upgrade rule, which performs a conversion of string/verbatim body to `{ p { } }`. -> This means they auto-upgrade their body from literal to "paragraph with literal content" - When a built-in element uses a `{ ... }` list body, it is parsed in the mode below: - **Inline-list mode:** `title`, `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\ref`, `\footnote`, `\date`, `\time`, `\datetime`, ...). @@ -499,15 +496,25 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel - `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode. - Built-in elements with empty bodies are also parsed in Inline-list mode so accidental `{ ... }` usage stays balanced and formatters can recover consistently (e.g., `toc;`, `footnotes;`). -### 8.2 Element catalog (normative) +#### 8.1.3 Shorthand Body Promotion -In this chapter, an "inline text" body is one of: +If a block element's list body can contain general text block elements (such as `p`, `pre`, `ol`, `ul`, etc.), its body **MAY** instead be written as a shorthand string or verbatim literal. -- a string body (`"..."`) -- a verbatim body (`:`) -- an inline-list body (`{ ... }` parsed in Inline-list mode) +When a shorthand body is used, it is semantically equivalent to a block-list body containing a single `p` (paragraph) node whose own body is the original string or verbatim content. -Only an empty body (`;`) is not "inline text". +For example, `li "some text"` is semantically identical to: + +```hdoc +li { + p "some text" +} +``` + +This promotion is a feature for convenience and applies only to the following elements: +- `li` +- `td` + +### 8.2 Top-Level Block Elements #### 8.2.1 `hdoc` (header) @@ -521,19 +528,68 @@ Only an empty body (`;`) is not "inline text". - `date` (optional): datetime lexical format (§9.2.3) - `tz` (optional): default timezone for time/datetime values (§9.2) -#### 8.2.2 Headings: `h1`, `h2`, `h3` +#### 8.2.2 `title` (document title) + +- **Role:** document-level display title +- **Body:** inline text +- **Attributes:** `lang` (optional) + +Semantic constraints: + +- `title` **MUST** be a top-level block element. +- `title` **MUST** appear at most once. +- If present, `title` **MUST** be the second node in the document (after `hdoc`). +- `title` **MUST NOT** have an `id` attribute. + +#### 8.2.3 Table of contents: `toc` + +- **Role:** Generates a table of contents. +- **Body:** `;` (empty) +- **Attributes:** `depth` (optional Integer in {1,2,3}; default 3), `lang` (optional), `id` (optional) + +Semantic constraints: +- `toc` **MUST** be a top-level block element (a direct child of the document). + +#### 8.2.4 Footnote dump: `footnotes` + +- **Role:** collect and render accumulated footnotes +- **Body:** `;` (empty) +- **Attributes:** + - `kind` (optional; one of `footnote`, `citation`) + - `lang` (optional) + +Semantics: + +- `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet). +- `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node. +- `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node. +- Each invocation of `footnotes(...)` **MUST** advance the “collection cursor” for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set). +- `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`. +- Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears. + +### 8.3 General Text Block Elements + +In this chapter, an "inline text" body is one of: + +- a string body (`"..."`) +- a verbatim body (`:`) +- an inline-list body (`{ ... }` parsed in Inline-list mode) + +Only an empty body (`;`) is not "inline text". + +#### 8.3.1 Headings: `h1`, `h2`, `h3` - **Role:** block heading levels 1-3 - **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` +#### 8.3.2 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` - **Role:** paragraph-like block with semantic hint - **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.2.4 Lists: `ul`, `ol` +#### 8.3.3 Lists: `ul`, `ol` > TODO: Split into two separate parts "Unordered Lists" and "Ordered Lists" @@ -544,17 +600,7 @@ Only an empty body (`;`) is not "inline text". - `first` (optional Integer ≥ 0; default 1): number of the first list item -#### 8.2.5 List item: `li` - -> TODO: Include correct body upgrade rules - -- **Body:** either - - a block-list of block elements, or - - a single string body, or - - a verbatim body -- **Attributes:** `lang` (optional) - -#### 8.2.6 Figure: `img` +#### 8.3.4 Figure: `img` - **Body:** inline text caption/description (may be empty) - **Attributes:** @@ -563,19 +609,14 @@ Only an empty body (`;`) is not "inline text". - `lang` (optional) - `id` (optional; top-level only) -#### 8.2.7 Preformatted: `pre` +#### 8.3.5 Preformatted: `pre` > TODO: Body is always just "inline text", as verbatim bodies are also always inline text. - **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only) -#### 8.2.8 Table of contents: `toc` - -- **Body:** `;` (empty) -- **Attributes:** `depth` (optional Integer in {1,2,3}; default 3), `lang` (optional), `id` (optional; top-level only) - -#### 8.2.9 Tables: `table` +#### 8.3.6 Tables: `table` - **Body:** block-list containing: - optional `columns`, then @@ -600,24 +641,32 @@ Table layout rules: - If any `row` has a `title` attribute, renderers **MUST** reserve a leading title column. - The leading column’s header cell is implicit (empty/invisible) and **MUST NOT** be authored inside `columns`. -#### 8.2.10 `columns` (table header row) +### 8.4 Structural Elements + +#### 8.4.1 List item: `li` + +- **Body:** either + - a block-list of block elements, or + - a single string body, or + - a verbatim body +- **Attributes:** `lang` (optional) + +#### 8.4.2 `columns` (table header row) - **Body:** block-list containing `td` (at least one) - **Attributes:** `lang` (optional) -#### 8.2.11 `row` (table data row) +#### 8.4.3 `row` (table data row) - **Body:** block-list containing `td` (at least one) - **Attributes:** `title` (optional string), `lang` (optional) -#### 8.2.12 `group` (table row group) +#### 8.4.4 `group` (table row group) - **Body:** inline text - **Attributes:** `lang` (optional) -#### 8.2.13 `td` (table cell) - -> TODO: Include correct body upgrade rules +#### 8.4.5 `td` (table cell) - **Body:** either - a block-list of block elements, or @@ -625,60 +674,29 @@ Table layout rules: - a verbatim body - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional) -#### 8.2.14 `title` (document title) - -- **Role:** document-level display title -- **Body:** inline text -- **Attributes:** `lang` (optional) - -Semantic constraints: - -- `title` **MUST** be a top-level block element. -- `title` **MUST** appear at most once. -- If present, `title` **MUST** be the second node in the document (after `hdoc`). -- `title` **MUST NOT** have an `id` attribute. - -#### 8.2.15 Footnote dump: `footnotes` - -- **Role:** collect and render accumulated footnotes -- **Body:** `;` (empty) -- **Attributes:** - - `kind` (optional; one of `footnote`, `citation`) - - `lang` (optional) - -Semantics: - -- `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet). -- `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node. -- `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node. -- Each invocation of `footnotes(...)` **MUST** advance the “collection cursor” for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set). -- `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`. -- Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears. - - -### 8.3 Inline elements +### 8.5 Inline elements Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer). -#### 8.3.1 `\\em` +#### 8.5.1 `\\em` - **Role:** emphasis - **Body:** inline text - **Attributes:** `lang` (optional) -#### 8.3.2 `\\mono` +#### 8.5.2 `\\mono` - **Role:** monospaced span - **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional) -#### 8.3.3 `\\strike`, `\\sub`, `\\sup` +#### 8.5.3 `\\strike`, `\\sub`, `\\sup` - **Role:** strike-through / subscript / superscript - **Body:** inline text - **Attributes:** `lang` (optional) -#### 8.3.4 `\link` +#### 8.5.4 `\link` - **Role:** foreign hyperlink (external or non-validated target) - **Body:** inline text @@ -692,13 +710,13 @@ Notes: - Interior references use `\ref(ref="...")`. -#### 8.3.5 `\\date`, `\\time`, `\\datetime` +#### 8.5.5 `\\date`, `\\time`, `\\datetime` - **Role:** localized date/time rendering - **Body:** must be plain text, a single string, or verbatim (no nested inline elements) - **Attributes:** `fmt` (optional; per element), `lang` (optional) -#### 8.3.6 `\ref` +#### 8.5.6 `\ref` - **Role:** validated interior reference (to a top-level `id`) - **Body:** inline text (optional; may be empty) @@ -729,7 +747,7 @@ If the referenced target is not a heading: When computing `` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored). -#### 8.3.7 `\footnote` +#### 8.5.7 `\footnote` - **Role:** footnote/citation marker and definition - **Body:** inline text (required for defining form; empty for reference form) From eadc76e21eef902ba4189ad96a99a473d5cf5180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 23:30:07 +0100 Subject: [PATCH 070/116] Changes note/warning/... from paragraphs into admonition blocks, which wrap other blocks. --- docs/specification-proper-draft.md | 30 +- docs/specification.md | 764 ----------------------------- 2 files changed, 21 insertions(+), 773 deletions(-) delete mode 100644 docs/specification.md diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index 09413ca..2a0f168 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -488,12 +488,12 @@ The Footnote Namespace is used for defining and referencing reusable footnotes. When a built-in element uses a `{ ... }` list body, it is parsed in the mode below: -- **Inline-list mode:** `title`, `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\ref`, `\footnote`, `\date`, `\time`, `\datetime`, ...). -- **Block-list mode:** `ul`, `ol`, `li`, `table`, `columns`, `row`, `td`. +- **Inline-list mode:** `title`, `h1`, `h2`, `h3`, `p`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\ref`, `\footnote`, `\date`, `\time`, `\datetime`, ...). +- **Block-list mode:** `ul`, `ol`, `li`, `table`, `columns`, `row`, `td`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`. - Containers (`ul`, `ol`, `table`, `row`, `columns`) naturally contain nested nodes. - Text blocks (`title`, `p`, headings, etc.) contain inline text streams. -- `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode. +- `li`, `td`, and admonition blocks contain either blocks or a single string/verbatim body; representing blocks implies block-list mode. - Built-in elements with empty bodies are also parsed in Inline-list mode so accidental `{ ... }` usage stays balanced and formatters can recover consistently (e.g., `toc;`, `footnotes;`). #### 8.1.3 Shorthand Body Promotion @@ -513,6 +513,12 @@ li { This promotion is a feature for convenience and applies only to the following elements: - `li` - `td` +- `note` +- `warning` +- `danger` +- `tip` +- `quote` +- `spoiler` ### 8.2 Top-Level Block Elements @@ -583,13 +589,19 @@ Only an empty body (`;`) is not "inline text". - **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.3.2 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` +#### 8.3.2 Paragraph: `p` -- **Role:** paragraph-like block with semantic hint +- **Role:** A standard paragraph of text. - **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.3.3 Lists: `ul`, `ol` +#### 8.3.3 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` + +- **Role:** A block that renders with a distinct style to draw the reader's attention. +- **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§ 8.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph. +- **Attributes:** `lang` (optional), `id` (optional; top-level only) + +#### 8.3.4 Lists: `ul`, `ol` > TODO: Split into two separate parts "Unordered Lists" and "Ordered Lists" @@ -600,7 +612,7 @@ Only an empty body (`;`) is not "inline text". - `first` (optional Integer ≥ 0; default 1): number of the first list item -#### 8.3.4 Figure: `img` +#### 8.3.5 Figure: `img` - **Body:** inline text caption/description (may be empty) - **Attributes:** @@ -609,14 +621,14 @@ Only an empty body (`;`) is not "inline text". - `lang` (optional) - `id` (optional; top-level only) -#### 8.3.5 Preformatted: `pre` +#### 8.3.6 Preformatted: `pre` > TODO: Body is always just "inline text", as verbatim bodies are also always inline text. - **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only) -#### 8.3.6 Tables: `table` +#### 8.3.7 Tables: `table` - **Body:** block-list containing: - optional `columns`, then diff --git a/docs/specification.md b/docs/specification.md deleted file mode 100644 index a4ecd99..0000000 --- a/docs/specification.md +++ /dev/null @@ -1,764 +0,0 @@ -# HyperDoc 2.0 - -This specification describes the document markup language "HyperDoc 2.0", that tries to be a simple to parse, easy to write markup language for hypertext documents. - -It sits in a space where it's unambigious to parse, but still relatively convenient to write. - -## Syntax Overview - -```hdoc -hdoc(version="2.0"); - -h1 "Introduction" - -p { - This is my first HyperDoc 2.0 document! -} - -pre(syntax="c"): -| #include -| int main(int argc, char *argv[]) { -| printf("Hello, World!"); -| return 0; -| } -``` - -## Document encoding - -This section defines the required byte-level encoding and line structure of HyperDoc documents. - -### Character encoding - -- A HyperDoc document **MUST** be encoded as **UTF-8**. -- A HyperDoc document **MUST NOT** contain invalid UTF-8 byte sequences. - -**Byte Order Mark (BOM):** - -- A UTF-8 BOM (the byte sequence `EF BB BF`) **SHOULD NOT** be used. Tooling **MAY** accept it and treat it as whitespace at the beginning of the document. - -### Line endings - -- Lines **MUST** be terminated by either: - - `` (U+000A), or - - `` (U+000D U+000A). -- A bare `` **MUST NOT** appear except as part of a `` sequence. - -A document **MAY** mix `` and `` line endings, but tooling **SHOULD** normalize to a single convention when rewriting documents. - -The canonical line ending emitted by tooling **SHOULD** be ``. - -### Control characters - -- The only permitted control character **within a line** is: - - `` (U+0009). -- Apart from line terminators (`` and `` only as part of ``), all other Unicode control characters (General Category `Cc`) **MUST NOT** appear anywhere in a HyperDoc document. - -### Unicode text - -- Apart from the restrictions above, arbitrary Unicode text is allowed. - -### Recommendations for writing systems and directionality (non-normative) - -HyperDoc does not define special handling for right-to-left scripts, bidirectional layout, or writing system segmentation. For readability and to reduce ambiguity across renderers and editors: - -- Authors **SHOULD** keep each paragraph primarily in a **single writing system/directionality** where practical. -- Tooling **MAY** warn when a paragraph mixes strongly different directional scripts or contains invisible bidirectional formatting characters (e.g., bidi overrides/isolates), since these can be confusing in editors and reviews. - -## Syntax - -This chapter defines the **syntactic structure** of HyperDoc documents: how characters form tokens, how tokens form **nodes**, and how nodes nest. It intentionally does **not** define meaning (required elements, allowed attributes per node type, ID/refs, allowed escape sequences, etc.). Those are handled in later chapters as **semantic validity** rules. - -A HyperDoc document is a sequence of **nodes**. Each node has: - -- a **node name** (identifier), -- an optional **attribute list** `(key="value", ...)`, -- and a mandatory **body**, which is one of: - - `;` empty body, - - `"..."` string literal body, - - `:` verbatim body (one or more `|` lines), - - `{ ... }` list body. - -A list body `{ ... }` is parsed in one of two modes: - -- **Block-list mode**: the list contains nested nodes. -- **Inline-list mode**: the list contains a token stream of text items, escape tokens, inline nodes, and balanced brace groups. - -The grammar below is syntax-only and intentionally leaves the choice between block-list and inline-list content to an **external disambiguation rule**. - -### Grammar (EBNF) - -```ebnf -(* ---------- Top level ---------- *) - -document ::= ws , { node , ws } , EOF ; - -(* ---------- Nodes ---------- *) - -node ::= node_name , ws , [ attribute_list , ws ] , body ; - -body ::= empty_body - | string_body - | verbatim_body - | list_body ; - -empty_body ::= ";" ; - -string_body ::= string_literal ; - -verbatim_body ::= ":" , { ws , piped_line } ; - -list_body ::= "{" , list_content , "}" ; - -(* - IMPORTANT: list_content is intentionally ambiguous. - A conforming parser chooses either inline_content or block_content by an - EXTERNAL rule (see “Disambiguation for list bodies”). -*) -list_content ::= inline_content | block_content ; - - -(* ---------- Attributes ---------- *) - -attribute_list ::= "(" , ws , - [ attribute , - { ws , "," , ws , attribute } , - [ ws , "," ] (* trailing comma allowed *) - ] , - ws , ")" ; - -attribute ::= attr_key , ws , "=" , ws , string_literal ; - -(* - Attribute keys may include '-' in addition to node-name characters. -*) -attr_key ::= attr_key_char , { attr_key_char } ; - -attr_key_char ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | "\" ; - - -(* ---------- Block-list content ---------- *) - -block_content ::= ws , { node , ws } ; - - -(* ---------- Inline-list content ---------- *) - -inline_content ::= ws , { inline_item , ws } ; - -inline_item ::= word - | escape_text - | inline_node - | inline_group ; - -(* - Balanced braces in inline content are represented as inline_group. - If braces cannot be balanced, they must be written as \{ and \}. -*) -inline_group ::= "{" , inline_content , "}" ; - -(* - Backslash dispatch inside inline content: - - If next char is one of '\', '{', '}', emit escape_text. - - Otherwise begin an inline_node. -*) -escape_text ::= "\" , ( "\" | "{" | "}" ) ; - -inline_node ::= inline_name , ws , [ attribute_list , ws ] , body ; - -(* - Inline node names start with '\' and then continue with node-name characters. -*) -inline_name ::= "\" , node_name_char_no_backslash , { node_name_char } ; - - -(* ---------- Words / node names ---------- *) - -(* - Node names intentionally do NOT include ':' because ':' is also a body marker - (e.g. 'p:' for verbatim body) and adjacency is allowed. -*) -node_name ::= node_name_char , { node_name_char } ; - -node_name_char ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | "\" ; - -node_name_char_no_backslash - ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" ; - -word ::= word_char , { word_char } ; - -(* - word_char matches any Unicode scalar value except: - - whitespace - - '{' or '}' - - '\' (because '\' begins escape_text or inline_node) -*) -word_char ::= ? any scalar value except WS, "{", "}", "\" ? ; - - -(* ---------- String literals (syntax only; no escape validation here) ---------- *) - -string_literal ::= "\"" , { string_unit } , "\"" ; - -(* - string_unit is permissive enough that malformed escapes remain parsable, - BUT forbids escaping control characters (including LF/CR/TAB). - Raw TAB is allowed as a normal string_char. -*) -string_unit ::= string_char | "\" , escaped_noncontrol ; - -string_char ::= ? any scalar value except '"', '\', LF, CR ? ; - -escaped_noncontrol - ::= ? any scalar value except control chars (Unicode category Cc) ? ; - - -(* ---------- Verbatim lines ---------- *) - -piped_line ::= "|" , { not_line_end } , line_terminator ; - -not_line_end ::= ? any scalar value except CR and LF ? ; - -line_terminator ::= LF | CR , LF | EOF ; - - -(* ---------- Whitespace ---------- *) - -ws ::= { WS } ; - -WS ::= " " | "\t" | CR | LF ; - -CR ::= "\r" ; -LF ::= "\n" ; -``` - -### Additional syntax rules and notes (normative) - -#### 1) Maximal-munch for identifiers - -When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consume the **longest possible** sequence of allowed identifier characters (maximal munch). This is required because `\` is a legal identifier character and must not be arbitrarily split. - -#### 2) Disambiguation for list bodies (external chooser) - -The production `list_content ::= inline_content | block_content` is resolved by a deterministic, non-backtracking rule: - -1. Before parsing the content of a `{ ... }` list body, the parser **MUST** choose exactly one list mode: **Inline-list mode** or **Block-list mode**. -2. The mode is determined solely from the syntactic **node name token** (not attributes, not body contents, not document state). -3. Required behavior (recovery-friendly): - - If the node name begins with `\`, the parser **MUST** choose **Inline-list mode**. - - If the node name is recognized as a built-in name with a specified list mode, the parser **MUST** choose that mode. - - Otherwise (unknown / misspelled / unsupported node name), the parser **MUST** choose **Inline-list mode**. - -This rule ensures unknown nodes accept rich inline content for typo recovery (e.g. `prre { ... }`). - -#### 3) Inline-list mode: brace balancing and escape-text tokens - -In **Inline-list mode**: - -- `{` and `}` that appear as literal characters in the inline stream are represented structurally as `inline_group` and therefore **must be balanced**. -- If braces cannot be balanced, they **must** be written using the escape-text tokens `\{` and `\}`. -- A backslash in inline content is interpreted as: - - one of the three **escape-text tokens** `\\`, `\{`, `\}`, or - - the start of an `inline_node` otherwise. - -The escape-text tokens exist primarily so the three characters `\`, `{`, `}` can be represented literally within inline content without always starting an inline node. - -#### 4) String literals are syntax-only at this stage - -String literals are delimited by `"` and parsed without interpreting escape meanings. This is intentional: documents with malformed or unknown escape sequences remain **syntactically valid**, allowing formatters and other tooling to round-trip source reliably. - -However, the following are **syntactically invalid** inside string literals: - -- raw LF or CR characters (line breaks are not allowed within `"..."`), -- a backslash immediately followed by a **control character** (Unicode General Category `Cc`), which includes TAB. - -(Separately: which escape sequences are *semantically* valid is defined later.) - -#### 5) Verbatim bodies are line-oriented - -In a verbatim body (`:`): - -- The body consists of zero or more `piped_line` entries. -- Each `piped_line` starts with `|` after optional whitespace skipping. -- The content of a verbatim line is everything up to the line terminator; it is not tokenized into nodes. - -A file ending without a final newline is syntactically allowed (`EOF` as a line terminator), though tooling may warn. - -#### 6) Syntactic validity vs semantic validity - -A document is **syntactically valid** if it matches the grammar and the additional syntax rules above (maximal munch, list-mode disambiguation, inline brace balancing, and the string-literal constraints). - -A syntactically valid document may still be **semantically invalid**. Semantic validation is defined later and may include rules such as required header nodes, attribute constraints, reference resolution, allowed escape sequences, encoding policy, and disallowed control characters in source text. - -## Escape encoding - -This chapter defines how **escape sequences are interpreted** to produce decoded Unicode text. Escape processing is part of **semantic validation**: a document may be syntactically valid even if it contains unknown or malformed escapes, but it is not semantically valid unless all escapes decode successfully under the rules below. - -HyperDoc documents are UTF-8 text. Unless explicitly stated otherwise, all “characters” in this chapter refer to Unicode scalar values. - -### Scope - -Escape sequences are recognized in two places: - -1. **STRING literals** (the `"..."` body form, and attribute values which are also STRING literals). -2. **Inline escape-text tokens** inside inline-list bodies: `\\`, `\{`, `\}` (these are emitted as text spans by the parser and can be decoded to literal characters during semantic processing). - -No other part of the syntax performs escape decoding (not node names, not verbatim bodies, not block-list structure). - -## Control character policy - -HyperDoc forbids control characters except **LF** and **CR**. - -- A semantically valid document **MUST NOT** contain any Unicode control characters (General Category `Cc`) anywhere **except**: - - U+000A LINE FEED (LF) - - U+000D CARRIAGE RETURN (CR) - -This rule applies both to: - -- the raw document text (source), and -- any decoded text produced from escapes. - -Implications: - -- TAB (U+0009) is forbidden, including if introduced via `\u{9}`. -- NUL (U+0000) is forbidden, including if introduced via `\u{0}`. - -(Structural line breaks in the file may be LF or CRLF or CR as allowed by the syntax rules; decoded strings may contain LF/CR only via escapes.) - -### String literal escape sequences - -#### Overview - -Within a STRING literal, a backslash (`\`) begins an escape sequence. The set of valid escapes is deliberately small. - -A semantic validator/decoder **MUST** accept exactly the escape forms listed below and **MUST** reject all others. - -#### Supported escapes (STRING literals) - -The following escapes are valid inside STRING literals: - -| Escape | Decodes to | -| ---------- | ---------------------------- | -| `\\` | U+005C REVERSE SOLIDUS (`\`) | -| `\"` | U+0022 QUOTATION MARK (`"`) | -| `\n` | U+000A LINE FEED (LF) | -| `\r` | U+000D CARRIAGE RETURN (CR) | -| `\u{H...}` | Unicode scalar value U+H... | - -No other escapes exist. In particular, `\0`, `\xHH`, `\e`, and similar are not part of HyperDoc. - -#### Unicode escape `\u{H...}` - -`H...` is a non-empty sequence of hexadecimal digits (`0–9`, `A–F`, `a–f`) representing a Unicode code point in hexadecimal. - -Rules: - -- The hex sequence **MUST** contain **1 to 6** hex digits. -- The value **MUST** be within `0x0 .. 0x10FFFF` inclusive. -- The value **MUST NOT** be in the surrogate range `0xD800 .. 0xDFFF`. -- The value **MUST NOT** decode to a forbidden control character (see Control character policy). The only allowed controls are LF and CR. - -Notes: - -- Leading zeros are allowed (`\u{000041}` is `A`). -- `\u{20}` is ASCII space. (`\u{032}` is U+0032, the digit `"2"`, because the digits are hexadecimal.) - -#### Invalid escapes (STRING literals) - -A semantic validator/decoder **MUST** reject a document (or at least reject that literal) if any STRING literal contains: - -- an unknown escape (e.g. `\q`, `\uFFFF`, `\x20`, `\t`, `\b`, …), -- an unterminated escape (string ends immediately after `\`), -- a malformed Unicode escape (`\u{}`, missing `{`/`}`, non-hex digits, more than 6 hex digits), -- a Unicode escape outside the valid scalar range or within the surrogate range, -- a Unicode escape that produces a forbidden control character. - -#### Canonical encoding recommendations (non-normative) - -For authors and formatters: - -- Prefer `\\` and `\"` for literal backslash and quote. -- Prefer `\n` and `\r` for LF/CR instead of `\u{A}` / `\u{D}`. -- Prefer the shortest hex form for `\u{...}` without leading zeros unless alignment/readability benefits. - -### Inline escape-text tokens in inline-list bodies - -Inside **inline-list bodies**, the syntax defines three special two-character text tokens: - -- `\\` -- `\{` -- `\}` - -These exist so that inline content can contain literal `\`, `{`, and `}` without always starting an inline node (`\name{...}`) or requiring brace balancing. - -#### Decoding rule - -During semantic text construction, an implementation **MAY** decode these tokens as: - -- `\\` → `\` -- `\{` → `{` -- `\}` → `}` - -This decoding is independent of STRING literal escapes: these tokens occur in inline text streams, not inside `"..."` literals. - -#### Round-tripping note (normative intent) - -A formatter or tooling that aims to preserve the author’s intent **SHOULD** preserve the distinction between: - -- a literal `{`/`}` that is part of a balanced inline group, and -- an escaped brace token `\{`/`\}` that was used to avoid imbalance. - -This distinction matters for reliable reconstruction and for edits that may reflow or restructure inline content. - -### Interaction with syntax - -- Escape decoding is performed **after** syntactic parsing. -- Syntactic parsing of STRING literals is delimiter-based and does not validate escape *meaning*. -- Semantic validation determines whether escapes are valid and produces the decoded Unicode text. - -This separation is intentional: it allows autoformatters to parse and rewrite documents that may contain malformed escapes without losing information, while still allowing strict validators to enforce the escape rules above. - -## Semantic Validity - -> TO BE DONE. -> -> - Attribute uniqueness -> - Attribute must be defined on a node -> - Non-optional attributes must be present -> - id is only valid on top-level nodes -> - id must be unique -> - id is case sensitive -> - ref must point to an existing id - -## Element Overview - -| Element | Element Type | Allowed Children | Attributes | -| ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------------- | -| *Document* | Document | `hdoc`, Blocks | | -| `hdoc` | Header | - | `lang`, `title`, `version`, `author`, `date`, `tz` | -| `h1`, `h2`, `h3` | Block | Text Body | `lang`, \[`id`\] | -| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block | Text Body | `lang`, \[`id`\] | -| `ul` | Block | `li` ≥ 1 | `lang`, \[`id`\] | -| `ol` | Block | `li` ≥ 1 | `lang`, \[`id`\], `first` | -| `img` | Block | Text Body | `lang`, \[`id`\], `alt`, `path` | -| `pre` | Block | Text Body | `lang`, \[`id`\], `syntax` | -| `toc` | Block | - | `lang`, \[`id`\], `depth` | -| `table` | Block | Table Rows | `lang`, \[`id`\] | -| `li` | List Item | Blocks, String, Verbatim | `lang` | -| `td` | Table Cell | Blocks, String, Verbatim | `lang`, `colspan` | -| `columns` | Table Row | `td` ≥ 1 | `lang` | -| `group` | Table Row | Text Body | `lang`, | -| `row` | Table Row | `td` ≥ 1 | `lang`, `title` | -| `\em` | Text Body | Text Body | `lang` | -| `\mono` | Text Body | Text Body | `lang`, `syntax` | -| `\strike` | Text Body | Text Body | `lang` | -| `\sub`, `\sup` | Text Body | Text Body | `lang` | -| `\link` | Text Body | Text Body | `lang`, (`ref` \| `uri`) | -| `\date`, `\time`, `\datetime` | Text Body | Plain Text, String, Verbatim | `lang`, `fmt` | -| *Plain Text* | Text Body | - | | -| *String* | Text Body | - | | -| *Verbatim* | Text Body | - | | - -Notes: - -- The attribute `id` is only allowed when the element is a top-level element (direct child of the document) -- The attributes `ref` and `uri` on a `\link` are mutually exclusive -- `\date`, `\time` and `\datetime` cannot contain other text body items except for plain text, string or verbatim content. - -## Attribute Overview - -| Attribute | Type | Required | Allowed Values | Description | -| --------- | --------------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | -| `version` | Version | Yes | `2.0` | Describes the version of this HyperDoc document. | -| `lang` | Language Tag | No | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646) | Defines the language of the elements contents. | -| `title` | String | No | *Any* | Sets the title of the document or the table row. | -| `author` | String | No | *Any* | Sets the author of the document. | -| `date` | DateTime | No | A date-time value using the format specified below | Sets the authoring date of the document. | -| `id` | Reference | No | Non-empty | Sets a reference which can be linked to with `\link(ref="...")`. | -| `first` | Integer | No | Decimal integer numbers ≥ 0 | Sets the number of the first list item. | -| `alt` | String | No | Non-empty | Sets the alternative text shown when an image cannot be loaded. | -| `path` | String | Yes | Non-empty file path to an image file | Defines the file path where the image file can be found. | -| `syntax` | String | No | *See element documentation* | Hints the syntax highlighter how how the elements context shall be highlighted. | -| `depth` | Integer | No | `1`, `2` or `3` | Defines how many levels of headings shall be included. | -| `colspan` | Integer | No | Decimal integer numbers ≥ 1 | Sets how many columns the table cell spans. | -| `ref` | Reference | No | Any value present in an `id` attribute. | References any `id` inside this document. | -| `uri` | URI | No | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI. | -| `fmt` | Enum | No | *See element documentation* | Defines how the date/time value shall be displayed. | -| `tz` | Timezone Offset | No | `Z` for UTC or a `±HH:MM` timezone offset. | Defines the default timezone for time/datetime values. | - -NOTE: All attribute values allow leading and trailing whitespace, but it's heavily discouraged and should yield a non-fatal diagnostic or hint in implementations. - -## Attribute Types - -| Type | Example | Syntax | Notes | -| ----------------- | ----------------------------------- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | -| `Date` | `2025-12-31T13:37:42` | *See below* | A date value as specified below. | -| `Enum` | `auto` | `\w+` | | -| `Integer` | `10` | `\d+` | Leading zeroes are allowed, but discouraged. | -| `Language Tag` | `de-DE` | *See [RFC 5646](https://datatracker.ietf.org/doc/html/rfc5646)* | | -| `Reference` | `attribute-types` | *No control characters or whitespace* | | -| `String` | `This image shows a cat and a dog.` | *Any Value* | Any textual value. | -| `Timezone Offset` | `+13:30` | `Z\|[+-]{00..23}:{00..59}` | Expresses the UTC timezone with `Z` or a relative offset in hours + minutes | -| `URI` | `www://example.com` | *See [RFC 3987](https://datatracker.ietf.org/doc/html/rfc3987) | The type actually allows IRIs (unicode-enabled URIs), but is called URI to use the core common term. | -| `Version` | `2.0` | `\d+\.\d+` | Has no semantic meaning yet, and is forced to be `2.0`. All other values are reserved for future use. | - -## Semantic Structure - -All elements have these attributes: - -| Attribute | Function | -| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -| `lang` | Marks the (human) language of the contents of that element. This must be an [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag). | - -## Top-Level / Block Elements - -All top-level elements have these attributes: - -| Attribute | Function | -| --------- | -------------------------------------------------------------------------------- | -| `id` | Marks a target for a `\link(ref="...")`. Must be unique throughout the document. | - -### Headings: `h1`, `h2`, `h3` - -**Allowed Items:** Inline Text - -These elements are all rendered as headings of different levels. - -- `h1` is the top-level heading. -- `h2` is the level below `h1`. -- `h3` is the level below `h2`. - -### Paragraphs: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` - -**Allowed Items:** Inline Text - -These elements are all rendered as paragraphs. - -The type of the paragraph includes a semantic hint: - -- `p`: A normal paragraph. -- `note`: A paragraph that informs the reader. This is typically rendered with a blue/white color hint. The associated icon is a white i in a blue box/circle. -- `warning`: A paragraph that warns the reader. This is typically rendered with a yellow/black color hint. The associated icon is a yellow triangle with a black exclamation mark. -- `danger`: A paragraph that warns the of danger. This is typically rendered with a red/white color hint. The associated icon is a red octagon with a white exclamation mark. -- `tip`: A paragraph that gives the reader a tip. The associated icon is a lightbulb. -- `quote`: A paragraph that quotes a foreign source. This is typically rendered with a small indentation and a distinct font. -- `spoiler`: A paragraph that contains information the reader about things they might not want to know. This is typically visually hidden/blurred so it's unreadable until a reader action is performed. - -### Lists: `ul`, `ol` - -**Allowed Items:** `li` - -- `ul` is an unordered list rendered with typically either dashes or dots as list enumerators. -- `ol` is an ordered list rendered with typically either roman or arabic numerals as list enumerators. - -#### Ordered List `ol` - -| Attribute | Function | -| --------- | --------------------------------------------------------------------------------------------------------------------------- | -| `first` | An integer string that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. | - -### Figures: `img` - -**Allowed Items:** Inline Text - -| Attribute | Function | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | -| `alt` | A textual description of the image contents for vision-impaired users. Similar to the [HTML alt tag](https://en.wikipedia.org/wiki/Alt_attribute). | -| `path` | A path relative to the current file that points to an image file that should be shown. | - -This element shows a full-width image or figure. Its contents are the figure description. - -If the contents are empty, the figure may be rendered in a simpler form. - -### Preformatted: `pre` - -**Allowed Items:** Inline Text - -| Attribute | Function | -| --------- | ------------------------------------------------------------------------------------------------------- | -| `syntax` | If present, hints a syntax highlighter that this preformatted block contains programming language code. | - -In contrast to all other block types, a `pre` block retains whitespace and line-break information and lays out the text as-is. - -It does not allow automatic line break insertion or word-wrapping. - -If a pre contains inline elements, these will still be parsed and apply their styles to the text spans. - -### Table Of Contents: `toc` - -**Allowed Items:** *none* - -| Attribute | Function | -| --------- | ------------------------------------------------------------------------------ | -| `depth` | String `1`, `2` or `3`. Defines how many levels of headings shall be included. | - -Renders a table of contents for the current document. - -This element allows no child items. - -## Lists - -### List Items `li` - -**Allowed Items:** Block Elements *or* String Content. - -These elements wrap a sequence of blocks that will be rendered for this list item. - -It also allows a string to be used as it's content directly, this will be equivalent to having a nested paragraph with that strings content: - -```hdoc -ul { - li { p { This is a normal item. } } - li "This is a normal item." -} -``` - -will have two identical list items. - -### Tables: `table` - -**Allowed Items:** `columns`, `row`, `group` - -Tables are made up of an optional header row (`columns`) followed by a sequence of `row` and `group` elements. - -- `columns` defines the header labels and the column count. -- `row` defines a data row. -- `group` provides a section heading that applies to subsequent rows until the next group or the end of the table. - -All `row` and `columns` elements must resolve to the same number of columns after applying `colspan`. -If a `row` uses the `title` attribute or a `group` is present, renderers must reserve a leading title column. -In that case, the header row should have an empty leading cell before the column headers. - -## Table Elements - -### Column Headers: `columns` - -**Allowed Items:** `td` - -This element contains the header cells for each column. - -### Rows: `row` - -**Allowed Items:** `td` - -| Attribute | Function | -| --------- | ---------------------------------------------------------------------------- | -| `title` | A title caption for this row. If present, will be shown left of all columns. | - -### Row Groups: `group` - -**Allowed Items:** Inline Text - -A *row group* is a row that contains a single heading-style cell that labels the rows below. - -### Cells: `td` - -**Allowed Items:** Block Elements *or* String Content. - -| Attribute | Function | -| --------- | --------------------------------------------------------- | -| `colspan` | Integer string defining how many columns this cell spans. | - -This element contains the contents of a table cell. - -Like `li`, a `td` can either contain a single string or a nested block sequence. - -## Inline Text - -These elements are all allowed inside a paragraph-like content and can typically be nested. - -*Inline Text* can either be a string literal, a literal block or a list. - -If the text is a list, it allows the use of inline elements like `\em` or `\mono`. - -### Plain Text - -This is normal plain text and has no special meaning. - -### Emphasis: `em` - -**Nesting:** Yes - -Formats the text as emphasised. This is typically bold or italic rendering. - -### Monospaced: `mono` - -**Nesting:** Yes - -| Attribute | Function | -| --------- | ----------------------------------------------------------------------------------------- | -| `syntax` | If present, hints a syntax highlighter that this span contains programming language code. | - -Formats the text in a monospaced font. This is useful for code-like structures. - -### Strike-through: `strike` - -**Nesting:** Yes - -Renders the text with a horizontal line through the text, striking it out. - -### Sub/Superscript: `sub`, `sup` - -**Nesting:** Yes - -Renders the text a bit smaller and moved upwards (`sup`) or downwards (`sub`) to allow sub- or superscript rendering. - -### Linking: `link` - -**Nesting:** No - -| Attribute | Function | -| --------- | -------------------------------------------------------------------------------------------------------- | -| `ref` | Points the link to a top-level block with the `id` of this `ref` attribute. Mutually exclusive to `uri`. | -| `uri` | Points the link to the resource inside the `uri`. Mutually exclusive to `ref`. | - -Adds a hyperlink to the contents. This allows a reader to navigate by typically clicking the link. - -### Localized Date/Time: `date`, `time`, `datetime` - -**Nesting:** No - -| Element | Attribute | Function | -| ---------- | --------- | ----------------------------------------------------------------------------------------------------------- | -| `date` | `fmt` | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` (raw ISO 8601). | -| `time` | `fmt` | `short`, `long`, `rough`, `relative`, `iso` (raw ISO 8601). | -| `datetime` | `fmt` | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). | - -Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner. - -## Date/Time Formatting - -All date/time values MUST use the formats defined in this section. This is a conservative, interoperable intersection between [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), so values that conform here are valid under both specifications. Digits are ASCII decimal unless stated otherwise. - -### Date Format - -Date strings MUST follow `YYYY-MM-DD`. - -- `YYYY` is a year with one or more digits. -- `MM` is a two-digit month in the range `01` to `12`. -- `DD` is a two-digit day in the range `01` to `31`. -- The `-` separators are mandatory. - -Examples: `2025-12-25`, `1-01-01`. - -### Time Format - -Time strings MUST follow `hh:mm:ss` with a required time zone. - -- `hh`, `mm`, `ss` are two-digit hour, minute, second fields. -- Hour MUST be in `00` to `23`, minute and second MUST be in `00` to `59`. -- An optional fractional seconds component MAY follow the seconds field as `.` plus - 1, 2, 3, 6, or 9 digits. -- The fractional separator MUST be `.`. Comma is not allowed. -- A time zone is required when no `tz` attribute is present on the header node and - MUST be either `Z` (UTC) or a numeric offset in the form `+hh:mm` or `-hh:mm` with two-digit hour/minute fields. -- Offset hours MUST be in `00` to `23`, offset minutes MUST be in `00` to `59`. - -Examples: `22:30:46Z`, `22:30:46.136+01:00`, `21:30:46.136797358-05:30`, `22:30:46` (only with `tz` attribute). - -### Date/Time Format - -Date/time strings MUST combine a date and time with a literal `T`. - -- Format: `YYYY-MM-DD` + `T` + `hh:mm:ss` (with optional fraction and required zone). - -Examples: `2025-12-25T22:31:50.13+01:00`, `2025-12-25T21:31:43Z`. From 59afce3bfd2c1b66f76fe37a80d34f83e2f5252d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 23:34:59 +0100 Subject: [PATCH 071/116] Splits Lists chapter into two. --- docs/TODO.md | 7 ------- docs/specification-proper-draft.md | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index f6e284e..ae7b9a7 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -4,10 +4,3 @@ - Specify "syntax" proper - Add links to RFCs where possible - Document `lang` inheritance. No `lang` attribute means that parent language is used. - - -- Special-style blocks become block containers - - The “special paragraph” family (e.g. note, info, warning, danger, tip, spoiler, quote, …) are block containers. - - Their { ... } list body is always Block-list mode (i.e., they contain blocks like p, ul, ol, pre, etc.). - - They do not accept inline-list bodies directly. Inline markup requires an explicit paragraph: - note { p { text with \link(...) { inline } nodes } } diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index 2a0f168..7c8fe71 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -601,18 +601,20 @@ Only an empty body (`;`) is not "inline text". - **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§ 8.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph. - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.3.4 Lists: `ul`, `ol` - -> TODO: Split into two separate parts "Unordered Lists" and "Ordered Lists" +#### 8.3.4 Unordered List: `ul` - **Body:** block-list containing `li` (at least one) - **Attributes:** `lang` (optional), `id` (optional; top-level only) -`ol` additional attribute: +#### 8.3.5 Ordered List: `ol` -- `first` (optional Integer ≥ 0; default 1): number of the first list item +- **Body:** block-list containing `li` (at least one) +- **Attributes:** + - `lang` (optional) + - `id` (optional; top-level only) + - `first` (optional Integer ≥ 0; default 1): number of the first list item -#### 8.3.5 Figure: `img` +#### 8.3.6 Figure: `img` - **Body:** inline text caption/description (may be empty) - **Attributes:** @@ -621,14 +623,12 @@ Only an empty body (`;`) is not "inline text". - `lang` (optional) - `id` (optional; top-level only) -#### 8.3.6 Preformatted: `pre` - -> TODO: Body is always just "inline text", as verbatim bodies are also always inline text. +#### 8.3.7 Preformatted: `pre` - **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only) -#### 8.3.7 Tables: `table` +#### 8.3.8 Tables: `table` - **Body:** block-list containing: - optional `columns`, then From a6681b5b72394d7d1890d75a08f416c6a27b1a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Thu, 1 Jan 2026 23:42:16 +0100 Subject: [PATCH 072/116] Clarifies table semantics. --- docs/specification-proper-draft.md | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md index 7c8fe71..37e7a9b 100644 --- a/docs/specification-proper-draft.md +++ b/docs/specification-proper-draft.md @@ -637,21 +637,11 @@ Only an empty body (`;`) is not "inline text". Table layout rules: -> TODO: `group` is not a "row with implicit title and no cells", but basically -> `group { }` is equivalent to `columns { td(colspan="") { } }`, -> so a regular row with a single cell spanning all columns. -> `group` never implies the existence of the "leading title column" - -> TODO: The `row(title="…")` does never affect the effective column count. -> It implies an additional untitled first column, which is blank in `columns` and `group` rows. -> The `title` row is designed to form matrices with an empty top-left field. - -- `columns` defines header labels and the column count. -- Each `row` defines a data row. -- Each `group` acts as a section heading for subsequent rows. -- After applying `td.colspan`, all `row` and `columns` entries **MUST** resolve to the same effective column count. -- If any `row` has a `title` attribute, renderers **MUST** reserve a leading title column. - - The leading column’s header cell is implicit (empty/invisible) and **MUST NOT** be authored inside `columns`. +- **Column Count:** The number of columns in a table is determined by the `columns` element. It is the sum of the `colspan` values of the `td` cells within the `columns` row. If `columns` is absent, the column count is determined by the first `row` element in the same way. All `columns` and `row` elements in a table **MUST** resolve to the same effective column count. + +- **Row Headers (`row(title)`):** A `row` element may have a `title` attribute, which creates a *row header*. This header is rendered as an implicit, additional first column for that row. This "row header column" does **not** contribute to the table's main column count. If any `row` in the table has a `title`, renderers **MUST** reserve space for a leading row header column throughout the table. This leading column will be blank for `columns`, `group`, and any `row` without a `title`. + +- **Group Headers (`group`):** A `group` element acts as a heading that spans all columns of the table. Semantically, `group { ... }` is equivalent to a `row` containing a single `td` with a `colspan` attribute equal to the table's column count. A `group` does not have a `title` and does not render a cell in the row header column. ### 8.4 Structural Elements @@ -665,21 +655,27 @@ Table layout rules: #### 8.4.2 `columns` (table header row) +- **Role:** Defines the labels for the columns of a table. The number of cells in this element (taking `colspan` into account) defines the table's column count. - **Body:** block-list containing `td` (at least one) - **Attributes:** `lang` (optional) #### 8.4.3 `row` (table data row) +- **Role:** Defines a row of data in a table. - **Body:** block-list containing `td` (at least one) -- **Attributes:** `title` (optional string), `lang` (optional) +- **Attributes:** + - `title` (optional string): If present, creates a header cell for the row in an implicit leading column. + - `lang` (optional) #### 8.4.4 `group` (table row group) +- **Role:** A heading row that spans all table columns. - **Body:** inline text - **Attributes:** `lang` (optional) #### 8.4.5 `td` (table cell) +- **Role:** A single cell within a table row. - **Body:** either - a block-list of block elements, or - a single string body, or From 1ab645820d671f85a9a736445656ef90f4a0a6d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 13:09:33 +0100 Subject: [PATCH 073/116] Renames docs/specification-proper-draft.md -> docs/specification.md --- docs/TODO.md | 3 +++ docs/{specification-proper-draft.md => specification.md} | 0 2 files changed, 3 insertions(+) rename docs/{specification-proper-draft.md => specification.md} (100%) diff --git a/docs/TODO.md b/docs/TODO.md index ae7b9a7..c2aa0ee 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -4,3 +4,6 @@ - Specify "syntax" proper - Add links to RFCs where possible - Document `lang` inheritance. No `lang` attribute means that parent language is used. + + +> Okay, next task: Fix chapter 6 (escapes) by splitting into two chapters (described in 308-315), clarify how control characters are handled (L328) \ No newline at end of file diff --git a/docs/specification-proper-draft.md b/docs/specification.md similarity index 100% rename from docs/specification-proper-draft.md rename to docs/specification.md From 061a7f9faa1744d5bb6a6755f6f1396bfb69d85f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 14:06:52 +0100 Subject: [PATCH 074/116] Refactory escapes semantics chapter 6 and splits it into 6 and 7 --- docs/TODO.md | 38 +++++- docs/specification.md | 293 ++++++++++++++++++++---------------------- 2 files changed, 176 insertions(+), 155 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index c2aa0ee..986c7e0 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -4,6 +4,42 @@ - Specify "syntax" proper - Add links to RFCs where possible - Document `lang` inheritance. No `lang` attribute means that parent language is used. +- Clarify that page layout is static and won't change except for context resize. +- \abbrev and \term might be good ideas. +> Okay, next task: Fix chapter 6 (escapes) by splitting into two chapters (described in 308-315), clarify how control characters are handled (L328) -> Okay, next task: Fix chapter 6 (escapes) by splitting into two chapters (described in 308-315), clarify how control characters are handled (L328) \ No newline at end of file + + +> §5.5 - String Literal Control Character Inconsistency + +§5.5 forbids "any Unicode control characters" in string literals +§6.3 allows \n (LF) and \r (CR) escape sequences +Problem: These decode to control characters (Cc), contradicting §6.2 which says "resolved string-literal values" must not contain control characters except line terminators. Need explicit carve-out. + +> Problem: How does this interact with inline \time and \datetime elements? Do they inherit it? §9.2.2 says "If hdoc(tz="...") is present, a time value MAY omit the zone," but doesn't specify how the default is applied during rendering. + +> Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns. + +> Problem: What should synthesized text be for valid non-heading targets like table, img, pre? Spec says headings get but doesn't define fallback for figures ("Figure 3"), tables ("Table 2"), etc. + +States "A renderer SHALL render a regular footnote marker as \sup{\link{\d+}}" +Problem: This seems like implementation guidance, not semantic requirement. Different renderers (HTML, PDF, terminal) may render markers differently. Should be in §10 (non-normative) or relaxed to "SHOULD". + + +> Recommendation 3: Add Formal Whitespace Processing Algorithm + + +Recommendation 5: Add Appendix with Formal Schema +Rationale: Current spec requires reading entire document to understand element relationships. Machine-readable schema would enable automatic validation and tooling. +Provide RelaxNG Compact syntax schema defining: + + +Rationale: Technical documentation needs to emphasize specific code lines (tutorials, diffs, explanations). +pre(syntax="python", highlight="2,4-6"): +| def factorial(n): +| if n == 0: # Base case +| return 1 +| else: +| return n * factorial(n-1) # Recursive case +also: enable line numbers diff --git a/docs/specification.md b/docs/specification.md index 37e7a9b..96693eb 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -23,56 +23,56 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap - "5.3 Maximal munch": FROZEN - "5.4 Inline-list brace balancing and backslash dispatch": DONE - "5.5 String literals (syntax)": DRAFT -- "6. Escape processing (semantic)": DRAFT - - "6.1 Scope": DRAFT - - "6.2 Control character policy (semantic)": DRAFT - - "6.3 Supported escapes in string literals": DRAFT - - "6.3.1 Unicode escape `\\u{H...}`": DRAFT - - "6.4 Invalid escapes": DRAFT - - "6.5 Inline escape-text tokens": DRAFT -- "7. Semantic document model": DRAFT - - "7.1 Document structure": DONE - - "7.2 Inline text construction and normalization": DONE - - "7.3 Attribute uniqueness": DONE - - "7.4 Attribute validity": DONE - - "7.5 Identifiers and References": DONE - - "7.6 Built-in element recognition": DONE -- "8. Elements and attributes" - - "8.1 Built-in elements and list mode" - - "8.1.1 Inline vs block": DONE - - "8.1.2 List-body mode per built-in element": DRAFT - - "8.2 Element catalog (normative)": DRAFT - - "8.2.1 `hdoc` (header)": DONE - - "8.2.2 Headings: `h1`, `h2`, `h3`": DRAFT - - "8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT - - "8.2.4 Lists: `ul`, `ol`": DRAFT - - "8.2.5 List item: `li`": DRAFT - - "8.2.6 Figure: `img`": DRAFT - - "8.2.7 Preformatted: `pre`": DRAFT - - "8.2.8 Table of contents: `toc`": DRAFT - - "8.2.9 Tables: `table`": DRAFT - - "8.2.10 `columns` (table header row)": DRAFT - - "8.2.11 `row` (table data row)": DRAFT - - "8.2.12 `group` (table row group)": DRAFT - - "8.2.13 `td` (table cell)": DRAFT - - "8.2.14 `title` (document title)": DRAFT - - "8.2.15 Footnote dump: `footnotes`": DRAFT - - "8.3 Inline elements" - - "8.3.1 `\\em`": DRAFT - - "8.3.2 `\\mono`": DRAFT - - "8.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT - - "8.3.4 `\link`": DRAFT - - "8.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT - - "8.3.6 `\ref`": DRAFT - - "8.3.7 `\footnote`": DRAFT -- "9. Attribute types and date/time formats": DRAFT - - "9.1 Common attribute types": DRAFT - - "9.2 Date / time lexical formats (normative)": DRAFT - - "9.2.1 Date": DRAFT - - "9.2.2 Time": DRAFT - - "9.2.3 Datetime": DRAFT - - "9.3 `fmt` values": DRAFT -- "10. Non-normative guidance for tooling": DRAFT +- "6. Inline Text Escape Processing (semantic)": DRAFT + - "6.1 Inline escape-text tokens": DRAFT +- "7. String Literal Escape Processing (semantic)": DRAFT + - "7.1 Control character policy (semantic)": DRAFT + - "7.2 Supported escapes in string literals": DRAFT + - "7.2.1 Unicode escape `\\u{H...}`": DRAFT + - "7.3 Invalid escapes": DRAFT +- "8. Semantic document model": DRAFT + - "8.1 Document structure": DONE + - "8.2 Inline text construction and normalization": DONE + - "8.3 Attribute uniqueness": DONE + - "8.4 Attribute validity": DONE + - "8.5 Identifiers and References": DONE + - "8.6 Built-in element recognition": DONE +- "9. Elements and attributes" + - "9.1 Built-in elements and list mode" + - "9.1.1 Inline vs block": DONE + - "9.1.2 List-body mode per built-in element": DRAFT + - "9.2 Element catalog (normative)": DRAFT + - "9.2.1 `hdoc` (header)": DONE + - "9.2.2 Headings: `h1`, `h2`, `h3`": DRAFT + - "9.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT + - "9.2.4 Lists: `ul`, `ol`": DRAFT + - "9.2.5 List item: `li`": DRAFT + - "9.2.6 Figure: `img`": DRAFT + - "9.2.7 Preformatted: `pre`": DRAFT + - "9.2.8 Table of contents: `toc`": DRAFT + - "9.2.9 Tables: `table`": DRAFT + - "9.2.10 `columns` (table header row)": DRAFT + - "9.2.11 `row` (table data row)": DRAFT + - "9.2.12 `group` (table row group)": DRAFT + - "9.2.13 `td` (table cell)": DRAFT + - "9.2.14 `title` (document title)": DRAFT + - "9.2.15 Footnote dump: `footnotes`": DRAFT + - "9.3 Inline elements" + - "9.3.1 `\\em`": DRAFT + - "9.3.2 `\\mono`": DRAFT + - "9.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT + - "9.3.4 `\link`": DRAFT + - "9.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT + - "9.3.6 `\ref`": DRAFT + - "9.3.7 `\footnote`": DRAFT +- "10. Attribute types and date/time formats": DRAFT + - "10.1 Common attribute types": DRAFT + - "10.2 Date / time lexical formats (normative)": DRAFT + - "10.2.1 Date": DRAFT + - "10.2.2 Time": DRAFT + - "10.2.3 Datetime": DRAFT + - "10.3 `fmt` values": DRAFT +- "11. Non-normative guidance for tooling": DRAFT - "Appendix A. Example": DRAFT - "Appendix B. Element Overview": MISSING - "Appendix C. Attribute Overview": MISSING @@ -98,7 +98,7 @@ A document can be: - **Syntactically valid**: conforms to the grammar and additional syntax rules. - **Semantically valid**: syntactically valid **and** conforms to semantic rules (elements, attributes, escape decoding, IDs/refs, etc.). -Unless explicitly stated, rules in chapters 3-5 are **syntax** rules; rules in chapters 6-9 are **semantic** rules. +Unless explicitly stated, rules in chapters 3-5 are **syntax** rules; rules in chapters 6-10 are **semantic** rules. ## 3. Document encoding (byte- and line-level) @@ -131,7 +131,7 @@ The canonical line ending emitted by tooling **SHOULD** be ``. - U+000D (CR) as part of a valid line ending. - Surrogate characters (Plane "unassigned", U+D800…U+DFFF) **MUST NOT** appear in the source text. A conforming parser **MUST** reject them. -A semantic validator **MAY** reject TABs in source text (see §6.2). +A semantic validator **MAY** reject TABs in source text (see §7.1). ### 3.4 Unicode text @@ -176,7 +176,7 @@ The grammar is intentionally ambiguous; a deterministic external rule selects a - Attribute lists are comma-separated `(key="value", ...)`. - Trailing commas are allowed. - Attribute values are **string literals** (see §5.5). -- Attribute keys are identifiers with hyphen-separated segments (see §5.1 and §9.1). +- Attribute keys are identifiers with hyphen-separated segments (see §5.1 and §10.1). ## 5. Grammar and additional syntax rules @@ -257,7 +257,7 @@ The mode is determined solely from the **node name token**: 2. Else, if the node name is a recognized built-in with a specified list mode, the parser **MUST** choose that mode. 3. Otherwise (unknown node name), the parser **MUST** choose **Inline-list mode**. -Built-in elements and their list modes are defined in §8.1. +Built-in elements and their list modes are defined in §9.1. ### 5.3 Maximal munch @@ -301,57 +301,58 @@ while(not eof()): abort() # eof before closing '"' ``` -Semantic escape decoding and validation is specified in §6. +Semantic escape decoding and validation is specified in §7. -## 6. Escape processing (semantic) +## 6. Inline Text Escape Processing (semantic) -> TODO: This chapter must be split into two chapters: -> -> - "Inline Text Escape Processing" -> - "String Literal Escape Processing" -> -> This includes renumbering all chapters and their references for the markdown spec. -> -> Chapter "6.1 Scope" will be removed then. +Escape decoding in inline-list bodies applies only to the three escape-text tokens produced by the parser (§5.4). + +### 6.1 Inline escape-text tokens + +In inline-list bodies, the parser emits three special text tokens: -### 6.1 Scope +- `\\` +- `\{` +- `\}` -Escape sequences are recognized only in: +During semantic inline-text construction (§8.2), implementations **MUST** decode these to literal `\`, `{`, `}`. -1. String literals (node bodies of the `"..."` form and attribute values). -2. Inline escape-text tokens emitted by the parser: `\\\\`, `\\{`, `\\}`. +Tooling that aims to preserve author intent **SHOULD** preserve whether braces were written as balanced groups vs escaped brace tokens, because these spellings are not semantically equivalent in the inline parse tree. -No other syntax performs escape decoding. +## 7. String Literal Escape Processing (semantic) -### 6.2 Control character policy (semantic) +Escape sequences are recognized only in string literals (node bodies of the `"..."` form and attribute values). No other syntax performs string-literal escape decoding. -> TODO: The same rules as in §3 are applied, except that `TAB` is also additionally forbidden after escaping. +### 7.1 Control character policy (semantic) - A semantic validator **MAY** reject TAB (U+0009) in source text. -- Regardless of whether TAB is accepted in source text, TAB **MUST** be rejected in the **resolved value of any string literal** (quoted node bodies and attribute values). This includes TAB that appears literally between quotes and TAB produced via `\u{...}`. +- After decoding escapes in any string literal, the resolved value **MUST NOT** contain any Unicode control character (General Category `Cc`) except: + - LF (U+000A), and + - CR (U+000D) only when immediately followed by LF (U+000A) (i.e. as part of a CRLF sequence U+000D U+000A). +- TAB (U+0009) is always forbidden in resolved string-literal values, including when produced via `\u{...}`. -Apart from LF/CR line terminators and TAB (U+0009) in source text, a semantically valid document **MUST NOT** contain other Unicode control characters (General Category `Cc`). Resolved string-literal values are restricted by the rules above (TAB is always forbidden there). +String literals are syntactically forbidden from containing literal control characters (§5.5); therefore LF/CRLF can only appear in resolved values via `\n`, `\r`, or `\u{...}`. -### 6.3 Supported escapes in string literals +### 7.2 Supported escapes in string literals A semantic validator/decoder **MUST** accept exactly: -| Escape | Decodes to | -| ----------- | --------------------------- | -| `\\\\` | U+005C (`\\`) | -| `\\"` | U+0022 (`"`) | -| `\\n` | U+000A (LF) | -| `\\r` | U+000D (CR) | -| `\\u{H...}` | Unicode scalar value U+H... | +| Escape | Decodes to | +| ---------- | --------------------------- | +| `\\` | U+005C (`\`) | +| `\"` | U+0022 (`"`) | +| `\n` | U+000A (LF) | +| `\r` | U+000D (CR) | +| `\u{H...}` | Unicode scalar value U+H... | -#### 6.3.1 Unicode escape `\\u{H...}` +#### 7.2.1 Unicode escape `\u{H...}` - 1-6 hex digits - value in `0x0..0x10FFFF` - not in `0xD800..0xDFFF` (surrogates) -- must not decode to a forbidden control character (§6.2) +- must not decode to a forbidden control character (§7.1) -### 6.4 Invalid escapes +### 7.3 Invalid escapes A semantic validator/decoder **MUST** reject a string literal that contains: @@ -361,25 +362,9 @@ A semantic validator/decoder **MUST** reject a string literal that contains: - out-of-range or surrogate code points - forbidden control characters produced by `\u{...}` -### 6.5 Inline escape-text tokens - -> TODO: Move to chapter "Inline Text Escape Processing" - -In inline-list bodies, the parser emits three special text tokens: - -- `\\` -- `\{` -- `\}` - -During semantic text construction, implementations **MUST** decode these to literal `\`, `{`, `}`. - -> TODO: The following sentence is unclear. The intent is: "When parsing, tooling should not perform ad-hoc conversion of escape sequences, so the output can be rendered again as-is. The escape sequences must always be display their escaped variant." - -Tooling that aims to preserve author intent **SHOULD** preserve whether braces were written as balanced groups vs escaped brace tokens. - -## 7. Semantic document model +## 8. Semantic document model -### 7.1 Document structure +### 8.1 Document structure - A semantically valid document **MUST** contain exactly one `hdoc` header node. - The `hdoc` node **MUST** be the first node in the document. @@ -406,7 +391,7 @@ Tooling that aims to preserve author intent **SHOULD** preserve whether braces w - If neither is present, tooling **MAY** emit a diagnostic hint that the document has no title. -### 7.2 Inline text construction and normalization +### 8.2 Inline text construction and normalization Many elements (e.g. `p`, headings, and inline elements) produce **inline text** for rendering. Inline text is constructed from one of: @@ -432,23 +417,23 @@ The renderer **MUST** see the post-normalization result. **String and verbatim bodies:** When a string body or verbatim body is converted into spans, it is treated as a single text source (no nested inline nodes) and then processed using the same rules above, including whitespace normalization for non-`pre` elements. -### 7.3 Attribute uniqueness +### 8.3 Attribute uniqueness - Within a node, attribute keys **MUST** be unique (case-sensitive). -### 7.4 Attribute validity +### 8.4 Attribute validity - Attributes **MUST** be allowed on the element they appear on. - Required attributes **MUST** be present. - Attributes not defined for an element **MUST** be rejected. -### 7.5 Identifiers and References +### 8.5 Identifiers and References HyperDoc defines two separate namespaces for identifiers to allow cross-referencing within a document: the **Block Namespace** and the **Footnote Namespace**. Identifiers in both namespaces are case-sensitive and share the same syntax: they **MUST** be a non-empty sequence of one or more characters, and **MUST NOT** contain any whitespace or Unicode control characters (General Category `Cc`). -#### 7.5.1 Block Namespace (`id` and `\ref(ref)`) +#### 8.5.1 Block Namespace (`id` and `\ref(ref)`) The Block Namespace is used for referencing top-level block elements like headings, figures, or tables. @@ -459,7 +444,7 @@ The Block Namespace is used for referencing top-level block elements like headin - **Reference**: An identifier in the Block Namespace is referenced using the `\ref` inline element. - `\ref(ref="...")` **MUST** reference an `id` that exists in the Block Namespace. -#### 7.5.2 Footnote Namespace (`\footnote(key)` and `\footnote(ref)`) +#### 8.5.2 Footnote Namespace (`\footnote(key)` and `\footnote(ref)`) The Footnote Namespace is used for defining and referencing reusable footnotes. @@ -470,21 +455,21 @@ The Footnote Namespace is used for defining and referencing reusable footnotes. - **Reference**: An identifier in the Footnote Namespace is referenced using a `\footnote` element that has no body. - `\footnote(ref="...");` **MUST** reference a `key` that has been defined in the Footnote Namespace. -### 7.6 Built-in element recognition +### 8.6 Built-in element recognition -- Built-in element names are defined in §8. +- Built-in element names are defined in §9. - Unknown elements are syntactically valid (parseable), but semantically invalid. -## 8. Elements and attributes +## 9. Elements and attributes -### 8.1 Built-in elements and list mode +### 9.1 Built-in elements and list mode -#### 8.1.1 Inline vs block +#### 9.1.1 Inline vs block - Any element name starting with `\` is an **inline element**. - Any element name not starting with `\` is a **block element**. -#### 8.1.2 List-body mode per built-in element +#### 9.1.2 List-body mode per built-in element When a built-in element uses a `{ ... }` list body, it is parsed in the mode below: @@ -496,7 +481,7 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel - `li`, `td`, and admonition blocks contain either blocks or a single string/verbatim body; representing blocks implies block-list mode. - Built-in elements with empty bodies are also parsed in Inline-list mode so accidental `{ ... }` usage stays balanced and formatters can recover consistently (e.g., `toc;`, `footnotes;`). -#### 8.1.3 Shorthand Body Promotion +#### 9.1.3 Shorthand Body Promotion If a block element's list body can contain general text block elements (such as `p`, `pre`, `ol`, `ul`, etc.), its body **MAY** instead be written as a shorthand string or verbatim literal. @@ -520,9 +505,9 @@ This promotion is a feature for convenience and applies only to the following el - `quote` - `spoiler` -### 8.2 Top-Level Block Elements +### 9.2 Top-Level Block Elements -#### 8.2.1 `hdoc` (header) +#### 9.2.1 `hdoc` (header) - **Role:** document header - **Body:** `;` (empty) @@ -531,10 +516,10 @@ This promotion is a feature for convenience and applies only to the following el - `lang` (optional) - `title` (optional) - `author` (optional) - - `date` (optional): datetime lexical format (§9.2.3) - - `tz` (optional): default timezone for time/datetime values (§9.2) + - `date` (optional): datetime lexical format (§10.2.3) + - `tz` (optional): default timezone for time/datetime values (§10.2) -#### 8.2.2 `title` (document title) +#### 9.2.2 `title` (document title) - **Role:** document-level display title - **Body:** inline text @@ -547,7 +532,7 @@ Semantic constraints: - If present, `title` **MUST** be the second node in the document (after `hdoc`). - `title` **MUST NOT** have an `id` attribute. -#### 8.2.3 Table of contents: `toc` +#### 9.2.3 Table of contents: `toc` - **Role:** Generates a table of contents. - **Body:** `;` (empty) @@ -556,7 +541,7 @@ Semantic constraints: Semantic constraints: - `toc` **MUST** be a top-level block element (a direct child of the document). -#### 8.2.4 Footnote dump: `footnotes` +#### 9.2.4 Footnote dump: `footnotes` - **Role:** collect and render accumulated footnotes - **Body:** `;` (empty) @@ -573,7 +558,7 @@ Semantics: - `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`. - Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears. -### 8.3 General Text Block Elements +### 9.3 General Text Block Elements In this chapter, an "inline text" body is one of: @@ -583,30 +568,30 @@ In this chapter, an "inline text" body is one of: Only an empty body (`;`) is not "inline text". -#### 8.3.1 Headings: `h1`, `h2`, `h3` +#### 9.3.1 Headings: `h1`, `h2`, `h3` - **Role:** block heading levels 1-3 - **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.3.2 Paragraph: `p` +#### 9.3.2 Paragraph: `p` - **Role:** A standard paragraph of text. - **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.3.3 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` +#### 9.3.3 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` - **Role:** A block that renders with a distinct style to draw the reader's attention. -- **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§ 8.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph. +- **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§9.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph. - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.3.4 Unordered List: `ul` +#### 9.3.4 Unordered List: `ul` - **Body:** block-list containing `li` (at least one) - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 8.3.5 Ordered List: `ol` +#### 9.3.5 Ordered List: `ol` - **Body:** block-list containing `li` (at least one) - **Attributes:** @@ -614,7 +599,7 @@ Only an empty body (`;`) is not "inline text". - `id` (optional; top-level only) - `first` (optional Integer ≥ 0; default 1): number of the first list item -#### 8.3.6 Figure: `img` +#### 9.3.6 Figure: `img` - **Body:** inline text caption/description (may be empty) - **Attributes:** @@ -623,12 +608,12 @@ Only an empty body (`;`) is not "inline text". - `lang` (optional) - `id` (optional; top-level only) -#### 8.3.7 Preformatted: `pre` +#### 9.3.7 Preformatted: `pre` - **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only) -#### 8.3.8 Tables: `table` +#### 9.3.8 Tables: `table` - **Body:** block-list containing: - optional `columns`, then @@ -643,9 +628,9 @@ Table layout rules: - **Group Headers (`group`):** A `group` element acts as a heading that spans all columns of the table. Semantically, `group { ... }` is equivalent to a `row` containing a single `td` with a `colspan` attribute equal to the table's column count. A `group` does not have a `title` and does not render a cell in the row header column. -### 8.4 Structural Elements +### 9.4 Structural Elements -#### 8.4.1 List item: `li` +#### 9.4.1 List item: `li` - **Body:** either - a block-list of block elements, or @@ -653,13 +638,13 @@ Table layout rules: - a verbatim body - **Attributes:** `lang` (optional) -#### 8.4.2 `columns` (table header row) +#### 9.4.2 `columns` (table header row) - **Role:** Defines the labels for the columns of a table. The number of cells in this element (taking `colspan` into account) defines the table's column count. - **Body:** block-list containing `td` (at least one) - **Attributes:** `lang` (optional) -#### 8.4.3 `row` (table data row) +#### 9.4.3 `row` (table data row) - **Role:** Defines a row of data in a table. - **Body:** block-list containing `td` (at least one) @@ -667,13 +652,13 @@ Table layout rules: - `title` (optional string): If present, creates a header cell for the row in an implicit leading column. - `lang` (optional) -#### 8.4.4 `group` (table row group) +#### 9.4.4 `group` (table row group) - **Role:** A heading row that spans all table columns. - **Body:** inline text - **Attributes:** `lang` (optional) -#### 8.4.5 `td` (table cell) +#### 9.4.5 `td` (table cell) - **Role:** A single cell within a table row. - **Body:** either @@ -682,29 +667,29 @@ Table layout rules: - a verbatim body - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional) -### 8.5 Inline elements +### 9.5 Inline elements Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer). -#### 8.5.1 `\\em` +#### 9.5.1 `\\em` - **Role:** emphasis - **Body:** inline text - **Attributes:** `lang` (optional) -#### 8.5.2 `\\mono` +#### 9.5.2 `\\mono` - **Role:** monospaced span - **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional) -#### 8.5.3 `\\strike`, `\\sub`, `\\sup` +#### 9.5.3 `\\strike`, `\\sub`, `\\sup` - **Role:** strike-through / subscript / superscript - **Body:** inline text - **Attributes:** `lang` (optional) -#### 8.5.4 `\link` +#### 9.5.4 `\link` - **Role:** foreign hyperlink (external or non-validated target) - **Body:** inline text @@ -718,13 +703,13 @@ Notes: - Interior references use `\ref(ref="...")`. -#### 8.5.5 `\\date`, `\\time`, `\\datetime` +#### 9.5.5 `\\date`, `\\time`, `\\datetime` - **Role:** localized date/time rendering - **Body:** must be plain text, a single string, or verbatim (no nested inline elements) - **Attributes:** `fmt` (optional; per element), `lang` (optional) -#### 8.5.6 `\ref` +#### 9.5.6 `\ref` - **Role:** validated interior reference (to a top-level `id`) - **Body:** inline text (optional; may be empty) @@ -755,7 +740,7 @@ If the referenced target is not a heading: When computing `` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored). -#### 8.5.7 `\footnote` +#### 9.5.7 `\footnote` - **Role:** footnote/citation marker and definition - **Body:** inline text (required for defining form; empty for reference form) @@ -784,7 +769,7 @@ Marker rendering (normative): - A renderer **SHALL** render a citation marker as `\sup{\link{[\d+]}}`. -## 9. Attribute types and date/time formats +## 10. Attribute types and date/time formats > TODO: Attributes should be documented well and not only be mentioned in the element catalog. > This chapter shall document attributes and their types, including detailled descriptions for both. @@ -793,7 +778,7 @@ Marker rendering (normative): > Non-fatal diagnostics **MUST** be emitted for that. > Leading and trailing whitespace must be stripped. -### 9.1 Common attribute types +### 10.1 Common attribute types - **Version:** must be `2.0`. - **Integer:** ASCII decimal digits; leading zeros allowed but discouraged. @@ -802,11 +787,11 @@ Marker rendering (normative): - **Timezone offset:** `Z` or `±HH:MM`. - **URI/IRI:** per RFC 3987. -### 9.2 Date / time lexical formats (normative) +### 10.2 Date / time lexical formats (normative) These formats are a conservative intersection of RFC 3339 and ISO 8601. -#### 9.2.1 Date +#### 10.2.1 Date `YYYY-MM-DD` @@ -814,7 +799,7 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601. - `MM`: `01`-`12` - `DD`: `01`-`31` -#### 9.2.2 Time +#### 10.2.2 Time `hh:mm:ss` with a required time zone unless a default `tz` is defined in `hdoc`. @@ -828,13 +813,13 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601. If `hdoc(tz="...")` is present, a time value **MAY** omit the zone. -#### 9.2.3 Datetime +#### 10.2.3 Datetime `YYYY-MM-DD` `T` `hh:mm:ss` (with optional fraction and required zone, unless `hdoc.tz` is present) If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is permitted specifically for `hdoc(date="...")` and for `\datetime` bodies. -### 9.3 `fmt` values +### 10.3 `fmt` values > TODO: `fmt` values need a proper description of what the expected output is. > The output is using the `lang` context of the \date, \time, \datetime element and @@ -858,7 +843,7 @@ Defaults when omitted: - `\datetime(fmt=...)`: default `short` - `\ref(fmt=...)`: default `full` -## 10. Non-normative guidance for tooling +## 11. Non-normative guidance for tooling - Formatters should normalize line endings to LF. - Provide diagnostics for discouraged patterns (leading/trailing whitespace in attribute values, leading zeros, mixed directionality, etc.). From 62f0f7643228486e118463890fb7d384af2a7539 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 14:19:13 +0100 Subject: [PATCH 075/116] Improves \time and \datetime tz handling. --- docs/TODO.md | 18 ++++++------------ docs/specification.md | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 986c7e0..68ad3ab 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -6,18 +6,12 @@ - Document `lang` inheritance. No `lang` attribute means that parent language is used. - Clarify that page layout is static and won't change except for context resize. - \abbrev and \term might be good ideas. - -> Okay, next task: Fix chapter 6 (escapes) by splitting into two chapters (described in 308-315), clarify how control characters are handled (L328) - - - -> §5.5 - String Literal Control Character Inconsistency - -§5.5 forbids "any Unicode control characters" in string literals -§6.3 allows \n (LF) and \r (CR) escape sequences -Problem: These decode to control characters (Cc), contradicting §6.2 which says "resolved string-literal values" must not contain control characters except line terminators. Need explicit carve-out. - -> Problem: How does this interact with inline \time and \datetime elements? Do they inherit it? §9.2.2 says "If hdoc(tz="...") is present, a time value MAY omit the zone," but doesn't specify how the default is applied during rendering. +- Add more text to the introduction and underlying ideas of the format: + - Orthogonality: Semantic structure is not dependend on syntax. Verbatim lines are not preformatted, but `pre` blocks are. + - Strictness for ecosystem health: Prevent HTML uncontrolled growth desaster + - Allow tooling to work with semanticall yinvalid documents + - Static layout: No surprises. Layout once, yield consistent rendering + - Accessiblity: Everything is semantic, nothing is presentation-only. > Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns. diff --git a/docs/specification.md b/docs/specification.md index 96693eb..6437148 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -801,7 +801,7 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601. #### 10.2.2 Time -`hh:mm:ss` with a required time zone unless a default `tz` is defined in `hdoc`. +`hh:mm:ss` with an optional fraction and an optional zone. - `hh`: `00`-`23` - `mm`: `00`-`59` @@ -811,13 +811,23 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601. - `Z`, or - `+hh:mm` / `-hh:mm` (two-digit hour/minute) -If `hdoc(tz="...")` is present, a time value **MAY** omit the zone. +Normative rules: + +- If `hdoc(tz="...")` is present, a time value **MAY** omit the zone; if omitted, the effective zone **MUST** be `hdoc.tz`. +- If `hdoc(tz="...")` is not present, a time value **MUST** specify a zone. +- If a time value specifies a zone, that zone **MUST** be used regardless of `hdoc.tz`. #### 10.2.3 Datetime -`YYYY-MM-DD` `T` `hh:mm:ss` (with optional fraction and required zone, unless `hdoc.tz` is present) +`YYYY-MM-DD` `T` `hh:mm:ss` with an optional fraction and an optional zone. + +The time component (including fraction and zone syntax) uses the same rules as §10.2.2. + +Normative rules: -If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is permitted specifically for `hdoc(date="...")` and for `\datetime` bodies. +- If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone; if omitted, the effective zone **MUST** be `hdoc.tz`. +- If `hdoc(tz="...")` is not present, a datetime value **MUST** specify a zone. +- If a datetime value specifies a zone, that zone **MUST** be used regardless of `hdoc.tz`. ### 10.3 `fmt` values From b853dfb16214ffc59a9a742c6511ed9d193e6ba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 14:56:57 +0100 Subject: [PATCH 076/116] Implements automatic header number generation --- src/hyperdoc.zig | 69 ++++++++++++++++++++++++++++++++-------- src/render/dump.zig | 2 +- src/render/html5.zig | 23 ++++++++++++-- src/testsuite.zig | 10 +++--- test/accept/workset.hdoc | 18 +++++++++++ 5 files changed, 100 insertions(+), 22 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 61e37e6..423d258 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -26,7 +26,7 @@ pub const Document = struct { timezone: ?TimeZoneOffset, pub const TableOfContents = struct { - level: Block.HeadingLevel, + level: Block.Heading.Level, // TODO: Refactor to use `index` here as well. headings: []usize, children: []TableOfContents, }; @@ -52,12 +52,28 @@ pub const Block = union(enum) { table: Table, pub const Heading = struct { - level: HeadingLevel, + index: Index, lang: LanguageTag, content: []Span, - }; - pub const HeadingLevel = enum { h1, h2, h3 }; + pub const Level = enum(u2) { + pub const count: comptime_int = @typeInfo(@This()).@"enum".fields.len; + + h1 = 0, + h2 = 1, + h3 = 2, + }; + + /// Stores both heading level and the index number of that heading. + /// h1 is §[0] + /// h2 is §[0].[1] + /// h3 is §[0].[1].[2] + pub const Index = union(Level) { + h1: [1]u16, + h2: [2]u16, + h3: [3]u16, + }; + }; pub const Paragraph = struct { kind: ParagraphKind, @@ -688,11 +704,11 @@ pub const SemanticAnalyzer = struct { }; const TocBuilder = struct { - level: Block.HeadingLevel, + level: Block.Heading.Level, headings: std.ArrayList(usize), children: std.ArrayList(*TocBuilder), - fn init(level: Block.HeadingLevel) @This() { + fn init(level: Block.Heading.Level) @This() { return .{ .level = level, .headings = .empty, @@ -712,6 +728,9 @@ pub const SemanticAnalyzer = struct { id_locations: std.ArrayList(?Parser.Location) = .empty, pending_refs: std.ArrayList(RefUse) = .empty, + current_heading_level: usize = 0, + heading_counters: [Block.Heading.Level.count]u16 = @splat(0), + fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, UnsupportedVersion }!void { switch (node.type) { .hdoc => { @@ -881,12 +900,12 @@ pub const SemanticAnalyzer = struct { }); const heading: Block.Heading = .{ - .level = switch (node.type) { + .index = try sema.compute_next_heading(node, switch (node.type) { .h1 => .h1, .h2 => .h2, .h3 => .h3, else => unreachable, - }, + }), .lang = attrs.lang, .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), }; @@ -1026,7 +1045,7 @@ pub const SemanticAnalyzer = struct { depth: ?u8 = null, }); - const max_depth: comptime_int = @typeInfo(Block.HeadingLevel).@"enum".fields.len; + const max_depth = Block.Heading.Level.count; var depth = attrs.depth orelse max_depth; if (depth < 1 or depth > max_depth) { @@ -1925,7 +1944,7 @@ pub const SemanticAnalyzer = struct { else => continue, }; - const target_depth = heading_level_index(heading.level); + const target_depth = heading_level_index(heading.index); while (stack.items.len > target_depth) { _ = stack.pop(); @@ -1987,7 +2006,7 @@ pub const SemanticAnalyzer = struct { return node; } - fn heading_level_index(level: Block.HeadingLevel) usize { + fn heading_level_index(level: Block.Heading.Level) usize { return switch (level) { .h1 => 1, .h2 => 2, @@ -1995,7 +2014,7 @@ pub const SemanticAnalyzer = struct { }; } - fn next_heading_level(level: Block.HeadingLevel) Block.HeadingLevel { + fn next_heading_level(level: Block.Heading.Level) Block.Heading.Level { return switch (level) { .h1 => .h2, .h2 => .h3, @@ -2003,6 +2022,30 @@ pub const SemanticAnalyzer = struct { }; } + /// Computes the next index number for a heading of the given level: + fn compute_next_heading(sema: *SemanticAnalyzer, node: Parser.Node, level: Block.Heading.Level) !Block.Heading.Index { + const index = @intFromEnum(level); + + sema.heading_counters[index] += 1; + + if (index > sema.current_heading_level + 1) { + // TODO: Emit fatal diagnostic for invalid heading sequencing: "h3 after h1 is not legal" + } + sema.current_heading_level = index; + + // Reset all higher levels to 1: + for (sema.heading_counters[index + 1 ..]) |*val| { + val.* = 0; + } + _ = node; + + return switch (level) { + .h1 => .{ .h1 = sema.heading_counters[0..1].* }, + .h2 => .{ .h2 = sema.heading_counters[0..2].* }, + .h3 => .{ .h3 = sema.heading_counters[0..3].* }, + }; + } + fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, location: Parser.Location) !void { if (sema.diagnostics) |diag| { try diag.add(code, sema.make_location(location.offset)); @@ -2940,7 +2983,7 @@ pub const Diagnostic = struct { pub const ForbiddenControlCharacter = struct { codepoint: u21 }; pub const TableShapeError = struct { actual: usize, expected: usize }; pub const ReferenceError = struct { ref: []const u8 }; - pub const AutomaticHeading = struct { level: Block.HeadingLevel }; + pub const AutomaticHeading = struct { level: Block.Heading.Level }; pub const Code = union(enum) { // errors: diff --git a/src/render/dump.zig b/src/render/dump.zig index e731a96..b617f0d 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -403,7 +403,7 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err switch (block) { .heading => |heading| { try writeTypeTag(writer, "heading"); - try dumpEnumField(writer, indent + indent_step, "level", heading.level); + try dumpEnumField(writer, indent + indent_step, "level", heading.index); // TODO: Also print the indices here try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang.text); try dumpSpanListField(writer, indent + indent_step, "content", heading.content); }, diff --git a/src/render/html5.zig b/src/render/html5.zig index 1eb76bc..3bbc4e9 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -48,12 +48,29 @@ const RenderContext = struct { null; try writeIndent(ctx.writer, indent); - try writeStartTag(ctx.writer, headingTag(heading.level), .regular, .{ + try writeStartTag(ctx.writer, headingTag(heading.index), .regular, .{ .id = id_attr, .lang = lang_attr, }); + + // TODO: Make stylable: + if (true) { + var buffer: [32]u8 = undefined; + try ctx.renderSpan(.{ + .content = .{ + .text = switch (heading.index) { + .h1 => |level| std.fmt.bufPrint(&buffer, "§{} ", .{level[0]}) catch unreachable, + .h2 => |level| std.fmt.bufPrint(&buffer, "§{}.{} ", .{ level[0], level[1] }) catch unreachable, + .h3 => |level| std.fmt.bufPrint(&buffer, "§{}.{}.{} ", .{ level[0], level[1], level[2] }) catch unreachable, + }, + }, + .attribs = .{}, + .location = undefined, + }); + } + try ctx.renderSpans(heading.content); - try writeEndTag(ctx.writer, headingTag(heading.level)); + try writeEndTag(ctx.writer, headingTag(heading.index)); try ctx.writer.writeByte('\n'); } @@ -625,7 +642,7 @@ fn takeLang(lang: *?[]const u8) ?[]const u8 { return null; } -fn headingTag(level: hdoc.Block.HeadingLevel) []const u8 { +fn headingTag(level: hdoc.Block.Heading.Level) []const u8 { return switch (level) { .h1 => "h1", .h2 => "h2", diff --git a/src/testsuite.zig b/src/testsuite.zig index 5949fe3..682aaae 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -435,26 +435,26 @@ test "table of contents inserts automatic headings when skipping levels" { try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[2].code, .{ .automatic_heading_insertion = .{ .level = .h2 } })); const toc = doc.toc; - try std.testing.expectEqual(hdoc.Block.HeadingLevel.h1, toc.level); + try std.testing.expectEqual(.h1, toc.level); try std.testing.expectEqualSlices(usize, &.{ 0, 2 }, toc.headings); try std.testing.expectEqual(@as(usize, 2), toc.children.len); const auto_h1 = toc.children[0]; - try std.testing.expectEqual(hdoc.Block.HeadingLevel.h2, auto_h1.level); + try std.testing.expectEqual(.h2, auto_h1.level); try std.testing.expectEqualSlices(usize, &.{ 0, 1 }, auto_h1.headings); try std.testing.expectEqual(@as(usize, 2), auto_h1.children.len); const auto_h2 = auto_h1.children[0]; - try std.testing.expectEqual(hdoc.Block.HeadingLevel.h3, auto_h2.level); + try std.testing.expectEqual(.h3, auto_h2.level); try std.testing.expectEqualSlices(usize, &.{0}, auto_h2.headings); const h2_child = auto_h1.children[1]; - try std.testing.expectEqual(hdoc.Block.HeadingLevel.h3, h2_child.level); + try std.testing.expectEqual(.h3, h2_child.level); try std.testing.expectEqual(@as(usize, 0), h2_child.headings.len); try std.testing.expectEqual(@as(usize, 0), h2_child.children.len); const trailing_h1_child = toc.children[1]; - try std.testing.expectEqual(hdoc.Block.HeadingLevel.h2, trailing_h1_child.level); + try std.testing.expectEqual(.h2, trailing_h1_child.level); try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.headings.len); try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.children.len); } diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc index 77cabaf..72d4f44 100644 --- a/test/accept/workset.hdoc +++ b/test/accept/workset.hdoc @@ -1,4 +1,22 @@ hdoc(version="2.0", lang="en"); +h1 "First" + +h2 "First.1" +h2 "First.2" +h2 "First.3" + p { We can mix \em{emphasis}, \strike{strike}, \mono{monospace} text. Superscript x\sup{2} and subscript x\sub{2} also appear. } +h1 "Second" +h2 "Second.2" +h3 "Second.2.first" +h3 "Second.2.second" +h3 "Second.2.third" +h2 "Second.3" + +h1 "Third" +h2 "Third.1" +h3 "Third.1.first" +h3 "Third.1.second" +h3 "Third.1.third" From 89b290919162b771b286410f2d2d13695a503400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 15:46:40 +0100 Subject: [PATCH 077/116] Improves the specification around 'fmt' attribute. --- docs/TODO.md | 1 + docs/specification.md | 114 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 94 insertions(+), 21 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 68ad3ab..48d62c0 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -12,6 +12,7 @@ - Allow tooling to work with semanticall yinvalid documents - Static layout: No surprises. Layout once, yield consistent rendering - Accessiblity: Everything is semantic, nothing is presentation-only. +- h3 after h1 is not legal > Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns. diff --git a/docs/specification.md b/docs/specification.md index 6437148..b340351 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -831,27 +831,99 @@ Normative rules: ### 10.3 `fmt` values -> TODO: `fmt` values need a proper description of what the expected output is. -> The output is using the `lang` context of the \date, \time, \datetime element and -> we provide examples in german and english for each `fmt` option. - -> TODO: This chapter shall be split into: -> -> - `fmt` for `\date` -> - `fmt` for `\time` -> - `fmt` for `\datetime` - -- `\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` -- `\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso` -- `\datetime(fmt=...)`: `short`, `long`, `relative`, `iso` -- `\ref(fmt=...)`: `full`, `name`, `index` - -Defaults when omitted: - -- `\date(fmt=...)`: default `short` -- `\time(fmt=...)`: default `short` -- `\datetime(fmt=...)`: default `short` -- `\ref(fmt=...)`: default `full` +Some inline elements accept a `fmt` attribute that controls localized formatting of their value. + +The `fmt` value **MUST** be one of the values explicitly listed for the element; any other value **MUST** be rejected as semantically invalid. + +#### 10.3.1 Language context + +Formatting uses the element’s **language context**. + +The base language context is determined as follows: + +1. If the element has a `lang` attribute, that language tag **SHALL** be used. +2. Otherwise, if the document header has `hdoc(lang="...")`, that language tag **SHALL** be used. +3. Otherwise, there is no language context. + +Tooling **MAY** allow users to override the language context and/or localized formatting preferences (e.g. force 24-hour time, force a preferred date ordering). If such an override is active, it **SHALL** replace the base language context for the purpose of all formatting in this section. + +If there is no language context after applying user overrides, or if the implementation has no matching localized formatting data for the selected language context, then implementations **MUST** fall back to locale-independent formatting as follows: + +- For `\date`: + - `fmt="iso"` and `fmt="year"` proceed normally. + - `fmt="day"` **MUST** render the day-of-month as decimal digits (`DD`), without an ordinal suffix. + - `fmt="month"` **MUST** render the month as decimal digits (`MM`). + - `fmt="weekday"` **MUST** render the ISO weekday number (`1`=Monday … `7`=Sunday). + - `fmt="short"`, `fmt="long"`, and `fmt="relative"` **MUST** behave as if `fmt="iso"` was specified. +- For `\time` and `\datetime`: + - if `fmt="iso"`, formatting proceeds normally, and + - otherwise, the implementation **MUST** behave as if `fmt="iso"` was specified. + +The examples below use `en-US` and `de-DE` language tags, but the exact output of localized formats (punctuation, capitalization, abbreviations, and choice of words) is implementation-defined. + +#### 10.3.2 Time zone context + +For `\time` and `\datetime`, formatting uses the value’s **effective zone**: + +- If the value explicitly specifies a zone, that zone **MUST** be the effective zone. +- Otherwise, the effective zone **MUST** be `hdoc.tz` (see §10.2.2 and §10.2.3). + +#### 10.3.3 `fmt` values for `\date` + +The body of `\date` **MUST** be a date in the lexical format of §10.2.1. + +Supported values: + +| Value | Meaning (normative) | Example output (`en-US`) | Example output (`de-DE`) | +| ----------------- | ---------------------------------------------------------------------------------------- | ------------------------ | ------------------------ | +| `iso` | Render the date in the lexical format of §10.2.1. | `2026-09-13` | `2026-09-13` | +| `short` (default) | Render the date in a numeric, locale-appropriate short form. | `9/13/2026` | `13.09.2026` | +| `long` | Render the date in a locale-appropriate long form (month name, full year). | `September 13, 2026` | `13. September 2026` | +| `relative` | Render a relative description of the date compared to “today”. | `in 3 days` | `in 3 Tagen` | +| `year` | Render only the year component. | `2026` | `2026` | +| `month` | Render only the month component in a locale-appropriate form (typically a month name). | `September` | `September` | +| `day` | Render only the day-of-month component in a locale-appropriate form (may be an ordinal). | `13th` | `13.` | +| `weekday` | Render the weekday name for that date. | `Saturday` | `Samstag` | + +The `relative` examples are non-normative and assume “today” is `2026-09-10` in the renderer’s date context. + +#### 10.3.4 `fmt` values for `\time` + +The body of `\time` **MUST** be a time in the lexical format of §10.2.2. + +Supported values: + +| Value | Meaning (normative) | Example output (`en-US`) | Example output (`de-DE`) | +| ----------------- | ------------------------------------------------------------------------------- | ------------------------ | ------------------------ | +| `iso` | Render the time in the lexical format of §10.2.2, including the effective zone. | `13:36:00+02:00` | `13:36:00+02:00` | +| `short` (default) | Render the time with minute precision in a locale-appropriate form. | `1:36 PM` | `13:36` | +| `long` | Render the time with second precision; include the fractional part if present. | `1:36:00 PM` | `13:36:00` | +| `rough` | Render a coarse day-period description (e.g. morning/afternoon/evening). | `afternoon` | `Nachmittag` | + +#### 10.3.5 `fmt` values for `\datetime` + +The body of `\datetime` **MUST** be a datetime in the lexical format of §10.2.3. The time component uses the same formatting rules as §10.3.4. + +Supported values: + +| Value | Meaning (normative) | Example output (`en-US`) | Example output (`de-DE`) | +| ----------------- | ----------------------------------------------------------------------------------- | -------------------------------- | ------------------------------ | +| `iso` | Render the datetime in the lexical format of §10.2.3, including the effective zone. | `2026-09-13T13:36:00+02:00` | `2026-09-13T13:36:00+02:00` | +| `short` (default) | Render date and time with minute precision in a locale-appropriate short form. | `9/13/2026, 1:36 PM` | `13.09.2026, 13:36` | +| `long` | Render date and time with second precision; include the fractional part if present. | `September 13, 2026, 1:36:00 PM` | `13. September 2026, 13:36:00` | +| `relative` | Render a relative description compared to the current datetime. | `20 minutes ago` | `vor 20 Minuten` | + +The `relative` examples are non-normative and assume the effective zone is `+02:00`, the value is `2026-09-13T13:36:00+02:00`, and “now” is `2026-09-13T13:56:00+02:00`. + +#### 10.3.6 `fmt` values for `\ref` + +The `fmt` attribute on `\ref` controls how synthesized link text is produced when the `\ref` body is empty (§9.5.6). It does not affect `\ref` nodes with a non-empty body. + +| Value | Meaning (normative) | Example | +| ---------------- | -------------------------- | ----------------------------- | +| `full` (default) | Render `" "`. | `§10.3.6 fmt values for \ref` | +| `name` | Render `""`. | `fmt values for \ref` | +| `index` | Render `""`. | `§10.3.6` | ## 11. Non-normative guidance for tooling From 29847449d54ceadb50b2db9793817424ef8e174f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 15:51:03 +0100 Subject: [PATCH 078/116] Removes rendering requirement for \footnote, removes resolved TODOs --- docs/TODO.md | 7 +------ docs/specification.md | 8 +++----- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 48d62c0..7e92bc5 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -13,15 +13,10 @@ - Static layout: No surprises. Layout once, yield consistent rendering - Accessiblity: Everything is semantic, nothing is presentation-only. - h3 after h1 is not legal +- Support "appendix{}" in addition to h1,h2,h3 which is a h1-level chapter that renders as "A. B. C." instead of "1. 2. 3." > Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns. -> Problem: What should synthesized text be for valid non-heading targets like table, img, pre? Spec says headings get but doesn't define fallback for figures ("Figure 3"), tables ("Table 2"), etc. - -States "A renderer SHALL render a regular footnote marker as \sup{\link{\d+}}" -Problem: This seems like implementation guidance, not semantic requirement. Different renderers (HTML, PDF, terminal) may render markers differently. Should be in §10 (non-normative) or relaxed to "SHOULD". - - > Recommendation 3: Add Formal Whitespace Processing Algorithm diff --git a/docs/specification.md b/docs/specification.md index b340351..3f3ca35 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -735,6 +735,9 @@ Target-derived values: If the referenced target is not a heading: +> TODO: Also add semantics for `ref(ref);` with `img` (Figure X.) and `table` (Table X.). +> This requires the introduction of counters for these tags, and allow auto-numbering. + - `\ref(ref="X");` (implicit body) is semantically invalid and **MUST** be rejected. - `\ref(ref="X"){...}` remains valid. @@ -763,11 +766,6 @@ Semantics: - Each `kind` has an independent numeric namespace: footnotes and citations are numbered separately. - A renderer **MAY** hyperlink markers and dumped entries back-and-forth. -Marker rendering (normative): - -- A renderer **SHALL** render a regular footnote marker as `\sup{\link{\d+}}`. -- A renderer **SHALL** render a citation marker as `\sup{\link{[\d+]}}`. - ## 10. Attribute types and date/time formats From f7e84ff786f403c03aa047590eda7d793e95429c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 16:07:54 +0100 Subject: [PATCH 079/116] Adds note about inheritance of lang attribute --- docs/TODO.md | 1 - docs/specification.md | 21 ++++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 7e92bc5..84159ff 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -3,7 +3,6 @@ - Assign semantics to node types, paragraph kinds, ... - Specify "syntax" proper - Add links to RFCs where possible -- Document `lang` inheritance. No `lang` attribute means that parent language is used. - Clarify that page layout is static and won't change except for context resize. - \abbrev and \term might be good ideas. - Add more text to the introduction and underlying ideas of the format: diff --git a/docs/specification.md b/docs/specification.md index 3f3ca35..95bbe1b 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -837,11 +837,7 @@ The `fmt` value **MUST** be one of the values explicitly listed for the element; Formatting uses the element’s **language context**. -The base language context is determined as follows: - -1. If the element has a `lang` attribute, that language tag **SHALL** be used. -2. Otherwise, if the document header has `hdoc(lang="...")`, that language tag **SHALL** be used. -3. Otherwise, there is no language context. +The base language context is the element’s **effective language tag** (§10.4.1). This means `lang` is inherited from parent elements, and top-level elements inherit their language tag from `hdoc(lang="...")`. Tooling **MAY** allow users to override the language context and/or localized formatting preferences (e.g. force 24-hour time, force a preferred date ordering). If such an override is active, it **SHALL** replace the base language context for the purpose of all formatting in this section. @@ -923,6 +919,21 @@ The `fmt` attribute on `\ref` controls how synthesized link text is produced whe | `name` | Render `""`. | `fmt values for \ref` | | `index` | Render `""`. | `§10.3.6` | +### 10.4 `lang` attribute + +The `lang` attribute assigns a BCP 47 language tag (§10.1) to an element. + +#### 10.4.1 Effective language tag + +Each element has an **effective language tag**, computed as follows: + +1. If the element has a `lang` attribute, its value **SHALL** be the effective language tag. +2. Otherwise, if the element has a parent element, the effective language tag **SHALL** be inherited from the parent element. +3. Otherwise (for top-level elements), if the document header has `hdoc(lang="...")`, that language tag **SHALL** be the effective language tag. +4. Otherwise, the element has no effective language tag. + +This inheritance allows documents to mix language contexts across nested elements (e.g. an English document that contains a German `quote` with an Italian paragraph inside), and keeps localized date/time values in their local context. + ## 11. Non-normative guidance for tooling - Formatters should normalize line endings to LF. From 9467f1f20f7e58eb3c7f81c341ccdf51c75376c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 17:02:08 +0100 Subject: [PATCH 080/116] Clarifies tables more --- docs/TODO.md | 3 --- docs/specification.md | 23 ++++++++++++++++++----- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 84159ff..4c8a047 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -3,7 +3,6 @@ - Assign semantics to node types, paragraph kinds, ... - Specify "syntax" proper - Add links to RFCs where possible -- Clarify that page layout is static and won't change except for context resize. - \abbrev and \term might be good ideas. - Add more text to the introduction and underlying ideas of the format: - Orthogonality: Semantic structure is not dependend on syntax. Verbatim lines are not preformatted, but `pre` blocks are. @@ -14,8 +13,6 @@ - h3 after h1 is not legal - Support "appendix{}" in addition to h1,h2,h3 which is a h1-level chapter that renders as "A. B. C." instead of "1. 2. 3." -> Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns. - > Recommendation 3: Add Formal Whitespace Processing Algorithm diff --git a/docs/specification.md b/docs/specification.md index 95bbe1b..2d67edf 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -622,11 +622,24 @@ Only an empty body (`;`) is not "inline text". Table layout rules: -- **Column Count:** The number of columns in a table is determined by the `columns` element. It is the sum of the `colspan` values of the `td` cells within the `columns` row. If `columns` is absent, the column count is determined by the first `row` element in the same way. All `columns` and `row` elements in a table **MUST** resolve to the same effective column count. - -- **Row Headers (`row(title)`):** A `row` element may have a `title` attribute, which creates a *row header*. This header is rendered as an implicit, additional first column for that row. This "row header column" does **not** contribute to the table's main column count. If any `row` in the table has a `title`, renderers **MUST** reserve space for a leading row header column throughout the table. This leading column will be blank for `columns`, `group`, and any `row` without a `title`. - -- **Group Headers (`group`):** A `group` element acts as a heading that spans all columns of the table. Semantically, `group { ... }` is equivalent to a `row` containing a single `td` with a `colspan` attribute equal to the table's column count. A `group` does not have a `title` and does not render a cell in the row header column. +- **Column Count:** + - The **effective column count** in a table is determined by the `columns` element and is the sum of the `colspan` values of the `td` cells within the `columns` row. + - If `columns` is absent, the column count is determined by the first `row` element in the same way. + - A table with an effective column count of `0` **MUST** be rejected as semantically invalid. + - All `columns` and `row` elements in a table **MUST** resolve to the same effective column count. +- **Row Headers (`row(title)`):** + - A `row` element may have a `title` attribute, which creates a *row header*. + - If any *row header* is created, an *implicit*, additional first "row header column" is created. + - This header is rendered in that column. + - This "row header column" does **not** contribute to the table's main column count. + - If any `row` in the table has a `title`, renderers **MUST** reserve space for a leading row header column throughout the table. + - This leading column is blank for `columns`, `group`, and any `row` without a `title`. +- **Group Headers (`group`):** + - A `group` element starts a new group of rows with a shared semantic topic. + - The `group` body contains the caption for the topic of the following rows, until the next `group` element appears (or until the end of the table). + - Rows before the first `group` have no defined topic. + - A `group` element acts as a heading that spans all columns of the table. + - A `group` does not have a `title` and does not render a cell in the row header column. ### 9.4 Structural Elements From ef62482958c78a7baadb58bb8a1899e0294944f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 19:36:25 +0100 Subject: [PATCH 081/116] Add tasks from specification review --- SPEC_TODO.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SPEC_TODO.md b/SPEC_TODO.md index 4a92f37..0695989 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -3,3 +3,7 @@ - Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】 +- Enforce `hdoc` placement and body rules by rejecting headers that are not the first node or that carry any non-empty body, instead of merely warning on the first non-header block and accepting later headers. 【F:docs/specification.md†L369-L373】【F:src/hyperdoc.zig†L734-L788】 +- Treat admonition blocks (`note`, `warning`, `danger`, `tip`, `quote`, `spoiler`) as block-list containers with shorthand promotion for string/verbatim bodies rather than forcing them into a single inline paragraph payload. 【F:docs/specification.md†L585-L588】【F:src/hyperdoc.zig†L916-L935】 +- Enforce table column structure: allow at most one optional leading `columns` row, derive a non-zero effective column count even when `columns` is absent, and reject tables where no row or column establishes width. The current implementation accepts multiple `columns` nodes anywhere and never validates missing/zero column counts. 【F:docs/specification.md†L618-L629】【F:src/hyperdoc.zig†L1076-L1147】 +- Restrict `toc` to top-level usage as required by the specification; the current translator permits `toc` blocks inside nested block lists. 【F:docs/specification.md†L535-L543】【F:src/hyperdoc.zig†L1041-L1073】【F:src/hyperdoc.zig†L1254-L1270】 From b5634df6a0dab9fb73f21de7dea0d8aec2ce8c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 21:36:35 +0100 Subject: [PATCH 082/116] Clarify top-level scope for chapter 9.2 elements --- docs/specification.md | 71 +++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 2d67edf..9af418e 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -507,6 +507,8 @@ This promotion is a feature for convenience and applies only to the following el ### 9.2 Top-Level Block Elements +The elements in this chapter **MUST** appear only as top-level block elements (direct children of the document). They **MUST NOT** appear inside nested structures. + #### 9.2.1 `hdoc` (header) - **Role:** document header @@ -532,7 +534,24 @@ Semantic constraints: - If present, `title` **MUST** be the second node in the document (after `hdoc`). - `title` **MUST NOT** have an `id` attribute. -#### 9.2.3 Table of contents: `toc` +#### 9.2.3 Headings: `h1`, `h2`, `h3` + +- **Role:** block heading levels 1-3 +- **Body:** inline text +- **Attributes:** `lang` (optional), `id` (optional) + +Heading structure and numbering: + +- `h1`, `h2`, and `h3` **MUST** appear only as top-level block elements. +- `h1` **MAY** appear anywhere in the document order. +- `h2` **MUST** be preceded by an `h1`, and that `h1` is the parent section for the `h2`. +- `h3` **MUST** be preceded by an `h2`, and there **MUST NOT** be any intervening `h1` between that `h2` and the `h3`; the most recent `h2` is the parent section for the `h3`. +- Heading indices are assigned as follows: + - Each `h1` receives a one-part index `[i1]` that starts at `1` and increments by `1` after assignment. + - Each `h2` receives a two-part index `[i1, i2]`; `i2` resets to `1` when a new `h1` is assigned and increments by `1` after assignment. + - Each `h3` receives a three-part index `[i1, i2, i3]`; `i3` resets to `1` when a new `h1` or `h2` is assigned and increments by `1` after assignment. + +#### 9.2.4 Table of contents: `toc` - **Role:** Generates a table of contents. - **Body:** `;` (empty) @@ -541,7 +560,7 @@ Semantic constraints: Semantic constraints: - `toc` **MUST** be a top-level block element (a direct child of the document). -#### 9.2.4 Footnote dump: `footnotes` +#### 9.2.5 Footnote dump: `footnotes` - **Role:** collect and render accumulated footnotes - **Body:** `;` (empty) @@ -568,30 +587,24 @@ In this chapter, an "inline text" body is one of: Only an empty body (`;`) is not "inline text". -#### 9.3.1 Headings: `h1`, `h2`, `h3` - -- **Role:** block heading levels 1-3 -- **Body:** inline text -- **Attributes:** `lang` (optional), `id` (optional; top-level only) - -#### 9.3.2 Paragraph: `p` +#### 9.3.1 Paragraph: `p` - **Role:** A standard paragraph of text. - **Body:** inline text - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 9.3.3 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` +#### 9.3.2 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` - **Role:** A block that renders with a distinct style to draw the reader's attention. - **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§9.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph. - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 9.3.4 Unordered List: `ul` +#### 9.3.3 Unordered List: `ul` - **Body:** block-list containing `li` (at least one) - **Attributes:** `lang` (optional), `id` (optional; top-level only) -#### 9.3.5 Ordered List: `ol` +#### 9.3.4 Ordered List: `ol` - **Body:** block-list containing `li` (at least one) - **Attributes:** @@ -599,7 +612,7 @@ Only an empty body (`;`) is not "inline text". - `id` (optional; top-level only) - `first` (optional Integer ≥ 0; default 1): number of the first list item -#### 9.3.6 Figure: `img` +#### 9.3.5 Figure: `img` - **Body:** inline text caption/description (may be empty) - **Attributes:** @@ -608,12 +621,12 @@ Only an empty body (`;`) is not "inline text". - `lang` (optional) - `id` (optional; top-level only) -#### 9.3.7 Preformatted: `pre` +#### 9.3.6 Preformatted: `pre` - **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only) -#### 9.3.8 Tables: `table` +#### 9.3.7 Tables: `table` - **Body:** block-list containing: - optional `columns`, then @@ -735,24 +748,16 @@ Semantics: - `\ref(ref="X")` **MUST** resolve to a top-level element with `id="X"`, otherwise it is semantically invalid. - If `\ref` has a non-empty body, the body **MUST** be used as the rendered link text. -- If `\ref` has an empty body (`;`), the renderer **MUST** synthesize link text from the referenced target and `fmt`: - - - `fmt="full"`: renders `" "` (default) - - `fmt="name"`: renders `""` - - `fmt="index"`: renders `""` - -Target-derived values: - -- For heading targets (`h1`, `h2`, `h3`), `` is the heading’s constructed plaintext inline text. -- For heading targets, `` is the heading’s hierarchical number within the document (e.g. `3.` / `3.2.` / `3.2.1.`). - -If the referenced target is not a heading: - -> TODO: Also add semantics for `ref(ref);` with `img` (Figure X.) and `table` (Table X.). -> This requires the introduction of counters for these tags, and allow auto-numbering. - -- `\ref(ref="X");` (implicit body) is semantically invalid and **MUST** be rejected. -- `\ref(ref="X"){...}` remains valid. +- If `\ref` has an empty body (`;`), the following rules apply: + - If the referenced target is a heading (`h1`, `h2`, `h3`), the renderer **MUST** synthesize link text from the target and `fmt`: + - `fmt="full"`: renders `" "` (default) + - `fmt="name"`: renders `""` + - `fmt="index"`: renders `""` + - `` is the heading’s constructed plaintext inline text. + - `` is the heading’s hierarchical number within the document (e.g. `3.` / `3.2.` / `3.2.1.`). + - > TODO: Also add semantics for `ref(ref);` with `img` (Figure X.) and `table` (Table X.). + > This requires the introduction of counters for these tags, and allow auto-numbering. + - In all other cases, `\ref(ref="X");` (implicit body) **MUST** be rejected with a diagnostic explaining that empty-body references are only supported for headings until this TODO is resolved. When computing `` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored). From ff19d99e14a923c00abbd8be8837f29478953bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 21:55:03 +0100 Subject: [PATCH 083/116] Update chapter status for reorganized elements --- docs/specification.md | 50 +++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/docs/specification.md b/docs/specification.md index 9af418e..b977ea9 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -41,30 +41,34 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap - "9.1 Built-in elements and list mode" - "9.1.1 Inline vs block": DONE - "9.1.2 List-body mode per built-in element": DRAFT - - "9.2 Element catalog (normative)": DRAFT + - "9.2 Top-Level Block Elements": DRAFT - "9.2.1 `hdoc` (header)": DONE - - "9.2.2 Headings: `h1`, `h2`, `h3`": DRAFT - - "9.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT - - "9.2.4 Lists: `ul`, `ol`": DRAFT - - "9.2.5 List item: `li`": DRAFT - - "9.2.6 Figure: `img`": DRAFT - - "9.2.7 Preformatted: `pre`": DRAFT - - "9.2.8 Table of contents: `toc`": DRAFT - - "9.2.9 Tables: `table`": DRAFT - - "9.2.10 `columns` (table header row)": DRAFT - - "9.2.11 `row` (table data row)": DRAFT - - "9.2.12 `group` (table row group)": DRAFT - - "9.2.13 `td` (table cell)": DRAFT - - "9.2.14 `title` (document title)": DRAFT - - "9.2.15 Footnote dump: `footnotes`": DRAFT - - "9.3 Inline elements" - - "9.3.1 `\\em`": DRAFT - - "9.3.2 `\\mono`": DRAFT - - "9.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT - - "9.3.4 `\link`": DRAFT - - "9.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT - - "9.3.6 `\ref`": DRAFT - - "9.3.7 `\footnote`": DRAFT + - "9.2.2 `title` (document title)": DRAFT + - "9.2.3 Headings: `h1`, `h2`, `h3`": DRAFT + - "9.2.4 Table of contents: `toc`": DRAFT + - "9.2.5 Footnote dump: `footnotes`": DRAFT + - "9.3 General Text Block Elements": DRAFT + - "9.3.1 Paragraph: `p`": DRAFT + - "9.3.2 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT + - "9.3.3 Unordered List: `ul`": DRAFT + - "9.3.4 Ordered List: `ol`": DRAFT + - "9.3.5 Figure: `img`": DRAFT + - "9.3.6 Preformatted: `pre`": DRAFT + - "9.3.7 Tables: `table`": DRAFT + - "9.4 Structural Elements": DRAFT + - "9.4.1 List item: `li`": DRAFT + - "9.4.2 `columns` (table header row)": DRAFT + - "9.4.3 `row` (table data row)": DRAFT + - "9.4.4 `group` (table row group)": DRAFT + - "9.4.5 `td` (table cell)": DRAFT + - "9.5 Inline elements" + - "9.5.1 `\\em`": DRAFT + - "9.5.2 `\\mono`": DRAFT + - "9.5.3 `\\strike`, `\\sub`, `\\sup`": DRAFT + - "9.5.4 `\link`": DRAFT + - "9.5.5 `\\date`, `\\time`, `\\datetime`": DRAFT + - "9.5.6 `\ref`": DRAFT + - "9.5.7 `\footnote`": DRAFT - "10. Attribute types and date/time formats": DRAFT - "10.1 Common attribute types": DRAFT - "10.2 Date / time lexical formats (normative)": DRAFT From abc4070833f1e102d5e8e06516058d273d8eb590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 22:26:12 +0100 Subject: [PATCH 084/116] Adds better introduction adding motivation, usage scenarios and design philosophy --- docs/TODO.md | 39 ++++++++++++++++++++++++++++++--------- docs/specification.md | 35 +++++++++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 4c8a047..109d830 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -1,25 +1,46 @@ # Specification TODOs +## Tasks + - Assign semantics to node types, paragraph kinds, ... - Specify "syntax" proper - Add links to RFCs where possible -- \abbrev and \term might be good ideas. -- Add more text to the introduction and underlying ideas of the format: - - Orthogonality: Semantic structure is not dependend on syntax. Verbatim lines are not preformatted, but `pre` blocks are. - - Strictness for ecosystem health: Prevent HTML uncontrolled growth desaster - - Allow tooling to work with semanticall yinvalid documents - - Static layout: No surprises. Layout once, yield consistent rendering - - Accessiblity: Everything is semantic, nothing is presentation-only. -- h3 after h1 is not legal + +## Potential Future Features + +### `appendix` element + - Support "appendix{}" in addition to h1,h2,h3 which is a h1-level chapter that renders as "A. B. C." instead of "1. 2. 3." -> Recommendation 3: Add Formal Whitespace Processing Algorithm +### Abbreviations + +- \abbrev(title="Antiblockiersystem"){ABS} defines a new abbreviation +- \abbrev{ABS} references an existing abbreviation +- \abbrev(title) can only be set once. +- glossary; emits a glossary/definition list of all abbreviations + +### Definition Lists + +- deflist {structural} is a definition list +- term {inline} defines a new term, must be followed by a +- def { blocks } definition for the term + +### Glossary + +- \indexed{Word} adds a new entry to the index. +- index; emits an index with refs to all `\index`ed words. + +### Formal Whitespace Processing Algorithm + +Write a formal definition of the whitespace processing algorithm so it can be easily replicated. +### Formal Language Schema Recommendation 5: Add Appendix with Formal Schema Rationale: Current spec requires reading entire document to understand element relationships. Machine-readable schema would enable automatic validation and tooling. Provide RelaxNG Compact syntax schema defining: +### Highlighted Lines and Line Numbering Rationale: Technical documentation needs to emphasize specific code lines (tutorials, diffs, explanations). pre(syntax="python", highlight="2,4-6"): diff --git a/docs/specification.md b/docs/specification.md index b977ea9..a030fa0 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -87,12 +87,36 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap HyperDoc 2.0 ("HyperDoc") is a plain-text markup language for hypertext documents. -Design goals: +It was created out of frustration with the practical reality of Markdown: while its permissiveness and brevity are useful, its underspecified semantics have led to many dialects where correct authoring and correct rendering often require trial-and-error. In most ecosystems, Markdown also ends up being a convenient frontend for HTML, and HTML is the definition of unbounded growth: implementing a full HTML renderer is a large, ongoing effort, and the surface area keeps expanding. + +HyperDoc aims to be a middle ground between "just text" formats such as Gemini’s Gemtext and the flexibility of HTML: it has a strict, proper semantic definition and supports rich documents, but it is intentionally far more restrictive (and therefore more implementable, testable, and interoperable) than HTML. + +### 1.1 Design goals (non-normative) - Deterministic, unambiguous parsing. - Convenient authoring in plain text. - Round-trippable formatting (tooling can rewrite without losing information). +### 1.2 Underlying ideas (non-normative) + +- **Orthogonality:** Syntax is an encoding of a document tree, not its meaning. In particular, verbatim (`:`) bodies are not inherently "preformatted"; only elements such as `pre` assign preformatted rendering semantics to their content. +- **Strictness for ecosystem health:** A small, precisely specified core prevents uncontrolled growth into renderer-specific quirks and accidental "standard library" behavior, which is how many HTML-adjacent formats fragment over time. +- **Tooling-friendly invalidity:** Tooling is allowed to operate on documents that are syntactically valid but semantically invalid, so that formatters, editors, refactoring tools, and diagnostics can work with incomplete or broken drafts. +- **Static layout:** Documents have no runtime behavior; rendering can be decided from the semantic document tree without hidden state or incremental "best effort" heuristics. +- **Accessibility:** Everything is semantic; the format aims to avoid presentation-only constructs so renderers can provide accessible output (screen readers, reflow, alternative presentations) without guessing author intent. + +### 1.3 Designated authoring area (non-normative) + +HyperDoc is designed for writing and publishing informational documents where structure, linking, and predictable rendering matter: + +- Informational content (e.g. websites) +- Technical documentation +- Blogs +- News posts +- Code documentation +- Personal notes +- Public wiki content + ## 2. Conformance and terminology The key words **MUST**, **MUST NOT**, **SHOULD**, **SHOULD NOT**, and **MAY** are to be interpreted as described in RFC 2119. @@ -577,7 +601,7 @@ Semantics: - `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet). - `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node. - `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node. -- Each invocation of `footnotes(...)` **MUST** advance the “collection cursor” for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set). +- Each invocation of `footnotes(...)` **MUST** advance the "collection cursor" for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set). - `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`. - Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears. @@ -788,7 +812,6 @@ Semantics: - Each `kind` has an independent numeric namespace: footnotes and citations are numbered separately. - A renderer **MAY** hyperlink markers and dumped entries back-and-forth. - ## 10. Attribute types and date/time formats > TODO: Attributes should be documented well and not only be mentioned in the element catalog. @@ -895,13 +918,13 @@ Supported values: | `iso` | Render the date in the lexical format of §10.2.1. | `2026-09-13` | `2026-09-13` | | `short` (default) | Render the date in a numeric, locale-appropriate short form. | `9/13/2026` | `13.09.2026` | | `long` | Render the date in a locale-appropriate long form (month name, full year). | `September 13, 2026` | `13. September 2026` | -| `relative` | Render a relative description of the date compared to “today”. | `in 3 days` | `in 3 Tagen` | +| `relative` | Render a relative description of the date compared to "today". | `in 3 days` | `in 3 Tagen` | | `year` | Render only the year component. | `2026` | `2026` | | `month` | Render only the month component in a locale-appropriate form (typically a month name). | `September` | `September` | | `day` | Render only the day-of-month component in a locale-appropriate form (may be an ordinal). | `13th` | `13.` | | `weekday` | Render the weekday name for that date. | `Saturday` | `Samstag` | -The `relative` examples are non-normative and assume “today” is `2026-09-10` in the renderer’s date context. +The `relative` examples are non-normative and assume "today" is `2026-09-10` in the renderer’s date context. #### 10.3.4 `fmt` values for `\time` @@ -929,7 +952,7 @@ Supported values: | `long` | Render date and time with second precision; include the fractional part if present. | `September 13, 2026, 1:36:00 PM` | `13. September 2026, 13:36:00` | | `relative` | Render a relative description compared to the current datetime. | `20 minutes ago` | `vor 20 Minuten` | -The `relative` examples are non-normative and assume the effective zone is `+02:00`, the value is `2026-09-13T13:36:00+02:00`, and “now” is `2026-09-13T13:56:00+02:00`. +The `relative` examples are non-normative and assume the effective zone is `+02:00`, the value is `2026-09-13T13:36:00+02:00`, and "now" is `2026-09-13T13:56:00+02:00`. #### 10.3.6 `fmt` values for `\ref` From 27748e80dab211ee8e4f3c479756fb2f76e70818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 23:52:57 +0100 Subject: [PATCH 085/116] Add HTML5 document header test --- src/hyperdoc.zig | 255 +++++++++++++++++++++++++--- src/render/dump.zig | 44 ++++- src/render/html5.zig | 55 +++++- src/testsuite.zig | 44 ++++- test/html5/document_header.hdoc | 5 + test/html5/document_header.html | 5 + test/html5/media_and_toc.html | 11 +- test/html5/nesting_and_inlines.html | 9 +- test/html5/paragraph_styles.html | 5 +- test/html5/tables.html | 5 +- 10 files changed, 392 insertions(+), 46 deletions(-) create mode 100644 test/html5/document_header.hdoc create mode 100644 test/html5/document_header.html diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 423d258..f9f88a6 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -20,11 +20,16 @@ pub const Document = struct { // header information lang: LanguageTag = .inherit, // inherit here means "unset" - title: ?[]const u8, + title: ?Title = null, author: ?[]const u8, date: ?DateTime, timezone: ?TimeZoneOffset, + pub const Title = struct { + full: Block.Title, + simple: []const u8, + }; + pub const TableOfContents = struct { level: Block.Heading.Level, // TODO: Refactor to use `index` here as well. headings: []usize, @@ -146,6 +151,11 @@ pub const Block = union(enum) { colspan: u32, content: []Block, }; + + pub const Title = struct { + lang: LanguageTag, + content: []Span, + }; }; pub fn FormattedDateTime(comptime DT: type) type { @@ -578,6 +588,7 @@ pub fn parse( try sema.validate_references(&id_map); const doc_lang = header.lang orelse LanguageTag.inherit; + const title = try sema.finalize_title(header, doc_lang); const contents = try sema.blocks.toOwnedSlice(arena.allocator()); const block_locations = try sema.block_locations.toOwnedSlice(arena.allocator()); const toc = try sema.build_toc(contents, block_locations); @@ -590,7 +601,7 @@ pub fn parse( .toc = toc, .lang = doc_lang, - .title = header.title, + .title = title, .version = header.version, .author = header.author, .date = header.date, @@ -722,6 +733,9 @@ pub const SemanticAnalyzer = struct { code: []const u8, header: ?Header = null, + title_block: ?Block.Title = null, + title_location: ?Parser.Location = null, + top_level_index: usize = 0, blocks: std.ArrayList(Block) = .empty, block_locations: std.ArrayList(Parser.Location) = .empty, ids: std.ArrayList(?Reference) = .empty, @@ -732,8 +746,18 @@ pub const SemanticAnalyzer = struct { heading_counters: [Block.Heading.Level.count]u16 = @splat(0), fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, UnsupportedVersion }!void { + const node_index = sema.top_level_index; + sema.top_level_index += 1; + switch (node.type) { .hdoc => { + if (node_index != 0) { + try sema.emit_diagnostic(.misplaced_hdoc_header, node.location); + } + if (node.body != .empty) { + try sema.emit_diagnostic(.non_empty_hdoc_body, node.location); + } + const header = sema.translate_header_node(node) catch |err| switch (err) { error.OutOfMemory, error.UnsupportedVersion => |e| return e, error.BadAttributes => null, @@ -753,15 +777,32 @@ pub const SemanticAnalyzer = struct { std.debug.assert(sema.header != null); }, + .title => { + if (sema.header == null and node_index == 0) { + try sema.emit_diagnostic(.missing_hdoc_header, node.location); + } + if (node_index != 1) { + try sema.emit_diagnostic(.misplaced_title_block, node.location); + } + if (sema.title_block != null) { + try sema.emit_diagnostic(.duplicate_title_block, node.location); + return; + } + + const title_block = sema.translate_title_node(node) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.BadAttributes => { + return; + }, + }; + + sema.title_block = title_block; + sema.title_location = node.location; + }, + else => { - if (sema.header == null) { - if (sema.blocks.items.len == 0) { - // Emit error for the first encountered block. - // This can only happen exactly once, as we either: - // - have already set a header block when the first non-header nodes arrives. - // - we have processed another block already, so the previous block would've emitted the warning already. - try sema.emit_diagnostic(.missing_hdoc_header, node.location); - } + if (sema.header == null and node_index == 0) { + try sema.emit_diagnostic(.missing_hdoc_header, node.location); } const block, const id = sema.translate_block_node(node) catch |err| switch (err) { @@ -851,6 +892,10 @@ pub const SemanticAnalyzer = struct { const image, const id = try sema.translate_image_node(node); return .{ .{ .image = image }, id }; }, + .title => { + try sema.emit_diagnostic(.{ .invalid_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location); + return error.InvalidNodeType; + }, .pre => { const preformatted, const id = try sema.translate_preformatted_node(node); return .{ .{ .preformatted = preformatted }, id }; @@ -913,6 +958,17 @@ pub const SemanticAnalyzer = struct { return .{ heading, attrs.id }; } + fn translate_title_node(sema: *SemanticAnalyzer, node: Parser.Node) !Block.Title { + const attrs = try sema.get_attributes(node, struct { + lang: LanguageTag = .inherit, + }); + + return .{ + .lang = attrs.lang, + .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), + }; + } + fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } { const attrs = try sema.get_attributes(node, struct { lang: LanguageTag = .inherit, @@ -1607,10 +1663,10 @@ pub const SemanticAnalyzer = struct { const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space); // Convert the content_spans into a "rendered string". - const content_text = sema.render_spans_to_plaintext(content_spans) catch |err| switch (err) { + const content_text = (sema.render_spans_to_plaintext(content_spans, .reject_date_time) catch |err| switch (err) { error.DateTimeRenderingUnsupported => unreachable, else => |e| return e, - }; + }).text; const content: Span.Content = switch (node.type) { .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt), @@ -1632,6 +1688,7 @@ pub const SemanticAnalyzer = struct { .h1, .h2, .h3, + .title, .p, .note, .warning, @@ -1706,28 +1763,156 @@ pub const SemanticAnalyzer = struct { }); } - fn render_spans_to_plaintext(sema: *SemanticAnalyzer, source_spans: []const Span) error{ OutOfMemory, DateTimeRenderingUnsupported }![]const u8 { - var len: usize = 0; - for (source_spans) |span| { - len += switch (span.content) { - .text => |str| str.len, - .date, .time, .datetime => return error.DateTimeRenderingUnsupported, - }; - } + const TitlePlainText = struct { + text: []const u8, + contains_date_time: bool, + }; - var output_str: std.ArrayList(u8) = .empty; - defer output_str.deinit(sema.arena); + const PlaintextMode = enum { + reject_date_time, + iso_date_time, + }; - try output_str.ensureTotalCapacityPrecise(sema.arena, len); + fn render_spans_to_plaintext( + sema: *SemanticAnalyzer, + source_spans: []const Span, + mode: PlaintextMode, + ) error{ OutOfMemory, DateTimeRenderingUnsupported }!TitlePlainText { + var output: std.ArrayList(u8) = .empty; + defer output.deinit(sema.arena); + + var contains_date_time = false; for (source_spans) |span| { switch (span.content) { - .text => |str| output_str.appendSliceAssumeCapacity(str), - .date, .time, .datetime => unreachable, + .text => |str| try output.appendSlice(sema.arena, str), + .date => |value| switch (mode) { + .reject_date_time => return error.DateTimeRenderingUnsupported, + .iso_date_time => { + contains_date_time = true; + var buffer: [64]u8 = undefined; + const text = format_iso_date(value.value, &buffer); + try output.appendSlice(sema.arena, text); + }, + }, + .time => |value| switch (mode) { + .reject_date_time => return error.DateTimeRenderingUnsupported, + .iso_date_time => { + contains_date_time = true; + var buffer: [64]u8 = undefined; + const text = format_iso_time(value.value, &buffer); + try output.appendSlice(sema.arena, text); + }, + }, + .datetime => |value| switch (mode) { + .reject_date_time => return error.DateTimeRenderingUnsupported, + .iso_date_time => { + contains_date_time = true; + var buffer: [96]u8 = undefined; + const text = format_iso_datetime(value.value, &buffer); + try output.appendSlice(sema.arena, text); + }, + }, } } - return try output_str.toOwnedSlice(sema.arena); + return .{ + .text = try output.toOwnedSlice(sema.arena), + .contains_date_time = contains_date_time, + }; + } + + fn format_iso_date(value: Date, buffer: []u8) []const u8 { + const formatted = std.fmt.bufPrint(buffer, "{d:0>4}-{d:0>2}-{d:0>2}", .{ + @as(u32, @intCast(value.year)), + value.month, + value.day, + }) catch unreachable; + + return if (formatted.len > 0 and formatted[0] == '+') + formatted[1..] + else + formatted; + } + + fn format_iso_time(value: Time, buffer: []u8) []const u8 { + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ value.hour, value.minute, value.second }) catch unreachable; + if (value.microsecond > 0) { + writer.print(".{d:0>6}", .{value.microsecond}) catch unreachable; + } + const minutes = @intFromEnum(value.timezone); + if (minutes == 0) { + writer.writeByte('Z') catch unreachable; + } else { + const sign: u8 = if (minutes < 0) '-' else '+'; + const abs_minutes: u32 = @intCast(@abs(minutes)); + const hour: u32 = abs_minutes / 60; + const minute: u32 = abs_minutes % 60; + writer.print("{c}{d:0>2}:{d:0>2}", .{ sign, hour, minute }) catch unreachable; + } + + return stream.getWritten(); + } + + fn format_iso_datetime(value: DateTime, buffer: []u8) []const u8 { + const date_text = format_iso_date(value.date, buffer); + const sep_index = date_text.len; + buffer[sep_index] = 'T'; + + const time_text = format_iso_time(value.time, buffer[sep_index + 1 ..]); + + return buffer[0 .. sep_index + 1 + time_text.len]; + } + + fn synthesize_title_from_plaintext(sema: *SemanticAnalyzer, text: []const u8, doc_lang: LanguageTag) !Block.Title { + const spans = try sema.arena.alloc(Span, 1); + spans[0] = .{ + .content = .{ .text = text }, + .attribs = .{ .lang = .inherit }, + .location = .{ .offset = 0, .length = text.len }, + }; + + return .{ + .lang = doc_lang, + .content = spans, + }; + } + + fn finalize_title(sema: *SemanticAnalyzer, header: Header, doc_lang: LanguageTag) !?Document.Title { + const header_title = header.title; + const block_title = sema.title_block; + + if (header_title == null and block_title == null) + return null; + + if (block_title) |title_block| { + const rendered = sema.render_spans_to_plaintext(title_block.content, .iso_date_time) catch |err| switch (err) { + error.DateTimeRenderingUnsupported => unreachable, + else => |e| return e, + }; + + if (header_title == null and rendered.contains_date_time) { + if (sema.title_location) |location| { + try sema.emit_diagnostic(.title_inline_date_time_without_header, location); + } + } + + return .{ + .full = title_block, + .simple = rendered.text, + }; + } + + const simple_text = header_title.?; + const synthesized_full = try sema.synthesize_title_from_plaintext(simple_text, doc_lang); + + return .{ + .full = synthesized_full, + .simple = simple_text, + }; } const EmptyHandling = enum { @@ -2809,6 +2994,7 @@ pub const Parser = struct { h1, h2, h3, + title, p, note, warning, @@ -2861,6 +3047,7 @@ pub const Parser = struct { .h1, .h2, .h3, + .title, .p, .note, .warning, @@ -2890,6 +3077,7 @@ pub const Parser = struct { .h2, .h3, + .title, .p, .note, .warning, @@ -2995,6 +3183,8 @@ pub const Diagnostic = struct { unterminated_block_list, missing_hdoc_header: MissingHdocHeader, duplicate_hdoc_header: DuplicateHdocHeader, + misplaced_hdoc_header, + non_empty_hdoc_body, missing_attribute: NodeAttributeError, invalid_attribute: NodeAttributeError, empty_attribute: NodeAttributeError, @@ -3015,6 +3205,8 @@ pub const Diagnostic = struct { illegal_child_item, list_body_required, illegal_id_attribute, + misplaced_title_block, + duplicate_title_block, column_count_mismatch: TableShapeError, duplicate_id: ReferenceError, unknown_id: ReferenceError, @@ -3033,6 +3225,7 @@ pub const Diagnostic = struct { attribute_leading_trailing_whitespace, tab_character, automatic_heading_insertion: AutomaticHeading, + title_inline_date_time_without_header, pub fn severity(code: Code) Severity { return switch (code) { @@ -3044,6 +3237,8 @@ pub const Diagnostic = struct { .unterminated_block_list, .missing_hdoc_header, .duplicate_hdoc_header, + .misplaced_hdoc_header, + .non_empty_hdoc_body, .invalid_attribute, .missing_attribute, .empty_attribute, @@ -3064,6 +3259,8 @@ pub const Diagnostic = struct { .list_body_required, .illegal_id_attribute, .invalid_date_time_body, + .misplaced_title_block, + .duplicate_title_block, .column_count_mismatch, .duplicate_id, .unknown_id, @@ -3082,6 +3279,7 @@ pub const Diagnostic = struct { .tab_character, .document_starts_with_bom, .automatic_heading_insertion, + .title_inline_date_time_without_header, => .warning, }; } @@ -3104,6 +3302,8 @@ pub const Diagnostic = struct { .unterminated_block_list => try w.writeAll("Block list body is unterminated (missing '}' before end of file)."), .missing_hdoc_header => try w.writeAll("Document must start with an 'hdoc' header."), .duplicate_hdoc_header => try w.writeAll("Only one 'hdoc' header is allowed; additional header found."), + .misplaced_hdoc_header => try w.writeAll("The 'hdoc' header must be the first node in the document."), + .non_empty_hdoc_body => try w.writeAll("The 'hdoc' header must have an empty body (';')."), .duplicate_attribute => |ctx| try w.print("Duplicate attribute '{s}' will overwrite the earlier value.", .{ctx.name}), .empty_verbatim_block => try w.writeAll("Verbatim block has no lines."), .verbatim_missing_trailing_newline => try w.writeAll("Verbatim line should end with a newline."), @@ -3147,6 +3347,8 @@ pub const Diagnostic = struct { .illegal_child_item => try w.writeAll("Node not allowed here."), .illegal_id_attribute => try w.writeAll("Attribute 'id' not allowed here."), + .misplaced_title_block => try w.writeAll("Document title must be the second node (directly after 'hdoc')."), + .duplicate_title_block => try w.writeAll("Only one 'title' block is allowed."), .invalid_date_time_body => try w.writeAll("\\date, \\time and \\datetime do not allow any inlines inside their body."), @@ -3159,6 +3361,7 @@ pub const Diagnostic = struct { .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."), .automatic_heading_insertion => |ctx| try w.print("Inserted automatic {t} to fill heading level gap.", .{ctx.level}), + .title_inline_date_time_without_header => try w.writeAll("Title block contains \\date/\\time/\\datetime but hdoc(title=\"...\") is missing; metadata title cannot be derived reliably."), } } }; diff --git a/src/render/dump.zig b/src/render/dump.zig index b617f0d..3188c56 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -455,11 +455,25 @@ fn dumpOptionalDateTimeField(writer: *Writer, indent: usize, key: []const u8, va } } +fn dumpOptionalTitleField(writer: *Writer, indent: usize, key: []const u8, value: ?hdoc.Document.Title) Writer.Error!void { + try writeIndent(writer, indent); + if (value) |title| { + try writer.print("{s}:\n", .{key}); + try dumpOptionalStringField(writer, indent + indent_step, "simple", title.simple); + try writeIndent(writer, indent + indent_step); + try writer.writeAll("full:\n"); + try dumpOptionalStringField(writer, indent + 2 * indent_step, "lang", title.full.lang.text); + try dumpSpanListField(writer, indent + 2 * indent_step, "content", title.full.content); + } else { + try writer.print("{s}: null\n", .{key}); + } +} + fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void { try writer.writeAll("document:\n"); try dumpVersion(writer, indent_step, doc.version); try dumpOptionalStringField(writer, indent_step, "lang", doc.lang.text); - try dumpOptionalStringField(writer, indent_step, "title", doc.title); + try dumpOptionalTitleField(writer, indent_step, "title", doc.title); try dumpOptionalStringField(writer, indent_step, "author", doc.author); try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date); try dumpTableOfContents(writer, indent_step, doc.toc); @@ -475,8 +489,8 @@ pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void { test "render escapes string values" { const title = "Doc \"Title\"\n"; const span_text = "Hello \"world\"\n"; - const link_ref: hdoc.Reference = .init("section \"A\""); - const id_value: hdoc.Reference = .init("id:1\n"); + const link_ref: hdoc.Reference = .{ .text = "section \"A\"" }; + const id_value: hdoc.Reference = .{ .text = "id:1\n" }; var doc: hdoc.Document = .{ .arena = std.heap.ArenaAllocator.init(std.testing.allocator), @@ -486,7 +500,7 @@ test "render escapes string values" { .id_map = .{}, .toc = undefined, .lang = .inherit, - .title = title, + .title = null, .author = null, .date = null, .timezone = null, @@ -494,6 +508,21 @@ test "render escapes string values" { defer doc.deinit(); const arena_alloc = doc.arena.allocator(); + + const title_spans = try arena_alloc.alloc(hdoc.Span, 1); + title_spans[0] = .{ + .content = .{ .text = title }, + .attribs = .{}, + .location = .{ .offset = 0, .length = title.len }, + }; + doc.title = .{ + .full = .{ + .lang = .inherit, + .content = title_spans, + }, + .simple = title, + }; + doc.contents = try arena_alloc.alloc(hdoc.Block, 0); doc.content_ids = try arena_alloc.alloc(?hdoc.Reference, 0); doc.toc = .{ @@ -506,6 +535,7 @@ test "render escapes string values" { spans[0] = .{ .content = .{ .text = span_text }, .attribs = .{ .link = .{ .ref = link_ref } }, + .location = .{ .offset = 0, .length = span_text.len }, }; const blocks = try arena_alloc.alloc(hdoc.Block, 1); @@ -541,9 +571,9 @@ test "render escapes string values" { try buffer.writer.flush(); const output = buffer.writer.buffered(); - const expected_title = try std.fmt.allocPrint(std.testing.allocator, "title: \"{f}\"\n", .{std.zig.fmtString(title)}); - defer std.testing.allocator.free(expected_title); - try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null); + const expected_title_simple = try std.fmt.allocPrint(std.testing.allocator, " simple: \"{f}\"\n", .{std.zig.fmtString(title)}); + defer std.testing.allocator.free(expected_title_simple); + try std.testing.expect(std.mem.indexOf(u8, output, expected_title_simple) != null); const expected_span = try std.fmt.allocPrint( std.testing.allocator, diff --git a/src/render/html5.zig b/src/render/html5.zig index 3bbc4e9..364255e 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -11,6 +11,8 @@ const indent_step: usize = 2; pub fn render(doc: hdoc.Document, writer: *Writer) RenderError!void { var ctx: RenderContext = .{ .doc = &doc, .writer = writer }; + try ctx.renderDocumentHeader(); + for (doc.contents, 0..) |block, index| { try ctx.renderBlock(block, index, 0); } @@ -32,6 +34,53 @@ const RenderContext = struct { } } + fn renderDocumentHeader(ctx: *RenderContext) RenderError!void { + const has_title = ctx.doc.title != null; + const has_author = ctx.doc.author != null; + const has_date = ctx.doc.date != null; + + if (!has_title and !has_author and !has_date) return; + + try writeStartTag(ctx.writer, "header", .regular, .{ .lang = langAttribute(ctx.doc.lang) }); + try ctx.writer.writeByte('\n'); + + if (has_title) { + const title = ctx.doc.title.?; + try writeIndent(ctx.writer, indent_step); + try writeStartTag(ctx.writer, "h1", .regular, .{ .lang = langAttribute(title.full.lang) }); + try ctx.renderSpans(title.full.content); + try writeEndTag(ctx.writer, "h1"); + try ctx.writer.writeByte('\n'); + } + + if (has_author or has_date) { + try writeIndent(ctx.writer, indent_step); + try writeStartTag(ctx.writer, "p", .regular, .{ .class = "hdoc-doc-meta" }); + + var wrote_any = false; + if (has_author) { + try ctx.writer.writeAll("By "); + try writeEscapedHtml(ctx.writer, ctx.doc.author.?); + wrote_any = true; + } + if (has_date) { + if (wrote_any) { + try ctx.writer.writeAll(" - "); + } + + var date_buffer: [128]u8 = undefined; + const date_text = try formatIsoDateTime(ctx.doc.date.?, &date_buffer); + try writeEscapedHtml(ctx.writer, date_text); + } + + try writeEndTag(ctx.writer, "p"); + try ctx.writer.writeByte('\n'); + } + + try writeEndTag(ctx.writer, "header"); + try ctx.writer.writeByte('\n'); + } + fn renderBlocks(ctx: *RenderContext, blocks: []const hdoc.Block, indent: usize) RenderError!void { for (blocks) |block| { try ctx.renderBlock(block, null, indent); @@ -644,9 +693,9 @@ fn takeLang(lang: *?[]const u8) ?[]const u8 { fn headingTag(level: hdoc.Block.Heading.Level) []const u8 { return switch (level) { - .h1 => "h1", - .h2 => "h2", - .h3 => "h3", + .h1 => "h2", + .h2 => "h3", + .h3 => "h4", }; } diff --git a/src/testsuite.zig b/src/testsuite.zig index 682aaae..9a0a9dc 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -601,11 +601,53 @@ test "diagnostic codes are emitted for expected samples" { try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n|nospace\n", &.{.verbatim_missing_space}); try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n| trailing \n", &.{.trailing_whitespace}); try validateDiagnostics(.{}, "h1 \"Title\"", &.{.missing_hdoc_header}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{.duplicate_hdoc_header}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{ .misplaced_hdoc_header, .duplicate_hdoc_header }); try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }}); try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); } +test "title block populates metadata and warns on inline date" { + const code = "hdoc(version=\"2.0\",lang=\"en\");\ntitle { Hello \\date{2020-01-02} }\nh1 \"Body\""; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 1), diagnostics.items.items.len); + try std.testing.expect(diagnostics.items.items[0].code == .title_inline_date_time_without_header); + + const title = doc.title orelse return error.TestExpectedEqual; + const full = title.full; + try std.testing.expectEqualStrings("Hello 2020-01-02", title.simple); + try std.testing.expectEqual(@as(usize, 3), full.content.len); +} + +test "header title synthesizes full title representation" { + const code = "hdoc(version=\"2.0\",title=\"Metadata\",lang=\"en\");\nh1 \"Body\""; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 0), diagnostics.items.items.len); + + const title = doc.title orelse return error.TestExpectedEqual; + try std.testing.expectEqualStrings("Metadata", title.simple); + + const full = title.full; + try std.testing.expectEqual(@as(usize, 1), full.content.len); + switch (full.content[0].content) { + .text => |text| try std.testing.expectEqualStrings("Metadata", text), + else => return error.TestExpectedEqual, + } +} + test "parser reports unterminated inline lists" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); diff --git a/test/html5/document_header.hdoc b/test/html5/document_header.hdoc new file mode 100644 index 0000000..3366121 --- /dev/null +++ b/test/html5/document_header.hdoc @@ -0,0 +1,5 @@ +hdoc(version="2.0", title="Metadata Title", date="2024-08-16T09:30:00", lang="en", tz="+02:00"); + +title { Metadata Title From Block } + +p { This document uses header metadata and a title block without any explicit headings. } diff --git a/test/html5/document_header.html b/test/html5/document_header.html new file mode 100644 index 0000000..f5fdb71 --- /dev/null +++ b/test/html5/document_header.html @@ -0,0 +1,5 @@ +
+

Metadata Title From Block

+

+2024-08-16T09:30:00+02:00

+
+

This document uses header metadata and a title block without any explicit headings.

diff --git a/test/html5/media_and_toc.html b/test/html5/media_and_toc.html index 563874e..5cf5739 100644 --- a/test/html5/media_and_toc.html +++ b/test/html5/media_and_toc.html @@ -1,4 +1,7 @@ -

Media and TOC

+
+

Media and TOC

+
+

§1 Media and TOC

-

Preformatted

+

§1.1 Preformatted

 print("hello world") 
-

Figure

+

§1.2 Figure

Example figure
Figure caption text.
-

Dates and Times

+

§1.3 Dates and Times

Today is .

The meeting is at .

Release happens on .

diff --git a/test/html5/nesting_and_inlines.html b/test/html5/nesting_and_inlines.html index 5db4d36..1b5be1a 100644 --- a/test/html5/nesting_and_inlines.html +++ b/test/html5/nesting_and_inlines.html @@ -1,7 +1,10 @@ -

Nesting and Inline Styling

+
+

Nesting and Inlines

+
+

§1 Nesting and Inline Styling

This document exercises inline formatting and nested lists.

-

We can mix emphasis, strike, monospacetext. Superscript x2and subscript x2also appear.

-

Links point to local anchorsor external sites.

+

We can mix emphasis, strike, monospace text. Superscript x2 and subscript x2 also appear.

+

Links point to local anchors or external sites.

  • Top-level item one

    diff --git a/test/html5/paragraph_styles.html b/test/html5/paragraph_styles.html index 82e8555..575b583 100644 --- a/test/html5/paragraph_styles.html +++ b/test/html5/paragraph_styles.html @@ -1,4 +1,7 @@ -

    Paragraph Styles

    +
    +

    Paragraph Styles

    +
    +

    §1 Paragraph Styles

    A standard paragraph introducing the styles below.

    Notes provide informational context without urgency.

    Warnings highlight potential issues to watch for.

    diff --git a/test/html5/tables.html b/test/html5/tables.html index bfce614..9384ec5 100644 --- a/test/html5/tables.html +++ b/test/html5/tables.html @@ -1,4 +1,7 @@ -

    Table Coverage

    +
    +

    Tables

    +
    +

    §1 Table Coverage

    This file covers header rows, data rows with titles, groups, and colspans.

    From 16a6af796bef4850c24e169fb5d259d65a35a481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Fri, 2 Jan 2026 23:03:38 +0100 Subject: [PATCH 086/116] Cleans up specification text. Disallows arbitrary number of year digits, must be 4 now. We won't live till 9999 anyways. --- docs/TODO.md | 10 ++++++++++ docs/specification.md | 38 +++++++++++++++++++------------------- src/hyperdoc.zig | 4 ++-- src/testsuite.zig | 6 +----- 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 109d830..89df194 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -6,6 +6,11 @@ - Specify "syntax" proper - Add links to RFCs where possible +- Verbatim-body to text conversion is under-specified. You define verbatim syntax (: with | lines) and later say verbatim bodies become inline text spans (§8.2), but you don’t precisely define how piped lines join (LF vs preserving original CRLF, whether there is a trailing newline, whether a final EOF line_terminator contributes a newline, etc.). Different implementations may diverge. +- Inline “groups” exist syntactically but are not given explicit semantics. The grammar includes inline_group ::= "{" , inline_content , "}" and §5.4 makes brace balancing a core rule, but §8.2 doesn’t explicitly state that groups are semantically transparent (flattened) versus affecting whitespace normalization boundaries or span merging. +- Span attribute semantics are referenced but not fully defined. §8.2 introduces spans with an “attribute set (e.g. emphasis/monospace/link…)” but the spec never fully defines the canonical attribute keys, nesting behavior (e.g., \em inside \mono), or how lang overrides interact at span level. That’s a major interoperability risk because renderers may differ even if parsers agree. +- + ## Potential Future Features ### `appendix` element @@ -50,3 +55,8 @@ pre(syntax="python", highlight="2,4-6"): | else: | return n * factorial(n-1) # Recursive case also: enable line numbers + +## Rejected Features + +- `\kbd{…}` is just `\mono(syntax="kbd"){…}` +- `include(path="...")` is rejected for unbounded document content growth diff --git a/docs/specification.md b/docs/specification.md index a030fa0..a3ffe77 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -28,7 +28,7 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap - "7. String Literal Escape Processing (semantic)": DRAFT - "7.1 Control character policy (semantic)": DRAFT - "7.2 Supported escapes in string literals": DRAFT - - "7.2.1 Unicode escape `\\u{H...}`": DRAFT + - "7.2.1 Unicode escape `\u{H...}`": DRAFT - "7.3 Invalid escapes": DRAFT - "8. Semantic document model": DRAFT - "8.1 Document structure": DONE @@ -62,11 +62,11 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap - "9.4.4 `group` (table row group)": DRAFT - "9.4.5 `td` (table cell)": DRAFT - "9.5 Inline elements" - - "9.5.1 `\\em`": DRAFT - - "9.5.2 `\\mono`": DRAFT - - "9.5.3 `\\strike`, `\\sub`, `\\sup`": DRAFT + - "9.5.1 `\em`": DRAFT + - "9.5.2 `\mono`": DRAFT + - "9.5.3 `\strike`, `\sub`, `\sup`": DRAFT - "9.5.4 `\link`": DRAFT - - "9.5.5 `\\date`, `\\time`, `\\datetime`": DRAFT + - "9.5.5 `\date`, `\time`, `\datetime`": DRAFT - "9.5.6 `\ref`": DRAFT - "9.5.7 `\footnote`": DRAFT - "10. Attribute types and date/time formats": DRAFT @@ -384,7 +384,7 @@ A semantic validator/decoder **MUST** accept exactly: A semantic validator/decoder **MUST** reject a string literal that contains: -- any other escape (`\t`, `\\xHH`, `\0`, etc.) +- any other escape (`\t`, `\xHH`, `\0`, etc.) - an unterminated escape (string ends after `\`) - malformed `\u{...}` (missing braces, empty, non-hex, >6 digits) - out-of-range or surrogate code points @@ -723,21 +723,21 @@ Table layout rules: ### 9.5 Inline elements -Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer). +Inline elements appear only in inline-list bodies. -#### 9.5.1 `\\em` +#### 9.5.1 `\em` - **Role:** emphasis - **Body:** inline text - **Attributes:** `lang` (optional) -#### 9.5.2 `\\mono` +#### 9.5.2 `\mono` - **Role:** monospaced span - **Body:** inline text - **Attributes:** `syntax` (optional), `lang` (optional) -#### 9.5.3 `\\strike`, `\\sub`, `\\sup` +#### 9.5.3 `\strike`, `\sub`, `\sup` - **Role:** strike-through / subscript / superscript - **Body:** inline text @@ -757,7 +757,7 @@ Notes: - Interior references use `\ref(ref="...")`. -#### 9.5.5 `\\date`, `\\time`, `\\datetime` +#### 9.5.5 `\date`, `\time`, `\datetime` - **Role:** localized date/time rendering - **Body:** must be plain text, a single string, or verbatim (no nested inline elements) @@ -795,7 +795,7 @@ When computing `` for headings, inline footnote/citation markers **SHOULD - **Body:** inline text (required for defining form; empty for reference form) - **Attributes:** - `key` (optional; defines a named footnote) - - `ref` (optional; references a previously defined named footnote) + - `ref` (optional; references a defined named footnote) - `kind` (optional; one of `footnote`, `citation`; default `footnote`) - `lang` (optional) @@ -808,7 +808,7 @@ Semantics: - `\footnote{...}` defines an anonymous footnote entry at the marker position. - `\footnote(key="X"){...}` defines a named footnote entry in the footnote namespace and emits its marker at the marker position. -- `\footnote(ref="X");` emits a marker for the previously defined named footnote `X`. +- `\footnote(ref="X");` emits a marker for the defined named footnote `X`. - Each `kind` has an independent numeric namespace: footnotes and citations are numbered separately. - A renderer **MAY** hyperlink markers and dumped entries back-and-forth. @@ -838,17 +838,17 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601. `YYYY-MM-DD` -- `YYYY`: one or more digits -- `MM`: `01`-`12` -- `DD`: `01`-`31` +- `YYYY`: exactly four digits +- `MM`: `01`-`12`: exactly two digits +- `DD`: `01`-`31`: exactly two digits #### 10.2.2 Time `hh:mm:ss` with an optional fraction and an optional zone. -- `hh`: `00`-`23` -- `mm`: `00`-`59` -- `ss`: `00`-`59` +- `hh`: `00`-`23`: exactly two digits +- `mm`: `00`-`59`: exactly two digits +- `ss`: `00`-`59`: exactly two digits - optional fraction: `.` followed by 1,2,3,6, or 9 digits - zone: - `Z`, or diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index f9f88a6..c22cc27 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -294,7 +294,7 @@ pub const Date = struct { day: u5, // 1-31 pub fn parse(text: []const u8) !Date { - if (text.len < 7) // "Y-MM-DD" + if (text.len != "YYYY-MM-DD".len) return error.InvalidValue; const first_dash = std.mem.indexOfScalar(u8, text, '-') orelse return error.InvalidValue; const tail = text[first_dash + 1 ..]; @@ -305,7 +305,7 @@ pub const Date = struct { const month_text = text[first_dash + 1 .. second_dash]; const day_text = text[second_dash + 1 ..]; - if (year_text.len == 0 or month_text.len != 2 or day_text.len != 2) return error.InvalidValue; + if (year_text.len != 4 or month_text.len != 2 or day_text.len != 2) return error.InvalidValue; const year_value = std.fmt.parseInt(u32, year_text, 10) catch return error.InvalidValue; if (year_value > std.math.maxInt(i32)) return error.InvalidValue; diff --git a/src/testsuite.zig b/src/testsuite.zig index 9a0a9dc..b6e7e1b 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -694,14 +694,10 @@ test "Date.parse accepts ISO dates" { try std.testing.expectEqual(@as(u4, 12), date.month); try std.testing.expectEqual(@as(u5, 25), date.day); - const short_year = try hdoc.Date.parse("1-01-01"); - try std.testing.expectEqual(@as(i32, 1), short_year.year); - try std.testing.expectEqual(@as(u4, 1), short_year.month); - try std.testing.expectEqual(@as(u5, 1), short_year.day); - try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-1-01")); try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-13-01")); try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-12-32")); + try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("1-01-01")); } test "Time.parse accepts ISO times with zones" { From effddce7be29e226fb717037959e4cdc6cf26808 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 00:09:53 +0100 Subject: [PATCH 087/116] Adds a huge load of stuff to docs/TODO.md, improves docs/specification.md by adding two sub-chapters for unknown node semantics and verbatim body joining --- SPEC_TODO.md | 1 - docs/TODO.md | 65 +++++++++++++++++++++++++++++++++++++++++-- docs/specification.md | 41 +++++++++++++++++++++++++-- 3 files changed, 102 insertions(+), 5 deletions(-) diff --git a/SPEC_TODO.md b/SPEC_TODO.md index 0695989..1cfadaf 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -1,6 +1,5 @@ # Spec compliance TODOs -- Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】 - Enforce `hdoc` placement and body rules by rejecting headers that are not the first node or that carry any non-empty body, instead of merely warning on the first non-header block and accepting later headers. 【F:docs/specification.md†L369-L373】【F:src/hyperdoc.zig†L734-L788】 diff --git a/docs/TODO.md b/docs/TODO.md index 89df194..556c7dc 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -5,14 +5,63 @@ - Assign semantics to node types, paragraph kinds, ... - Specify "syntax" proper - Add links to RFCs where possible - - Verbatim-body to text conversion is under-specified. You define verbatim syntax (: with | lines) and later say verbatim bodies become inline text spans (§8.2), but you don’t precisely define how piped lines join (LF vs preserving original CRLF, whether there is a trailing newline, whether a final EOF line_terminator contributes a newline, etc.). Different implementations may diverge. - Inline “groups” exist syntactically but are not given explicit semantics. The grammar includes inline_group ::= "{" , inline_content , "}" and §5.4 makes brace balancing a core rule, but §8.2 doesn’t explicitly state that groups are semantically transparent (flattened) versus affecting whitespace normalization boundaries or span merging. - Span attribute semantics are referenced but not fully defined. §8.2 introduces spans with an “attribute set (e.g. emphasis/monospace/link…)” but the spec never fully defines the canonical attribute keys, nesting behavior (e.g., \em inside \mono), or how lang overrides interact at span level. That’s a major interoperability risk because renderers may differ even if parsers agree. -- +- Refine that `hdoc(title)` is metadata while `title{}` is rendered rich text +- Refine `img(path)` only using forward slash. + - Proposal: Add to §9.3.5: + - "path MUST use forward slashes (/) as path separators, regardless of host OS." + - "path MUST be relative; absolute paths and URI schemes (e.g., http://) MUST be rejected." + - "Path resolution is relative to the directory containing the HyperDoc source file." + - "Path traversal outside the source directory (e.g., ../../etc/passwd) SHOULD be rejected or restricted by implementations." +- Proposal: Add to §9.2.4: + - "Multiple toc elements MAY appear in a document; each MUST render the same heading structure but MAY appear at different locations." + - "If depth differs between instances, each TOC renders independently according to its own depth attribute." +- Add to §9.2.5: + - "Multiple footnotes elements partition footnote rendering; each instance collects only footnotes/citations accumulated since the previous dump (or document start)." +- Proposal: Add to §4: + - "Implementations MUST support nesting depths of at least 32 levels." + - "Implementations MAY reject documents exceeding this depth with a diagnostic." + - "Nesting depth is measured as the maximum distance from the document root to any leaf node." +- Ambiguity of Inline Unicode: + - Finding: String literals ("...") support \u{...} escapes (§7.2.1). Inline text streams (bodies of p, h1) do not (§6.1 only lists \\, \{, \}). + - Issue: Authors cannot enter invisible characters (like Non-Breaking Space U+00A0 or Zero Width Space U+200B) into a paragraph without pasting the raw invisible character, which is brittle and invisible in editors. +- Recommendation: Add explicit sequencing in §7 stating: "Escape decoding MUST occur during semantic validation, before inline text construction (§8.2) for inline-list bodies, and before attribute validation for attribute values." +- Recommendation: Add to §9.2.1: "If the document contains any \date, \time, or \datetime elements with fmt values other than iso, and hdoc(lang) is not specified, implementations SHOULD emit a diagnostic." +- Issue: "Lexical" implies only regex-level matching. It does not strictly forbid 2023-02-31. For a strict format, "Semantic" validity (Gregorian correctness) should be enforced to prevent invalid metadata. ## Potential Future Features +### `hr;` or `break;` + +Purpose: Explicit scene/topic breaks within prose (equivalent to HTML
    ). + +Attributes: + id (optional; top-level only) +Body: + ; (empty) +Constraints: + - MUST be top-level or inside block containers that allow general text blocks + - MUST NOT appear inside inline contexts + +Rationale: + Common typographic convention for section breaks that are less formal than headings. Currently missing; authors might abuse pre: or empty paragraphs as workarounds. + +### `\plain` + +Finding: Attributes like lang are supported on \em, \mono, etc. However, if an author needs to mark a plain-text span as a different language (e.g., "The word Angst (German) means...") without applying italics or monospace, there is no element to hold the lang attribute. + +### `table{title{}}` or `table(caption="")` + `img(caption="")` + +x(caption) composes well with `\ref(ref)`. + +table title is good for accesibility. + +### `\br;` inline + +Introduce \br for Hard Line Breaks: Since whitespace normalization collapses \n to space, there is currently no way to force a line break within a paragraph (e.g., for postal addresses or poetry) without using pre. Adding a \br inline element would resolve this semantic gap. + ### `appendix` element - Support "appendix{}" in addition to h1,h2,h3 which is a h1-level chapter that renders as "A. B. C." instead of "1. 2. 3." @@ -56,7 +105,19 @@ pre(syntax="python", highlight="2,4-6"): | return n * factorial(n-1) # Recursive case also: enable line numbers +### Attribution + +```hdoc +quote { + p "Premature optimization is the root of all evil." + attribution "Donald Knuth" +} +``` + ## Rejected Features - `\kbd{…}` is just `\mono(syntax="kbd"){…}` - `include(path="...")` is rejected for unbounded document content growth +- `code` is just `\mono(syntax="…")` +- `details/summary` is just HTML with dynamic changing page layout, ever tried printing this? +- `\math`, `equation{…}` have too high implementation complexity and have high requirements on fonts, font renderers and layout engines. \ No newline at end of file diff --git a/docs/specification.md b/docs/specification.md index a3ffe77..50f184c 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -445,6 +445,30 @@ The renderer **MUST** see the post-normalization result. **String and verbatim bodies:** When a string body or verbatim body is converted into spans, it is treated as a single text source (no nested inline nodes) and then processed using the same rules above, including whitespace normalization for non-`pre` elements. +#### 8.2.1 Verbatim body decoding (normative) + +A verbatim body is converted to a Unicode string as follows. + +Let `LINESEP` be U+000A (LF). + +For each `piped_line` in source order: + +1. Let `raw` be the sequence of characters after the leading `|` up to (but not including) the line terminator. +2. If `raw` begins with a single U+0020 SPACE, remove **exactly one** such leading SPACE from `raw`. + (This optional SPACE is a visual separator between `|` and the content and is not part of the verbatim value.) +3. Append the resulting string to a list `lines`. + +The verbatim value is: + +- the empty string if `lines` is empty, otherwise +- `join(LINESEP, lines)` (i.e., insert `LINESEP` between adjacent entries, but not after the last entry). + +Notes: + +- The concrete source line ending used for `piped_line` termination (LF vs CRLF vs EOF) does not affect the verbatim value. +- The resulting verbatim value is then processed as a single text source under §8.2 (including span merging and whitespace normalization for non-`pre` elements). + + ### 8.3 Attribute uniqueness - Within a node, attribute keys **MUST** be unique (case-sensitive). @@ -488,6 +512,19 @@ The Footnote Namespace is used for defining and referencing reusable footnotes. - Built-in element names are defined in §9. - Unknown elements are syntactically valid (parseable), but semantically invalid. +### 8.6.1 Closed-world semantics and compatibility policy (normative) + +HyperDoc 2.0 defines a **closed** set of built-in element names and attributes. + +- A semantic validator **MUST** treat any node whose name is not a built-in element name (§9) as **semantically invalid**. +- A semantic validator **MUST** treat any attribute key that is not defined for the given element (§9) as **semantically invalid** (see also §8.4). +- Renderers **MUST NOT** assign renderer-specific meaning to unknown element names or unknown attributes. + +#### Tooling guidance (non-normative) + +- Tools that operate on syntactically valid documents (formatters, editors, refactoring tools) **SHOULD** preserve unknown nodes and unknown attributes when round-tripping, while emitting diagnostics, to support drafts and forward-compatibility experiments. +- Conformance tests for “HyperDoc 2.0 renderers” should assume closed-world semantics: unknown names are errors, not extension points. + ## 9. Elements and attributes ### 9.1 Built-in elements and list mode @@ -687,7 +724,7 @@ Table layout rules: #### 9.4.1 List item: `li` - **Body:** either - - a block-list of block elements, or + - a block-list of general text block elements, or - a single string body, or - a verbatim body - **Attributes:** `lang` (optional) @@ -716,7 +753,7 @@ Table layout rules: - **Role:** A single cell within a table row. - **Body:** either - - a block-list of block elements, or + - a block-list of general text block elements, or - a single string body, or - a verbatim body - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional) From 893074e6eb7fc86b666e822b825c00781582d396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 00:33:26 +0100 Subject: [PATCH 088/116] Enforce top-level toc --- src/hyperdoc.zig | 5 +++++ src/testsuite.zig | 1 + 2 files changed, 6 insertions(+) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index c22cc27..2203f88 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -1316,6 +1316,11 @@ pub const SemanticAnalyzer = struct { try blocks.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); for (child_nodes) |child_node| { + if (child_node.type == .toc) { + try sema.emit_diagnostic(.illegal_child_item, child_node.location); + continue; + } + const block, const id = try sema.translate_block_node(child_node); if (id != null) { try sema.emit_diagnostic(.illegal_id_attribute, get_attribute_location(child_node, "id", .name).?); diff --git a/src/testsuite.zig b/src/testsuite.zig index b6e7e1b..f2e51b4 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -604,6 +604,7 @@ test "diagnostic codes are emitted for expected samples" { try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{ .misplaced_hdoc_header, .duplicate_hdoc_header }); try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }}); try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); ul{ li{ toc; } }", &.{.illegal_child_item}); } test "title block populates metadata and warns on inline date" { From fb71accca58ff61d5ce29fc44f117f35ed5dab9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 08:57:59 +0100 Subject: [PATCH 089/116] Track row title column usage --- src/hyperdoc.zig | 106 ++++++++++++++++++++++++++++++------------- src/render/dump.zig | 2 + src/render/html5.zig | 42 ++--------------- src/testsuite.zig | 83 +++++++++++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 70 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index c22cc27..d5dfbf9 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -118,8 +118,8 @@ pub const Block = union(enum) { }; pub const Table = struct { - // TODO: column_count: usize, - // TODO: has_row_titles: bool, // not counted inside `Table.column_count`! + column_count: usize, + has_row_titles: bool, // not counted inside `Table.column_count`! lang: LanguageTag, rows: []TableRow, }; @@ -1138,7 +1138,10 @@ pub const SemanticAnalyzer = struct { var rows: std.ArrayList(Block.TableRow) = .empty; defer rows.deinit(sema.arena); - var column_count: ?usize = null; + var column_count: usize = 0; + var saw_header_row = false; + var saw_non_header_row = false; + var has_row_titles = false; switch (node.body) { .list => |child_nodes| { @@ -1146,39 +1149,43 @@ pub const SemanticAnalyzer = struct { for (child_nodes) |child_node| { switch (child_node.type) { .columns => { + if (saw_header_row) { + try sema.emit_diagnostic(.duplicate_columns_row, child_node.location); + } + + if (saw_non_header_row) { + try sema.emit_diagnostic(.misplaced_columns_row, child_node.location); + } + + saw_header_row = true; + const row_attrs = try sema.get_attributes(child_node, struct { lang: LanguageTag = .inherit, }); const cells = try sema.translate_table_cells(child_node); + const width = calculate_table_width(cells); + try sema.update_table_column_count(&column_count, width, child_node.location); + rows.appendAssumeCapacity(.{ .columns = .{ .lang = row_attrs.lang, .cells = cells, }, }); - - var width: usize = 0; - for (cells) |cell| { - std.debug.assert(cell.colspan > 0); - width += cell.colspan; - } - - column_count = column_count orelse width; - if (width != column_count) { - try sema.emit_diagnostic(.{ .column_count_mismatch = .{ - .expected = column_count.?, - .actual = width, - } }, child_node.location); - } }, .row => { + saw_non_header_row = true; + const row_attrs = try sema.get_attributes(child_node, struct { lang: LanguageTag = .inherit, title: ?[]const u8 = null, }); const cells = try sema.translate_table_cells(child_node); + const width = calculate_table_width(cells); + try sema.update_table_column_count(&column_count, width, child_node.location); + has_row_titles = has_row_titles or (row_attrs.title != null); rows.appendAssumeCapacity(.{ .row = .{ @@ -1187,22 +1194,10 @@ pub const SemanticAnalyzer = struct { .cells = cells, }, }); - - var width: usize = 0; - for (cells) |cell| { - std.debug.assert(cell.colspan > 0); - width += cell.colspan; - } - - column_count = column_count orelse width; - if (width != column_count) { - try sema.emit_diagnostic(.{ .column_count_mismatch = .{ - .expected = column_count.?, - .actual = width, - } }, child_node.location); - } }, .group => { + saw_non_header_row = true; + const row_attrs = try sema.get_attributes(child_node, struct { lang: LanguageTag = .inherit, }); @@ -1225,7 +1220,14 @@ pub const SemanticAnalyzer = struct { }, } + if (column_count == 0) { + try sema.emit_diagnostic(.missing_table_column_count, node.location); + column_count = 1; + } + const table: Block.Table = .{ + .column_count = column_count, + .has_row_titles = has_row_titles, .lang = attrs.lang, .rows = try rows.toOwnedSlice(sema.arena), }; @@ -1233,6 +1235,39 @@ pub const SemanticAnalyzer = struct { return .{ table, attrs.id }; } + fn calculate_table_width(cells: []const Block.TableCell) usize { + var width: usize = 0; + for (cells) |cell| { + std.debug.assert(cell.colspan > 0); + width += cell.colspan; + } + return width; + } + + fn update_table_column_count(sema: *SemanticAnalyzer, column_count: *usize, width: usize, location: Parser.Location) !void { + if (width == 0) { + if (column_count.* != 0) { + try sema.emit_diagnostic(.{ .column_count_mismatch = .{ + .expected = column_count.*, + .actual = 0, + } }, location); + } + return; + } + + if (column_count.* == 0) { + column_count.* = width; + return; + } + + if (width != column_count.*) { + try sema.emit_diagnostic(.{ .column_count_mismatch = .{ + .expected = column_count.*, + .actual = width, + } }, location); + } + } + fn translate_table_cells(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes, InvalidNodeType, Unimplemented }![]Block.TableCell { var cells: std.ArrayList(Block.TableCell) = .empty; defer cells.deinit(sema.arena); @@ -3208,6 +3243,9 @@ pub const Diagnostic = struct { misplaced_title_block, duplicate_title_block, column_count_mismatch: TableShapeError, + missing_table_column_count, + misplaced_columns_row, + duplicate_columns_row, duplicate_id: ReferenceError, unknown_id: ReferenceError, @@ -3262,6 +3300,9 @@ pub const Diagnostic = struct { .misplaced_title_block, .duplicate_title_block, .column_count_mismatch, + .missing_table_column_count, + .misplaced_columns_row, + .duplicate_columns_row, .duplicate_id, .unknown_id, => .@"error", @@ -3353,6 +3394,9 @@ pub const Diagnostic = struct { .invalid_date_time_body => try w.writeAll("\\date, \\time and \\datetime do not allow any inlines inside their body."), .column_count_mismatch => |ctx| try w.print("Expected {} columns, but found {}", .{ ctx.expected, ctx.actual }), + .missing_table_column_count => try w.writeAll("Table must declare at least one column via a columns or row entry."), + .misplaced_columns_row => try w.writeAll("The 'columns' header row must be the first item in a table."), + .duplicate_columns_row => try w.writeAll("Only one 'columns' header row is allowed per table."), .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}), .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}), diff --git a/src/render/dump.zig b/src/render/dump.zig index 3188c56..9e284d0 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -440,6 +440,8 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err .table => |table| { try writeTypeTag(writer, "table"); try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang.text); + try dumpOptionalNumberField(writer, indent + indent_step, "column_count", @as(?usize, table.column_count)); + try dumpBoolField(writer, indent + indent_step, "has_row_titles", table.has_row_titles); try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows); }, } diff --git a/src/render/html5.zig b/src/render/html5.zig index 364255e..2357a42 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -301,8 +301,8 @@ const RenderContext = struct { const lang_attr = langAttribute(table.lang); const id_attr = ctx.resolveBlockId(block_index); - const column_count = inferColumnCount(table.rows) orelse 0; - const has_title_column = tableHasTitleColumn(table.rows); + const column_count = table.column_count; + const has_title_column = table.has_row_titles; try writeIndent(ctx.writer, indent); try writeStartTag(ctx.writer, "table", .regular, .{ .id = id_attr, .lang = lang_attr }); @@ -707,44 +707,8 @@ fn tocHasEntries(node: hdoc.Document.TableOfContents) bool { return false; } -fn inferColumnCount(rows: []const hdoc.Block.TableRow) ?usize { - for (rows) |row| { - switch (row) { - .columns => |columns| { - var width: usize = 0; - for (columns.cells) |cell| { - width += cell.colspan; - } - return width; - }, - .row => |data_row| { - var width: usize = 0; - for (data_row.cells) |cell| { - width += cell.colspan; - } - return width; - }, - .group => {}, - } - } - return null; -} - -fn tableHasTitleColumn(rows: []const hdoc.Block.TableRow) bool { - for (rows) |row| { - switch (row) { - .row => |data_row| if (data_row.title != null) return true, - .group => return true, - .columns => {}, - } - } - return false; -} - fn findHeaderIndex(rows: []const hdoc.Block.TableRow) ?usize { - for (rows, 0..) |row, index| { - if (row == .columns) return index; - } + if (rows.len > 0 and rows[0] == .columns) return 0; return null; } diff --git a/src/testsuite.zig b/src/testsuite.zig index b6e7e1b..e2c55b8 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -606,6 +606,89 @@ test "diagnostic codes are emitted for expected samples" { try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); } +test "table derives column count from first data row" { + const code = + \\hdoc(version="2.0",lang="en"); + \\table { + \\ row(title="headered") { + \\ td { p "A" } + \\ td(colspan="2") { p "B" } + \\ } + \\} + ; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 1), doc.contents.len); + + switch (doc.contents[0]) { + .table => |table| { + try std.testing.expectEqual(@as(usize, 3), table.column_count); + try std.testing.expect(table.has_row_titles); + }, + else => return error.TestExpectedEqual, + } +} + +test "table without header or data rows is rejected" { + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); table { group \"Topic\" }", &.{.missing_table_column_count}); +} + +test "columns row must come first" { + const code = + \\hdoc(version="2.0",lang="en"); + \\table { + \\ row { td "A" } + \\ columns { td "B" } + \\} + ; + + try validateDiagnostics(.{}, code, &.{.misplaced_columns_row}); +} + +test "table allows only one columns row" { + const code = + \\hdoc(version="2.0",lang="en"); + \\table { + \\ columns { td "A" } + \\ columns { td "B" } + \\} + ; + + try validateDiagnostics(.{}, code, &.{.duplicate_columns_row}); +} + +test "table tracks presence of row titles" { + const code = + \\hdoc(version="2.0",lang="en"); + \\table { + \\ row { td "A" } + \\ group { "Topic" } + \\} + ; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 1), doc.contents.len); + + switch (doc.contents[0]) { + .table => |table| { + try std.testing.expect(!table.has_row_titles); + }, + else => return error.TestExpectedEqual, + } +} + test "title block populates metadata and warns on inline date" { const code = "hdoc(version=\"2.0\",lang=\"en\");\ntitle { Hello \\date{2020-01-02} }\nh1 \"Body\""; From 74d1dc0d81560e6329b1f4d5e50c8246e541818e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 10:01:16 +0100 Subject: [PATCH 090/116] Remove unused paragraph kind --- examples/demo.hdoc | 12 ++--- examples/guide.hdoc | 12 ++--- src/hyperdoc.zig | 48 +++++++++++++------ src/render/dump.zig | 7 ++- src/render/html5.zig | 30 +++++++++--- src/testsuite.zig | 78 +++++++++++++++++++++++++++++++ test/html5/admonition_blocks.hdoc | 17 +++++++ test/html5/admonition_blocks.html | 22 +++++++++ test/html5/paragraph_styles.html | 24 +++++++--- 9 files changed, 211 insertions(+), 39 deletions(-) create mode 100644 test/html5/admonition_blocks.hdoc create mode 100644 test/html5/admonition_blocks.html diff --git a/examples/demo.hdoc b/examples/demo.hdoc index a092e91..7b18c00 100644 --- a/examples/demo.hdoc +++ b/examples/demo.hdoc @@ -21,12 +21,12 @@ p { h2{Special Paragraphs} -note { HyperDoc 2.0 also supports different types of paragraphs. } -warning { These should affect rendering, and have well-defined semantics attached to them. } -danger { You shall not assume any specific formatting of these elements though. } -tip { They typically have a standardized style though. } -quote { You shall not pass! } -spoiler { Nobody expects the Spanish Inquisition! } +note "HyperDoc 2.0 also supports different types of paragraphs." +warning "These should affect rendering, and have well-defined semantics attached to them." +danger "You shall not assume any specific formatting of these elements though." +tip "They typically have a standardized style though." +quote "You shall not pass!" +spoiler "Nobody expects the Spanish Inquisition!" h2{Verbatim and Preformatted Text} diff --git a/examples/guide.hdoc b/examples/guide.hdoc index 3f939f4..d7d4ecd 100644 --- a/examples/guide.hdoc +++ b/examples/guide.hdoc @@ -24,12 +24,12 @@ p { Links can target \link(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}. } -note { Notes highlight supportive information. } -warning { Warnings call out risky behavior. } -danger { Danger paragraphs emphasize critical hazards. } -tip { Tips provide actionable hints. } -quote { Quotes include sourced or emphasized wording. } -spoiler { Spoilers hide key story information until revealed. } +note "Notes highlight supportive information." +warning "Warnings call out risky behavior." +danger "Danger paragraphs emphasize critical hazards." +tip "Tips provide actionable hints." +quote "Quotes include sourced or emphasized wording." +spoiler "Spoilers hide key story information until revealed." h2(id="literals") { Literal and Preformatted Blocks } diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index c22cc27..be548ba 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -50,6 +50,7 @@ pub const Document = struct { pub const Block = union(enum) { heading: Heading, paragraph: Paragraph, + admonition: Admonition, list: List, image: Image, preformatted: Preformatted, @@ -81,12 +82,17 @@ pub const Block = union(enum) { }; pub const Paragraph = struct { - kind: ParagraphKind, lang: LanguageTag, content: []Span, }; - pub const ParagraphKind = enum { p, note, warning, danger, tip, quote, spoiler }; + pub const Admonition = struct { + kind: AdmonitionKind, + lang: LanguageTag, + content: []Block, + }; + + pub const AdmonitionKind = enum { note, warning, danger, tip, quote, spoiler }; pub const List = struct { lang: LanguageTag, @@ -880,10 +886,14 @@ pub const SemanticAnalyzer = struct { const heading, const id = try sema.translate_heading_node(node); return .{ .{ .heading = heading }, id }; }, - .p, .note, .warning, .danger, .tip, .quote, .spoiler => { + .p => { const paragraph, const id = try sema.translate_paragraph_node(node); return .{ .{ .paragraph = paragraph }, id }; }, + .note, .warning, .danger, .tip, .quote, .spoiler => { + const admonition, const id = try sema.translate_admonition_node(node); + return .{ .{ .admonition = admonition }, id }; + }, .ul, .ol => { const list, const id = try sema.translate_list_node(node); return .{ .{ .list = list }, id }; @@ -976,8 +986,21 @@ pub const SemanticAnalyzer = struct { }); const heading: Block.Paragraph = .{ + .lang = attrs.lang, + .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), + }; + + return .{ heading, attrs.id }; + } + + fn translate_admonition_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Admonition, ?Reference } { + const attrs = try sema.get_attributes(node, struct { + lang: LanguageTag = .inherit, + id: ?Reference = null, + }); + + const admonition: Block.Admonition = .{ .kind = switch (node.type) { - .p => .p, .note => .note, .warning => .warning, .danger => .danger, @@ -987,10 +1010,10 @@ pub const SemanticAnalyzer = struct { else => unreachable, }, .lang = attrs.lang, - .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), + .content = try sema.translate_block_list(node, .text_to_p), }; - return .{ heading, attrs.id }; + return .{ admonition, attrs.id }; } fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } { @@ -1337,7 +1360,6 @@ pub const SemanticAnalyzer = struct { const blocks = try sema.arena.alloc(Block, 1); blocks[0] = .{ .paragraph = .{ - .kind = .p, .lang = .inherit, .content = spans, }, @@ -3079,12 +3101,6 @@ pub const Parser = struct { .title, .p, - .note, - .warning, - .danger, - .tip, - .quote, - .spoiler, .img, .pre, @@ -3106,6 +3122,12 @@ pub const Parser = struct { => true, .hdoc, + .note, + .warning, + .danger, + .tip, + .quote, + .spoiler, .ul, .ol, .table, diff --git a/src/render/dump.zig b/src/render/dump.zig index 3188c56..39f2904 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -409,10 +409,15 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err }, .paragraph => |paragraph| { try writeTypeTag(writer, "paragraph"); - try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind); try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang.text); try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content); }, + .admonition => |admonition| { + try writeTypeTag(writer, "admonition"); + try dumpEnumField(writer, indent + indent_step, "kind", admonition.kind); + try dumpOptionalStringField(writer, indent + indent_step, "lang", admonition.lang.text); + try dumpBlockListField(writer, indent + indent_step, "content", admonition.content); + }, .list => |list| { try writeTypeTag(writer, "list"); try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang.text); diff --git a/src/render/html5.zig b/src/render/html5.zig index 364255e..a3661d3 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -26,6 +26,7 @@ const RenderContext = struct { switch (block) { .heading => |heading| try ctx.renderHeading(heading, block_index, indent), .paragraph => |paragraph| try ctx.renderParagraph(paragraph, block_index, indent), + .admonition => |admonition| try ctx.renderAdmonition(admonition, block_index, indent), .list => |list| try ctx.renderList(list, block_index, indent), .image => |image| try ctx.renderImage(image, block_index, indent), .preformatted => |preformatted| try ctx.renderPreformatted(preformatted, block_index, indent), @@ -127,23 +128,38 @@ const RenderContext = struct { const lang_attr = langAttribute(paragraph.lang); const id_attr = ctx.resolveBlockId(block_index); - var class_buffer: [32]u8 = undefined; - const class_attr: ?[]const u8 = switch (paragraph.kind) { - .p => null, - else => std.fmt.bufPrint(&class_buffer, "hdoc-{s}", .{@tagName(paragraph.kind)}) catch unreachable, - }; - try writeIndent(ctx.writer, indent); try writeStartTag(ctx.writer, "p", .regular, .{ .id = id_attr, .lang = lang_attr, - .class = class_attr, }); try ctx.renderSpans(paragraph.content); try writeEndTag(ctx.writer, "p"); try ctx.writer.writeByte('\n'); } + fn renderAdmonition(ctx: *RenderContext, admonition: hdoc.Block.Admonition, block_index: ?usize, indent: usize) RenderError!void { + const lang_attr = langAttribute(admonition.lang); + const id_attr = ctx.resolveBlockId(block_index); + + var class_buffer: [32]u8 = undefined; + const class_attr = std.fmt.bufPrint(&class_buffer, "hdoc-{s}", .{@tagName(admonition.kind)}) catch unreachable; + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "div", .regular, .{ + .id = id_attr, + .lang = lang_attr, + .class = class_attr, + }); + if (admonition.content.len > 0) { + try ctx.writer.writeByte('\n'); + try ctx.renderBlocks(admonition.content, indent + indent_step); + try writeIndent(ctx.writer, indent); + } + try writeEndTag(ctx.writer, "div"); + try ctx.writer.writeByte('\n'); + } + fn renderList(ctx: *RenderContext, list: hdoc.Block.List, block_index: ?usize, indent: usize) RenderError!void { const lang_attr = langAttribute(list.lang); const id_attr = ctx.resolveBlockId(block_index); diff --git a/src/testsuite.zig b/src/testsuite.zig index b6e7e1b..0f358de 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -224,6 +224,84 @@ test "span merger preserves whitespace after inline mono" { } } +test "admonition supports block-list bodies" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + \\hdoc(version="2.0",lang="en"); + \\note{ + \\ p "Outer block text." + \\ ul{li "Nested item"} + \\} + ; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 1), doc.contents.len); + + const admonition = doc.contents[0].admonition; + try std.testing.expectEqual(hdoc.Block.AdmonitionKind.note, admonition.kind); + try std.testing.expectEqual(@as(usize, 2), admonition.content.len); + + switch (admonition.content[0]) { + .paragraph => |para| { + try std.testing.expectEqual(@as(usize, 1), para.content.len); + try std.testing.expectEqualStrings("Outer block text.", para.content[0].content.text); + }, + else => return error.TestExpectedEqual, + } + + switch (admonition.content[1]) { + .list => |list| { + try std.testing.expectEqual(@as(usize, 1), list.items.len); + try std.testing.expectEqual(@as(?u32, null), list.first); + try std.testing.expectEqual(@as(usize, 1), list.items[0].content.len); + }, + else => return error.TestExpectedEqual, + } +} + +test "admonition shorthand promotes inline bodies to paragraphs" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + "hdoc(version=\"2.0\",lang=\"en\");\n" ++ + "warning \"Be careful.\" \n" ++ + "tip:\n" ++ + "| first line\n" ++ + "| second line\n"; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 2), doc.contents.len); + + const warning_block = doc.contents[0].admonition; + try std.testing.expectEqual(hdoc.Block.AdmonitionKind.warning, warning_block.kind); + try std.testing.expectEqual(@as(usize, 1), warning_block.content.len); + switch (warning_block.content[0]) { + .paragraph => |para| { + try std.testing.expectEqualStrings("Be careful.", para.content[0].content.text); + }, + else => return error.TestExpectedEqual, + } + + const tip_block = doc.contents[1].admonition; + try std.testing.expectEqual(hdoc.Block.AdmonitionKind.tip, tip_block.kind); + try std.testing.expectEqual(@as(usize, 1), tip_block.content.len); + switch (tip_block.content[0]) { + .paragraph => |para| { + try std.testing.expectEqualStrings("first line\nsecond line", para.content[0].content.text); + }, + else => return error.TestExpectedEqual, + } +} + test "pre verbatim preserves trailing whitespace" { var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); defer diagnostics.deinit(); diff --git a/test/html5/admonition_blocks.hdoc b/test/html5/admonition_blocks.hdoc new file mode 100644 index 0000000..3636623 --- /dev/null +++ b/test/html5/admonition_blocks.hdoc @@ -0,0 +1,17 @@ +hdoc(version="2.0", title="Admonition Blocks", lang="en"); + +h1 "Admonitions as Containers" + +note{ + p "A note can span multiple blocks." + ul{ + li "Lists are allowed." + li "They render inside the note container." + } +} + +danger "String bodies become paragraphs inside the container." + +spoiler: +| Hidden detail +| spans multiple lines. diff --git a/test/html5/admonition_blocks.html b/test/html5/admonition_blocks.html new file mode 100644 index 0000000..a298e1e --- /dev/null +++ b/test/html5/admonition_blocks.html @@ -0,0 +1,22 @@ +
    +

    Admonition Blocks

    +
    +

    §1 Admonitions as Containers

    +
    +

    A note can span multiple blocks.

    +
      +
    • +

      Lists are allowed.

      +
    • +
    • +

      They render inside the note container.

      +
    • +
    +
    +
    +

    String bodies become paragraphs inside the container.

    +
    +
    +

    Hidden detail +spans multiple lines.

    +
    diff --git a/test/html5/paragraph_styles.html b/test/html5/paragraph_styles.html index 575b583..1bdd369 100644 --- a/test/html5/paragraph_styles.html +++ b/test/html5/paragraph_styles.html @@ -3,9 +3,21 @@

    Paragraph Styles

    §1 Paragraph Styles

    A standard paragraph introducing the styles below.

    -

    Notes provide informational context without urgency.

    -

    Warnings highlight potential issues to watch for.

    -

    Danger blocks signal critical problems.

    -

    Tips offer helpful hints for readers.

    -

    Quoted material sits in its own paragraph style.

    -

    This is a spoiler; renderers may hide or blur this content.

    +
    +

    Notes provide informational context without urgency.

    +
    +
    +

    Warnings highlight potential issues to watch for.

    +
    +
    +

    Danger blocks signal critical problems.

    +
    +
    +

    Tips offer helpful hints for readers.

    +
    +
    +

    Quoted material sits in its own paragraph style.

    +
    +
    +

    This is a spoiler; renderers may hide or blur this content.

    +
    From 21f6fc68d3f90208dcd93a0c2c93398c5372abb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 12:25:28 +0100 Subject: [PATCH 091/116] Guard plaintext rendering from unexpected refs --- AGENTS.md | 1 + docs/specification.md | 1 + examples/demo.hdoc | 2 +- examples/guide.hdoc | 2 +- src/hyperdoc.zig | 204 ++++++++++++++++++++++++---- src/render/dump.zig | 17 ++- src/render/html5.zig | 89 +++++++++++- src/testsuite.zig | 58 +++++++- test/html5/nesting_and_inlines.hdoc | 6 +- test/html5/nesting_and_inlines.html | 2 + 10 files changed, 347 insertions(+), 35 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 2ab16dd..dc23294 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -10,6 +10,7 @@ - Run `zig build` to validate the main application still compiles - Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/` and `test/`. - Avoid editing documentation unless the request explicitly asks for it. +- `src/hyperdoc.zig` must not contain locale- or rendering-specific parts. - Treat `docs/specification.md` as the authoritative source of behavior; examples may be outdated or incorrect. - If the spec is unclear or conflicts with code/tests, ask before changing behavior. - Do not implement "just make it work" fallbacks that alter semantics to satisfy examples. diff --git a/docs/specification.md b/docs/specification.md index 50f184c..fcee7cd 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -812,6 +812,7 @@ Notes: Semantics: - `\ref(ref="X")` **MUST** resolve to a top-level element with `id="X"`, otherwise it is semantically invalid. +- A `\ref` inline element **MUST NOT** appear inside `h1`, `h2`, `h3`, or `title` elements. - If `\ref` has a non-empty body, the body **MUST** be used as the rendered link text. - If `\ref` has an empty body (`;`), the following rules apply: - If the referenced target is a heading (`h1`, `h2`, `h3`), the renderer **MUST** synthesize link text from the target and `fmt`: diff --git a/examples/demo.hdoc b/examples/demo.hdoc index a092e91..5284b9a 100644 --- a/examples/demo.hdoc +++ b/examples/demo.hdoc @@ -15,7 +15,7 @@ p(id="foo") { p { This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice. Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}). - We can also \link(ref="foo"){link to other parts of a document} or \link(uri="https://ashet.computer"){to websites}. + We can also \ref(ref="foo"){link to other parts of a document} or \link(uri="https://ashet.computer"){to websites}. With \mono(syntax="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font. } diff --git a/examples/guide.hdoc b/examples/guide.hdoc index 3f939f4..4a0bbf0 100644 --- a/examples/guide.hdoc +++ b/examples/guide.hdoc @@ -21,7 +21,7 @@ p { } p { - Links can target \link(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}. + Links can target \ref(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}. } note { Notes highlight supportive information. } diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index c22cc27..2c35cfa 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -166,11 +166,20 @@ pub fn FormattedDateTime(comptime DT: type) type { } pub const Span = struct { + pub const ReferenceFormat = enum { full, name, index }; + pub const Content = union(enum) { text: []const u8, date: FormattedDateTime(Date), time: FormattedDateTime(Time), datetime: FormattedDateTime(DateTime), + reference: InlineReference, + }; + + pub const InlineReference = struct { + ref: Reference, + fmt: ReferenceFormat, + target_block: ?usize = null, }; pub const Attributes = struct { @@ -220,18 +229,27 @@ pub const ScriptPosition = enum { pub const Link = union(enum) { none, - ref: Reference, + ref: RefTarget, uri: Uri, pub fn eql(lhs: Link, rhs: Link) bool { return switch (lhs) { .none => (rhs == .none), - .ref => (rhs == .ref) and std.mem.eql(u8, lhs.ref.text, rhs.ref.text), + .ref => (rhs == .ref) and lhs.ref.eql(rhs.ref), .uri => (rhs == .uri) and std.mem.eql(u8, lhs.uri.text, rhs.uri.text), }; } }; +pub const RefTarget = struct { + ref: Reference, + block_index: ?usize = null, + + pub fn eql(lhs: RefTarget, rhs: RefTarget) bool { + return lhs.ref.eql(rhs.ref) and lhs.block_index == rhs.block_index; + } +}; + /// HyperDoc Version Number pub const Version = struct { major: u16, @@ -586,6 +604,7 @@ pub fn parse( } try sema.validate_references(&id_map); + try sema.resolve_references(&id_map); const doc_lang = header.lang orelse LanguageTag.inherit; const title = try sema.finalize_title(header, doc_lang); @@ -920,6 +939,7 @@ pub const SemanticAnalyzer = struct { .@"\\sub", .@"\\sup", .@"\\link", + .@"\\ref", .@"\\time", .@"\\date", .@"\\datetime", @@ -1415,6 +1435,12 @@ pub const SemanticAnalyzer = struct { try merger.output.append(merger.arena, span); }, + .reference => { + try merger.flush_internal(.keep); + std.debug.assert(merger.current_span.items.len == 0); + + try merger.output.append(merger.arena, span); + }, .text => |text_content| { std.debug.assert(span.attribs.eql(merger.attribs)); @@ -1594,34 +1620,44 @@ pub const SemanticAnalyzer = struct { .@"\\link" => { const props = try sema.get_attributes(node, struct { lang: LanguageTag = .inherit, - uri: ?Uri = null, - ref: ?Reference = null, + uri: Uri, }); - if (props.uri != null and props.ref != null) { - try sema.emit_diagnostic(.invalid_link, node.location); - } + try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + .lang = props.lang, + .link = .{ .uri = props.uri }, + }), .emit_diagnostic); + }, - const link: Link = if (props.uri) |uri| blk: { - break :blk .{ .uri = uri }; - } else if (props.ref) |ref| blk: { - break :blk .{ .ref = ref }; - } else blk: { - try sema.emit_diagnostic(.invalid_link, node.location); - break :blk .none; - }; + .@"\\ref" => { + const props = try sema.get_attributes(node, struct { + lang: LanguageTag = .inherit, + ref: Reference, + fmt: Span.ReferenceFormat = .full, + }); - if (props.ref) |ref| { - if (props.uri == null) { - const ref_location = get_attribute_location(node, "ref", .value) orelse node.location; - try sema.pending_refs.append(sema.arena, .{ .ref = ref, .location = ref_location }); - } - } + const ref_location = get_attribute_location(node, "ref", .value) orelse node.location; + try sema.pending_refs.append(sema.arena, .{ .ref = props.ref, .location = ref_location }); - try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ + const link_attribs = try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, - .link = link, - }), .emit_diagnostic); + .link = .{ .ref = .{ .ref = props.ref } }, + }); + + switch (node.body) { + .empty => { + try spans.append(sema.arena, .{ + .content = .{ .reference = .{ + .ref = props.ref, + .fmt = props.fmt, + .target_block = null, + } }, + .attribs = link_attribs, + .location = node.location, + }); + }, + else => try sema.translate_inline_body(spans, node.body, link_attribs, .emit_diagnostic), + } }, .@"\\mono" => { @@ -1665,6 +1701,7 @@ pub const SemanticAnalyzer = struct { // Convert the content_spans into a "rendered string". const content_text = (sema.render_spans_to_plaintext(content_spans, .reject_date_time) catch |err| switch (err) { error.DateTimeRenderingUnsupported => unreachable, + error.UnexpectedReference => unreachable, else => |e| return e, }).text; @@ -1777,7 +1814,7 @@ pub const SemanticAnalyzer = struct { sema: *SemanticAnalyzer, source_spans: []const Span, mode: PlaintextMode, - ) error{ OutOfMemory, DateTimeRenderingUnsupported }!TitlePlainText { + ) error{ OutOfMemory, DateTimeRenderingUnsupported, UnexpectedReference }!TitlePlainText { var output: std.ArrayList(u8) = .empty; defer output.deinit(sema.arena); @@ -1786,6 +1823,7 @@ pub const SemanticAnalyzer = struct { for (source_spans) |span| { switch (span.content) { .text => |str| try output.appendSlice(sema.arena, str), + .reference => return error.UnexpectedReference, .date => |value| switch (mode) { .reject_date_time => return error.DateTimeRenderingUnsupported, .iso_date_time => { @@ -1891,6 +1929,7 @@ pub const SemanticAnalyzer = struct { if (block_title) |title_block| { const rendered = sema.render_spans_to_plaintext(title_block.content, .iso_date_time) catch |err| switch (err) { error.DateTimeRenderingUnsupported => unreachable, + error.UnexpectedReference => unreachable, else => |e| return e, }; @@ -2099,7 +2138,10 @@ pub const SemanticAnalyzer = struct { LanguageTag => LanguageTag.parse(value) catch return error.InvalidValue, TimeZoneOffset => TimeZoneOffset.parse(value) catch return error.InvalidValue, - else => @compileError("Unsupported attribute type: " ++ @typeName(T)), + inline else => |EnumT| switch (@typeInfo(EnumT)) { + .@"enum" => std.meta.stringToEnum(EnumT, value) orelse return error.InvalidValue, + else => @compileError("Unsupported attribute type: " ++ @typeName(EnumT)), + }, }; } @@ -2111,6 +2153,107 @@ pub const SemanticAnalyzer = struct { } } + fn resolve_references( + sema: *SemanticAnalyzer, + id_map: *const std.StringArrayHashMapUnmanaged(usize), + ) error{OutOfMemory}!void { + for (sema.blocks.items) |*block| { + try sema.resolve_block_references(block, id_map); + } + + if (sema.title_block) |*title_block| { + try sema.resolve_span_slice(&title_block.content, id_map); + } + } + + fn resolve_block_references( + sema: *SemanticAnalyzer, + block: *Block, + id_map: *const std.StringArrayHashMapUnmanaged(usize), + ) error{OutOfMemory}!void { + switch (block.*) { + .heading => |*heading| { + try sema.resolve_span_slice(&heading.content, id_map); + }, + .paragraph => |*paragraph| { + try sema.resolve_span_slice(¶graph.content, id_map); + }, + .list => |*list| { + for (list.items) |*item| { + for (item.content) |*child| { + try sema.resolve_block_references(child, id_map); + } + } + }, + .image => |*image| { + try sema.resolve_span_slice(&image.content, id_map); + }, + .preformatted => |*preformatted| { + try sema.resolve_span_slice(&preformatted.content, id_map); + }, + .toc => {}, + .table => |*table| { + for (table.rows) |*row| switch (row.*) { + .columns => |*columns| { + for (columns.cells) |*cell| { + for (cell.content) |*child| { + try sema.resolve_block_references(child, id_map); + } + } + }, + .group => |*group| { + try sema.resolve_span_slice(&group.content, id_map); + }, + .row => |*table_row| { + for (table_row.cells) |*cell| { + for (cell.content) |*child| { + try sema.resolve_block_references(child, id_map); + } + } + }, + }; + }, + } + } + + fn resolve_span_slice( + sema: *SemanticAnalyzer, + spans: *[]Span, + id_map: *const std.StringArrayHashMapUnmanaged(usize), + ) error{OutOfMemory}!void { + for (spans.*) |*span| { + var target_index: ?usize = null; + switch (span.attribs.link) { + .ref => |ref_target| { + target_index = ref_target.block_index orelse id_map.get(ref_target.ref.text); + span.attribs.link = .{ .ref = .{ + .ref = ref_target.ref, + .block_index = target_index, + } }; + }, + else => {}, + } + + switch (span.content) { + .reference => |ref_content| { + const resolved_index = target_index orelse id_map.get(ref_content.ref.text) orelse continue; + const target_block = sema.blocks.items[resolved_index]; + switch (target_block) { + .heading => {}, + else => try sema.emit_diagnostic(.empty_ref_body_target, span.location), + } + span.content = .{ .reference = .{ + .ref = ref_content.ref, + .fmt = ref_content.fmt, + .target_block = resolved_index, + } }; + span.attribs.link = .{ .ref = .{ .ref = ref_content.ref, .block_index = resolved_index } }; + }, + else => {}, + } + } + } + fn build_toc(sema: *SemanticAnalyzer, contents: []const Block, block_locations: []const Parser.Location) !Document.TableOfContents { std.debug.assert(contents.len == block_locations.len); @@ -3021,6 +3164,7 @@ pub const Parser = struct { @"\\sub", @"\\sup", @"\\link", + @"\\ref", @"\\date", @"\\time", @"\\datetime", @@ -3036,6 +3180,7 @@ pub const Parser = struct { .@"\\sub", .@"\\sup", .@"\\link", + .@"\\ref", .@"\\date", .@"\\time", .@"\\datetime", @@ -3097,6 +3242,7 @@ pub const Parser = struct { .@"\\sub", .@"\\sup", .@"\\link", + .@"\\ref", .@"\\date", .@"\\time", .@"\\datetime", @@ -3193,7 +3339,6 @@ pub const Diagnostic = struct { block_list_required: NodeBodyError, invalid_inline_combination: InlineCombinationError, link_not_nestable, - invalid_link, invalid_date_time, invalid_date_time_body, invalid_date_time_fmt: DateTimeFormatError, @@ -3210,6 +3355,7 @@ pub const Diagnostic = struct { column_count_mismatch: TableShapeError, duplicate_id: ReferenceError, unknown_id: ReferenceError, + empty_ref_body_target, // warnings: document_starts_with_bom, @@ -3247,7 +3393,6 @@ pub const Diagnostic = struct { .block_list_required, .invalid_inline_combination, .link_not_nestable, - .invalid_link, .invalid_date_time, .invalid_date_time_fmt, .missing_timezone, @@ -3264,6 +3409,7 @@ pub const Diagnostic = struct { .column_count_mismatch, .duplicate_id, .unknown_id, + .empty_ref_body_target, => .@"error", .missing_document_language, @@ -3323,7 +3469,6 @@ pub const Diagnostic = struct { .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}), .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }), .link_not_nestable => try w.writeAll("Links are not nestable"), - .invalid_link => try w.writeAll("\\link requires either ref=\"…\" or uri=\"…\" attribute."), .attribute_leading_trailing_whitespace => try w.writeAll("Attribute value has invalid leading or trailing whitespace."), @@ -3356,6 +3501,7 @@ pub const Diagnostic = struct { .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}), .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}), + .empty_ref_body_target => try w.writeAll("Empty-body \\ref is only supported for headings."), .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."), .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."), diff --git a/src/render/dump.zig b/src/render/dump.zig index 3188c56..25ffe02 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -135,7 +135,11 @@ fn writeSpanAttributes(writer: *Writer, span: hdoc.Span) Writer.Error!void { .none => {}, .ref => |value| { try writeAttrSeparator(writer, &first); - try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.text)}); + if (value.block_index) |idx| { + try writer.print("link=\"ref:{f}#{d}\"", .{ std.zig.fmtString(value.ref.text), idx }); + } else { + try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.ref.text)}); + } }, .uri => |value| { try writeAttrSeparator(writer, &first); @@ -217,6 +221,17 @@ fn writeSpanContentInline(writer: *Writer, content: hdoc.Span.Content) Writer.Er try writeFormattedDateTimeInline(writer, datetime); try writer.writeByte('"'); }, + .reference => |reference| { + try writer.writeByte('"'); + try writer.writeAll("ref:"); + try writer.writeAll(reference.ref.text); + try writer.writeByte('@'); + try writer.writeAll(@tagName(reference.fmt)); + if (reference.target_block) |idx| { + try writer.print("#{d}", .{idx}); + } + try writer.writeByte('"'); + }, } } diff --git a/src/render/html5.zig b/src/render/html5.zig index 364255e..936bdba 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -472,8 +472,13 @@ const RenderContext = struct { const href_value = switch (span.attribs.link) { .none => unreachable, .ref => |reference| blk: { + if (ctx.resolveBlockId(reference.block_index)) |resolved| { + var href_buffer: [128]u8 = undefined; + break :blk std.fmt.bufPrint(&href_buffer, "#{s}", .{resolved}) catch unreachable; + } + var href_buffer: [128]u8 = undefined; - break :blk std.fmt.bufPrint(&href_buffer, "#{s}", .{reference.text}) catch unreachable; + break :blk std.fmt.bufPrint(&href_buffer, "#{s}", .{reference.ref.text}) catch unreachable; }, .uri => |uri| uri.text, }; @@ -530,6 +535,9 @@ const RenderContext = struct { .date => |date| try ctx.renderDateTimeValue(.date, date, content_lang), .time => |time| try ctx.renderDateTimeValue(.time, time, content_lang), .datetime => |datetime| try ctx.renderDateTimeValue(.datetime, datetime, content_lang), + .reference => |reference| { + try ctx.renderReference(reference, content_lang); + }, } while (opened_len > 0) { @@ -538,6 +546,66 @@ const RenderContext = struct { } } + fn renderReference(ctx: *RenderContext, reference: hdoc.Span.InlineReference, content_lang: ?[]const u8) RenderError!void { + if (reference.target_block) |target_idx| { + if (target_idx < ctx.doc.contents.len) { + switch (ctx.doc.contents[target_idx]) { + .heading => |heading| return ctx.renderHeadingReference(reference, heading, content_lang), + else => {}, + } + } + } + + try ctx.renderReferenceText(reference.ref.text, content_lang); + } + + fn renderHeadingReference(ctx: *RenderContext, reference: hdoc.Span.InlineReference, heading: hdoc.Block.Heading, content_lang: ?[]const u8) RenderError!void { + var has_bdi = false; + if (content_lang) |lang| { + try writeStartTag(ctx.writer, "bdi", .regular, .{ .lang = lang }); + has_bdi = true; + } + + const print_index = reference.fmt != .name; + if (print_index) { + var index_buffer: [32]u8 = undefined; + const index_label = try formatHeadingIndexLabel(heading.index, &index_buffer); + try writeEscapedHtml(ctx.writer, index_label); + } + + if (reference.fmt == .full and heading.content.len > 0) { + try ctx.writer.writeByte(' '); + } + + switch (reference.fmt) { + .full, .name => try ctx.renderReferenceTargetSpans(heading.content), + .index => {}, + } + + if (has_bdi) { + try writeEndTag(ctx.writer, "bdi"); + } + } + + fn renderReferenceText(ctx: *RenderContext, text: []const u8, content_lang: ?[]const u8) RenderError!void { + if (content_lang) |lang| { + try writeStartTag(ctx.writer, "bdi", .regular, .{ .lang = lang }); + try writeEscapedHtml(ctx.writer, text); + try writeEndTag(ctx.writer, "bdi"); + return; + } + + try writeEscapedHtml(ctx.writer, text); + } + + fn renderReferenceTargetSpans(ctx: *RenderContext, spans: []const hdoc.Span) RenderError!void { + for (spans) |span| { + var adjusted = span; + adjusted.attribs.link = .none; + try ctx.renderSpan(adjusted); + } + } + fn renderDateTimeValue(ctx: *RenderContext, comptime kind: enum { date, time, datetime }, value: anytype, lang_attr: ?[]const u8) RenderError!void { var datetime_buffer: [128]u8 = undefined; const datetime_value = switch (kind) { @@ -790,6 +858,25 @@ fn formatIsoDateTime(value: hdoc.DateTime, buffer: []u8) RenderError![]const u8 return std.fmt.bufPrint(buffer, "{s}T{s}", .{ date_text, time_text }) catch unreachable; } +fn formatHeadingIndexLabel(index: hdoc.Block.Heading.Index, buffer: []u8) RenderError![]const u8 { + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + const parts = switch (index) { + .h1 => index.h1[0..1], + .h2 => index.h2[0..2], + .h3 => index.h3[0..3], + }; + + for (parts, 0..) |value, idx| { + if (idx != 0) try writer.writeByte('.'); + try writer.print("{d}", .{value}); + } + try writer.writeByte('.'); + + return stream.getWritten(); +} + fn formatDateValue(value: hdoc.FormattedDateTime(hdoc.Date), buffer: []u8) RenderError![]const u8 { return switch (value.format) { .year => std.fmt.bufPrint(buffer, "{d}", .{value.value.year}) catch unreachable, diff --git a/src/testsuite.zig b/src/testsuite.zig index b6e7e1b..464518c 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -415,6 +415,62 @@ test "parser handles unknown node types" { } } +test "\\ref synthesizes heading text for empty bodies" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + \\hdoc(version="2.0",lang="en"); + \\h1(id="intro"){Introduction} + \\p{See \ref(ref="intro"); and \ref(ref="intro",fmt="name"); and \ref(ref="intro",fmt="index");} + ; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 2), doc.contents.len); + + const paragraph = doc.contents[1].paragraph; + const expected_formats = [_]hdoc.Span.ReferenceFormat{ .full, .name, .index }; + + var seen: usize = 0; + for (paragraph.content) |span| { + if (span.content != .reference) continue; + + const reference = span.content.reference; + try std.testing.expect(seen < expected_formats.len); + try std.testing.expectEqual(expected_formats[seen], reference.fmt); + try std.testing.expectEqual(@as(?usize, 0), reference.target_block); + + switch (span.attribs.link) { + .ref => |link| try std.testing.expectEqual(@as(?usize, 0), link.block_index), + else => return error.TestExpectedEqual, + } + + seen += 1; + } + + try std.testing.expectEqual(expected_formats.len, seen); +} + +test "\\ref empty body rejects non-heading targets" { + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + const source = + \\hdoc(version="2.0",lang="en"); + \\p(id="p1"){Body} + \\p{\ref(ref="p1");} + ; + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(diagnostics.has_error()); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .empty_ref_body_target)); +} + test "table of contents inserts automatic headings when skipping levels" { const source = \\hdoc(version="2.0"); @@ -791,7 +847,7 @@ test "diagnostics for missing timezone and unknown id" { const source = \\hdoc(version="2.0"); - \\p{ \time"12:00:00" \link(ref="missing"){missing} } + \\p{ \time"12:00:00" \ref(ref="missing"){missing} } ; var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); diff --git a/test/html5/nesting_and_inlines.hdoc b/test/html5/nesting_and_inlines.hdoc index f1bd8a2..fcfd8fc 100644 --- a/test/html5/nesting_and_inlines.hdoc +++ b/test/html5/nesting_and_inlines.hdoc @@ -6,7 +6,11 @@ p "This document exercises inline formatting and nested lists." p { We can mix \em{emphasis}, \strike{strike}, \mono{monospace} text. Superscript x\sup{2} and subscript x\sub{2} also appear. } -p { Links point to \link(ref="top"){local anchors} or \link(uri="https://example.com"){external sites}. } +p { Links point to \ref(ref="top"){local anchors} or \link(uri="https://example.com"){external sites}. } + +h2(id="formatted") {Formatted \em{Heading}} + +p { Empty-body references become \ref(ref="formatted",fmt="full"); \ref(ref="formatted",fmt="name"); and \ref(ref="formatted",fmt="index"); } ul { li { p "Top-level item one" } diff --git a/test/html5/nesting_and_inlines.html b/test/html5/nesting_and_inlines.html index 1b5be1a..1ee2e94 100644 --- a/test/html5/nesting_and_inlines.html +++ b/test/html5/nesting_and_inlines.html @@ -5,6 +5,8 @@

    §1 Nesting and Inline Styling

    This document exercises inline formatting and nested lists.

    We can mix emphasis, strike, monospace text. Superscript x2 and subscript x2 also appear.

    Links point to local anchors or external sites.

    +

    §1.1 Formatted Heading

    +

    Empty-body references become 1.1. Formatted Heading Formatted Heading and 1.1.

    • Top-level item one

      From 48997cdaf3f55b7737cd7f92b12936f8397b4066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 15:15:14 +0100 Subject: [PATCH 092/116] Modify validate.yml for hdoc-2.0 and testing Updated workflow to include hdoc-2.0 branch and added test step. --- .github/workflows/validate.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 0b8538c..9f2c982 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -2,9 +2,9 @@ name: Build on: pull_request: - branches: [master] + branches: [master, hdoc-2.0] push: - branches: [master] + branches: [master, hdoc-2.0] jobs: build: @@ -20,4 +20,8 @@ jobs: - name: Build run: | - zig build + zig build install + + - name: Test + run: | + zig build test From 3627f99a47c50c3d8feee239154042cbd3cb37c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 21:05:16 +0100 Subject: [PATCH 093/116] Fixes simple merging bug --- src/hyperdoc.zig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index d8c19b6..4012b0d 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -2253,6 +2253,11 @@ pub const SemanticAnalyzer = struct { .preformatted => |*preformatted| { try sema.resolve_span_slice(&preformatted.content, id_map); }, + .admonition => |*admonition| { + for (admonition.content) |*child| { + try sema.resolve_block_references(child, id_map); + } + }, .toc => {}, .table => |*table| { for (table.rows) |*row| switch (row.*) { From be0c069f193ec4e94f7d9d13a7faff7c9afb8c0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 21:15:32 +0100 Subject: [PATCH 094/116] Adds yaml dumps to tests. Adds basic golden file verification --- build.zig | 27 ++++++ src/main.zig | 4 +- test/html5/admonition_blocks.yaml | 73 +++++++++++++++ test/html5/document_header.yaml | 33 +++++++ test/html5/media_and_toc.yaml | 101 +++++++++++++++++++++ test/html5/nesting_and_inlines.yaml | 121 +++++++++++++++++++++++++ test/html5/paragraph_styles.yaml | 89 ++++++++++++++++++ test/html5/tables.yaml | 134 ++++++++++++++++++++++++++++ 8 files changed, 580 insertions(+), 2 deletions(-) create mode 100644 test/html5/admonition_blocks.yaml create mode 100644 test/html5/document_header.yaml create mode 100644 test/html5/media_and_toc.yaml create mode 100644 test/html5/nesting_and_inlines.yaml create mode 100644 test/html5/paragraph_styles.yaml create mode 100644 test/html5/tables.yaml diff --git a/build.zig b/build.zig index 1d265f5..262e775 100644 --- a/build.zig +++ b/build.zig @@ -1,5 +1,14 @@ const std = @import("std"); +const test_files: []const []const u8 = &.{ + "test/html5/admonition_blocks.hdoc", + "test/html5/document_header.hdoc", + "test/html5/media_and_toc.hdoc", + "test/html5/nesting_and_inlines.hdoc", + "test/html5/paragraph_styles.hdoc", + "test/html5/tables.hdoc", +}; + pub fn build(b: *std.Build) void { // Options: const target = b.standardTargetOptions(.{}); @@ -35,6 +44,24 @@ pub fn build(b: *std.Build) void { run_step.dependOn(&run_cmd.step); + // Snapshot tests: + for (test_files) |path| { + std.debug.assert(std.mem.endsWith(u8, path, ".hdoc")); + const html_file = b.fmt("{s}.html", .{path[0 .. path.len - 5]}); + const yaml_file = b.fmt("{s}.yaml", .{path[0 .. path.len - 5]}); + + for (&[2][]const u8{ html_file, yaml_file }) |file| { + const test_run = b.addRunArtifact(exe); + test_run.addArgs(&.{ "--format", file[file.len - 4 ..] }); + test_run.addFileArg(b.path(path)); + test_run.expectStdOutEqual( + b.build_root.handle.readFileAlloc(b.allocator, file, 10 * 1024 * 1024) catch @panic("oom"), + ); + test_step.dependOn(&test_run.step); + } + } + + // Unit tests: const exe_tests = b.addTest(.{ .root_module = b.createModule(.{ .root_source_file = b.path("src/testsuite.zig"), diff --git a/src/main.zig b/src/main.zig index 693a2f1..776d241 100644 --- a/src/main.zig +++ b/src/main.zig @@ -65,7 +65,7 @@ fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostic } switch (options.format) { - .dump => try hdoc.render.yaml(parsed, output_stream), + .yaml => try hdoc.render.yaml(parsed, output_stream), .html => try hdoc.render.html5(parsed, output_stream), } } @@ -76,7 +76,7 @@ const CliOptions = struct { }; const RenderFormat = enum { - dump, + yaml, html, }; diff --git a/test/html5/admonition_blocks.yaml b/test/html5/admonition_blocks.yaml new file mode 100644 index 0000000..d40b0fc --- /dev/null +++ b/test/html5/admonition_blocks.yaml @@ -0,0 +1,73 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Admonition Blocks" + full: + lang: "en" + content: + - [] "Admonition Blocks" + author: null + date: null + toc: + level: h1 + headings: + - 0 + children: + - + level: h2 + headings: [] + children: [] + contents: + - heading: + level: h1 + lang: "" + content: + - [] "Admonitions as Containers" + - admonition: + kind: note + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "A note can span multiple blocks." + - list: + lang: "" + first: null + items: + - lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Lists are allowed." + - lang: "" + content: + - paragraph: + lang: "" + content: + - [] "They render inside the note container." + - admonition: + kind: danger + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "String bodies become paragraphs inside the container." + - admonition: + kind: spoiler + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Hidden detail\nspans multiple lines." + ids: + - null + - null + - null + - null diff --git a/test/html5/document_header.yaml b/test/html5/document_header.yaml new file mode 100644 index 0000000..eae9439 --- /dev/null +++ b/test/html5/document_header.yaml @@ -0,0 +1,33 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Metadata Title From Block" + full: + lang: "" + content: + - [] "Metadata Title From Block" + author: null + date: + date: + year: 2024 + month: 8 + day: 16 + time: + hour: 9 + minute: 30 + second: 0 + microsecond: 0 + toc: + level: h1 + headings: [] + children: [] + contents: + - paragraph: + lang: "" + content: + - [] "This document uses header metadata and a title block without any explicit headings." + ids: + - null diff --git a/test/html5/media_and_toc.yaml b/test/html5/media_and_toc.yaml new file mode 100644 index 0000000..cd0bd24 --- /dev/null +++ b/test/html5/media_and_toc.yaml @@ -0,0 +1,101 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Media and TOC" + full: + lang: "en" + content: + - [] "Media and TOC" + author: null + date: null + toc: + level: h1 + headings: + - 0 + children: + - + level: h2 + headings: + - 2 + - 4 + - 6 + children: + - + level: h3 + headings: [] + children: [] + - + level: h3 + headings: [] + children: [] + - + level: h3 + headings: [] + children: [] + contents: + - heading: + level: h1 + lang: "" + content: + - [] "Media and TOC" + - toc: + lang: "" + depth: 3 + - heading: + level: h2 + lang: "" + content: + - [] "Preformatted" + - preformatted: + lang: "" + syntax: "python" + content: + - [] " print(\"hello world\") " + - heading: + level: h2 + lang: "" + content: + - [] "Figure" + - image: + lang: "" + alt: "Example figure" + path: "./example.png" + content: + - [] "Figure caption text." + - heading: + level: h2 + lang: "" + content: + - [] "Dates and Times" + - paragraph: + lang: "" + content: + - [] "Today is " + - [] "date:+2024-03-01@iso" + - [] "." + - paragraph: + lang: "" + content: + - [] "The meeting is at " + - [] "time:14:30:45@long" + - [] "." + - paragraph: + lang: "" + content: + - [] "Release happens on " + - [] "datetime:+2024-04-15T08:00:00" + - [] "." + ids: + - "intro" + - null + - "code" + - null + - "figure" + - "fig-code" + - "dates" + - null + - null + - null diff --git a/test/html5/nesting_and_inlines.yaml b/test/html5/nesting_and_inlines.yaml new file mode 100644 index 0000000..498844e --- /dev/null +++ b/test/html5/nesting_and_inlines.yaml @@ -0,0 +1,121 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Nesting and Inlines" + full: + lang: "en" + content: + - [] "Nesting and Inlines" + author: null + date: null + toc: + level: h1 + headings: + - 0 + children: + - + level: h2 + headings: + - 4 + children: + - + level: h3 + headings: [] + children: [] + contents: + - heading: + level: h1 + lang: "" + content: + - [] "Nesting and Inline Styling" + - paragraph: + lang: "" + content: + - [] "This document exercises inline formatting and nested lists." + - paragraph: + lang: "" + content: + - [] "We can mix " + - [em] "emphasis" + - [] ", " + - [strike] "strike" + - [] ", " + - [mono] "monospace" + - [] " text. Superscript x" + - [position="superscript"] "2" + - [] " and subscript x" + - [position="subscript"] "2" + - [] " also appear." + - paragraph: + lang: "" + content: + - [] "Links point to " + - [link="ref:top#0"] "local anchors" + - [] " or " + - [link="uri:https://example.com"] "external sites" + - [] "." + - heading: + level: h2 + lang: "" + content: + - [] "Formatted " + - [em] "Heading" + - paragraph: + lang: "" + content: + - [] "Empty-body references become " + - [link="ref:formatted#4"] "ref:formatted@full#4" + - [] " " + - [link="ref:formatted#4"] "ref:formatted@name#4" + - [] " and " + - [link="ref:formatted#4"] "ref:formatted@index#4" + - [] "" + - list: + lang: "" + first: null + items: + - lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Top-level item one" + - lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Top-level item two with nested list" + - list: + lang: "" + first: 1 + items: + - lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Nested ordered item A" + - lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Nested ordered item B" + - lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Top-level item three" + ids: + - "top" + - null + - null + - null + - "formatted" + - null + - null diff --git a/test/html5/paragraph_styles.yaml b/test/html5/paragraph_styles.yaml new file mode 100644 index 0000000..e8d82e9 --- /dev/null +++ b/test/html5/paragraph_styles.yaml @@ -0,0 +1,89 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Paragraph Styles" + full: + lang: "en" + content: + - [] "Paragraph Styles" + author: null + date: null + toc: + level: h1 + headings: + - 0 + children: + - + level: h2 + headings: [] + children: [] + contents: + - heading: + level: h1 + lang: "" + content: + - [] "Paragraph Styles" + - paragraph: + lang: "" + content: + - [] "A standard paragraph introducing the styles below." + - admonition: + kind: note + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Notes provide informational context without urgency." + - admonition: + kind: warning + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Warnings highlight potential issues to watch for." + - admonition: + kind: danger + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Danger blocks signal critical problems." + - admonition: + kind: tip + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Tips offer helpful hints for readers." + - admonition: + kind: quote + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "Quoted material sits in its own paragraph style." + - admonition: + kind: spoiler + lang: "" + content: + - paragraph: + lang: "" + content: + - [] "This is a spoiler; renderers may hide or blur this content." + ids: + - null + - null + - null + - null + - null + - null + - null + - null diff --git a/test/html5/tables.yaml b/test/html5/tables.yaml new file mode 100644 index 0000000..a3e7b4f --- /dev/null +++ b/test/html5/tables.yaml @@ -0,0 +1,134 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Tables" + full: + lang: "en" + content: + - [] "Tables" + author: null + date: null + toc: + level: h1 + headings: + - 0 + children: + - + level: h2 + headings: [] + children: [] + contents: + - heading: + level: h1 + lang: "" + content: + - [] "Table Coverage" + - paragraph: + lang: "" + content: + - [] "This file covers header rows, data rows with titles, groups, and colspans." + - table: + lang: "" + column_count: 3 + has_row_titles: true + rows: + - columns: + lang: "" + cells: + - lang: "" + colspan: 1 + content: + - paragraph: + lang: "" + content: + - [] "Column A" + - lang: "" + colspan: 1 + content: + - paragraph: + lang: "" + content: + - [] "Column B" + - lang: "" + colspan: 1 + content: + - paragraph: + lang: "" + content: + - [] "Column C" + - group: + lang: "" + content: + - [] "\"Section One\"" + - row: + lang: "" + title: "Row 1" + cells: + - lang: "" + colspan: 1 + content: + - paragraph: + lang: "" + content: + - [] "A1" + - lang: "" + colspan: 2 + content: + - paragraph: + lang: "" + content: + - [] "B1-C1" + - row: + lang: "" + title: "Row 2" + cells: + - lang: "" + colspan: 2 + content: + - paragraph: + lang: "" + content: + - [] "A2-B2" + - lang: "" + colspan: 1 + content: + - paragraph: + lang: "" + content: + - [] "C2" + - group: + lang: "" + content: + - [] "\"Section Two\"" + - row: + lang: "" + title: "Row 3" + cells: + - lang: "" + colspan: 1 + content: + - paragraph: + lang: "" + content: + - [] "A3" + - lang: "" + colspan: 1 + content: + - paragraph: + lang: "" + content: + - [] "B3" + - lang: "" + colspan: 1 + content: + - paragraph: + lang: "" + content: + - [] "C3" + ids: + - null + - null + - null From dab39970b3ddad9234378a8f94535ba677a72434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 21:30:05 +0100 Subject: [PATCH 095/116] Implements proper snapshot test validation with a basic differ output on failure. --- AGENTS.md | 7 +++ build.zig | 40 ++++++++++----- test/compare.zig | 51 +++++++++++++++++++ test/{html5 => snapshot}/AGENTS.md | 0 .../admonition_blocks.hdoc | 0 .../admonition_blocks.html | 0 .../admonition_blocks.yaml | 0 test/{html5 => snapshot}/document_header.hdoc | 0 test/{html5 => snapshot}/document_header.html | 0 test/{html5 => snapshot}/document_header.yaml | 0 test/{html5 => snapshot}/media_and_toc.hdoc | 0 test/{html5 => snapshot}/media_and_toc.html | 0 test/{html5 => snapshot}/media_and_toc.yaml | 0 .../nesting_and_inlines.hdoc | 0 .../nesting_and_inlines.html | 0 .../nesting_and_inlines.yaml | 0 .../{html5 => snapshot}/paragraph_styles.hdoc | 0 .../{html5 => snapshot}/paragraph_styles.html | 0 .../{html5 => snapshot}/paragraph_styles.yaml | 0 test/{html5 => snapshot}/tables.hdoc | 0 test/{html5 => snapshot}/tables.html | 0 test/{html5 => snapshot}/tables.yaml | 0 22 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 test/compare.zig rename test/{html5 => snapshot}/AGENTS.md (100%) rename test/{html5 => snapshot}/admonition_blocks.hdoc (100%) rename test/{html5 => snapshot}/admonition_blocks.html (100%) rename test/{html5 => snapshot}/admonition_blocks.yaml (100%) rename test/{html5 => snapshot}/document_header.hdoc (100%) rename test/{html5 => snapshot}/document_header.html (100%) rename test/{html5 => snapshot}/document_header.yaml (100%) rename test/{html5 => snapshot}/media_and_toc.hdoc (100%) rename test/{html5 => snapshot}/media_and_toc.html (100%) rename test/{html5 => snapshot}/media_and_toc.yaml (100%) rename test/{html5 => snapshot}/nesting_and_inlines.hdoc (100%) rename test/{html5 => snapshot}/nesting_and_inlines.html (100%) rename test/{html5 => snapshot}/nesting_and_inlines.yaml (100%) rename test/{html5 => snapshot}/paragraph_styles.hdoc (100%) rename test/{html5 => snapshot}/paragraph_styles.html (100%) rename test/{html5 => snapshot}/paragraph_styles.yaml (100%) rename test/{html5 => snapshot}/tables.hdoc (100%) rename test/{html5 => snapshot}/tables.html (100%) rename test/{html5 => snapshot}/tables.yaml (100%) diff --git a/AGENTS.md b/AGENTS.md index dc23294..5575ad0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,3 +19,10 @@ - Do not use "inline functions" like `const func = struct { fn func(…) {} }.func;` - Zig has no methods. Functions used by "method like" functions can still be placed next to them, no need to put them into global scope nor into local scope. + +## Snapshot Files + +- If you add a `hdoc` file to `test/snapshot`, also: + - Generate the corresponding html and yaml file + - Add the file inside build.zig to the snapshot_files global +- If you change behaviour, the snapshot tests will fail. Validate the failure against your expectations and see if you broke something unexpected. \ No newline at end of file diff --git a/build.zig b/build.zig index 262e775..f885e67 100644 --- a/build.zig +++ b/build.zig @@ -1,12 +1,12 @@ const std = @import("std"); -const test_files: []const []const u8 = &.{ - "test/html5/admonition_blocks.hdoc", - "test/html5/document_header.hdoc", - "test/html5/media_and_toc.hdoc", - "test/html5/nesting_and_inlines.hdoc", - "test/html5/paragraph_styles.hdoc", - "test/html5/tables.hdoc", +const snapshot_files: []const []const u8 = &.{ + "test/snapshot/admonition_blocks.hdoc", + "test/snapshot/document_header.hdoc", + "test/snapshot/media_and_toc.hdoc", + "test/snapshot/nesting_and_inlines.hdoc", + "test/snapshot/paragraph_styles.hdoc", + "test/snapshot/tables.hdoc", }; pub fn build(b: *std.Build) void { @@ -44,20 +44,32 @@ pub fn build(b: *std.Build) void { run_step.dependOn(&run_cmd.step); + const snapshot_diff = b.addExecutable(.{ + .name = "diff", + .root_module = b.createModule(.{ + .root_source_file = b.path("test/compare.zig"), + .target = b.graph.host, + .optimize = .Debug, + }), + }); + // Snapshot tests: - for (test_files) |path| { + for (snapshot_files) |path| { std.debug.assert(std.mem.endsWith(u8, path, ".hdoc")); const html_file = b.fmt("{s}.html", .{path[0 .. path.len - 5]}); const yaml_file = b.fmt("{s}.yaml", .{path[0 .. path.len - 5]}); - for (&[2][]const u8{ html_file, yaml_file }) |file| { + for (&[2][]const u8{ html_file, yaml_file }) |snapshot_file| { const test_run = b.addRunArtifact(exe); - test_run.addArgs(&.{ "--format", file[file.len - 4 ..] }); + test_run.addArgs(&.{ "--format", snapshot_file[snapshot_file.len - 4 ..] }); test_run.addFileArg(b.path(path)); - test_run.expectStdOutEqual( - b.build_root.handle.readFileAlloc(b.allocator, file, 10 * 1024 * 1024) catch @panic("oom"), - ); - test_step.dependOn(&test_run.step); + const generated_file = test_run.captureStdOut(); + + const compare_run = b.addRunArtifact(snapshot_diff); + compare_run.addFileArg(b.path(snapshot_file)); + compare_run.addFileArg(generated_file); + + test_step.dependOn(&compare_run.step); } } diff --git a/test/compare.zig b/test/compare.zig new file mode 100644 index 0000000..57d549b --- /dev/null +++ b/test/compare.zig @@ -0,0 +1,51 @@ +//! +//! compare +//! +const std = @import("std"); + +var arena: std.heap.ArenaAllocator = .init(std.heap.page_allocator); + +const allocator = arena.allocator(); + +pub fn main() !u8 { + defer arena.deinit(); + + const argv = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, argv); + + if (argv.len != 3) { + std.debug.print("usage: {s} \n", .{argv[0]}); + return 2; + } + + const ground_truth_path = argv[1]; + const new_input_path = argv[2]; + + const ground_truth = try readFileAlloc(allocator, ground_truth_path, 10 * 1024 * 1024); + defer allocator.free(ground_truth); + + const new_input = try readFileAlloc(allocator, new_input_path, 10 * 1024 * 1024); + defer allocator.free(new_input); + + // Compare full file contents for now. This keeps the snapshot tests simple and + // uses std.testing's string mismatch reporting. + std.testing.expectEqualStrings(ground_truth, new_input) catch |err| switch (err) { + error.TestExpectedEqual => return 1, + else => return err, + }; + + return 0; +} + +fn readFileAlloc(alloc: std.mem.Allocator, path: []const u8, max_bytes: usize) ![]u8 { + const file = try openFile(path); + defer file.close(); + return file.readToEndAlloc(alloc, max_bytes); +} + +fn openFile(path: []const u8) !std.fs.File { + if (std.fs.path.isAbsolute(path)) { + return std.fs.openFileAbsolute(path, .{}); + } + return std.fs.cwd().openFile(path, .{}); +} diff --git a/test/html5/AGENTS.md b/test/snapshot/AGENTS.md similarity index 100% rename from test/html5/AGENTS.md rename to test/snapshot/AGENTS.md diff --git a/test/html5/admonition_blocks.hdoc b/test/snapshot/admonition_blocks.hdoc similarity index 100% rename from test/html5/admonition_blocks.hdoc rename to test/snapshot/admonition_blocks.hdoc diff --git a/test/html5/admonition_blocks.html b/test/snapshot/admonition_blocks.html similarity index 100% rename from test/html5/admonition_blocks.html rename to test/snapshot/admonition_blocks.html diff --git a/test/html5/admonition_blocks.yaml b/test/snapshot/admonition_blocks.yaml similarity index 100% rename from test/html5/admonition_blocks.yaml rename to test/snapshot/admonition_blocks.yaml diff --git a/test/html5/document_header.hdoc b/test/snapshot/document_header.hdoc similarity index 100% rename from test/html5/document_header.hdoc rename to test/snapshot/document_header.hdoc diff --git a/test/html5/document_header.html b/test/snapshot/document_header.html similarity index 100% rename from test/html5/document_header.html rename to test/snapshot/document_header.html diff --git a/test/html5/document_header.yaml b/test/snapshot/document_header.yaml similarity index 100% rename from test/html5/document_header.yaml rename to test/snapshot/document_header.yaml diff --git a/test/html5/media_and_toc.hdoc b/test/snapshot/media_and_toc.hdoc similarity index 100% rename from test/html5/media_and_toc.hdoc rename to test/snapshot/media_and_toc.hdoc diff --git a/test/html5/media_and_toc.html b/test/snapshot/media_and_toc.html similarity index 100% rename from test/html5/media_and_toc.html rename to test/snapshot/media_and_toc.html diff --git a/test/html5/media_and_toc.yaml b/test/snapshot/media_and_toc.yaml similarity index 100% rename from test/html5/media_and_toc.yaml rename to test/snapshot/media_and_toc.yaml diff --git a/test/html5/nesting_and_inlines.hdoc b/test/snapshot/nesting_and_inlines.hdoc similarity index 100% rename from test/html5/nesting_and_inlines.hdoc rename to test/snapshot/nesting_and_inlines.hdoc diff --git a/test/html5/nesting_and_inlines.html b/test/snapshot/nesting_and_inlines.html similarity index 100% rename from test/html5/nesting_and_inlines.html rename to test/snapshot/nesting_and_inlines.html diff --git a/test/html5/nesting_and_inlines.yaml b/test/snapshot/nesting_and_inlines.yaml similarity index 100% rename from test/html5/nesting_and_inlines.yaml rename to test/snapshot/nesting_and_inlines.yaml diff --git a/test/html5/paragraph_styles.hdoc b/test/snapshot/paragraph_styles.hdoc similarity index 100% rename from test/html5/paragraph_styles.hdoc rename to test/snapshot/paragraph_styles.hdoc diff --git a/test/html5/paragraph_styles.html b/test/snapshot/paragraph_styles.html similarity index 100% rename from test/html5/paragraph_styles.html rename to test/snapshot/paragraph_styles.html diff --git a/test/html5/paragraph_styles.yaml b/test/snapshot/paragraph_styles.yaml similarity index 100% rename from test/html5/paragraph_styles.yaml rename to test/snapshot/paragraph_styles.yaml diff --git a/test/html5/tables.hdoc b/test/snapshot/tables.hdoc similarity index 100% rename from test/html5/tables.hdoc rename to test/snapshot/tables.hdoc diff --git a/test/html5/tables.html b/test/snapshot/tables.html similarity index 100% rename from test/html5/tables.html rename to test/snapshot/tables.html diff --git a/test/html5/tables.yaml b/test/snapshot/tables.yaml similarity index 100% rename from test/html5/tables.yaml rename to test/snapshot/tables.yaml From ba41ef81183756dd3668dc566005e687e942d47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 15:43:07 +0100 Subject: [PATCH 096/116] Add regression tests for pending footnotes --- src/hyperdoc.zig | 270 ++++++++++++++++++++++++++++++++++- src/render/dump.zig | 30 ++++ src/render/html5.zig | 100 +++++++++++++ src/testsuite.zig | 114 +++++++++++++++ test/snapshot/footnotes.hdoc | 7 + test/snapshot/footnotes.html | 31 ++++ 6 files changed, 551 insertions(+), 1 deletion(-) create mode 100644 test/snapshot/footnotes.hdoc create mode 100644 test/snapshot/footnotes.html diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 4012b0d..f163269 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -42,6 +42,11 @@ pub const Document = struct { } }; +pub const FootnoteKind = enum { + footnote, + citation, +}; + /// A top level layouting element of a document. /// Each block is a rectangular element on the screen with /// variable height, but a fixed width. @@ -56,6 +61,7 @@ pub const Block = union(enum) { preformatted: Preformatted, toc: TableOfContents, table: Table, + footnotes: Footnotes, pub const Heading = struct { index: Index, @@ -162,6 +168,18 @@ pub const Block = union(enum) { lang: LanguageTag, content: []Span, }; + + pub const Footnotes = struct { + lang: LanguageTag, + entries: []FootnoteEntry, + }; + + pub const FootnoteEntry = struct { + index: usize, + kind: FootnoteKind, + lang: LanguageTag, + content: []Span, + }; }; pub fn FormattedDateTime(comptime DT: type) type { @@ -180,6 +198,7 @@ pub const Span = struct { time: FormattedDateTime(Time), datetime: FormattedDateTime(DateTime), reference: InlineReference, + footnote: Footnote, }; pub const InlineReference = struct { @@ -227,6 +246,11 @@ pub const Span = struct { location: Parser.Location, }; +pub const Footnote = struct { + kind: FootnoteKind, + index: usize, +}; + pub const ScriptPosition = enum { baseline, superscript, @@ -618,6 +642,12 @@ pub fn parse( const block_locations = try sema.block_locations.toOwnedSlice(arena.allocator()); const toc = try sema.build_toc(contents, block_locations); + if (sema.has_pending_footnotes()) { + if (sema.first_footnote_location) |location| { + try sema.emit_diagnostic(.footnote_missing_dump, location); + } + } + return .{ .arena = arena, .contents = contents, @@ -753,6 +783,13 @@ pub const SemanticAnalyzer = struct { } }; + const FootnoteDefinition = struct { + kind: FootnoteKind, + index: usize, + lang: LanguageTag, + content: []Span, + }; + arena: std.mem.Allocator, diagnostics: ?*Diagnostics, code: []const u8, @@ -766,6 +803,10 @@ pub const SemanticAnalyzer = struct { ids: std.ArrayList(?Reference) = .empty, id_locations: std.ArrayList(?Parser.Location) = .empty, pending_refs: std.ArrayList(RefUse) = .empty, + footnote_counters: std.EnumArray(FootnoteKind, usize) = std.EnumArray(FootnoteKind, usize).initFill(0), + footnote_pending: std.EnumArray(FootnoteKind, std.ArrayList(Block.FootnoteEntry)) = std.EnumArray(FootnoteKind, std.ArrayList(Block.FootnoteEntry)).initFill(.empty), + footnote_keys: std.StringArrayHashMapUnmanaged(FootnoteDefinition) = .empty, + first_footnote_location: ?Parser.Location = null, current_heading_level: usize = 0, heading_counters: [Block.Heading.Level.count]u16 = @splat(0), @@ -933,6 +974,10 @@ pub const SemanticAnalyzer = struct { const toc, const id = try sema.translate_toc_node(node); return .{ .{ .toc = toc }, id }; }, + .footnotes => { + const footnotes = try sema.translate_footnotes_node(node); + return .{ .{ .footnotes = footnotes }, null }; + }, .table => { const table, const id = try sema.translate_table_node(node); return .{ .{ .table = table }, id }; @@ -953,6 +998,7 @@ pub const SemanticAnalyzer = struct { .@"\\time", .@"\\date", .@"\\datetime", + .@"\\footnote", .text, .columns, .group, @@ -1172,6 +1218,51 @@ pub const SemanticAnalyzer = struct { return .{ toc, attrs.id }; } + fn translate_footnotes_node(sema: *SemanticAnalyzer, node: Parser.Node) !Block.Footnotes { + const attrs = try sema.get_attributes(node, struct { + lang: LanguageTag = .inherit, + kind: ?FootnoteKind = null, + }); + + switch (node.body) { + .empty => {}, + .list => |child_nodes| { + for (child_nodes) |child_node| { + try sema.emit_diagnostic(.illegal_child_item, child_node.location); + } + }, + .string, .verbatim, .text_span => { + try sema.emit_diagnostic(.illegal_child_item, node.location); + }, + } + + var entries: std.ArrayList(Block.FootnoteEntry) = .empty; + defer entries.deinit(sema.arena); + + const kinds: []const FootnoteKind = if (attrs.kind) |kind| + &[_]FootnoteKind{kind} + else + &[_]FootnoteKind{ .footnote, .citation }; + + for (kinds) |kind| { + const pending = sema.footnote_pending.getPtr(kind); + if (pending.items.len == 0) + continue; + + try entries.appendSlice(sema.arena, pending.items); + pending.clearRetainingCapacity(); + } + + if (!sema.has_pending_footnotes()) { + sema.first_footnote_location = null; + } + + return .{ + .lang = attrs.lang, + .entries = try entries.toOwnedSlice(sema.arena), + }; + } + fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } { const attrs = try sema.get_attributes(node, struct { lang: LanguageTag = .inherit, @@ -1497,7 +1588,7 @@ pub const SemanticAnalyzer = struct { try merger.output.append(merger.arena, span); }, - .reference => { + .reference, .footnote => { try merger.flush_internal(.keep); std.debug.assert(merger.current_span.items.len == 0); @@ -1782,6 +1873,85 @@ pub const SemanticAnalyzer = struct { .location = node.location, }); }, + .@"\\footnote" => { + const props = try sema.get_attributes(node, struct { + key: ?Reference = null, + ref: ?Reference = null, + kind: ?FootnoteKind = null, + lang: LanguageTag = .inherit, + }); + + const has_body = node.body != .empty; + if (props.key != null and props.ref != null) { + try sema.emit_diagnostic(.footnote_conflicting_key_ref, node.location); + return; + } + + if (has_body) { + if (props.ref != null) { + try sema.emit_diagnostic(.footnote_conflicting_key_ref, node.location); + return; + } + } else { + if (props.ref == null) { + try sema.emit_diagnostic(.footnote_missing_ref, node.location); + return; + } + if (props.kind != null) { + try sema.emit_diagnostic(.footnote_kind_on_reference, get_attribute_location(node, "kind", .name) orelse node.location); + } + + const definition = sema.footnote_keys.get(props.ref.?.text) orelse { + try sema.emit_diagnostic(.{ .unknown_footnote_key = .{ .ref = props.ref.?.text } }, get_attribute_location(node, "ref", .value) orelse node.location); + return; + }; + + try sema.enqueue_footnote(definition); + sema.note_footnote_marker(node.location); + try spans.append(sema.arena, .{ + .content = .{ .footnote = .{ + .kind = definition.kind, + .index = definition.index, + } }, + .attribs = attribs, + .location = node.location, + }); + return; + } + + if (!has_body) { + try sema.emit_diagnostic(.footnote_missing_body, node.location); + return; + } + + const kind = props.kind orelse FootnoteKind.footnote; + + var content_spans: std.ArrayList(Span) = .empty; + defer content_spans.deinit(sema.arena); + + const content_attribs = try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang }); + try sema.translate_inline_body(&content_spans, node.body, content_attribs, .emit_diagnostic); + + const compacted = try sema.compact_spans(content_spans.items, .one_space); + if (compacted.len == 0) { + try sema.emit_diagnostic(.footnote_missing_body, node.location); + return; + } + + const key_location = get_attribute_location(node, "key", .value); + const definition = try sema.append_footnote_definition(kind, props.lang, compacted, props.key, node.location, key_location); + try sema.enqueue_footnote(definition); + sema.note_footnote_marker(node.location); + + try spans.append(sema.arena, .{ + .content = .{ .footnote = .{ + .kind = definition.kind, + .index = definition.index, + } }, + .attribs = attribs, + .location = node.location, + }); + }, .hdoc, .h1, @@ -1800,6 +1970,7 @@ pub const SemanticAnalyzer = struct { .img, .pre, .toc, + .footnotes, .table, .columns, .group, @@ -1913,6 +2084,7 @@ pub const SemanticAnalyzer = struct { try output.appendSlice(sema.arena, text); }, }, + .footnote => {}, } } @@ -2199,6 +2371,7 @@ pub const SemanticAnalyzer = struct { DateTime => DateTime.parse(value, timezone_hint) catch return error.InvalidValue, LanguageTag => LanguageTag.parse(value) catch return error.InvalidValue, TimeZoneOffset => TimeZoneOffset.parse(value) catch return error.InvalidValue, + FootnoteKind => std.meta.stringToEnum(FootnoteKind, value) orelse return error.InvalidValue, inline else => |EnumT| switch (@typeInfo(EnumT)) { .@"enum" => std.meta.stringToEnum(EnumT, value) orelse return error.InvalidValue, @@ -2258,6 +2431,12 @@ pub const SemanticAnalyzer = struct { try sema.resolve_block_references(child, id_map); } }, + .footnotes => |*footnotes| { + for (footnotes.entries) |*entry| { + try sema.resolve_span_slice(&entry.content, id_map); + } + }, + .toc => {}, .table => |*table| { for (table.rows) |*row| switch (row.*) { @@ -2417,6 +2596,68 @@ pub const SemanticAnalyzer = struct { }; } + fn enqueue_footnote(sema: *SemanticAnalyzer, definition: FootnoteDefinition) !void { + const pending = sema.footnote_pending.getPtr(definition.kind); + for (pending.items) |entry| { + if (entry.index == definition.index) { + return; + } + } + + try pending.append(sema.arena, .{ + .index = definition.index, + .kind = definition.kind, + .lang = definition.lang, + .content = definition.content, + }); + } + + fn append_footnote_definition( + sema: *SemanticAnalyzer, + kind: FootnoteKind, + lang: LanguageTag, + content: []Span, + key: ?Reference, + node_location: Parser.Location, + key_location: ?Parser.Location, + ) !FootnoteDefinition { + const counter = sema.footnote_counters.getPtr(kind); + counter.* += 1; + const definition: FootnoteDefinition = .{ + .kind = kind, + .index = counter.*, + .lang = lang, + .content = content, + }; + + if (key) |reference| { + const gop = try sema.footnote_keys.getOrPut(sema.arena, reference.text); + if (gop.found_existing) { + try sema.emit_diagnostic(.{ .duplicate_footnote_key = .{ .ref = reference.text } }, key_location orelse node_location); + } else { + gop.value_ptr.* = definition; + } + } + + return definition; + } + + fn note_footnote_marker(sema: *SemanticAnalyzer, location: Parser.Location) void { + if (sema.first_footnote_location == null) { + sema.first_footnote_location = location; + } + } + + fn has_pending_footnotes(sema: *SemanticAnalyzer) bool { + for (std.meta.tags(FootnoteKind)) |kind| { + if (sema.footnote_pending.get(kind).items.len > 0) { + return true; + } + } + + return false; + } + /// Computes the next index number for a heading of the given level: fn compute_next_heading(sema: *SemanticAnalyzer, node: Parser.Node, level: Block.Heading.Level) !Block.Heading.Index { const index = @intFromEnum(level); @@ -3217,6 +3458,7 @@ pub const Parser = struct { img, pre, toc, + footnotes, table, columns, group, @@ -3235,6 +3477,7 @@ pub const Parser = struct { @"\\date", @"\\time", @"\\datetime", + @"\\footnote", unknown_block, unknown_inline, @@ -3251,6 +3494,7 @@ pub const Parser = struct { .@"\\date", .@"\\time", .@"\\datetime", + .@"\\footnote", .unknown_inline, .text, => true, @@ -3272,6 +3516,7 @@ pub const Parser = struct { .img, .pre, .toc, + .footnotes, .table, .columns, .group, @@ -3295,6 +3540,7 @@ pub const Parser = struct { .img, .pre, .toc, + .footnotes, .group, .@"\\em", @@ -3307,6 +3553,7 @@ pub const Parser = struct { .@"\\date", .@"\\time", .@"\\datetime", + .@"\\footnote", .unknown_inline, .unknown_block, // Unknown blocks must also have inline bodies to optimally retain body contents @@ -3426,6 +3673,12 @@ pub const Diagnostic = struct { duplicate_id: ReferenceError, unknown_id: ReferenceError, empty_ref_body_target, + duplicate_footnote_key: ReferenceError, + unknown_footnote_key: ReferenceError, + footnote_conflicting_key_ref, + footnote_missing_ref, + footnote_missing_body, + footnote_kind_on_reference, // warnings: document_starts_with_bom, @@ -3442,6 +3695,7 @@ pub const Diagnostic = struct { tab_character, automatic_heading_insertion: AutomaticHeading, title_inline_date_time_without_header, + footnote_missing_dump, pub fn severity(code: Code) Severity { return switch (code) { @@ -3483,6 +3737,12 @@ pub const Diagnostic = struct { .duplicate_id, .unknown_id, .empty_ref_body_target, + .duplicate_footnote_key, + .unknown_footnote_key, + .footnote_conflicting_key_ref, + .footnote_missing_ref, + .footnote_missing_body, + .footnote_kind_on_reference, => .@"error", .missing_document_language, @@ -3499,6 +3759,7 @@ pub const Diagnostic = struct { .document_starts_with_bom, .automatic_heading_insertion, .title_inline_date_time_without_header, + .footnote_missing_dump, => .warning, }; } @@ -3578,12 +3839,19 @@ pub const Diagnostic = struct { .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}), .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}), .empty_ref_body_target => try w.writeAll("Empty-body \\ref is only supported for headings."), + .duplicate_footnote_key => |ctx| try w.print("The footnote key \"{s}\" is already defined.", .{ctx.ref}), + .unknown_footnote_key => |ctx| try w.print("The referenced footnote key \"{s}\" does not exist.", .{ctx.ref}), + .footnote_conflicting_key_ref => try w.writeAll("\\footnote attributes 'key' and 'ref' cannot be used together."), + .footnote_missing_ref => try w.writeAll("\\footnote without a body requires a ref=\"...\" attribute."), + .footnote_missing_body => try w.writeAll("\\footnote definitions require a non-empty body."), + .footnote_kind_on_reference => try w.writeAll("Attribute 'kind' is only valid on defining \\footnote entries."), .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."), .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."), .automatic_heading_insertion => |ctx| try w.print("Inserted automatic {t} to fill heading level gap.", .{ctx.level}), .title_inline_date_time_without_header => try w.writeAll("Title block contains \\date/\\time/\\datetime but hdoc(title=\"...\") is missing; metadata title cannot be derived reliably."), + .footnote_missing_dump => try w.writeAll("Document contains footnotes but no footnotes(...) block to render them."), } } }; diff --git a/src/render/dump.zig b/src/render/dump.zig index cc57876..76d92c2 100644 --- a/src/render/dump.zig +++ b/src/render/dump.zig @@ -221,6 +221,9 @@ fn writeSpanContentInline(writer: *Writer, content: hdoc.Span.Content) Writer.Er try writeFormattedDateTimeInline(writer, datetime); try writer.writeByte('"'); }, + .footnote => |footnote| { + try writer.print("\"footnote:{s}:{d}\"", .{ @tagName(footnote.kind), footnote.index }); + }, .reference => |reference| { try writer.writeByte('"'); try writer.writeAll("ref:"); @@ -321,6 +324,28 @@ fn dumpListItemsField(writer: *Writer, indent: usize, key: []const u8, items: [] } } +fn dumpFootnoteEntry(writer: *Writer, indent: usize, entry: hdoc.Block.FootnoteEntry) Writer.Error!void { + try writeIndent(writer, indent); + try writer.print("index: {}\n", .{entry.index}); + try dumpEnumField(writer, indent, "kind", entry.kind); + try dumpOptionalStringField(writer, indent, "lang", entry.lang.text); + try dumpSpanListField(writer, indent, "content", entry.content); +} + +fn dumpFootnoteEntries(writer: *Writer, indent: usize, entries: []const hdoc.Block.FootnoteEntry) Writer.Error!void { + try writeIndent(writer, indent); + if (entries.len == 0) { + try writer.writeAll("entries: []\n"); + return; + } + try writer.writeAll("entries:\n"); + for (entries) |entry| { + try writeIndent(writer, indent + indent_step); + try writer.writeAll("- "); + try dumpFootnoteEntry(writer, indent + indent_step, entry); + } +} + fn dumpTableCell(writer: *Writer, indent: usize, cell: hdoc.Block.TableCell) Writer.Error!void { try dumpOptionalStringFieldInline(writer, "lang", cell.lang.text); try dumpOptionalNumberField(writer, indent + indent_step, "colspan", @as(?u32, cell.colspan)); @@ -457,6 +482,11 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang.text); try dumpOptionalNumberField(writer, indent + indent_step, "depth", @as(?u8, toc.depth)); }, + .footnotes => |footnotes| { + try writeTypeTag(writer, "footnotes"); + try dumpOptionalStringField(writer, indent + indent_step, "lang", footnotes.lang.text); + try dumpFootnoteEntries(writer, indent + indent_step, footnotes.entries); + }, .table => |table| { try writeTypeTag(writer, "table"); try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang.text); diff --git a/src/render/html5.zig b/src/render/html5.zig index 2bc361f..5aa9b97 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -32,6 +32,7 @@ const RenderContext = struct { .preformatted => |preformatted| try ctx.renderPreformatted(preformatted, block_index, indent), .toc => |toc| try ctx.renderTableOfContents(toc, block_index, indent), .table => |table| try ctx.renderTable(table, block_index, indent), + .footnotes => |footnotes| try ctx.renderFootnotes(footnotes, block_index, indent), } } @@ -359,6 +360,78 @@ const RenderContext = struct { try ctx.writer.writeByte('\n'); } + fn renderFootnotes(ctx: *RenderContext, footnotes: hdoc.Block.Footnotes, block_index: ?usize, indent: usize) RenderError!void { + const lang_attr = langAttribute(footnotes.lang); + const id_attr = ctx.resolveBlockId(block_index); + + try writeIndent(ctx.writer, indent); + try writeStartTag(ctx.writer, "div", .regular, .{ + .id = id_attr, + .lang = lang_attr, + .class = "hdoc-footnotes", + }); + try ctx.writer.writeByte('\n'); + + const kinds = [_]hdoc.FootnoteKind{ .footnote, .citation }; + for (kinds) |kind| { + var first_index: ?usize = null; + var count: usize = 0; + + for (footnotes.entries) |entry| { + if (entry.kind != kind) + continue; + if (first_index == null) + first_index = entry.index; + count += 1; + } + + if (count == 0) + continue; + + try writeIndent(ctx.writer, indent + indent_step); + var class_buffer: [64]u8 = undefined; + const list_class = std.fmt.bufPrint(&class_buffer, "hdoc-footnote-list hdoc-{s}", .{footnoteSlug(kind)}) catch unreachable; + try writeStartTag(ctx.writer, "ol", .regular, .{ + .class = list_class, + .start = first_index, + }); + try ctx.writer.writeByte('\n'); + + for (footnotes.entries) |entry| { + if (entry.kind != kind) + continue; + + var id_buffer: [64]u8 = undefined; + const entry_id = ctx.footnoteId(entry.kind, entry.index, &id_buffer); + + try writeIndent(ctx.writer, indent + 2 * indent_step); + try writeStartTag(ctx.writer, "li", .regular, .{ + .id = entry_id, + .lang = langAttribute(entry.lang), + }); + if (entry.content.len > 0) { + try ctx.writer.writeByte('\n'); + try writeIndent(ctx.writer, indent + 3 * indent_step); + try writeStartTag(ctx.writer, "p", .regular, .{ .lang = langAttribute(entry.lang) }); + try ctx.renderSpans(entry.content); + try writeEndTag(ctx.writer, "p"); + try ctx.writer.writeByte('\n'); + try writeIndent(ctx.writer, indent + 2 * indent_step); + } + try writeEndTag(ctx.writer, "li"); + try ctx.writer.writeByte('\n'); + } + + try writeIndent(ctx.writer, indent + indent_step); + try writeEndTag(ctx.writer, "ol"); + try ctx.writer.writeByte('\n'); + } + + try writeIndent(ctx.writer, indent); + try writeEndTag(ctx.writer, "div"); + try ctx.writer.writeByte('\n'); + } + fn renderHeaderRow(ctx: *RenderContext, columns: hdoc.Block.TableColumns, indent: usize, has_title_column: bool) RenderError!void { try writeIndent(ctx.writer, indent); try writeStartTag(ctx.writer, "tr", .regular, .{ .lang = langAttribute(columns.lang) }); @@ -471,6 +544,18 @@ const RenderContext = struct { return null; } + fn footnoteSlug(kind: hdoc.FootnoteKind) []const u8 { + return switch (kind) { + .footnote => "footnote", + .citation => "citation", + }; + } + + fn footnoteId(ctx: *RenderContext, kind: hdoc.FootnoteKind, index: usize, buffer: []u8) []const u8 { + _ = ctx; + return std.fmt.bufPrint(buffer, "hdoc-{s}-{d}", .{ footnoteSlug(kind), index }) catch unreachable; + } + fn renderSpans(ctx: *RenderContext, spans: []const hdoc.Span) RenderError!void { for (spans) |span| { try ctx.renderSpan(span); @@ -554,6 +639,21 @@ const RenderContext = struct { .reference => |reference| { try ctx.renderReference(reference, content_lang); }, + .footnote => |footnote| { + var id_buffer: [64]u8 = undefined; + const target_id = ctx.footnoteId(footnote.kind, footnote.index, &id_buffer); + var href_buffer: [64]u8 = undefined; + const href = std.fmt.bufPrint(&href_buffer, "#{s}", .{target_id}) catch unreachable; + + var class_buffer: [64]u8 = undefined; + const class_attr = std.fmt.bufPrint(&class_buffer, "hdoc-footnote-ref hdoc-{s}", .{footnoteSlug(footnote.kind)}) catch unreachable; + + try writeStartTag(ctx.writer, "sup", .regular, .{ .class = class_attr, .lang = content_lang }); + try writeStartTag(ctx.writer, "a", .regular, .{ .href = href }); + try ctx.writer.print("{d}", .{footnote.index}); + try writeEndTag(ctx.writer, "a"); + try writeEndTag(ctx.writer, "sup"); + }, } while (opened_len > 0) { diff --git a/src/testsuite.zig b/src/testsuite.zig index 41a60f9..5d79e8a 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -593,6 +593,120 @@ test "table of contents inserts automatic headings when skipping levels" { try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.children.len); } +test "footnotes collect entries per dump" { + const source = + \\hdoc(version="2.0",lang="en"); + \\p{Intro \footnote{first} \footnote(kind="citation",key="cite1"){c1}} + \\footnotes; + \\p{Again \footnote(ref="cite1"); \footnote{second}} + \\footnotes(kind="citation"); + \\footnotes; + ; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + try std.testing.expect(!diagnostics.has_error()); + try std.testing.expectEqual(@as(usize, 5), doc.contents.len); + + const first_dump = switch (doc.contents[1]) { + .footnotes => |value| value, + else => return error.TestExpectedEqual, + }; + try std.testing.expectEqual(@as(usize, 2), first_dump.entries.len); + try std.testing.expectEqual(hdoc.FootnoteKind.footnote, first_dump.entries[0].kind); + try std.testing.expectEqual(@as(usize, 1), first_dump.entries[0].index); + try std.testing.expectEqual(hdoc.FootnoteKind.citation, first_dump.entries[1].kind); + try std.testing.expectEqual(@as(usize, 1), first_dump.entries[1].index); + + const second_dump = switch (doc.contents[3]) { + .footnotes => |value| value, + else => return error.TestExpectedEqual, + }; + try std.testing.expectEqual(@as(usize, 1), second_dump.entries.len); + try std.testing.expectEqual(hdoc.FootnoteKind.citation, second_dump.entries[0].kind); + try std.testing.expectEqual(@as(usize, 1), second_dump.entries[0].index); + + const final_dump = switch (doc.contents[4]) { + .footnotes => |value| value, + else => return error.TestExpectedEqual, + }; + try std.testing.expectEqual(@as(usize, 1), final_dump.entries.len); + try std.testing.expectEqual(hdoc.FootnoteKind.footnote, final_dump.entries[0].kind); + try std.testing.expectEqual(@as(usize, 2), final_dump.entries[0].index); +} + +test "warn when footnotes are missing dumps" { + const source = + \\hdoc(version="2.0",lang="en"); + \\p{Body \footnote{content}} + ; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var saw_warning = false; + for (diagnostics.items.items) |item| { + if (diagnosticCodesEqual(item.code, .footnote_missing_dump)) { + saw_warning = true; + break; + } + } + try std.testing.expect(saw_warning); +} + +test "warn when footnotes remain after intermediate dump" { + const source = + \\hdoc(version="2.0",lang="en"); + \\p{First \footnote{one}} + \\footnotes{} + \\p{Second \footnote{two}} + ; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var saw_warning = false; + for (diagnostics.items.items) |item| { + if (diagnosticCodesEqual(item.code, .footnote_missing_dump)) { + saw_warning = true; + break; + } + } + try std.testing.expect(saw_warning); +} + +test "no warning when footnotes are drained after later dump" { + const source = + \\hdoc(version="2.0",lang="en"); + \\p{First \footnote{one}} + \\footnotes{} + \\p{Second \footnote{two}} + \\footnotes{} + ; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + for (diagnostics.items.items) |item| { + if (diagnosticCodesEqual(item.code, .footnote_missing_dump)) { + return error.TestExpectedEqual; + } + } +} + fn diagnosticCodesEqual(lhs: hdoc.Diagnostic.Code, rhs: hdoc.Diagnostic.Code) bool { if (std.meta.activeTag(lhs) != std.meta.activeTag(rhs)) return false; diff --git a/test/snapshot/footnotes.hdoc b/test/snapshot/footnotes.hdoc new file mode 100644 index 0000000..ae66d1b --- /dev/null +++ b/test/snapshot/footnotes.hdoc @@ -0,0 +1,7 @@ +hdoc(version="2.0",lang="en"); +title{Footnotes Demo} +p{Intro with footnote\footnote{Footnote text} and citation\footnote(kind="citation",key="ref1"){Citation text}.} +footnotes; +p{Next section references \footnote(ref="ref1"); and adds another\footnote{Second footnote}.} +footnotes(kind="citation"); +footnotes; diff --git a/test/snapshot/footnotes.html b/test/snapshot/footnotes.html new file mode 100644 index 0000000..8ecbc1d --- /dev/null +++ b/test/snapshot/footnotes.html @@ -0,0 +1,31 @@ +
      +

      Footnotes Demo

      +
      +

      Intro with footnote1 and citation1.

      +
      +
        +
      1. +

        Footnote text

        +
      2. +
      +
        +
      1. +

        Citation text

        +
      2. +
      +
      +

      Next section references 1 and adds another2.

      +
      +
        +
      1. +

        Citation text

        +
      2. +
      +
      +
      +
        +
      1. +

        Second footnote

        +
      2. +
      +
      From 3c6855013a23e6cbfcc4ad0a427c88a45714f195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 16:38:38 +0100 Subject: [PATCH 097/116] Fix pending footnote warning location --- src/hyperdoc.zig | 37 +++++++++++++++++++++++++++++-------- src/testsuite.zig | 27 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index f163269..42919c7 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -643,7 +643,7 @@ pub fn parse( const toc = try sema.build_toc(contents, block_locations); if (sema.has_pending_footnotes()) { - if (sema.first_footnote_location) |location| { + if (sema.first_pending_footnote_location()) |location| { try sema.emit_diagnostic(.footnote_missing_dump, location); } } @@ -806,7 +806,7 @@ pub const SemanticAnalyzer = struct { footnote_counters: std.EnumArray(FootnoteKind, usize) = std.EnumArray(FootnoteKind, usize).initFill(0), footnote_pending: std.EnumArray(FootnoteKind, std.ArrayList(Block.FootnoteEntry)) = std.EnumArray(FootnoteKind, std.ArrayList(Block.FootnoteEntry)).initFill(.empty), footnote_keys: std.StringArrayHashMapUnmanaged(FootnoteDefinition) = .empty, - first_footnote_location: ?Parser.Location = null, + first_footnote_locations: std.EnumArray(FootnoteKind, ?Parser.Location) = std.EnumArray(FootnoteKind, ?Parser.Location).initFill(null), current_heading_level: usize = 0, heading_counters: [Block.Heading.Level.count]u16 = @splat(0), @@ -1251,10 +1251,13 @@ pub const SemanticAnalyzer = struct { try entries.appendSlice(sema.arena, pending.items); pending.clearRetainingCapacity(); + sema.first_footnote_locations.getPtr(kind).* = null; } if (!sema.has_pending_footnotes()) { - sema.first_footnote_location = null; + for (std.meta.tags(FootnoteKind)) |kind| { + sema.first_footnote_locations.getPtr(kind).* = null; + } } return .{ @@ -1907,7 +1910,7 @@ pub const SemanticAnalyzer = struct { }; try sema.enqueue_footnote(definition); - sema.note_footnote_marker(node.location); + sema.note_footnote_marker(definition.kind, node.location); try spans.append(sema.arena, .{ .content = .{ .footnote = .{ .kind = definition.kind, @@ -1941,7 +1944,7 @@ pub const SemanticAnalyzer = struct { const key_location = get_attribute_location(node, "key", .value); const definition = try sema.append_footnote_definition(kind, props.lang, compacted, props.key, node.location, key_location); try sema.enqueue_footnote(definition); - sema.note_footnote_marker(node.location); + sema.note_footnote_marker(definition.kind, node.location); try spans.append(sema.arena, .{ .content = .{ .footnote = .{ @@ -2642,9 +2645,10 @@ pub const SemanticAnalyzer = struct { return definition; } - fn note_footnote_marker(sema: *SemanticAnalyzer, location: Parser.Location) void { - if (sema.first_footnote_location == null) { - sema.first_footnote_location = location; + fn note_footnote_marker(sema: *SemanticAnalyzer, kind: FootnoteKind, location: Parser.Location) void { + const slot = sema.first_footnote_locations.getPtr(kind); + if (slot.* == null) { + slot.* = location; } } @@ -2658,6 +2662,23 @@ pub const SemanticAnalyzer = struct { return false; } + fn first_pending_footnote_location(sema: *SemanticAnalyzer) ?Parser.Location { + var earliest: ?Parser.Location = null; + + for (std.meta.tags(FootnoteKind)) |kind| { + if (sema.footnote_pending.get(kind).items.len == 0) + continue; + + if (sema.first_footnote_locations.get(kind)) |location| { + if (earliest == null or location.offset < earliest.?.offset) { + earliest = location; + } + } + } + + return earliest; + } + /// Computes the next index number for a heading of the given level: fn compute_next_heading(sema: *SemanticAnalyzer, node: Parser.Node, level: Block.Heading.Level) !Block.Heading.Index { const index = @intFromEnum(level); diff --git a/src/testsuite.zig b/src/testsuite.zig index 5d79e8a..e2003c8 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -685,6 +685,33 @@ test "warn when footnotes remain after intermediate dump" { try std.testing.expect(saw_warning); } +test "footnote missing dump warning points to earliest remaining kind" { + const source = + \\hdoc(version="2.0",lang="en"); + \\p{First \footnote{one}} + \\p{C \footnote(kind="citation",key="cite"){two}} + \\footnotes(kind="footnote"); + ; + + var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator); + defer diagnostics.deinit(); + + var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); + defer doc.deinit(); + + var warning: ?hdoc.Diagnostic = null; + for (diagnostics.items.items) |item| { + if (diagnosticCodesEqual(item.code, .footnote_missing_dump)) { + warning = item; + break; + } + } + + try std.testing.expect(warning != null); + try std.testing.expectEqual(@as(u32, 3), warning.?.location.line); + try std.testing.expectEqual(@as(u32, 5), warning.?.location.column); +} + test "no warning when footnotes are drained after later dump" { const source = \\hdoc(version="2.0",lang="en"); From 9003d2ef306fc09112a8daea90c021af80f68f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 21:39:50 +0100 Subject: [PATCH 098/116] Improves handling of missing files when dealing with snapshots --- build.zig | 1 + test/compare.zig | 20 +++++++++-- test/snapshot/footnotes.yaml | 69 ++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 test/snapshot/footnotes.yaml diff --git a/build.zig b/build.zig index f885e67..eb0d9ba 100644 --- a/build.zig +++ b/build.zig @@ -7,6 +7,7 @@ const snapshot_files: []const []const u8 = &.{ "test/snapshot/nesting_and_inlines.hdoc", "test/snapshot/paragraph_styles.hdoc", "test/snapshot/tables.hdoc", + "test/snapshot/footnotes.hdoc", }; pub fn build(b: *std.Build) void { diff --git a/test/compare.zig b/test/compare.zig index 57d549b..69c9b82 100644 --- a/test/compare.zig +++ b/test/compare.zig @@ -21,10 +21,23 @@ pub fn main() !u8 { const ground_truth_path = argv[1]; const new_input_path = argv[2]; - const ground_truth = try readFileAlloc(allocator, ground_truth_path, 10 * 1024 * 1024); + var files_ok = true; + const ground_truth = readFileAlloc(allocator, ground_truth_path, 10 * 1024 * 1024) catch |err| switch (err) { + error.FileNotFound => blk: { + files_ok = false; + break :blk ""; + }, + else => |e| return e, + }; defer allocator.free(ground_truth); - const new_input = try readFileAlloc(allocator, new_input_path, 10 * 1024 * 1024); + const new_input = readFileAlloc(allocator, new_input_path, 10 * 1024 * 1024) catch |err| switch (err) { + error.FileNotFound => blk: { + files_ok = false; + break :blk ""; + }, + else => |e| return e, + }; defer allocator.free(new_input); // Compare full file contents for now. This keeps the snapshot tests simple and @@ -34,6 +47,9 @@ pub fn main() !u8 { else => return err, }; + if (!files_ok) + return 1; + return 0; } diff --git a/test/snapshot/footnotes.yaml b/test/snapshot/footnotes.yaml new file mode 100644 index 0000000..5340dde --- /dev/null +++ b/test/snapshot/footnotes.yaml @@ -0,0 +1,69 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Footnotes Demo" + full: + lang: "" + content: + - [] "Footnotes Demo" + author: null + date: null + toc: + level: h1 + headings: [] + children: [] + contents: + - paragraph: + lang: "" + content: + - [] "Intro with footnote" + - [] "footnote:footnote:1" + - [] " and citation" + - [] "footnote:citation:1" + - [] "." + - footnotes: + lang: "" + entries: + - index: 1 + kind: footnote + lang: "" + content: + - [] "Footnote text" + - index: 1 + kind: citation + lang: "" + content: + - [] "Citation text" + - paragraph: + lang: "" + content: + - [] "Next section references " + - [] "footnote:citation:1" + - [] " and adds another" + - [] "footnote:footnote:2" + - [] "." + - footnotes: + lang: "" + entries: + - index: 1 + kind: citation + lang: "" + content: + - [] "Citation text" + - footnotes: + lang: "" + entries: + - index: 2 + kind: footnote + lang: "" + content: + - [] "Second footnote" + ids: + - null + - null + - null + - null + - null From bf902d05e628e80bd855c408e7d54fb28af32b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 21:44:55 +0100 Subject: [PATCH 099/116] Cleans SPEC_TODO.md --- SPEC_TODO.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/SPEC_TODO.md b/SPEC_TODO.md index 1cfadaf..c12f0be 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -1,8 +1,2 @@ # Spec compliance TODOs -- Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】 -- Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】 -- Enforce `hdoc` placement and body rules by rejecting headers that are not the first node or that carry any non-empty body, instead of merely warning on the first non-header block and accepting later headers. 【F:docs/specification.md†L369-L373】【F:src/hyperdoc.zig†L734-L788】 -- Treat admonition blocks (`note`, `warning`, `danger`, `tip`, `quote`, `spoiler`) as block-list containers with shorthand promotion for string/verbatim bodies rather than forcing them into a single inline paragraph payload. 【F:docs/specification.md†L585-L588】【F:src/hyperdoc.zig†L916-L935】 -- Enforce table column structure: allow at most one optional leading `columns` row, derive a non-zero effective column count even when `columns` is absent, and reject tables where no row or column establishes width. The current implementation accepts multiple `columns` nodes anywhere and never validates missing/zero column counts. 【F:docs/specification.md†L618-L629】【F:src/hyperdoc.zig†L1076-L1147】 -- Restrict `toc` to top-level usage as required by the specification; the current translator permits `toc` blocks inside nested block lists. 【F:docs/specification.md†L535-L543】【F:src/hyperdoc.zig†L1041-L1073】【F:src/hyperdoc.zig†L1254-L1270】 From 8ab068029cf556dc49fd34dc37c39be0167c2549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 22:29:46 +0100 Subject: [PATCH 100/116] Adds specification parts about the syntax attribute. --- docs/TODO.md | 1 - docs/specification.md | 121 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 1 deletion(-) diff --git a/docs/TODO.md b/docs/TODO.md index 556c7dc..a74db6c 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -3,7 +3,6 @@ ## Tasks - Assign semantics to node types, paragraph kinds, ... -- Specify "syntax" proper - Add links to RFCs where possible - Verbatim-body to text conversion is under-specified. You define verbatim syntax (: with | lines) and later say verbatim bodies become inline text spans (§8.2), but you don’t precisely define how piped lines join (LF vs preserving original CRLF, whether there is a trailing newline, whether a final EOF line_terminator contributes a newline, etc.). Different implementations may diverge. - Inline “groups” exist syntactically but are not given explicit semantics. The grammar includes inline_group ::= "{" , inline_content , "}" and §5.4 makes brace balancing a core rule, but §8.2 doesn’t explicitly state that groups are semantically transparent (flattened) versus affecting whitespace normalization boundaries or span merging. diff --git a/docs/specification.md b/docs/specification.md index fcee7cd..b862709 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -867,6 +867,36 @@ Semantics: - **Language tag:** BCP 47 (RFC 5646). - **Timezone offset:** `Z` or `±HH:MM`. - **URI/IRI:** per RFC 3987. +- **Syntax identifier**: see §10.1.1. + +#### 10.1.1 Syntax identifier + +A **Syntax identifier** is a compact string used to label a syntax-highlighting / tokenization scheme and a language (or other syntax) within that scheme. + +Lexical rules (normative): + +- A Syntax identifier value **MUST** be non-empty. +- It **MUST NOT** contain any whitespace or Unicode control characters (General Category `Cc`). +- It **MAY** contain any other Unicode scalar values, subject to the `scheme` / `name` splitting rules below. + +Structure (normative): + +A Syntax identifier is either: + +- `name` (no scheme prefix), or +- `scheme ":" name` + +Where: + +- `scheme` is an attribute-key-like identifier: it **MUST** match the lexical form of `attr_key` (§5.1). +- `name` is an opaque identifier: it **MUST** be non-empty and **MUST NOT** contain `":"`. + +Parsing and normalization (normative): + +- If the value contains a `":"`, the first `":"` splits the identifier into `scheme` and `name`. +- Otherwise, the effective `scheme` is `"hdoc"` and the entire value is the `name`. +- Scheme matching is **ASCII case-insensitive**. The canonical scheme behavior defined by this specification uses the lowercase scheme name. +- The meaning and matching rules of `name` are scheme-defined, except for the `"hdoc"` scheme which is defined in Appendix D. ### 10.2 Date / time lexical formats (normative) @@ -1017,6 +1047,28 @@ Each element has an **effective language tag**, computed as follows: This inheritance allows documents to mix language contexts across nested elements (e.g. an English document that contains a German `quote` with an Italian paragraph inside), and keeps localized date/time values in their local context. +### 10.5 `syntax` attribute + +The `syntax` attribute is a rendering hint for syntax-aware presentation (e.g. syntax highlighting) on `pre` (§9.3.6) and `\mono` (§9.5.2). + +Normative rules: + +- If present, the `syntax` attribute value **MUST** be a Syntax identifier (§10.1.1). +- The `syntax` attribute **MUST NOT** affect parsing or semantic meaning of the element’s body. It is a rendering hint only. +- Renderers **SHOULD** implement the `"hdoc"` scheme defined by this specification (Appendix D). +- Renderers **MAY** implement additional schemes. +- If a renderer does not recognize the scheme or the name within that scheme, it **MUST** render the content as **plain monospaced text** (i.e. without syntax-specific styling). + +#### 10.5.1 `plain` (normative) + +Within the `"hdoc"` scheme (Appendix D), the canonical name `plain` indicates an explicit request for no syntax highlighting. + +If the effective Syntax identifier resolves to `hdoc:plain` (including any aliases mapped to `plain`): + +- A renderer **MUST NOT** apply syntax highlighting. +- A renderer **MUST NOT** attempt language autodetection. +- A renderer **MAY** still apply generic monospace/code styling (font, background, line wrapping policy, etc.), but **MUST NOT** apply token- or language-dependent styling. + ## 11. Non-normative guidance for tooling - Formatters should normalize line endings to LF. @@ -1044,3 +1096,72 @@ pre(syntax="c"): | return 0; | } ``` + +## Appendix D. `"hdoc"` syntax scheme (normative, non-exhaustive) + +This appendix defines the `"hdoc"` scheme used by the `syntax` attribute (§10.5). + +The `"hdoc"` scheme is intended to provide stable, interoperable canonical names for common syntaxes, while allowing unknown names without making documents semantically invalid. + +### D.1 Canonicalization and aliasing (normative) + +For the purpose of interpreting `syntax` values in the `"hdoc"` scheme: + +- Matching of `"hdoc"` `name` values is **ASCII case-insensitive**. +- If the `name` matches an alias in Table D.2, the effective canonical name **MUST** be the alias target. +- Otherwise, if the `name` matches a canonical name in Table D.1, the effective canonical name is that name. +- Otherwise, the `name` is **unrecognized** for the `"hdoc"` scheme. + +Tooling guidance (non-normative): + +- Formatters and rewriters should preserve the original `syntax` string verbatim unless they intentionally canonicalize it. +- If canonicalizing, tooling should: + - prefer omitting the `"hdoc:"` scheme prefix (since `"hdoc"` is the default scheme), and + - prefer the canonical names in Table D.1. + +### D.2 Canonical `"hdoc"` names (normative) + +Table D.1 lists canonical `"hdoc"` syntax names defined by this specification. + +| Canonical name | Intended meaning | +| -------------- | ------------------------------------------------------------- | +| `plain` | Explicitly no highlighting (§10.5.1). | +| `hdoc` | HyperDoc source text. | +| `c` | C. | +| `cpp` | C++. | +| `csharp` | C#. | +| `rust` | Rust. | +| `zig` | Zig. | +| `python` | Python. | +| `lua` | Lua. | +| `js` | JavaScript. | +| `java` | Java. | +| `xml` | XML. | +| `json` | JSON. | +| `yaml` | YAML. | +| `toml` | TOML. | +| `gfm` | GitHub Flavored Markdown. | +| `html` | HTML. | + +### D.3 Normative alias mapping + +Table D.2 defines aliases that **MUST** be mapped to the listed canonical `"hdoc"` name for interpretation. + +| Alias | Canonical `"hdoc"` name | +| ------------ | ----------------------- | +| `text` | `plain` | +| `none` | `plain` | +| `hyperdoc` | `hdoc` | +| `c++` | `cpp` | +| `cxx` | `cpp` | +| `cc` | `cpp` | +| `c#` | `csharp` | +| `cs` | `csharp` | +| `c-sharp` | `csharp` | +| `py` | `python` | +| `javascript` | `js` | +| `ecmascript` | `js` | +| `yml` | `yaml` | +| `md` | `gfm` | +| `markdown` | `gfm` | +| `xhtml` | `html` | From 38e93d6d8481765eb8f62bedf50fd8e0b470aa27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 22:30:39 +0100 Subject: [PATCH 101/116] Use CR escape in string_cr_escape reject fixture --- SPEC_TODO.md | 44 +++++++++++++++++ build.zig | 49 +++++++++++++++++++ test/conformance/accept/inline_escape.hdoc | 3 ++ test/conformance/accept/inline_escape.yaml | 19 +++++++ .../accept/title_header_redundant.hdoc | 5 ++ .../accept/title_header_redundant.yaml | 24 +++++++++ .../reject/container_children.diag | 0 .../reject/container_children.hdoc | 11 +++++ test/conformance/reject/heading_sequence.diag | 3 ++ test/conformance/reject/heading_sequence.hdoc | 5 ++ .../reject/inline_identifier_dash.diag | 0 .../reject/inline_identifier_dash.hdoc | 3 ++ test/conformance/reject/nested_top_level.diag | 0 test/conformance/reject/nested_top_level.hdoc | 5 ++ test/conformance/reject/ref_in_heading.diag | 0 test/conformance/reject/ref_in_heading.hdoc | 5 ++ test/conformance/reject/string_cr_escape.diag | 0 test/conformance/reject/string_cr_escape.hdoc | 3 ++ .../conformance/reject/time_relative_fmt.diag | 0 .../conformance/reject/time_relative_fmt.hdoc | 3 ++ 20 files changed, 182 insertions(+) create mode 100644 test/conformance/accept/inline_escape.hdoc create mode 100644 test/conformance/accept/inline_escape.yaml create mode 100644 test/conformance/accept/title_header_redundant.hdoc create mode 100644 test/conformance/accept/title_header_redundant.yaml create mode 100644 test/conformance/reject/container_children.diag create mode 100644 test/conformance/reject/container_children.hdoc create mode 100644 test/conformance/reject/heading_sequence.diag create mode 100644 test/conformance/reject/heading_sequence.hdoc create mode 100644 test/conformance/reject/inline_identifier_dash.diag create mode 100644 test/conformance/reject/inline_identifier_dash.hdoc create mode 100644 test/conformance/reject/nested_top_level.diag create mode 100644 test/conformance/reject/nested_top_level.hdoc create mode 100644 test/conformance/reject/ref_in_heading.diag create mode 100644 test/conformance/reject/ref_in_heading.hdoc create mode 100644 test/conformance/reject/string_cr_escape.diag create mode 100644 test/conformance/reject/string_cr_escape.hdoc create mode 100644 test/conformance/reject/time_relative_fmt.diag create mode 100644 test/conformance/reject/time_relative_fmt.hdoc diff --git a/SPEC_TODO.md b/SPEC_TODO.md index c12f0be..77efb91 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -1,2 +1,46 @@ # Spec compliance TODOs +- Inline escape tokens remain undecoded in inline text construction. + - Expect: `\\`, `\{`, and `\}` tokens produced in inline bodies decode to literal `\`, `{`, and `}` during semantic processing (§6.1). + - Actual: Inline text spans keep the backslash sequences verbatim, so escapes render incorrectly. + - Proposed: Decode these three escape tokens before span merging while preserving locations. + +- String literal control character policy is incomplete. + - Expect: Resolved string values must reject control characters except LF and CR when immediately followed by LF (§7.1). + - Actual: `\r` escapes decode to lone CR codepoints without diagnostics, so invalid CR characters survive into resolved text. + - Proposed: Reject `\r` unless it participates in a CRLF sequence after escape decoding. + +- Identifier parsing permits extra characters. + - Expect: Node names use identifier characters limited to letters, digits, and `_`, with inline names beginning with `\`; attribute keys are hyphen-separated segments of the same identifier characters (§5.1, §4.3). + - Actual: Identifiers allow `-` and `\` in any position, so node and attribute names outside the grammar are accepted. + - Proposed: Align identifier character checks with the grammar and treat hyphens only as separators for attribute keys. + +- Heading sequencing rules are missing. + - Expect: `h2` must follow an `h1`, and `h3` must follow an `h2` without intervening `h1` (§9.2.3). + - Actual: Heading indices increment without validating the required ordering. + - Proposed: Track the last seen heading levels and emit errors when a heading appears without its required parent level. + +- Title/header interplay lacks the required comparison. + - Expect: When both `hdoc(title=...)` and `title { ... }` are present, their plaintext forms are compared and a redundancy hint is emitted if they match (§8.1). + - Actual: The block title is used and the header title is ignored without any comparison or diagnostics. + - Proposed: Compare the plaintext values, warn when redundant, and keep emitting hints when neither title form is present. + +- Top-level-only elements are allowed to nest. + - Expect: `h1`/`h2`/`h3`, `toc`, and `footnotes` may only appear as top-level blocks (§9.2). + - Actual: Nested blocks (e.g., `note { h1 ... }`) accept these nodes, so top-level elements render within other containers. + - Proposed: Reject top-level elements when they appear in nested block lists. + +- Containers do not restrict children to general text blocks. + - Expect: `li`, `td`, and admonition blocks contain general text block elements (with shorthand promotion) and may be empty for admonitions (§9.1.3, §9.3.2, §9.4.5). + - Actual: Block lists in these containers accept any block type (including headings and footnotes) and treat empty lists as errors. + - Proposed: Limit children to the allowed general text blocks and permit empty admonition bodies. + +- `\time` accepts an unsupported `fmt`. + - Expect: `\time(fmt=...)` supports only `iso`, `short`, `long`, and `rough` (§10.3.4). + - Actual: The `fmt` enum includes `relative`, so `fmt="relative"` is accepted. + - Proposed: Remove the unsupported variant and reject unknown `fmt` values. + +- `\ref` is permitted inside headings and titles. + - Expect: `\ref` must not appear inside `h1`/`h2`/`h3` or `title` bodies (§9.5.6). + - Actual: Inline translation allows references in these contexts without diagnostics. + - Proposed: Detect and reject `\ref` nodes while processing heading and title bodies. diff --git a/build.zig b/build.zig index eb0d9ba..8d8607f 100644 --- a/build.zig +++ b/build.zig @@ -10,6 +10,21 @@ const snapshot_files: []const []const u8 = &.{ "test/snapshot/footnotes.hdoc", }; +const conformance_accept_files: []const []const u8 = &.{ + "test/conformance/accept/inline_escape.hdoc", + "test/conformance/accept/title_header_redundant.hdoc", +}; + +const conformance_reject_files: []const []const u8 = &.{ + "test/conformance/reject/string_cr_escape.hdoc", + "test/conformance/reject/inline_identifier_dash.hdoc", + "test/conformance/reject/heading_sequence.hdoc", + "test/conformance/reject/nested_top_level.hdoc", + "test/conformance/reject/container_children.hdoc", + "test/conformance/reject/time_relative_fmt.hdoc", + "test/conformance/reject/ref_in_heading.hdoc", +}; + pub fn build(b: *std.Build) void { // Options: const target = b.standardTargetOptions(.{}); @@ -74,6 +89,40 @@ pub fn build(b: *std.Build) void { } } + // Conformance snapshots: accept cases (YAML only): + for (conformance_accept_files) |path| { + std.debug.assert(std.mem.endsWith(u8, path, ".hdoc")); + const yaml_file = b.fmt("{s}.yaml", .{path[0 .. path.len - 5]}); + + const test_run = b.addRunArtifact(exe); + test_run.addArgs(&.{ "--format", "yaml" }); + test_run.addFileArg(b.path(path)); + const generated_file = test_run.captureStdOut(); + + const compare_run = b.addRunArtifact(snapshot_diff); + compare_run.addFileArg(b.path(yaml_file)); + compare_run.addFileArg(generated_file); + + test_step.dependOn(&compare_run.step); + } + + // Conformance snapshots: reject cases (diagnostics on stderr, expect exit code 1): + for (conformance_reject_files) |path| { + std.debug.assert(std.mem.endsWith(u8, path, ".hdoc")); + const diag_file = b.fmt("{s}.diag", .{path[0 .. path.len - 5]}); + + const test_run = b.addRunArtifact(exe); + test_run.addFileArg(b.path(path)); + test_run.expectExitCode(1); + const generated_diag = test_run.captureStdErr(); + + const compare_run = b.addRunArtifact(snapshot_diff); + compare_run.addFileArg(b.path(diag_file)); + compare_run.addFileArg(generated_diag); + + test_step.dependOn(&compare_run.step); + } + // Unit tests: const exe_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/test/conformance/accept/inline_escape.hdoc b/test/conformance/accept/inline_escape.hdoc new file mode 100644 index 0000000..5988ae8 --- /dev/null +++ b/test/conformance/accept/inline_escape.hdoc @@ -0,0 +1,3 @@ +hdoc(version="2.0", lang="en"); + +p { backslash \\ brace-open \{ brace-close \} } diff --git a/test/conformance/accept/inline_escape.yaml b/test/conformance/accept/inline_escape.yaml new file mode 100644 index 0000000..c222dd3 --- /dev/null +++ b/test/conformance/accept/inline_escape.yaml @@ -0,0 +1,19 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: null + author: null + date: null + toc: + level: h1 + headings: [] + children: [] + contents: + - paragraph: + lang: "" + content: + - [] "backslash \\\\ brace-open \\{ brace-close \\}" + ids: + - null diff --git a/test/conformance/accept/title_header_redundant.hdoc b/test/conformance/accept/title_header_redundant.hdoc new file mode 100644 index 0000000..acd0c0a --- /dev/null +++ b/test/conformance/accept/title_header_redundant.hdoc @@ -0,0 +1,5 @@ +hdoc(version="2.0", lang="en", title="Header Title"); + +title { Header Title } + +p "body" diff --git a/test/conformance/accept/title_header_redundant.yaml b/test/conformance/accept/title_header_redundant.yaml new file mode 100644 index 0000000..5e82b26 --- /dev/null +++ b/test/conformance/accept/title_header_redundant.yaml @@ -0,0 +1,24 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Header Title" + full: + lang: "" + content: + - [] "Header Title" + author: null + date: null + toc: + level: h1 + headings: [] + children: [] + contents: + - paragraph: + lang: "" + content: + - [] "body" + ids: + - null diff --git a/test/conformance/reject/container_children.diag b/test/conformance/reject/container_children.diag new file mode 100644 index 0000000..e69de29 diff --git a/test/conformance/reject/container_children.hdoc b/test/conformance/reject/container_children.hdoc new file mode 100644 index 0000000..71ce4ad --- /dev/null +++ b/test/conformance/reject/container_children.hdoc @@ -0,0 +1,11 @@ +hdoc(version="2.0", lang="en"); + +ul { + li { + h1 "Heading child" + } +} + +note { + h1 "Inside note" +} diff --git a/test/conformance/reject/heading_sequence.diag b/test/conformance/reject/heading_sequence.diag new file mode 100644 index 0000000..31568cd --- /dev/null +++ b/test/conformance/reject/heading_sequence.diag @@ -0,0 +1,3 @@ +test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h1 to fill heading level gap. +test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h2 to fill heading level gap. +test/conformance/reject/heading_sequence.hdoc:5:1: Inserted automatic h2 to fill heading level gap. diff --git a/test/conformance/reject/heading_sequence.hdoc b/test/conformance/reject/heading_sequence.hdoc new file mode 100644 index 0000000..c8c9b43 --- /dev/null +++ b/test/conformance/reject/heading_sequence.hdoc @@ -0,0 +1,5 @@ +hdoc(version="2.0", lang="en"); + +h3 "Third level first" +h1 "Top" +h3 "Third without second" diff --git a/test/conformance/reject/inline_identifier_dash.diag b/test/conformance/reject/inline_identifier_dash.diag new file mode 100644 index 0000000..e69de29 diff --git a/test/conformance/reject/inline_identifier_dash.hdoc b/test/conformance/reject/inline_identifier_dash.hdoc new file mode 100644 index 0000000..1948b61 --- /dev/null +++ b/test/conformance/reject/inline_identifier_dash.hdoc @@ -0,0 +1,3 @@ +hdoc(version="2.0", lang="en"); + +p { \bad-name "ok" } diff --git a/test/conformance/reject/nested_top_level.diag b/test/conformance/reject/nested_top_level.diag new file mode 100644 index 0000000..e69de29 diff --git a/test/conformance/reject/nested_top_level.hdoc b/test/conformance/reject/nested_top_level.hdoc new file mode 100644 index 0000000..b418705 --- /dev/null +++ b/test/conformance/reject/nested_top_level.hdoc @@ -0,0 +1,5 @@ +hdoc(version="2.0", lang="en"); + +note { + h1 "Nested heading" +} diff --git a/test/conformance/reject/ref_in_heading.diag b/test/conformance/reject/ref_in_heading.diag new file mode 100644 index 0000000..e69de29 diff --git a/test/conformance/reject/ref_in_heading.hdoc b/test/conformance/reject/ref_in_heading.hdoc new file mode 100644 index 0000000..fcd2ace --- /dev/null +++ b/test/conformance/reject/ref_in_heading.hdoc @@ -0,0 +1,5 @@ +hdoc(version="2.0", lang="en"); + +p(id="target") "Target" + +h1 { Heading \ref(ref="target") "see"; } diff --git a/test/conformance/reject/string_cr_escape.diag b/test/conformance/reject/string_cr_escape.diag new file mode 100644 index 0000000..e69de29 diff --git a/test/conformance/reject/string_cr_escape.hdoc b/test/conformance/reject/string_cr_escape.hdoc new file mode 100644 index 0000000..204b3de --- /dev/null +++ b/test/conformance/reject/string_cr_escape.hdoc @@ -0,0 +1,3 @@ +hdoc(version="2.0", lang="en"); + +p "line\rline" diff --git a/test/conformance/reject/time_relative_fmt.diag b/test/conformance/reject/time_relative_fmt.diag new file mode 100644 index 0000000..e69de29 diff --git a/test/conformance/reject/time_relative_fmt.hdoc b/test/conformance/reject/time_relative_fmt.hdoc new file mode 100644 index 0000000..767ed26 --- /dev/null +++ b/test/conformance/reject/time_relative_fmt.hdoc @@ -0,0 +1,3 @@ +hdoc(version="2.0", lang="en", tz="+00:00"); + +p { \time(fmt="relative") "12:00:00Z" } From bde5027ffae4c557a89703aad58aebec3e82f8b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 22:37:51 +0100 Subject: [PATCH 102/116] Removes the hint about the now new specification.md file --- docs/AGENTS.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/AGENTS.md b/docs/AGENTS.md index 238ae4e..43026ff 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -3,7 +3,6 @@ ## General - `specification.md` is the current "status quo" specifiction. Do not edit unless explicitly asked. -- `docs/specification-proper-draft.md` is the new "shiny" specification. This is the one you should edit if only asked about the "specification". - This file contains a chapter `0. Chapter Status`. This chapter marks each other chapter of the file as FROZEN, DONE, DRAFT or MISSING - If a chapter is marked FROZEN, you are not permitted to change anything in it. - If a chapter is marked DONE, you are only permitted to perform language changes, but not semantic changes. From 67d4426f3fdaca524e5d09e7638ef169bbcc5e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sat, 3 Jan 2026 22:48:26 +0100 Subject: [PATCH 103/116] Clarify inline group brace text --- docs/TODO.md | 21 +-------------------- docs/specification.md | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 556c7dc..c9001b1 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -5,30 +5,11 @@ - Assign semantics to node types, paragraph kinds, ... - Specify "syntax" proper - Add links to RFCs where possible -- Verbatim-body to text conversion is under-specified. You define verbatim syntax (: with | lines) and later say verbatim bodies become inline text spans (§8.2), but you don’t precisely define how piped lines join (LF vs preserving original CRLF, whether there is a trailing newline, whether a final EOF line_terminator contributes a newline, etc.). Different implementations may diverge. -- Inline “groups” exist syntactically but are not given explicit semantics. The grammar includes inline_group ::= "{" , inline_content , "}" and §5.4 makes brace balancing a core rule, but §8.2 doesn’t explicitly state that groups are semantically transparent (flattened) versus affecting whitespace normalization boundaries or span merging. - Span attribute semantics are referenced but not fully defined. §8.2 introduces spans with an “attribute set (e.g. emphasis/monospace/link…)” but the spec never fully defines the canonical attribute keys, nesting behavior (e.g., \em inside \mono), or how lang overrides interact at span level. That’s a major interoperability risk because renderers may differ even if parsers agree. - Refine that `hdoc(title)` is metadata while `title{}` is rendered rich text -- Refine `img(path)` only using forward slash. - - Proposal: Add to §9.3.5: - - "path MUST use forward slashes (/) as path separators, regardless of host OS." - - "path MUST be relative; absolute paths and URI schemes (e.g., http://) MUST be rejected." - - "Path resolution is relative to the directory containing the HyperDoc source file." - - "Path traversal outside the source directory (e.g., ../../etc/passwd) SHOULD be rejected or restricted by implementations." -- Proposal: Add to §9.2.4: - - "Multiple toc elements MAY appear in a document; each MUST render the same heading structure but MAY appear at different locations." - - "If depth differs between instances, each TOC renders independently according to its own depth attribute." -- Add to §9.2.5: - - "Multiple footnotes elements partition footnote rendering; each instance collects only footnotes/citations accumulated since the previous dump (or document start)." -- Proposal: Add to §4: - - "Implementations MUST support nesting depths of at least 32 levels." - - "Implementations MAY reject documents exceeding this depth with a diagnostic." - - "Nesting depth is measured as the maximum distance from the document root to any leaf node." - Ambiguity of Inline Unicode: - Finding: String literals ("...") support \u{...} escapes (§7.2.1). Inline text streams (bodies of p, h1) do not (§6.1 only lists \\, \{, \}). - Issue: Authors cannot enter invisible characters (like Non-Breaking Space U+00A0 or Zero Width Space U+200B) into a paragraph without pasting the raw invisible character, which is brittle and invisible in editors. -- Recommendation: Add explicit sequencing in §7 stating: "Escape decoding MUST occur during semantic validation, before inline text construction (§8.2) for inline-list bodies, and before attribute validation for attribute values." -- Recommendation: Add to §9.2.1: "If the document contains any \date, \time, or \datetime elements with fmt values other than iso, and hdoc(lang) is not specified, implementations SHOULD emit a diagnostic." - Issue: "Lexical" implies only regex-level matching. It does not strictly forbid 2023-02-31. For a strict format, "Semantic" validity (Gregorian correctness) should be enforced to prevent invalid metadata. ## Potential Future Features @@ -120,4 +101,4 @@ quote { - `include(path="...")` is rejected for unbounded document content growth - `code` is just `\mono(syntax="…")` - `details/summary` is just HTML with dynamic changing page layout, ever tried printing this? -- `\math`, `equation{…}` have too high implementation complexity and have high requirements on fonts, font renderers and layout engines. \ No newline at end of file +- `\math`, `equation{…}` have too high implementation complexity and have high requirements on fonts, font renderers and layout engines. diff --git a/docs/specification.md b/docs/specification.md index fcee7cd..7ebfe42 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -206,6 +206,12 @@ The grammar is intentionally ambiguous; a deterministic external rule selects a - Attribute values are **string literals** (see §5.5). - Attribute keys are identifiers with hyphen-separated segments (see §5.1 and §10.1). +### 4.4 Nesting depth (syntax) + +- Implementations **MUST** support nesting depths of at least 32 levels. +- Implementations **MAY** reject documents that exceed this depth with a diagnostic. +- Nesting depth is measured as the maximum distance from the document root to any leaf node. + ## 5. Grammar and additional syntax rules ### 5.1 Grammar (EBNF) @@ -351,6 +357,8 @@ Tooling that aims to preserve author intent **SHOULD** preserve whether braces w Escape sequences are recognized only in string literals (node bodies of the `"..."` form and attribute values). No other syntax performs string-literal escape decoding. +Escape decoding **MUST** occur during semantic validation, before inline text construction (§8.2) for inline-list bodies, and before attribute validation for attribute values. + ### 7.1 Control character policy (semantic) - A semantic validator **MAY** reject TAB (U+0009) in source text. @@ -432,6 +440,8 @@ Semantic processing **MUST** construct inline text as a sequence of **spans**, w - a Unicode string, and - an attribute set (e.g. emphasis/monospace/link, language overrides, etc.). +Inline groups are structural only: when converting the inline tree into spans, implementations **MUST** flatten `inline_group` boundaries. An `inline_group` **MUST NOT** create a span boundary and **MUST NOT** affect whitespace normalization, but it **MUST** contribute the literal `{` and `}` characters to the inline text at its start and end. + Processing rules: 1. **Parse → tree:** Parsing preserves `ws` and yields an inline tree (text items, inline nodes, and inline groups). @@ -586,6 +596,9 @@ The elements in this chapter **MUST** appear only as top-level block elements (d - `date` (optional): datetime lexical format (§10.2.3) - `tz` (optional): default timezone for time/datetime values (§10.2) +Diagnostics: +- If the document contains any `\date`, `\time`, or `\datetime` elements with `fmt` values other than `iso`, and `hdoc(lang)` is not specified, implementations **SHOULD** emit a diagnostic. + #### 9.2.2 `title` (document title) - **Role:** document-level display title @@ -624,6 +637,8 @@ Heading structure and numbering: Semantic constraints: - `toc` **MUST** be a top-level block element (a direct child of the document). +- Multiple `toc` elements **MAY** appear in a document; each **MUST** render the same heading structure but **MAY** appear at different locations. +- If `depth` differs between instances, each `toc` **MUST** render independently according to its own `depth` attribute. #### 9.2.5 Footnote dump: `footnotes` @@ -635,6 +650,7 @@ Semantic constraints: Semantics: +- Multiple `footnotes` elements **MAY** appear in a document. - `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet). - `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node. - `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node. @@ -686,6 +702,13 @@ Only an empty body (`;`) is not "inline text". - `lang` (optional) - `id` (optional; top-level only) +Path semantics: + +- `path` **MUST** use forward slashes (`/`) as path separators, regardless of host operating system. +- `path` **MUST** be relative; absolute paths and URI schemes **MUST** be rejected. +- Path resolution is relative to the directory containing the HyperDoc source file. +- Path traversal outside the source directory (e.g., `../../etc/passwd`) **SHOULD** be rejected or restricted by implementations. + #### 9.3.6 Preformatted: `pre` - **Body:** inline text From 3a9ece8ac671d90bf94da88d956c45f9e81c2d55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 00:52:15 +0100 Subject: [PATCH 104/116] Remove llvm requirement for hyperdoc exe --- AGENTS.md | 4 +- SPEC_TODO.md | 40 --- src/hyperdoc.zig | 335 ++++++++++++++---- src/render/html5.zig | 2 +- src/testsuite.zig | 18 +- test/conformance/accept/inline_escape.yaml | 2 +- .../reject/container_children.diag | 4 + test/conformance/reject/heading_sequence.diag | 9 +- .../reject/inline_identifier_dash.diag | 2 + test/conformance/reject/nested_top_level.diag | 2 + test/conformance/reject/ref_in_heading.diag | 2 + test/conformance/reject/string_cr_escape.diag | 2 + .../conformance/reject/time_relative_fmt.diag | 2 + 13 files changed, 304 insertions(+), 120 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5575ad0..f10cdca 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,6 +14,8 @@ - Treat `docs/specification.md` as the authoritative source of behavior; examples may be outdated or incorrect. - If the spec is unclear or conflicts with code/tests, ask before changing behavior. - Do not implement "just make it work" fallbacks that alter semantics to satisfy examples. +- Diagnostics must not store dynamic strings (e.g., slices to parsed source). Keep diagnostic payloads POD/small and avoid holding arena-backed text. +- Do not hide crashes by removing safety checks or switching off DebugAllocator; fix the root cause instead. A signal 6 from DebugAllocator indicates memory corruption or a similar misuse. ## Zig Programming Style @@ -25,4 +27,4 @@ - If you add a `hdoc` file to `test/snapshot`, also: - Generate the corresponding html and yaml file - Add the file inside build.zig to the snapshot_files global -- If you change behaviour, the snapshot tests will fail. Validate the failure against your expectations and see if you broke something unexpected. \ No newline at end of file +- If you change behaviour, the snapshot tests will fail. Validate the failure against your expectations and see if you broke something unexpected. diff --git a/SPEC_TODO.md b/SPEC_TODO.md index 77efb91..1e879a9 100644 --- a/SPEC_TODO.md +++ b/SPEC_TODO.md @@ -1,46 +1,6 @@ # Spec compliance TODOs -- Inline escape tokens remain undecoded in inline text construction. - - Expect: `\\`, `\{`, and `\}` tokens produced in inline bodies decode to literal `\`, `{`, and `}` during semantic processing (§6.1). - - Actual: Inline text spans keep the backslash sequences verbatim, so escapes render incorrectly. - - Proposed: Decode these three escape tokens before span merging while preserving locations. - -- String literal control character policy is incomplete. - - Expect: Resolved string values must reject control characters except LF and CR when immediately followed by LF (§7.1). - - Actual: `\r` escapes decode to lone CR codepoints without diagnostics, so invalid CR characters survive into resolved text. - - Proposed: Reject `\r` unless it participates in a CRLF sequence after escape decoding. - -- Identifier parsing permits extra characters. - - Expect: Node names use identifier characters limited to letters, digits, and `_`, with inline names beginning with `\`; attribute keys are hyphen-separated segments of the same identifier characters (§5.1, §4.3). - - Actual: Identifiers allow `-` and `\` in any position, so node and attribute names outside the grammar are accepted. - - Proposed: Align identifier character checks with the grammar and treat hyphens only as separators for attribute keys. - -- Heading sequencing rules are missing. - - Expect: `h2` must follow an `h1`, and `h3` must follow an `h2` without intervening `h1` (§9.2.3). - - Actual: Heading indices increment without validating the required ordering. - - Proposed: Track the last seen heading levels and emit errors when a heading appears without its required parent level. - - Title/header interplay lacks the required comparison. - Expect: When both `hdoc(title=...)` and `title { ... }` are present, their plaintext forms are compared and a redundancy hint is emitted if they match (§8.1). - Actual: The block title is used and the header title is ignored without any comparison or diagnostics. - Proposed: Compare the plaintext values, warn when redundant, and keep emitting hints when neither title form is present. - -- Top-level-only elements are allowed to nest. - - Expect: `h1`/`h2`/`h3`, `toc`, and `footnotes` may only appear as top-level blocks (§9.2). - - Actual: Nested blocks (e.g., `note { h1 ... }`) accept these nodes, so top-level elements render within other containers. - - Proposed: Reject top-level elements when they appear in nested block lists. - -- Containers do not restrict children to general text blocks. - - Expect: `li`, `td`, and admonition blocks contain general text block elements (with shorthand promotion) and may be empty for admonitions (§9.1.3, §9.3.2, §9.4.5). - - Actual: Block lists in these containers accept any block type (including headings and footnotes) and treat empty lists as errors. - - Proposed: Limit children to the allowed general text blocks and permit empty admonition bodies. - -- `\time` accepts an unsupported `fmt`. - - Expect: `\time(fmt=...)` supports only `iso`, `short`, `long`, and `rough` (§10.3.4). - - Actual: The `fmt` enum includes `relative`, so `fmt="relative"` is accepted. - - Proposed: Remove the unsupported variant and reject unknown `fmt` values. - -- `\ref` is permitted inside headings and titles. - - Expect: `\ref` must not appear inside `h1`/`h2`/`h3` or `title` bodies (§9.5.6). - - Actual: Inline translation allows references in these contexts without diagnostics. - - Proposed: Detect and reject `\ref` nodes while processing heading and title bodies. diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig index 42919c7..1396b77 100644 --- a/src/hyperdoc.zig +++ b/src/hyperdoc.zig @@ -379,7 +379,6 @@ pub const Time = struct { long, short, rough, - relative, iso, }; @@ -584,13 +583,6 @@ pub fn parse( }; while (true) { - errdefer |err| { - std.log.debug("error at examples/demo.hdoc:{f}: {t}", .{ - parser.make_diagnostic_location(parser.offset), - err, - }); - } - const node = parser.accept_node(.top_level) catch |err| switch (err) { error.OutOfMemory => |e| return @as(error{OutOfMemory}!Document, e), // TODO: What the fuck? Bug report! @@ -1028,7 +1020,7 @@ pub const SemanticAnalyzer = struct { else => unreachable, }), .lang = attrs.lang, - .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), + .content = try sema.translate_inline(node, .emit_diagnostic, .one_space, .heading), }; return .{ heading, attrs.id }; @@ -1041,7 +1033,7 @@ pub const SemanticAnalyzer = struct { return .{ .lang = attrs.lang, - .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), + .content = try sema.translate_inline(node, .emit_diagnostic, .one_space, .title), }; } @@ -1053,7 +1045,7 @@ pub const SemanticAnalyzer = struct { const heading: Block.Paragraph = .{ .lang = attrs.lang, - .content = try sema.translate_inline(node, .emit_diagnostic, .one_space), + .content = try sema.translate_inline(node, .emit_diagnostic, .one_space, .normal), }; return .{ heading, attrs.id }; @@ -1076,7 +1068,11 @@ pub const SemanticAnalyzer = struct { else => unreachable, }, .lang = attrs.lang, - .content = try sema.translate_block_list(node, .text_to_p), + .content = try sema.translate_block_list(node, .{ + .upgrade = .text_to_p, + .allow_empty = true, + .general_text_only = true, + }), }; return .{ admonition, attrs.id }; @@ -1161,7 +1157,7 @@ pub const SemanticAnalyzer = struct { .lang = attrs.lang, .alt = alt, .path = path, - .content = try sema.translate_inline(node, .allow_empty, .one_space), + .content = try sema.translate_inline(node, .allow_empty, .one_space, .normal), }; return .{ image, attrs.id }; @@ -1177,7 +1173,7 @@ pub const SemanticAnalyzer = struct { const preformatted: Block.Preformatted = .{ .lang = attrs.lang, .syntax = attrs.syntax, - .content = try sema.translate_inline(node, .emit_diagnostic, .keep_space), + .content = try sema.translate_inline(node, .emit_diagnostic, .keep_space, .normal), }; return .{ preformatted, attrs.id }; @@ -1342,7 +1338,7 @@ pub const SemanticAnalyzer = struct { rows.appendAssumeCapacity(.{ .group = .{ .lang = row_attrs.lang, - .content = try sema.translate_inline(child_node, .emit_diagnostic, .one_space), + .content = try sema.translate_inline(child_node, .emit_diagnostic, .one_space, .normal), }, }); }, @@ -1457,7 +1453,10 @@ pub const SemanticAnalyzer = struct { return .{ .lang = attrs.lang, .colspan = colspan, - .content = try sema.translate_block_list(node, .text_to_p), + .content = try sema.translate_block_list(node, .{ + .upgrade = .text_to_p, + .general_text_only = true, + }), }; } @@ -1473,13 +1472,48 @@ pub const SemanticAnalyzer = struct { return .{ .lang = attrs.lang, - .content = try sema.translate_block_list(node, .text_to_p), + .content = try sema.translate_block_list(node, .{ + .upgrade = .text_to_p, + .general_text_only = true, + }), }; } const BlockTextUpgrade = enum { no_upgrade, text_to_p }; + const BlockListOptions = struct { + upgrade: BlockTextUpgrade, + allow_empty: bool = false, + general_text_only: bool = false, + }; + + fn is_top_level_only_block(node_type: Parser.NodeType) bool { + return switch (node_type) { + .h1, .h2, .h3, .toc, .footnotes => true, + else => false, + }; + } + + fn is_general_text_block(node_type: Parser.NodeType) bool { + return switch (node_type) { + .p, + .note, + .warning, + .danger, + .tip, + .quote, + .spoiler, + .ul, + .ol, + .img, + .pre, + .table, + => true, + + else => false, + }; + } - fn translate_block_list(sema: *SemanticAnalyzer, node: Parser.Node, upgrade: BlockTextUpgrade) error{ Unimplemented, InvalidNodeType, OutOfMemory, BadAttributes }![]Block { + fn translate_block_list(sema: *SemanticAnalyzer, node: Parser.Node, options: BlockListOptions) error{ Unimplemented, InvalidNodeType, OutOfMemory, BadAttributes }![]Block { switch (node.body) { .list => |child_nodes| { var blocks: std.ArrayList(Block) = .empty; @@ -1488,7 +1522,12 @@ pub const SemanticAnalyzer = struct { try blocks.ensureTotalCapacityPrecise(sema.arena, child_nodes.len); for (child_nodes) |child_node| { - if (child_node.type == .toc) { + if (is_top_level_only_block(child_node.type)) { + try sema.emit_diagnostic(.illegal_child_item, child_node.location); + continue; + } + + if (options.general_text_only and !is_general_text_block(child_node.type)) { try sema.emit_diagnostic(.illegal_child_item, child_node.location); continue; } @@ -1500,16 +1539,26 @@ pub const SemanticAnalyzer = struct { blocks.appendAssumeCapacity(block); } + if (blocks.items.len == 0 and !options.allow_empty) { + try sema.emit_diagnostic(.list_body_required, node.location); + } + return try blocks.toOwnedSlice(sema.arena); }, - .empty, .string, .verbatim, .text_span => switch (upgrade) { + .empty, .string, .verbatim, .text_span => switch (options.upgrade) { .no_upgrade => { + if (options.allow_empty and node.body == .empty) + return &.{}; + try sema.emit_diagnostic(.{ .block_list_required = .{ .type = node.type } }, node.location); return &.{}; }, .text_to_p => { - const spans = try sema.translate_inline(node, .emit_diagnostic, .one_space); + if (options.allow_empty and node.body == .empty) + return &.{}; + + const spans = try sema.translate_inline(node, .emit_diagnostic, .one_space, .normal); const blocks = try sema.arena.alloc(Block, 1); blocks[0] = .{ @@ -1526,11 +1575,13 @@ pub const SemanticAnalyzer = struct { } /// Translates a node into a sequence of inline spans. - fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling, whitespace_handling: Whitespace) error{ OutOfMemory, BadAttributes }![]Span { + const InlineContext = enum { normal, heading, title }; + + fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling, whitespace_handling: Whitespace, context: InlineContext) error{ OutOfMemory, BadAttributes }![]Span { var spans: std.ArrayList(Span) = .empty; defer spans.deinit(sema.arena); - try sema.translate_inline_body(&spans, node.body, .{}, empty_handling); + try sema.translate_inline_body(&spans, node.body, .{}, empty_handling, context); return try sema.compact_spans(spans.items, whitespace_handling); } @@ -1723,11 +1774,11 @@ pub const SemanticAnalyzer = struct { return new; } - fn translate_inline_node(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), node: Parser.Node, attribs: Span.Attributes) !void { + fn translate_inline_node(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), node: Parser.Node, attribs: Span.Attributes, context: InlineContext) !void { switch (node.type) { .unknown_inline, .text, - => try sema.translate_inline_body(spans, node.body, attribs, .emit_diagnostic), + => try sema.translate_inline_body(spans, node.body, attribs, .emit_diagnostic, context), .@"\\em" => { const props = try sema.get_attributes(node, struct { @@ -1737,7 +1788,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .em = true, - }), .emit_diagnostic); + }), .emit_diagnostic, context); }, .@"\\strike" => { @@ -1748,7 +1799,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .strike = true, - }), .emit_diagnostic); + }), .emit_diagnostic, context); }, .@"\\sub" => { @@ -1759,7 +1810,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .position = .subscript, - }), .emit_diagnostic); + }), .emit_diagnostic, context); }, .@"\\sup" => { @@ -1770,7 +1821,7 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .position = .superscript, - }), .emit_diagnostic); + }), .emit_diagnostic, context); }, .@"\\link" => { @@ -1782,10 +1833,15 @@ pub const SemanticAnalyzer = struct { try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang, .link = .{ .uri = props.uri }, - }), .emit_diagnostic); + }), .emit_diagnostic, context); }, .@"\\ref" => { + if (context == .heading or context == .title) { + try sema.emit_diagnostic(.{ .inline_not_allowed = .{ .node_type = node.type } }, node.location); + return; + } + const props = try sema.get_attributes(node, struct { lang: LanguageTag = .inherit, ref: Reference, @@ -1812,7 +1868,7 @@ pub const SemanticAnalyzer = struct { .location = node.location, }); }, - else => try sema.translate_inline_body(spans, node.body, link_attribs, .emit_diagnostic), + else => try sema.translate_inline_body(spans, node.body, link_attribs, .emit_diagnostic, context), } }, @@ -1825,7 +1881,7 @@ pub const SemanticAnalyzer = struct { .mono = true, .lang = props.lang, .syntax = props.syntax, - }), .emit_diagnostic); + }), .emit_diagnostic, context); }, .@"\\date", @@ -1852,7 +1908,7 @@ pub const SemanticAnalyzer = struct { break :blk; } - const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space); + const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space, context); // Convert the content_spans into a "rendered string". const content_text = (sema.render_spans_to_plaintext(content_spans, .reject_date_time) catch |err| switch (err) { @@ -1933,7 +1989,7 @@ pub const SemanticAnalyzer = struct { defer content_spans.deinit(sema.arena); const content_attribs = try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang }); - try sema.translate_inline_body(&content_spans, node.body, content_attribs, .emit_diagnostic); + try sema.translate_inline_body(&content_spans, node.body, content_attribs, .emit_diagnostic, context); const compacted = try sema.compact_spans(content_spans.items, .one_space); if (compacted.len == 0) { @@ -2026,7 +2082,7 @@ pub const SemanticAnalyzer = struct { else if (std.meta.stringToEnum(Format, format_str)) |format| format else blk: { - try sema.emit_diagnostic(.{ .invalid_date_time_fmt = .{ .fmt = format_str } }, get_attribute_location(node, "fmt", .value) orelse node.location); + try sema.emit_diagnostic(.invalid_date_time_fmt, get_attribute_location(node, "fmt", .value) orelse node.location); break :blk .default; }; @@ -2195,7 +2251,14 @@ pub const SemanticAnalyzer = struct { allow_empty, emit_diagnostic, }; - fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }!void { + fn translate_inline_body( + sema: *SemanticAnalyzer, + spans: *std.ArrayList(Span), + body: Parser.Node.Body, + attribs: Span.Attributes, + empty_handling: EmptyHandling, + context: InlineContext, + ) error{ OutOfMemory, BadAttributes }!void { switch (body) { .empty => |location| switch (empty_handling) { .allow_empty => {}, @@ -2255,13 +2318,22 @@ pub const SemanticAnalyzer = struct { .list => |list| { for (list) |child_node| { - try sema.translate_inline_node(spans, child_node, attribs); + try sema.translate_inline_node(spans, child_node, attribs, context); } }, .text_span => |text_span| { + const decoded_text = if (text_span.text.len == 2 and text_span.text[0] == '\\') blk: { + switch (text_span.text[1]) { + '{' => break :blk "{", + '}' => break :blk "}", + '\\' => break :blk "\\", + else => break :blk text_span.text, + } + } else text_span.text; + try spans.append(sema.arena, .{ - .content = .{ .text = text_span.text }, + .content = .{ .text = decoded_text }, .attribs = attribs, .location = text_span.location, }); @@ -2683,6 +2755,15 @@ pub const SemanticAnalyzer = struct { fn compute_next_heading(sema: *SemanticAnalyzer, node: Parser.Node, level: Block.Heading.Level) !Block.Heading.Index { const index = @intFromEnum(level); + const missing_parent: ?Block.Heading.Level = switch (level) { + .h1 => null, + .h2 => if (sema.heading_counters[0] == 0) .h1 else null, + .h3 => if (sema.heading_counters[1] == 0) .h2 else null, + }; + if (missing_parent) |missing| { + try sema.emit_diagnostic(.{ .invalid_heading_sequence = .{ .level = level, .missing = missing } }, node.location); + } + sema.heading_counters[index] += 1; if (index > sema.current_heading_level + 1) { @@ -2694,7 +2775,6 @@ pub const SemanticAnalyzer = struct { for (sema.heading_counters[index + 1 ..]) |*val| { val.* = 0; } - _ = node; return switch (level) { .h1 => .{ .h1 = sema.heading_counters[0..1].* }, @@ -2882,6 +2962,19 @@ pub const SemanticAnalyzer = struct { var output = output_buffer.toOwnedSlice(); errdefer output.deinit(sema.arena); + const chars = output.items(.char); + for (chars, 0..) |ch, idx| { + if (ch == std.ascii.control_code.cr) { + const next_is_lf = idx + 1 < chars.len and chars[idx + 1] == std.ascii.control_code.lf; + if (!next_is_lf) { + try sema.emit_diagnostic( + .{ .illegal_character = .{ .codepoint = std.ascii.control_code.cr } }, + output.get(idx).location, + ); + } + } + } + const view = std.unicode.Utf8View.init(output.items(.char)) catch { std.log.err("invalid utf-8 input: \"{f}\"", .{std.zig.fmtString(output.items(.char))}); @panic("String unescape produced invalid UTF-8 sequence. This should not be possible."); @@ -2953,7 +3046,7 @@ pub const Parser = struct { return error.EndOfFile; } - const type_ident = parser.accept_identifier() catch |err| switch (err) { + const type_ident = parser.accept_identifier(.node) catch |err| switch (err) { error.UnexpectedEndOfFile => |e| switch (scope_type) { .nested => return e, .top_level => return error.EndOfFile, @@ -2978,7 +3071,7 @@ pub const Parser = struct { while (true) { if (parser.try_accept_char(')')) break; - const attr_name = try parser.accept_identifier(); + const attr_name = try parser.accept_identifier(.attribute); _ = try parser.accept_char('='); const attr_value = try parser.accept_string(); @@ -3333,7 +3426,56 @@ pub const Parser = struct { return error.UnterminatedStringLiteral; } - pub fn accept_identifier(parser: *Parser) error{ UnexpectedEndOfFile, InvalidCharacter }!Token { + pub const IdentifierKind = enum { + node, + attribute, + }; + + fn is_identifier_char(c: u8) bool { + return switch (c) { + 'a'...'z', + 'A'...'Z', + '0'...'9', + '_', + => true, + else => false, + }; + } + + fn is_node_identifier_terminator(c: u8) bool { + return switch (c) { + ' ', + '\t', + '\n', + '\r', + '(', + ')', + '{', + '}', + ';', + ':', + '"', + ',', + => true, + else => false, + }; + } + + fn is_attribute_identifier_terminator(c: u8) bool { + return switch (c) { + ' ', + '\t', + '\n', + '\r', + ')', + '=', + ',', + => true, + else => false, + }; + } + + pub fn accept_identifier(parser: *Parser, kind: IdentifierKind) error{ UnexpectedEndOfFile, InvalidCharacter }!Token { parser.skip_whitespace(); if (parser.at_end()) { @@ -3342,17 +3484,76 @@ pub const Parser = struct { } const start = parser.offset; - const first = parser.code[start]; - if (!is_ident_char(first)) { - emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start)); - return error.InvalidCharacter; - } + switch (kind) { + .node => { + const first = parser.code[start]; + if (first == '\\') { + parser.offset += 1; + if (parser.offset >= parser.code.len or !is_identifier_char(parser.code[parser.offset])) { + emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start)); + return error.InvalidCharacter; + } + } else if (!is_identifier_char(first)) { + emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start)); + return error.InvalidCharacter; + } else { + parser.offset += 1; + } - while (parser.offset < parser.code.len) { - const c = parser.code[parser.offset]; - if (!is_ident_char(c)) - break; - parser.offset += 1; + while (parser.offset < parser.code.len) { + const c = parser.code[parser.offset]; + if (is_identifier_char(c)) { + parser.offset += 1; + continue; + } + + if (is_node_identifier_terminator(c)) + break; + + emitDiagnostic(parser, .{ .invalid_identifier_character = .{ .char = c } }, parser.make_diagnostic_location(parser.offset)); + return error.InvalidCharacter; + } + }, + .attribute => { + const first = parser.code[start]; + if (!is_identifier_char(first)) { + emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start)); + return error.InvalidCharacter; + } + + parser.offset += 1; + var prev_was_hyphen = false; + + while (parser.offset < parser.code.len) { + const c = parser.code[parser.offset]; + if (is_identifier_char(c)) { + prev_was_hyphen = false; + parser.offset += 1; + continue; + } + + if (c == '-') { + if (prev_was_hyphen) { + emitDiagnostic(parser, .{ .invalid_identifier_character = .{ .char = c } }, parser.make_diagnostic_location(parser.offset)); + return error.InvalidCharacter; + } + prev_was_hyphen = true; + parser.offset += 1; + continue; + } + + if (is_attribute_identifier_terminator(c)) + break; + + emitDiagnostic(parser, .{ .invalid_identifier_character = .{ .char = c } }, parser.make_diagnostic_location(parser.offset)); + return error.InvalidCharacter; + } + + if (prev_was_hyphen) { + emitDiagnostic(parser, .{ .invalid_identifier_character = .{ .char = '-' } }, parser.make_diagnostic_location(parser.offset - 1)); + return error.InvalidCharacter; + } + }, } return parser.slice(start, parser.offset); @@ -3434,19 +3635,6 @@ pub const Parser = struct { }; } - pub fn is_ident_char(c: u8) bool { - return switch (c) { - 'a'...'z', - 'A'...'Z', - '0'...'9', - '_', - '-', - '\\', - => true, - else => false, - }; - } - pub const Token = struct { text: []const u8, location: Location, @@ -3639,6 +3827,7 @@ pub const Diagnostic = struct { pub const UnexpectedEof = struct { context: []const u8, expected_char: ?u8 = null }; pub const UnexpectedCharacter = struct { expected: u8, found: u8 }; pub const InvalidIdentifierStart = struct { char: u8 }; + pub const InvalidIdentifierCharacter = struct { char: u8 }; pub const DuplicateAttribute = struct { name: []const u8 }; pub const NodeAttributeError = struct { type: Parser.NodeType, name: []const u8 }; pub const NodeBodyError = struct { type: Parser.NodeType }; @@ -3647,12 +3836,13 @@ pub const Diagnostic = struct { pub const InvalidBlockError = struct { name: []const u8 }; pub const InlineUsageError = struct { attribute: InlineAttribute }; pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute }; - pub const DateTimeFormatError = struct { fmt: []const u8 }; pub const InvalidStringEscape = struct { codepoint: u21 }; pub const ForbiddenControlCharacter = struct { codepoint: u21 }; pub const TableShapeError = struct { actual: usize, expected: usize }; pub const ReferenceError = struct { ref: []const u8 }; pub const AutomaticHeading = struct { level: Block.Heading.Level }; + pub const HeadingSequenceError = struct { level: Block.Heading.Level, missing: Block.Heading.Level }; + pub const InlineContextError = struct { node_type: Parser.NodeType }; pub const Code = union(enum) { // errors: @@ -3661,6 +3851,7 @@ pub const Diagnostic = struct { unexpected_character: UnexpectedCharacter, unterminated_string, invalid_identifier_start: InvalidIdentifierStart, + invalid_identifier_character: InvalidIdentifierCharacter, unterminated_block_list, missing_hdoc_header: MissingHdocHeader, duplicate_hdoc_header: DuplicateHdocHeader, @@ -3673,10 +3864,11 @@ pub const Diagnostic = struct { invalid_block_type: InvalidBlockError, block_list_required: NodeBodyError, invalid_inline_combination: InlineCombinationError, + inline_not_allowed: InlineContextError, link_not_nestable, invalid_date_time, invalid_date_time_body, - invalid_date_time_fmt: DateTimeFormatError, + invalid_date_time_fmt, missing_timezone, invalid_unicode_string_escape, invalid_string_escape: InvalidStringEscape, @@ -3700,6 +3892,7 @@ pub const Diagnostic = struct { footnote_missing_ref, footnote_missing_body, footnote_kind_on_reference, + invalid_heading_sequence: HeadingSequenceError, // warnings: document_starts_with_bom, @@ -3725,6 +3918,7 @@ pub const Diagnostic = struct { .unexpected_character, .unterminated_string, .invalid_identifier_start, + .invalid_identifier_character, .unterminated_block_list, .missing_hdoc_header, .duplicate_hdoc_header, @@ -3737,6 +3931,7 @@ pub const Diagnostic = struct { .invalid_block_type, .block_list_required, .invalid_inline_combination, + .inline_not_allowed, .link_not_nestable, .invalid_date_time, .invalid_date_time_fmt, @@ -3764,6 +3959,7 @@ pub const Diagnostic = struct { .footnote_missing_ref, .footnote_missing_body, .footnote_kind_on_reference, + .invalid_heading_sequence, => .@"error", .missing_document_language, @@ -3800,6 +3996,7 @@ pub const Diagnostic = struct { .unexpected_character => |ctx| try w.print("Expected '{c}' but found '{c}'.", .{ ctx.expected, ctx.found }), .unterminated_string => try w.writeAll("Unterminated string literal (missing closing \")."), .invalid_identifier_start => |ctx| try w.print("Invalid identifier start character: '{c}'.", .{ctx.char}), + .invalid_identifier_character => |ctx| try w.print("Invalid identifier character: '{c}'.", .{ctx.char}), .unterminated_block_list => try w.writeAll("Block list body is unterminated (missing '}' before end of file)."), .missing_hdoc_header => try w.writeAll("Document must start with an 'hdoc' header."), .duplicate_hdoc_header => try w.writeAll("Only one 'hdoc' header is allowed; additional header found."), @@ -3823,6 +4020,7 @@ pub const Diagnostic = struct { .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}), .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }), + .inline_not_allowed => |ctx| try w.print("\\{t} is not allowed in this context.", .{ctx.node_type}), .link_not_nestable => try w.writeAll("Links are not nestable"), .attribute_leading_trailing_whitespace => try w.writeAll("Attribute value has invalid leading or trailing whitespace."), @@ -3831,7 +4029,7 @@ pub const Diagnostic = struct { .missing_timezone => try w.writeAll("Missing timezone offset; add a 'tz' header attribute or include a timezone in the value."), - .invalid_date_time_fmt => |ctx| try w.print("Invalid 'fmt' value '{s}' for date/time.", .{ctx.fmt}), + .invalid_date_time_fmt => try w.writeAll("Invalid 'fmt' value for date/time."), .invalid_string_escape => |ctx| if (ctx.codepoint > 0x20 and ctx.codepoint <= 0x7F) try w.print("\\{u} is not a valid escape sequence.", .{ctx.codepoint}) @@ -3866,6 +4064,7 @@ pub const Diagnostic = struct { .footnote_missing_ref => try w.writeAll("\\footnote without a body requires a ref=\"...\" attribute."), .footnote_missing_body => try w.writeAll("\\footnote definitions require a non-empty body."), .footnote_kind_on_reference => try w.writeAll("Attribute 'kind' is only valid on defining \\footnote entries."), + .invalid_heading_sequence => |ctx| try w.print("{t} requires a preceding {t}.", .{ ctx.level, ctx.missing }), .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."), .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."), diff --git a/src/render/html5.zig b/src/render/html5.zig index 5aa9b97..a7acf35 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -973,7 +973,7 @@ fn formatTimeValue(value: hdoc.FormattedDateTime(hdoc.Time), buffer: []u8) Rende switch (value.format) { .short, .rough => try writer.print("{d:0>2}:{d:0>2}", .{ value.value.hour, value.value.minute }), - .long, .relative => { + .long => { try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ value.value.hour, value.value.minute, value.value.second }); if (value.value.microsecond > 0) { try writer.print(".{d:0>6}", .{value.value.microsecond}); diff --git a/src/testsuite.zig b/src/testsuite.zig index e2003c8..7d98959 100644 --- a/src/testsuite.zig +++ b/src/testsuite.zig @@ -60,7 +60,7 @@ test "parser accept identifier and word tokens" { .diagnostics = null, }; - const ident = try parser.accept_identifier(); + const ident = try parser.accept_identifier(.node); try std.testing.expectEqualStrings("h1", ident.text); try std.testing.expectEqual(@as(usize, 0), ident.location.offset); try std.testing.expectEqual(@as(usize, 2), ident.location.length); @@ -82,7 +82,7 @@ test "parser rejects identifiers with invalid start characters" { .diagnostics = null, }; - try std.testing.expectError(error.InvalidCharacter, parser.accept_identifier()); + try std.testing.expectError(error.InvalidCharacter, parser.accept_identifier(.node)); } test "parser accept string literals and unescape" { @@ -563,10 +563,16 @@ test "table of contents inserts automatic headings when skipping levels" { var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics); defer doc.deinit(); - try std.testing.expectEqual(@as(usize, 3), diagnostics.items.items.len); + try std.testing.expectEqual(@as(usize, 5), diagnostics.items.items.len); try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .missing_document_language)); - try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[1].code, .{ .automatic_heading_insertion = .{ .level = .h1 } })); - try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[2].code, .{ .automatic_heading_insertion = .{ .level = .h2 } })); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[1].code, .{ + .invalid_heading_sequence = .{ .level = .h3, .missing = .h2 }, + })); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[2].code, .{ + .invalid_heading_sequence = .{ .level = .h2, .missing = .h1 }, + })); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[3].code, .{ .automatic_heading_insertion = .{ .level = .h1 } })); + try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[4].code, .{ .automatic_heading_insertion = .{ .level = .h2 } })); const toc = doc.toc; try std.testing.expectEqual(.h1, toc.level); @@ -879,7 +885,7 @@ test "diagnostic codes are emitted for expected samples" { try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{ .misplaced_hdoc_header, .duplicate_hdoc_header }); try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }}); try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }}); - try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); ul{ li{ toc; } }", &.{.illegal_child_item}); + try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); ul{ li{ toc; } }", &.{ .illegal_child_item, .list_body_required }); } test "table derives column count from first data row" { diff --git a/test/conformance/accept/inline_escape.yaml b/test/conformance/accept/inline_escape.yaml index c222dd3..4f58ab7 100644 --- a/test/conformance/accept/inline_escape.yaml +++ b/test/conformance/accept/inline_escape.yaml @@ -14,6 +14,6 @@ document: - paragraph: lang: "" content: - - [] "backslash \\\\ brace-open \\{ brace-close \\}" + - [] "backslash \\ brace-open { brace-close }" ids: - null diff --git a/test/conformance/reject/container_children.diag b/test/conformance/reject/container_children.diag index e69de29..d6354d0 100644 --- a/test/conformance/reject/container_children.diag +++ b/test/conformance/reject/container_children.diag @@ -0,0 +1,4 @@ +/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:5:5: Node not allowed here. +/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:4:3: Node requires list body. +/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:10:3: Node not allowed here. +error: failed to parse "/workspace/hyperdoc/test/conformance/reject/container_children.hdoc": InvalidFile diff --git a/test/conformance/reject/heading_sequence.diag b/test/conformance/reject/heading_sequence.diag index 31568cd..ecae9b9 100644 --- a/test/conformance/reject/heading_sequence.diag +++ b/test/conformance/reject/heading_sequence.diag @@ -1,3 +1,6 @@ -test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h1 to fill heading level gap. -test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h2 to fill heading level gap. -test/conformance/reject/heading_sequence.hdoc:5:1: Inserted automatic h2 to fill heading level gap. +/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: h3 requires a preceding h2. +/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:5:1: h3 requires a preceding h2. +/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h1 to fill heading level gap. +/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h2 to fill heading level gap. +/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:5:1: Inserted automatic h2 to fill heading level gap. +error: failed to parse "/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc": InvalidFile diff --git a/test/conformance/reject/inline_identifier_dash.diag b/test/conformance/reject/inline_identifier_dash.diag index e69de29..0528512 100644 --- a/test/conformance/reject/inline_identifier_dash.diag +++ b/test/conformance/reject/inline_identifier_dash.diag @@ -0,0 +1,2 @@ +/workspace/hyperdoc/test/conformance/reject/inline_identifier_dash.hdoc:3:9: Invalid identifier character: '-'. +error: failed to parse "/workspace/hyperdoc/test/conformance/reject/inline_identifier_dash.hdoc": SyntaxError diff --git a/test/conformance/reject/nested_top_level.diag b/test/conformance/reject/nested_top_level.diag index e69de29..064fdbe 100644 --- a/test/conformance/reject/nested_top_level.diag +++ b/test/conformance/reject/nested_top_level.diag @@ -0,0 +1,2 @@ +/workspace/hyperdoc/test/conformance/reject/nested_top_level.hdoc:4:3: Node not allowed here. +error: failed to parse "/workspace/hyperdoc/test/conformance/reject/nested_top_level.hdoc": InvalidFile diff --git a/test/conformance/reject/ref_in_heading.diag b/test/conformance/reject/ref_in_heading.diag index e69de29..60d0cd0 100644 --- a/test/conformance/reject/ref_in_heading.diag +++ b/test/conformance/reject/ref_in_heading.diag @@ -0,0 +1,2 @@ +/workspace/hyperdoc/test/conformance/reject/ref_in_heading.hdoc:5:14: \\ref is not allowed in this context. +error: failed to parse "/workspace/hyperdoc/test/conformance/reject/ref_in_heading.hdoc": InvalidFile diff --git a/test/conformance/reject/string_cr_escape.diag b/test/conformance/reject/string_cr_escape.diag index e69de29..f85f8c7 100644 --- a/test/conformance/reject/string_cr_escape.diag +++ b/test/conformance/reject/string_cr_escape.diag @@ -0,0 +1,2 @@ +/workspace/hyperdoc/test/conformance/reject/string_cr_escape.hdoc:3:8: Forbidden control character U+000D. +error: failed to parse "/workspace/hyperdoc/test/conformance/reject/string_cr_escape.hdoc": InvalidFile diff --git a/test/conformance/reject/time_relative_fmt.diag b/test/conformance/reject/time_relative_fmt.diag index e69de29..5cbffa2 100644 --- a/test/conformance/reject/time_relative_fmt.diag +++ b/test/conformance/reject/time_relative_fmt.diag @@ -0,0 +1,2 @@ +/workspace/hyperdoc/test/conformance/reject/time_relative_fmt.hdoc:3:15: Invalid 'fmt' value for date/time. +error: failed to parse "/workspace/hyperdoc/test/conformance/reject/time_relative_fmt.hdoc": InvalidFile From 53888e53935fff9657a0322a1fe6b504b3a01d36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 07:07:57 +0100 Subject: [PATCH 105/116] Pretty-print JSON diagnostics in snapshots --- build.zig | 1 + src/main.zig | 28 ++++++-- .../reject/container_children.diag | 33 ++++++++-- test/conformance/reject/heading_sequence.diag | 65 +++++++++++++++++-- .../reject/inline_identifier_dash.diag | 15 ++++- test/conformance/reject/nested_top_level.diag | 13 +++- test/conformance/reject/ref_in_heading.diag | 15 ++++- test/conformance/reject/string_cr_escape.diag | 15 ++++- .../conformance/reject/time_relative_fmt.diag | 13 +++- 9 files changed, 171 insertions(+), 27 deletions(-) diff --git a/build.zig b/build.zig index 8d8607f..2570831 100644 --- a/build.zig +++ b/build.zig @@ -112,6 +112,7 @@ pub fn build(b: *std.Build) void { const diag_file = b.fmt("{s}.diag", .{path[0 .. path.len - 5]}); const test_run = b.addRunArtifact(exe); + test_run.addArgs(&.{"--json-diagnostics"}); test_run.addFileArg(b.path(path)); test_run.expectExitCode(1); const generated_diag = test_run.captureStdErr(); diff --git a/src/main.zig b/src/main.zig index 776d241..fffdc9d 100644 --- a/src/main.zig +++ b/src/main.zig @@ -34,17 +34,25 @@ pub fn main() !u8 { options, ); - for (diagnostics.items.items) |diag| { - try stderr.interface.print("{s}:{f}: {f}\n", .{ - options.file_path, - diag.location, - diag.code, - }); + if (options.json_diagnostics) { + const json_options: std.json.Stringify.Options = .{ .whitespace = .indent_2 }; + try std.json.Stringify.value(diagnostics.items.items, json_options, &stderr.interface); + try stderr.interface.writeByte('\n'); + } else { + for (diagnostics.items.items) |diag| { + try stderr.interface.print("{s}:{f}: {f}\n", .{ + options.file_path, + diag.location, + diag.code, + }); + } } try stderr.interface.flush(); parse_result catch |err| { - std.log.err("failed to parse \"{s}\": {t}", .{ options.file_path, err }); + if (!options.json_diagnostics) { + std.log.err("failed to parse \"{s}\": {t}", .{ options.file_path, err }); + } return 1; }; @@ -73,6 +81,7 @@ fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostic const CliOptions = struct { format: RenderFormat = .html, file_path: []const u8, + json_diagnostics: bool = false, }; const RenderFormat = enum { @@ -98,6 +107,11 @@ fn parse_options(stderr: *std.Io.Writer, argv: []const []const u8) !CliOptions { i += 1; continue; } + if (std.mem.eql(u8, value, "--json-diagnostics")) { + options.json_diagnostics = true; + i += 1; + continue; + } return error.InvalidCli; } diff --git a/test/conformance/reject/container_children.diag b/test/conformance/reject/container_children.diag index d6354d0..9d4bba4 100644 --- a/test/conformance/reject/container_children.diag +++ b/test/conformance/reject/container_children.diag @@ -1,4 +1,29 @@ -/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:5:5: Node not allowed here. -/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:4:3: Node requires list body. -/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:10:3: Node not allowed here. -error: failed to parse "/workspace/hyperdoc/test/conformance/reject/container_children.hdoc": InvalidFile +[ + { + "code": { + "illegal_child_item": {} + }, + "location": { + "line": 5, + "column": 5 + } + }, + { + "code": { + "list_body_required": {} + }, + "location": { + "line": 4, + "column": 3 + } + }, + { + "code": { + "illegal_child_item": {} + }, + "location": { + "line": 10, + "column": 3 + } + } +] diff --git a/test/conformance/reject/heading_sequence.diag b/test/conformance/reject/heading_sequence.diag index ecae9b9..02e90f2 100644 --- a/test/conformance/reject/heading_sequence.diag +++ b/test/conformance/reject/heading_sequence.diag @@ -1,6 +1,59 @@ -/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: h3 requires a preceding h2. -/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:5:1: h3 requires a preceding h2. -/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h1 to fill heading level gap. -/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h2 to fill heading level gap. -/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:5:1: Inserted automatic h2 to fill heading level gap. -error: failed to parse "/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc": InvalidFile +[ + { + "code": { + "invalid_heading_sequence": { + "level": "h3", + "missing": "h2" + } + }, + "location": { + "line": 3, + "column": 1 + } + }, + { + "code": { + "invalid_heading_sequence": { + "level": "h3", + "missing": "h2" + } + }, + "location": { + "line": 5, + "column": 1 + } + }, + { + "code": { + "automatic_heading_insertion": { + "level": "h1" + } + }, + "location": { + "line": 3, + "column": 1 + } + }, + { + "code": { + "automatic_heading_insertion": { + "level": "h2" + } + }, + "location": { + "line": 3, + "column": 1 + } + }, + { + "code": { + "automatic_heading_insertion": { + "level": "h2" + } + }, + "location": { + "line": 5, + "column": 1 + } + } +] diff --git a/test/conformance/reject/inline_identifier_dash.diag b/test/conformance/reject/inline_identifier_dash.diag index 0528512..3c8dfb8 100644 --- a/test/conformance/reject/inline_identifier_dash.diag +++ b/test/conformance/reject/inline_identifier_dash.diag @@ -1,2 +1,13 @@ -/workspace/hyperdoc/test/conformance/reject/inline_identifier_dash.hdoc:3:9: Invalid identifier character: '-'. -error: failed to parse "/workspace/hyperdoc/test/conformance/reject/inline_identifier_dash.hdoc": SyntaxError +[ + { + "code": { + "invalid_identifier_character": { + "char": 45 + } + }, + "location": { + "line": 3, + "column": 9 + } + } +] diff --git a/test/conformance/reject/nested_top_level.diag b/test/conformance/reject/nested_top_level.diag index 064fdbe..76ea6e6 100644 --- a/test/conformance/reject/nested_top_level.diag +++ b/test/conformance/reject/nested_top_level.diag @@ -1,2 +1,11 @@ -/workspace/hyperdoc/test/conformance/reject/nested_top_level.hdoc:4:3: Node not allowed here. -error: failed to parse "/workspace/hyperdoc/test/conformance/reject/nested_top_level.hdoc": InvalidFile +[ + { + "code": { + "illegal_child_item": {} + }, + "location": { + "line": 4, + "column": 3 + } + } +] diff --git a/test/conformance/reject/ref_in_heading.diag b/test/conformance/reject/ref_in_heading.diag index 60d0cd0..68602ea 100644 --- a/test/conformance/reject/ref_in_heading.diag +++ b/test/conformance/reject/ref_in_heading.diag @@ -1,2 +1,13 @@ -/workspace/hyperdoc/test/conformance/reject/ref_in_heading.hdoc:5:14: \\ref is not allowed in this context. -error: failed to parse "/workspace/hyperdoc/test/conformance/reject/ref_in_heading.hdoc": InvalidFile +[ + { + "code": { + "inline_not_allowed": { + "node_type": "\\ref" + } + }, + "location": { + "line": 5, + "column": 14 + } + } +] diff --git a/test/conformance/reject/string_cr_escape.diag b/test/conformance/reject/string_cr_escape.diag index f85f8c7..ac57a8b 100644 --- a/test/conformance/reject/string_cr_escape.diag +++ b/test/conformance/reject/string_cr_escape.diag @@ -1,2 +1,13 @@ -/workspace/hyperdoc/test/conformance/reject/string_cr_escape.hdoc:3:8: Forbidden control character U+000D. -error: failed to parse "/workspace/hyperdoc/test/conformance/reject/string_cr_escape.hdoc": InvalidFile +[ + { + "code": { + "illegal_character": { + "codepoint": 13 + } + }, + "location": { + "line": 3, + "column": 8 + } + } +] diff --git a/test/conformance/reject/time_relative_fmt.diag b/test/conformance/reject/time_relative_fmt.diag index 5cbffa2..decc5a5 100644 --- a/test/conformance/reject/time_relative_fmt.diag +++ b/test/conformance/reject/time_relative_fmt.diag @@ -1,2 +1,11 @@ -/workspace/hyperdoc/test/conformance/reject/time_relative_fmt.hdoc:3:15: Invalid 'fmt' value for date/time. -error: failed to parse "/workspace/hyperdoc/test/conformance/reject/time_relative_fmt.hdoc": InvalidFile +[ + { + "code": { + "invalid_date_time_fmt": {} + }, + "location": { + "line": 3, + "column": 15 + } + } +] From 6f8fc9921ff3e1148c0d71632356d70a3903fa7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 07:39:10 +0100 Subject: [PATCH 106/116] Add conformance tests for document headers and images --- build.zig | 14 ++++++++--- .../accept/header_and_title_order.hdoc | 5 ++++ .../accept/header_and_title_order.yaml | 24 +++++++++++++++++++ .../accept/image_with_required_path.hdoc | 3 +++ .../accept/image_with_required_path.yaml | 21 ++++++++++++++++ .../conformance/accept/no_title_document.hdoc | 3 +++ .../conformance/accept/no_title_document.yaml | 19 +++++++++++++++ test/conformance/reject/duplicate_header.diag | 20 ++++++++++++++++ test/conformance/reject/duplicate_header.hdoc | 5 ++++ .../reject/hdoc_body_non_empty.diag | 11 +++++++++ .../reject/hdoc_body_non_empty.hdoc | 1 + .../reject/image_missing_path.diag | 14 +++++++++++ .../reject/image_missing_path.hdoc | 3 +++ test/conformance/reject/missing_header.diag | 11 +++++++++ test/conformance/reject/missing_header.hdoc | 1 + .../reject/title_after_content.diag | 11 +++++++++ .../reject/title_after_content.hdoc | 5 ++++ 17 files changed, 168 insertions(+), 3 deletions(-) create mode 100644 test/conformance/accept/header_and_title_order.hdoc create mode 100644 test/conformance/accept/header_and_title_order.yaml create mode 100644 test/conformance/accept/image_with_required_path.hdoc create mode 100644 test/conformance/accept/image_with_required_path.yaml create mode 100644 test/conformance/accept/no_title_document.hdoc create mode 100644 test/conformance/accept/no_title_document.yaml create mode 100644 test/conformance/reject/duplicate_header.diag create mode 100644 test/conformance/reject/duplicate_header.hdoc create mode 100644 test/conformance/reject/hdoc_body_non_empty.diag create mode 100644 test/conformance/reject/hdoc_body_non_empty.hdoc create mode 100644 test/conformance/reject/image_missing_path.diag create mode 100644 test/conformance/reject/image_missing_path.hdoc create mode 100644 test/conformance/reject/missing_header.diag create mode 100644 test/conformance/reject/missing_header.hdoc create mode 100644 test/conformance/reject/title_after_content.diag create mode 100644 test/conformance/reject/title_after_content.hdoc diff --git a/build.zig b/build.zig index 2570831..626ffc0 100644 --- a/build.zig +++ b/build.zig @@ -11,18 +11,26 @@ const snapshot_files: []const []const u8 = &.{ }; const conformance_accept_files: []const []const u8 = &.{ + "test/conformance/accept/header_and_title_order.hdoc", + "test/conformance/accept/image_with_required_path.hdoc", "test/conformance/accept/inline_escape.hdoc", + "test/conformance/accept/no_title_document.hdoc", "test/conformance/accept/title_header_redundant.hdoc", }; const conformance_reject_files: []const []const u8 = &.{ - "test/conformance/reject/string_cr_escape.hdoc", - "test/conformance/reject/inline_identifier_dash.hdoc", + "test/conformance/reject/container_children.hdoc", + "test/conformance/reject/duplicate_header.hdoc", + "test/conformance/reject/hdoc_body_non_empty.hdoc", "test/conformance/reject/heading_sequence.hdoc", + "test/conformance/reject/image_missing_path.hdoc", + "test/conformance/reject/inline_identifier_dash.hdoc", + "test/conformance/reject/missing_header.hdoc", "test/conformance/reject/nested_top_level.hdoc", - "test/conformance/reject/container_children.hdoc", "test/conformance/reject/time_relative_fmt.hdoc", "test/conformance/reject/ref_in_heading.hdoc", + "test/conformance/reject/string_cr_escape.hdoc", + "test/conformance/reject/title_after_content.hdoc", }; pub fn build(b: *std.Build) void { diff --git a/test/conformance/accept/header_and_title_order.hdoc b/test/conformance/accept/header_and_title_order.hdoc new file mode 100644 index 0000000..3357233 --- /dev/null +++ b/test/conformance/accept/header_and_title_order.hdoc @@ -0,0 +1,5 @@ +hdoc(version="2.0", lang="en"); + +title { Proper Order } + +p "Body content" diff --git a/test/conformance/accept/header_and_title_order.yaml b/test/conformance/accept/header_and_title_order.yaml new file mode 100644 index 0000000..604bdd5 --- /dev/null +++ b/test/conformance/accept/header_and_title_order.yaml @@ -0,0 +1,24 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: + simple: "Proper Order" + full: + lang: "" + content: + - [] "Proper Order" + author: null + date: null + toc: + level: h1 + headings: [] + children: [] + contents: + - paragraph: + lang: "" + content: + - [] "Body content" + ids: + - null diff --git a/test/conformance/accept/image_with_required_path.hdoc b/test/conformance/accept/image_with_required_path.hdoc new file mode 100644 index 0000000..5152870 --- /dev/null +++ b/test/conformance/accept/image_with_required_path.hdoc @@ -0,0 +1,3 @@ +hdoc(version="2.0", lang="en"); + +img(path="media/picture.png", alt="Example figure") { Figure caption } diff --git a/test/conformance/accept/image_with_required_path.yaml b/test/conformance/accept/image_with_required_path.yaml new file mode 100644 index 0000000..9376937 --- /dev/null +++ b/test/conformance/accept/image_with_required_path.yaml @@ -0,0 +1,21 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: null + author: null + date: null + toc: + level: h1 + headings: [] + children: [] + contents: + - image: + lang: "" + alt: "Example figure" + path: "media/picture.png" + content: + - [] "Figure caption" + ids: + - null diff --git a/test/conformance/accept/no_title_document.hdoc b/test/conformance/accept/no_title_document.hdoc new file mode 100644 index 0000000..1c046ef --- /dev/null +++ b/test/conformance/accept/no_title_document.hdoc @@ -0,0 +1,3 @@ +hdoc(version="2.0", lang="en"); + +p "Untitled body" diff --git a/test/conformance/accept/no_title_document.yaml b/test/conformance/accept/no_title_document.yaml new file mode 100644 index 0000000..4be7da4 --- /dev/null +++ b/test/conformance/accept/no_title_document.yaml @@ -0,0 +1,19 @@ +document: + version: + major: 2 + minor: 0 + lang: "en" + title: null + author: null + date: null + toc: + level: h1 + headings: [] + children: [] + contents: + - paragraph: + lang: "" + content: + - [] "Untitled body" + ids: + - null diff --git a/test/conformance/reject/duplicate_header.diag b/test/conformance/reject/duplicate_header.diag new file mode 100644 index 0000000..79d5d3b --- /dev/null +++ b/test/conformance/reject/duplicate_header.diag @@ -0,0 +1,20 @@ +[ + { + "code": { + "misplaced_hdoc_header": {} + }, + "location": { + "line": 3, + "column": 1 + } + }, + { + "code": { + "duplicate_hdoc_header": {} + }, + "location": { + "line": 3, + "column": 1 + } + } +] diff --git a/test/conformance/reject/duplicate_header.hdoc b/test/conformance/reject/duplicate_header.hdoc new file mode 100644 index 0000000..faeb809 --- /dev/null +++ b/test/conformance/reject/duplicate_header.hdoc @@ -0,0 +1,5 @@ +hdoc(version="2.0", lang="en"); + +hdoc(version="2.0", lang="en"); + +p "Duplicate headers" diff --git a/test/conformance/reject/hdoc_body_non_empty.diag b/test/conformance/reject/hdoc_body_non_empty.diag new file mode 100644 index 0000000..1b0ff8b --- /dev/null +++ b/test/conformance/reject/hdoc_body_non_empty.diag @@ -0,0 +1,11 @@ +[ + { + "code": { + "non_empty_hdoc_body": {} + }, + "location": { + "line": 1, + "column": 1 + } + } +] diff --git a/test/conformance/reject/hdoc_body_non_empty.hdoc b/test/conformance/reject/hdoc_body_non_empty.hdoc new file mode 100644 index 0000000..cf1aa2a --- /dev/null +++ b/test/conformance/reject/hdoc_body_non_empty.hdoc @@ -0,0 +1 @@ +hdoc(version="2.0", lang="en") "not empty" diff --git a/test/conformance/reject/image_missing_path.diag b/test/conformance/reject/image_missing_path.diag new file mode 100644 index 0000000..9cc8cbe --- /dev/null +++ b/test/conformance/reject/image_missing_path.diag @@ -0,0 +1,14 @@ +[ + { + "code": { + "missing_attribute": { + "type": "img", + "name": "path" + } + }, + "location": { + "line": 3, + "column": 1 + } + } +] diff --git a/test/conformance/reject/image_missing_path.hdoc b/test/conformance/reject/image_missing_path.hdoc new file mode 100644 index 0000000..3051dc6 --- /dev/null +++ b/test/conformance/reject/image_missing_path.hdoc @@ -0,0 +1,3 @@ +hdoc(version="2.0", lang="en"); + +img { Figure caption } diff --git a/test/conformance/reject/missing_header.diag b/test/conformance/reject/missing_header.diag new file mode 100644 index 0000000..901fdd5 --- /dev/null +++ b/test/conformance/reject/missing_header.diag @@ -0,0 +1,11 @@ +[ + { + "code": { + "missing_hdoc_header": {} + }, + "location": { + "line": 1, + "column": 1 + } + } +] diff --git a/test/conformance/reject/missing_header.hdoc b/test/conformance/reject/missing_header.hdoc new file mode 100644 index 0000000..f942349 --- /dev/null +++ b/test/conformance/reject/missing_header.hdoc @@ -0,0 +1 @@ +p "No header present" diff --git a/test/conformance/reject/title_after_content.diag b/test/conformance/reject/title_after_content.diag new file mode 100644 index 0000000..8d0abcb --- /dev/null +++ b/test/conformance/reject/title_after_content.diag @@ -0,0 +1,11 @@ +[ + { + "code": { + "misplaced_title_block": {} + }, + "location": { + "line": 5, + "column": 1 + } + } +] diff --git a/test/conformance/reject/title_after_content.hdoc b/test/conformance/reject/title_after_content.hdoc new file mode 100644 index 0000000..8aa7651 --- /dev/null +++ b/test/conformance/reject/title_after_content.hdoc @@ -0,0 +1,5 @@ +hdoc(version="2.0", lang="en"); + +p "First content" + +title { Late Title } From 27bd7c3be47abf93bddb7a6d43d36fd2afdea65c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 11:13:38 +0100 Subject: [PATCH 107/116] Simplify wasm buffers and logging --- build.zig | 15 +++ src/playground.html | 284 ++++++++++++++++++++++++++++++++++++++++++++ src/wasm.zig | 221 ++++++++++++++++++++++++++++++++++ 3 files changed, 520 insertions(+) create mode 100644 src/playground.html create mode 100644 src/wasm.zig diff --git a/build.zig b/build.zig index 626ffc0..ab971a0 100644 --- a/build.zig +++ b/build.zig @@ -41,6 +41,7 @@ pub fn build(b: *std.Build) void { // Targets: const run_step = b.step("run", "Run the app"); const test_step = b.step("test", "Run unit tests"); + const wasm_target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .os_tag = .freestanding }); // Build: const hyperdoc = b.addModule("hyperdoc", .{ @@ -60,6 +61,20 @@ pub fn build(b: *std.Build) void { }); b.installArtifact(exe); + const wasm_exe = b.addExecutable(.{ + .name = "hyperdoc_wasm", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/wasm.zig"), + .target = wasm_target, + .optimize = optimize, + .single_threaded = true, + .imports = &.{ + .{ .name = "hyperdoc", .module = hyperdoc }, + }, + }), + }); + b.installArtifact(wasm_exe); + const run_cmd = b.addRunArtifact(exe); run_cmd.step.dependOn(b.getInstallStep()); if (b.args) |arg| { diff --git a/src/playground.html b/src/playground.html new file mode 100644 index 0000000..6336eed --- /dev/null +++ b/src/playground.html @@ -0,0 +1,284 @@ + + + + + HyperDoc Playground + + + +
      HyperDoc Playground
      +
      +
      +
      + HyperDoc Source + Waiting for WASM… +
      + +
      +
      +
      + Preview + +
      +
      +
        +
        +
        + + + diff --git a/src/wasm.zig b/src/wasm.zig new file mode 100644 index 0000000..852768a --- /dev/null +++ b/src/wasm.zig @@ -0,0 +1,221 @@ +const std = @import("std"); +const hyperdoc = @import("hyperdoc"); + +const LogLevel = enum(u8) { err, warn, info, debug }; + +extern fn reset_log() void; +extern fn append_log(ptr: [*]const u8, len: usize) void; +extern fn flush_log(level: LogLevel) void; + +const LogWriter = struct { + fn appendWrite(self: LogWriter, chunk: []const u8) error{OutOfMemory}!usize { + _ = self; + append_log(chunk.ptr, chunk.len); + return chunk.len; + } + + fn writer(self: LogWriter) std.io.GenericWriter(LogWriter, error{OutOfMemory}, appendWrite) { + return .{ .context = self }; + } +}; + +fn log_to_host( + comptime level: std.log.Level, + comptime _scope: @TypeOf(.enum_literal), + comptime format: []const u8, + args: anytype, +) void { + _ = _scope; + + reset_log(); + + const log_writer = LogWriter{}; + const writer = log_writer.writer(); + _ = std.fmt.format(writer, format, args) catch {}; + + const mapped: LogLevel = switch (level) { + .err => .err, + .warn => .warn, + .info => .info, + .debug => .debug, + }; + + flush_log(mapped); +} + +fn fixedPageSize() usize { + return 4096; +} + +fn zeroRandom(buffer: []u8) void { + @memset(buffer, 0); +} + +pub const std_options: std.Options = .{ + .enable_segfault_handler = false, + .logFn = log_to_host, + .queryPageSize = fixedPageSize, + .cryptoRandomSeed = zeroRandom, +}; + +const allocator = std.heap.wasm_allocator; + +pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace, ret_addr: ?usize) noreturn { + _ = message; + _ = stack_trace; + _ = ret_addr; + @breakpoint(); + unreachable; +} + +pub fn main() !void {} + +const DiagnosticView = struct { + line: u32, + column: u32, + message: []u8, +}; + +var document_buffer: std.array_list.Managed(u8) = std.array_list.Managed(u8).init(allocator); +var html_buffer: std.array_list.Managed(u8) = std.array_list.Managed(u8).init(allocator); +var diagnostic_views: std.array_list.Managed(DiagnosticView) = std.array_list.Managed(DiagnosticView).init(allocator); +var diagnostic_text: std.array_list.Managed(u8) = std.array_list.Managed(u8).init(allocator); + +const CountingWriter = struct { + count: usize = 0, + + fn write(self: *CountingWriter, bytes: []const u8) error{}!usize { + self.count += bytes.len; + return bytes.len; + } + + fn generic(self: *CountingWriter) std.Io.GenericWriter(*CountingWriter, error{}, write) { + return .{ .context = self }; + } +}; + +fn capture_diagnostics(source: *hyperdoc.Diagnostics) !void { + diagnostic_views.clearRetainingCapacity(); + diagnostic_text.clearRetainingCapacity(); + + if (source.items.items.len == 0) return; + + var total: usize = 0; + for (source.items.items) |diag| { + var cw: CountingWriter = .{}; + _ = diag.code.format(cw.generic()) catch {}; + total += cw.count; + } + + diagnostic_text.ensureTotalCapacityPrecise(total) catch return; + + var diag_writer = diagnostic_text.writer(); + var adapter_buffer: [256]u8 = undefined; + var adapter = diag_writer.any().adaptToNewApi(&adapter_buffer); + + for (source.items.items) |diag| { + const start = diagnostic_text.items.len; + diag.code.format(&adapter.new_interface) catch { + adapter.err = error.WriteFailed; + }; + if (adapter.err) |_| return; + + const rendered = diagnostic_text.items[start..]; + try diagnostic_views.append(.{ + .line = diag.location.line, + .column = diag.location.column, + .message = rendered, + }); + } +} + +export fn hdoc_set_document_len(len: usize) bool { + document_buffer.clearRetainingCapacity(); + document_buffer.items.len = 0; + + if (len == 0) return true; + + document_buffer.ensureTotalCapacityPrecise(len) catch return false; + document_buffer.items.len = len; + return true; +} + +export fn hdoc_document_ptr() [*]u8 { + return document_buffer.items.ptr; +} + +export fn hdoc_process() bool { + html_buffer.clearRetainingCapacity(); + diagnostic_views.clearRetainingCapacity(); + diagnostic_text.clearRetainingCapacity(); + + const source: []const u8 = document_buffer.items; + + var diagnostics = hyperdoc.Diagnostics.init(allocator); + defer diagnostics.deinit(); + + var parsed = hyperdoc.parse(allocator, source, &diagnostics) catch { + capture_diagnostics(&diagnostics) catch {}; + return false; + }; + defer parsed.deinit(); + + if (diagnostics.has_error()) { + capture_diagnostics(&diagnostics) catch {}; + return false; + } + + var html_writer = html_buffer.writer(); + var html_adapter_buffer: [256]u8 = undefined; + var html_adapter = html_writer.any().adaptToNewApi(&html_adapter_buffer); + + hyperdoc.render.html5(parsed, &html_adapter.new_interface) catch { + html_adapter.err = error.WriteFailed; + }; + if (html_adapter.err) |_| { + capture_diagnostics(&diagnostics) catch {}; + return false; + } + + capture_diagnostics(&diagnostics) catch {}; + return true; +} + +export fn hdoc_html_ptr() ?[*]const u8 { + if (html_buffer.items.len == 0) return null; + return html_buffer.items.ptr; +} + +export fn hdoc_html_len() usize { + return html_buffer.items.len; +} + +export fn hdoc_diagnostic_count() usize { + return diagnostic_views.items.len; +} + +export fn hdoc_diagnostic_line(index: usize) u32 { + if (index >= diagnostic_views.items.len) return 0; + + return diagnostic_views.items[index].line; +} + +export fn hdoc_diagnostic_column(index: usize) u32 { + if (index >= diagnostic_views.items.len) return 0; + + return diagnostic_views.items[index].column; +} + +export fn hdoc_diagnostic_message_ptr(index: usize) ?[*]const u8 { + if (index >= diagnostic_views.items.len) return null; + + if (diagnostic_views.items[index].message.len == 0) return null; + + return diagnostic_views.items[index].message.ptr; +} + +export fn hdoc_diagnostic_message_len(index: usize) usize { + if (index >= diagnostic_views.items.len) return 0; + + return diagnostic_views.items[index].message.len; +} From ccedc73b1dc61460500e505a9a0fa2c5bcccfabc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 12:48:56 +0100 Subject: [PATCH 108/116] Gate WASM Node tests via build --- README.md | 2 + build.zig | 21 ++++ src/wasm.zig | 6 + test/wasm/diagnostic_accepted.hdoc | 3 + test/wasm/diagnostic_rejected.hdoc | 2 + test/wasm/diagnostics_expected.json | 16 +++ test/wasm/validate.js | 177 ++++++++++++++++++++++++++++ 7 files changed, 227 insertions(+) create mode 100644 test/wasm/diagnostic_accepted.hdoc create mode 100644 test/wasm/diagnostic_rejected.hdoc create mode 100644 test/wasm/diagnostics_expected.json create mode 100644 test/wasm/validate.js diff --git a/README.md b/README.md index 6806429..08847f7 100644 --- a/README.md +++ b/README.md @@ -29,3 +29,5 @@ Requires [Zig 0.15.2](https://ziglang.org/) installed. ```sh-session [user@host] hyperdoc$ zig build test ``` + +> Optional: installing Node.js enables the WASM integration tests that exercise the compiled `hyperdoc_wasm.wasm` via `node test/wasm/validate.js`. diff --git a/build.zig b/build.zig index ab971a0..5e35295 100644 --- a/build.zig +++ b/build.zig @@ -73,6 +73,18 @@ pub fn build(b: *std.Build) void { }, }), }); + wasm_exe.root_module.export_symbol_names = &.{ + "hdoc_set_document_len", + "hdoc_document_ptr", + "hdoc_process", + "hdoc_html_ptr", + "hdoc_html_len", + "hdoc_diagnostic_count", + "hdoc_diagnostic_line", + "hdoc_diagnostic_column", + "hdoc_diagnostic_message_ptr", + "hdoc_diagnostic_message_len", + }; b.installArtifact(wasm_exe); const run_cmd = b.addRunArtifact(exe); @@ -177,6 +189,15 @@ pub fn build(b: *std.Build) void { .use_llvm = true, }); test_step.dependOn(&b.addRunArtifact(main_tests).step); + + const node_path = b.findProgram(&.{"node"}, &.{}) catch null; + if (node_path) |node| { + const wasm_validate = b.addSystemCommand(&.{ node, "test/wasm/validate.js" }); + wasm_validate.step.dependOn(b.getInstallStep()); + test_step.dependOn(&wasm_validate.step); + } else { + std.debug.print("node not found; skipping WASM integration tests\n", .{}); + } } fn rawFileMod(b: *std.Build, path: []const u8) std.Build.Module.Import { diff --git a/src/wasm.zig b/src/wasm.zig index 852768a..8cc0627 100644 --- a/src/wasm.zig +++ b/src/wasm.zig @@ -118,6 +118,9 @@ fn capture_diagnostics(source: *hyperdoc.Diagnostics) !void { diag.code.format(&adapter.new_interface) catch { adapter.err = error.WriteFailed; }; + adapter.new_interface.flush() catch { + adapter.err = error.WriteFailed; + }; if (adapter.err) |_| return; const rendered = diagnostic_text.items[start..]; @@ -172,6 +175,9 @@ export fn hdoc_process() bool { hyperdoc.render.html5(parsed, &html_adapter.new_interface) catch { html_adapter.err = error.WriteFailed; }; + html_adapter.new_interface.flush() catch { + html_adapter.err = error.WriteFailed; + }; if (html_adapter.err) |_| { capture_diagnostics(&diagnostics) catch {}; return false; diff --git a/test/wasm/diagnostic_accepted.hdoc b/test/wasm/diagnostic_accepted.hdoc new file mode 100644 index 0000000..fcd9f85 --- /dev/null +++ b/test/wasm/diagnostic_accepted.hdoc @@ -0,0 +1,3 @@ +hdoc(version="2.0"); +title "WASM Warning Coverage" +p { The header intentionally omits a lang attribute. } diff --git a/test/wasm/diagnostic_rejected.hdoc b/test/wasm/diagnostic_rejected.hdoc new file mode 100644 index 0000000..a43140b --- /dev/null +++ b/test/wasm/diagnostic_rejected.hdoc @@ -0,0 +1,2 @@ +h1 "Missing header" +p { This file lacks the required hdoc header. } diff --git a/test/wasm/diagnostics_expected.json b/test/wasm/diagnostics_expected.json new file mode 100644 index 0000000..703225c --- /dev/null +++ b/test/wasm/diagnostics_expected.json @@ -0,0 +1,16 @@ +{ + "accepted": [ + { + "line": 1, + "column": 1, + "message": "Document language is missing; set lang on the hdoc header." + } + ], + "rejected": [ + { + "line": 1, + "column": 1, + "message": "Document must start with an 'hdoc' header." + } + ] +} diff --git a/test/wasm/validate.js b/test/wasm/validate.js new file mode 100644 index 0000000..45ffd18 --- /dev/null +++ b/test/wasm/validate.js @@ -0,0 +1,177 @@ +#!/usr/bin/env node +'use strict'; + +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const path = require('node:path'); + +const textEncoder = new TextEncoder(); +const textDecoder = new TextDecoder(); + +const repoRoot = path.join(__dirname, '..', '..'); +const wasmPath = path.join(repoRoot, 'zig-out', 'bin', 'hyperdoc_wasm.wasm'); + +const htmlSnapshotTests = [ + { + name: 'document_header', + source: path.join(repoRoot, 'test', 'snapshot', 'document_header.hdoc'), + expected: path.join(repoRoot, 'test', 'snapshot', 'document_header.html'), + }, + { + name: 'paragraph_styles', + source: path.join(repoRoot, 'test', 'snapshot', 'paragraph_styles.hdoc'), + expected: path.join(repoRoot, 'test', 'snapshot', 'paragraph_styles.html'), + }, + { + name: 'tables', + source: path.join(repoRoot, 'test', 'snapshot', 'tables.hdoc'), + expected: path.join(repoRoot, 'test', 'snapshot', 'tables.html'), + }, +]; + +const diagnosticsInput = { + accepted: path.join(__dirname, 'diagnostic_accepted.hdoc'), + rejected: path.join(__dirname, 'diagnostic_rejected.hdoc'), + expected: path.join(__dirname, 'diagnostics_expected.json'), +}; + +function assertFileExists(filePath) { + if (!fs.existsSync(filePath)) { + throw new Error(`Missing required file: ${filePath}`); + } +} + +function readUtf8(filePath) { + return fs.readFileSync(filePath, 'utf8'); +} + +function createLogImports(memoryRef) { + const state = { buffer: '' }; + return { + reset_log() { + state.buffer = ''; + }, + append_log(ptr, len) { + if (len === 0 || ptr === 0) return; + const memory = memoryRef.current; + if (!memory) return; + const view = new Uint8Array(memory.buffer, ptr, len); + state.buffer += textDecoder.decode(view); + }, + flush_log(level) { + if (state.buffer.length === 0) return; + const method = ['error', 'warn', 'info', 'debug'][level] || 'log'; + console[method](`[wasm ${method}] ${state.buffer}`); + state.buffer = ''; + }, + }; +} + +function getMemory(wasm, memoryRef) { + const memory = wasm.memory || memoryRef.current; + memoryRef.current = memory; + if (!memory) { + throw new Error('WASM memory is unavailable'); + } + return memory; +} + +async function instantiateWasm() { + assertFileExists(wasmPath); + const bytes = await fs.promises.readFile(wasmPath); + const memoryRef = { current: null }; + const env = createLogImports(memoryRef); + const { instance } = await WebAssembly.instantiate(bytes, { env }); + memoryRef.current = instance.exports.memory; + return { wasm: instance.exports, memoryRef }; +} + +function readString(memory, ptr, len) { + if (!ptr || len === 0) return ''; + const view = new Uint8Array(memory.buffer, ptr, len); + return textDecoder.decode(view); +} + +function processDocument(ctx, sourceText) { + const { wasm, memoryRef } = ctx; + const bytes = textEncoder.encode(sourceText); + + if (!wasm.hdoc_set_document_len(bytes.length)) { + throw new Error('Failed to allocate WASM document buffer'); + } + + const memoryForInput = getMemory(wasm, memoryRef); + const docPtr = wasm.hdoc_document_ptr(); + if (bytes.length > 0) { + new Uint8Array(memoryForInput.buffer, docPtr, bytes.length).set(bytes); + } + + const ok = wasm.hdoc_process() !== 0; + const memory = getMemory(wasm, memoryRef); + + const htmlPtr = wasm.hdoc_html_ptr(); + const htmlLen = wasm.hdoc_html_len(); + const html = readString(memory, htmlPtr ?? 0, htmlLen); + + const diagnostics = []; + const diagCount = wasm.hdoc_diagnostic_count(); + for (let i = 0; i < diagCount; i += 1) { + const msgPtr = wasm.hdoc_diagnostic_message_ptr(i) ?? 0; + const msgLen = wasm.hdoc_diagnostic_message_len(i); + diagnostics.push({ + line: wasm.hdoc_diagnostic_line(i), + column: wasm.hdoc_diagnostic_column(i), + message: readString(memory, msgPtr, msgLen), + }); + } + + return { ok, html, diagnostics }; +} + +function compareDiagnostics(actual, expected, label) { + assert.deepStrictEqual( + actual, + expected, + `${label} diagnostics differ.\nExpected: ${JSON.stringify(expected, null, 2)}\nActual: ${JSON.stringify(actual, null, 2)}`, + ); +} + +async function runHtmlTests(ctx) { + for (const test of htmlSnapshotTests) { + assertFileExists(test.source); + assertFileExists(test.expected); + const { ok, html, diagnostics } = processDocument(ctx, readUtf8(test.source)); + assert.equal(ok, true, `WASM processing failed for ${test.name}`); + assert.deepStrictEqual(diagnostics, [], `Expected no diagnostics for ${test.name}`); + const expectedHtml = readUtf8(test.expected); + assert.equal(html, expectedHtml, `Rendered HTML mismatch for ${test.name}`); + } +} + +async function runDiagnosticTests(ctx) { + assertFileExists(diagnosticsInput.accepted); + assertFileExists(diagnosticsInput.rejected); + assertFileExists(diagnosticsInput.expected); + + const expectations = JSON.parse(readUtf8(diagnosticsInput.expected)); + + const acceptedResult = processDocument(ctx, readUtf8(diagnosticsInput.accepted)); + assert.equal(acceptedResult.ok, true, 'Accepted diagnostic test should render successfully'); + compareDiagnostics(acceptedResult.diagnostics, expectations.accepted, 'Accepted'); + + const rejectedResult = processDocument(ctx, readUtf8(diagnosticsInput.rejected)); + assert.equal(rejectedResult.ok, false, 'Rejected diagnostic test should fail'); + compareDiagnostics(rejectedResult.diagnostics, expectations.rejected, 'Rejected'); +} + +async function main() { + const ctx = await instantiateWasm(); + await runHtmlTests(ctx); + await runDiagnosticTests(ctx); + console.log('WASM integration tests passed.'); +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); From 1dec7142bf20a1ad697a4816b0f8231a277a2524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 13:34:55 +0100 Subject: [PATCH 109/116] Improves Wasm playground --- build.zig | 15 ++++++-- examples/guide.hdoc | 20 ++++++----- src/playground.html | 84 +++++++++++++++++++++++++++++++++---------- src/wasm.zig | 11 ++++++ test/wasm/validate.js | 2 +- 5 files changed, 102 insertions(+), 30 deletions(-) diff --git a/build.zig b/build.zig index 5e35295..fbbdf6a 100644 --- a/build.zig +++ b/build.zig @@ -33,6 +33,8 @@ const conformance_reject_files: []const []const u8 = &.{ "test/conformance/reject/title_after_content.hdoc", }; +const www_dir: std.Build.InstallDir = .{ .custom = "www" }; + pub fn build(b: *std.Build) void { // Options: const target = b.standardTargetOptions(.{}); @@ -73,7 +75,7 @@ pub fn build(b: *std.Build) void { }, }), }); - wasm_exe.root_module.export_symbol_names = &.{ + wasm_exe.root_module.export_symbol_names = comptime &.{ "hdoc_set_document_len", "hdoc_document_ptr", "hdoc_process", @@ -82,10 +84,17 @@ pub fn build(b: *std.Build) void { "hdoc_diagnostic_count", "hdoc_diagnostic_line", "hdoc_diagnostic_column", + "hdoc_diagnostic_fatal", "hdoc_diagnostic_message_ptr", "hdoc_diagnostic_message_len", }; - b.installArtifact(wasm_exe); + const install_wasm = b.addInstallArtifact(wasm_exe, .{ + .dest_dir = .{ .override = www_dir }, + }); + b.getInstallStep().dependOn(&install_wasm.step); + + const install_web = b.addInstallFileWithDir(b.path("src/playground.html"), www_dir, "index.html"); + b.getInstallStep().dependOn(&install_web.step); const run_cmd = b.addRunArtifact(exe); run_cmd.step.dependOn(b.getInstallStep()); @@ -193,7 +202,7 @@ pub fn build(b: *std.Build) void { const node_path = b.findProgram(&.{"node"}, &.{}) catch null; if (node_path) |node| { const wasm_validate = b.addSystemCommand(&.{ node, "test/wasm/validate.js" }); - wasm_validate.step.dependOn(b.getInstallStep()); + wasm_validate.step.dependOn(&install_wasm.step); test_step.dependOn(&wasm_validate.step); } else { std.debug.print("node not found; skipping WASM integration tests\n", .{}); diff --git a/examples/guide.hdoc b/examples/guide.hdoc index b102260..0e7e946 100644 --- a/examples/guide.hdoc +++ b/examples/guide.hdoc @@ -1,10 +1,10 @@ -hdoc(version="2.0"); +hdoc(version="2.0", lang="en", author="Felix \"xq\" Queißner", date="2025-12-17T13:45:00+01:00"); -h1(id="intro", lang="en") { HyperDoc 2.0 Examples } +title { HyperDoc 2.0 Examples } toc(depth="2") {} -h2(id="paragraphs") { Paragraphs and Inline Text } +h1(id="paragraphs") { Paragraphs and Inline Text } p(id="p-basic") { This paragraph shows plain text mixed with \em{emphasis}, \strike{strike-through}, and \mono{monospaced} spans. @@ -31,7 +31,7 @@ tip "Tips provide actionable hints." quote "Quotes include sourced or emphasized wording." spoiler "Spoilers hide key story information until revealed." -h2(id="literals") { Literal and Preformatted Blocks } +h1(id="literals") { Literal and Preformatted Blocks } p: | Literal blocks keep exact content and skip parsing, so characters like \{ \} are not special. @@ -45,7 +45,9 @@ test { } } -h2(id="lists") { Lists } +h1(id="lists") { Lists } + +h2 { Unordered Lists } ul { li { p { Apples } } @@ -53,13 +55,15 @@ ul { li { p { \em{Cucumbers} with inline markup. } } } +h2 { Ordered Lists } + ol(first="3") { li { p { Start counting at three. } } li "Continue with a string item." li { p { Finish the sequence. } } } -h2(id="media") { Figures } +h1(id="media") { Figures } p { The image below has a caption, alt text, and a relative asset path. @@ -69,14 +73,14 @@ img(id="fig-diagram", alt="Diagram showing a HyperDoc example", path="./assets/d HyperDoc is centered inside a rounded rectangle. } -h2(id="dates") { Dates and Times } +h1(id="dates") { Dates and Times } p { The event was announced on \date(fmt="long"){2025-12-17} at \time(fmt="short"){13:45:00Z}. A combined timestamp looks like \datetime(fmt="iso"){2025-12-17T13:45:00Z}. } -h2(id="table-ref") { Tables } +h1(id="table-ref") { Tables } p { See the dedicated tables example file for row groups and colspan usage. diff --git a/src/playground.html b/src/playground.html index 6336eed..325c6b7 100644 --- a/src/playground.html +++ b/src/playground.html @@ -1,11 +1,12 @@ - + + HyperDoc Playground +
        HyperDoc Playground
        @@ -124,6 +143,7 @@ Waiting for WASM…
        +
          @@ -131,7 +151,6 @@
          -
            - + + \ No newline at end of file diff --git a/src/wasm.zig b/src/wasm.zig index 8cc0627..fb6001c 100644 --- a/src/wasm.zig +++ b/src/wasm.zig @@ -74,6 +74,7 @@ const DiagnosticView = struct { line: u32, column: u32, message: []u8, + is_fatal: bool, }; var document_buffer: std.array_list.Managed(u8) = std.array_list.Managed(u8).init(allocator); @@ -128,6 +129,10 @@ fn capture_diagnostics(source: *hyperdoc.Diagnostics) !void { .line = diag.location.line, .column = diag.location.column, .message = rendered, + .is_fatal = switch (diag.code.severity()) { + .warning => false, + .@"error" => true, + }, }); } } @@ -212,6 +217,12 @@ export fn hdoc_diagnostic_column(index: usize) u32 { return diagnostic_views.items[index].column; } +export fn hdoc_diagnostic_fatal(index: usize) bool { + if (index >= diagnostic_views.items.len) return false; + + return diagnostic_views.items[index].is_fatal; +} + export fn hdoc_diagnostic_message_ptr(index: usize) ?[*]const u8 { if (index >= diagnostic_views.items.len) return null; diff --git a/test/wasm/validate.js b/test/wasm/validate.js index 45ffd18..5e4318f 100644 --- a/test/wasm/validate.js +++ b/test/wasm/validate.js @@ -9,7 +9,7 @@ const textEncoder = new TextEncoder(); const textDecoder = new TextDecoder(); const repoRoot = path.join(__dirname, '..', '..'); -const wasmPath = path.join(repoRoot, 'zig-out', 'bin', 'hyperdoc_wasm.wasm'); +const wasmPath = path.join(repoRoot, 'zig-out', 'www', 'hyperdoc_wasm.wasm'); const htmlSnapshotTests = [ { From aa26a71709b382997136bc29c25dc9635663a44d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 13:38:04 +0100 Subject: [PATCH 110/116] Fixes playground for long text --- src/playground.html | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/playground.html b/src/playground.html index 325c6b7..03aa96e 100644 --- a/src/playground.html +++ b/src/playground.html @@ -10,12 +10,19 @@ font-family: system-ui, -apple-system, "Segoe UI", sans-serif; } + html { + height: 100%; + } + body { margin: 0; - height: 100vh; + height: 100%; + min-height: 100vh; + min-height: 100dvh; display: flex; flex-direction: column; background: #f8f9fb; + overflow: hidden; } header { @@ -33,6 +40,8 @@ gap: 12px; padding: 12px; box-sizing: border-box; + min-height: 0; + overflow: hidden; } .pane { @@ -42,6 +51,7 @@ border-radius: 8px; background: #ffffff; overflow: hidden; + min-height: 0; } .pane-header { @@ -64,6 +74,8 @@ font-size: 14px; box-sizing: border-box; outline: none; + min-height: 0; + overflow: auto; } .preview { @@ -71,6 +83,7 @@ padding: 12px; overflow: auto; box-sizing: border-box; + min-height: 0; } .preview.outdated { @@ -329,4 +342,4 @@ - \ No newline at end of file + From 5d43542dde9c8f9276aa83065d27eeb4601543c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 14:44:14 +0100 Subject: [PATCH 111/116] Improves example documents and playground --- examples/featurematrix.hdoc | 10 +- examples/guide.hdoc | 109 ++++++++-- src/playground.html | 399 +++++++++++++++++++++++++++++++++++- src/render/html5.zig | 2 +- test/snapshot/tables.html | 4 +- 5 files changed, 490 insertions(+), 34 deletions(-) diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc index d6dd2a3..bb2e5fc 100644 --- a/examples/featurematrix.hdoc +++ b/examples/featurematrix.hdoc @@ -1,6 +1,6 @@ hdoc(version="2.0"); -h1 { Small Computer Feature Matrix } +title { Small Computer Feature Matrix } table { columns { @@ -63,7 +63,7 @@ table { td "✅" td "✅" td "❌" - td { p { ❌\sup{1} } } + td { p { ❌\footnote{Neotron Pico uses PS/2 for mouse/keyboard and VGA for video.} } } td "❌" } row(title="Modular Design") { @@ -71,7 +71,7 @@ table { td "❌" td "❌" td "✅" - td { p { ✅\sup{2} } } + td { p { ✅\footnote{Cody Computer has a single cartridge that can be added.} } } } row(title="Full Documentation") { td "✅" @@ -96,6 +96,6 @@ table { } } -p { \sup{1}: Neotron Pico uses PS/2 for mouse/keyboard and VGA for video. } +footnotes; + -p { \sup{2}: Cody Computer has a single cartridge that can be added. } diff --git a/examples/guide.hdoc b/examples/guide.hdoc index 0e7e946..adac6bb 100644 --- a/examples/guide.hdoc +++ b/examples/guide.hdoc @@ -24,26 +24,7 @@ p { Links can target \ref(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}. } -note "Notes highlight supportive information." -warning "Warnings call out risky behavior." -danger "Danger paragraphs emphasize critical hazards." -tip "Tips provide actionable hints." -quote "Quotes include sourced or emphasized wording." -spoiler "Spoilers hide key story information until revealed." - -h1(id="literals") { Literal and Preformatted Blocks } - -p: -| Literal blocks keep exact content and skip parsing, so characters like \{ \} are not special. -| They are introduced by a trailing colon. -| You can capture snippets without escaping anything. - -pre(id="code-sample", syntax="zig") { -test { - const message = "HyperDoc"; - const answer = \mono(syntax="zig"){42}; -} -} +footnotes; h1(id="lists") { Lists } @@ -55,7 +36,7 @@ ul { li { p { \em{Cucumbers} with inline markup. } } } -h2 { Ordered Lists } +h2(id="ol") { Ordered Lists } ol(first="3") { li { p { Start counting at three. } } @@ -63,6 +44,50 @@ ol(first="3") { li { p { Finish the sequence. } } } +h1(id="admonitions") "Admonitions" +tip { + p { Tips provide actionable hints\footnote{You can also write footnotes this way}. } + p { Tips can also include multiple paragraphs and even lists: } + ul { + li "Item 1" + li "Item 2" + } +} + +note "Notes highlight supportive information." + +warning { + p { Warnings call out risky behavior\footnote(key="shared"){You can even share the same footnote in multiple locations.,}. } +} + +danger { + p { Danger paragraphs emphasize critical hazards\footnote(ref="shared");. } +} + +quote { + p "Quotes include sourced or emphasized wording." + p { - Unknown author } +} + +spoiler "Spoilers hide key story information until revealed." + +footnotes; + +h1(id="literals") { Literal and Preformatted Blocks } + +p: +| Literal blocks keep exact content and skip parsing, so characters like \{ \} are not special. +| They are introduced by a trailing colon. +| You can capture snippets without escaping anything. + +pre(id="code-sample", syntax="zig") {test { + const message = "HyperDoc"; + const answer = \mono(syntax="zig"){42}; +} +} + + + h1(id="media") { Figures } p { @@ -80,8 +105,50 @@ p { A combined timestamp looks like \datetime(fmt="iso"){2025-12-17T13:45:00Z}. } +h1 "References" + +p { + In chapter \ref(ref="paragraphs"); we've read about how we can format paragraphs. + This chapter here teaches about references. +} + +p { + References to another chapter can use the numeric value (like \ref(ref="ol", fmt="index");), use only the name (like \ref(ref="table-ref",fmt="name");) or we can use a \ref(ref="table-ref"){custom name like with regular links}. +} + h1(id="table-ref") { Tables } p { See the dedicated tables example file for row groups and colspan usage. } + +table(id="inventory") { + columns { + td "Item" + td "Quantity" + td "Notes" + } + group { Fresh Produce } + row(title="Fruit") { + td "Apples" + td "12" + td { p { Delivered on \date(fmt="short"){2025-02-08}. } } + } + row(title="Vegetables") { + td "Carrots" + td "7" + td { p { Store at \time(fmt="rough"){08:00:00Z}. } } + } + group { Pantry } + row(title="Dry Goods") { + td "Rice" + td "3" + td { p { Packed on \datetime(fmt="relative"){2025-02-08T08:00:00Z}. } } + } + row(title="Bulk") { + td(colspan="2") { p { This cell spans two columns. } } + td "Requires label" + } +} + + diff --git a/src/playground.html b/src/playground.html index 03aa96e..c1000e0 100644 --- a/src/playground.html +++ b/src/playground.html @@ -144,6 +144,252 @@ color: #ecaa04; font-weight: 600; } + + #preview { + color: #111827; + font-size: 15px; + line-height: 1.55; + } + + #preview :is(h1, h2, h3, h4, h5, h6) { + margin: 0.9em 0 0.35em; + line-height: 1.25; + letter-spacing: -0.01em; + } + + #preview :is(p, ul, ol, figure, pre, blockquote) { + margin: 0 0 0.9em; + } + + #preview :is(ul, ol) { + padding-left: 1.35em; + } + + #preview li { + margin: 0.25em 0; + } + + #preview a { + color: #2563eb; + text-decoration-thickness: 1px; + text-underline-offset: 2px; + } + + #preview a:hover { + text-decoration-thickness: 2px; + } + + #preview img { + max-width: 100%; + height: auto; + } + + #preview .hdoc-doc-meta { + margin: 0.25em 0 0; + color: #6b7280; + font-size: 0.95em; + } + + #preview code.hdoc-code { + font-family: ui-monospace, "JetBrains Mono", Consolas, "Courier New", monospace; + font-size: 0.95em; + background: #f3f4f6; + border: 1px solid #e5e7eb; + padding: 0.08em 0.32em; + border-radius: 6px; + } + + #preview pre { + padding: 12px 14px; + border-radius: 10px; + background: #0b1220; + color: #e5e7eb; + border: 1px solid #111827; + overflow: auto; + } + + #preview pre code.hdoc-code { + white-space: pre; + background: transparent; + border: none; + padding: 0; + border-radius: 0; + color: inherit; + font-size: 13px; + } + + #preview pre code.hdoc-code code.hdoc-code { + background: rgba(148, 163, 184, 0.14); + border: 1px solid rgba(148, 163, 184, 0.18); + padding: 0.05em 0.25em; + } + + #preview :is(.hdoc-note, .hdoc-warning, .hdoc-danger, .hdoc-tip) { + margin: 1em 0; + border-radius: 12px; + padding: 12px 12px 12px 44px; + border: 1px solid var(--hdoc-callout-border); + background: var(--hdoc-callout-bg); + color: var(--hdoc-callout-fg); + position: relative; + } + + #preview :is(.hdoc-note, .hdoc-warning, .hdoc-danger, .hdoc-tip)::before { + content: var(--hdoc-callout-icon); + position: absolute; + left: 12px; + top: 12px; + line-height: 1; + } + + #preview .hdoc-note { + --hdoc-callout-icon: "ℹ️"; + --hdoc-callout-bg: #eff6ff; + --hdoc-callout-border: #bfdbfe; + --hdoc-callout-fg: #0f172a; + } + + #preview .hdoc-warning { + --hdoc-callout-icon: "⚠️"; + --hdoc-callout-bg: #fffbeb; + --hdoc-callout-border: #fcd34d; + --hdoc-callout-fg: #78350f; + } + + #preview .hdoc-danger { + --hdoc-callout-icon: "🛑"; + --hdoc-callout-bg: #fef2f2; + --hdoc-callout-border: #fecaca; + --hdoc-callout-fg: #7f1d1d; + } + + #preview .hdoc-tip { + --hdoc-callout-icon: "💡"; + --hdoc-callout-bg: #ecfdf5; + --hdoc-callout-border: #a7f3d0; + --hdoc-callout-fg: #064e3b; + } + + #preview :is(.hdoc-note, .hdoc-warning, .hdoc-danger, .hdoc-tip, .hdoc-quote, .hdoc-spoiler) > :first-child { + margin-top: 0; + } + + #preview :is(.hdoc-note, .hdoc-warning, .hdoc-danger, .hdoc-tip, .hdoc-quote, .hdoc-spoiler) > :last-child { + margin-bottom: 0; + } + + #preview .hdoc-quote { + margin: 1em 0; + border-radius: 12px; + padding: 12px 12px 12px 14px; + border-left: 4px solid #10b981; + background: #f0fdf4; + color: #064e3b; + } + + #preview .hdoc-spoiler { + margin: 1em 0; + border-radius: 12px; + padding: 12px; + border: 1px dashed #d1d5db; + background: #f3f4f6; + position: relative; + } + + #preview .hdoc-spoiler::before { + content: "Spoiler (hover to reveal)"; + display: block; + font-size: 12px; + color: #6b7280; + margin-bottom: 8px; + } + + #preview .hdoc-spoiler > * { + filter: blur(6px); + transition: filter 120ms ease; + } + + #preview .hdoc-spoiler:hover > *, + #preview .hdoc-spoiler:focus-within > * { + filter: none; + } + + #preview table { + width: 100%; + border-collapse: separate; + border-spacing: 0; + border: 1px solid #e5e7eb; + border-radius: 12px; + background: #ffffff; + overflow: hidden; + font-size: 14px; + } + + #preview :is(th, td) { + padding: 8px 10px; + border-bottom: 1px solid #e5e7eb; + border-right: 1px solid #e5e7eb; + vertical-align: top; + text-align: left; + } + + #preview tr > :last-child { + border-right: none; + } + + #preview tbody tr:last-child > :is(th, td) { + border-bottom: none; + } + + #preview thead th { + background: #f9fafb; + font-weight: 650; + color: #111827; + } + + #preview th[scope="row"] { + background: #f9fafb; + font-weight: 600; + white-space: nowrap; + } + + #preview th[scope="rowgroup"] { + background: #eef2ff; + color: #312e81; + font-weight: 700; + } + + #preview tbody tr:nth-child(even) td { + background: #fcfcfd; + } + + #preview :is(th, td):empty { + padding: 0; + } + + #preview table :is(p, ul, ol) { + margin: 0; + } + + #preview table :is(ul, ol) { + padding-left: 1.2em; + } + + #preview sup.hdoc-footnote-ref { + font-size: 0.75em; + } + + #preview .hdoc-footnotes { + margin: 1.2em 0 0; + padding-top: 0.8em; + border-top: 1px solid #e5e7eb; + color: #374151; + } + + #preview .hdoc-footnote-list { + margin: 0; + padding-left: 1.3em; + } @@ -177,14 +423,157 @@ const decoder = new TextDecoder(); const wasmUrl = "./hyperdoc_wasm.wasm"; - const initialText = `hdoc(version="2.0", lang="en"); -title { - HyperDoc Playground + const initialText = `hdoc(version="2.0", lang="en", author="Felix \\"xq\\" Queißner", date="2025-12-17T13:45:00+01:00"); + +title { HyperDoc 2.0 Examples } + +toc(depth="2") {} + +h1(id="paragraphs") { Paragraphs and Inline Text } + +p(id="p-basic") { + This paragraph shows plain text mixed with \\em{emphasis}, \\strike{strike-through}, and \\mono{monospaced} spans. +} + +p(lang="de") { + Dieser Absatz zeigt das Attribut \\mono{lang} auf Blockebene. +} + +p "This paragraph uses a string literal body instead of a list." + +p { + Inline scripts support \\mono(syntax="zig"){const version = "2.0";} as well as sub/superscripts like H\\sub{2}O and x\\sup{2}. } p { - Type HyperDoc content on the left to render HTML here. -}`; + Links can target \\ref(ref="fig-diagram"){other blocks} or external \\link(uri="https://ashet.computer"){resources}. +} + +footnotes; + +h1(id="lists") { Lists } + +h2 { Unordered Lists } + +ul { + li { p { Apples } } + li "Bananas" + li { p { \em{Cucumbers} with inline markup. } } +} + +h2(id="ol") { Ordered Lists } + +ol(first="3") { + li { p { Start counting at three. } } + li "Continue with a string item." + li { p { Finish the sequence. } } +} + +h1(id="admonitions") "Admonitions" +tip { + p { Tips provide actionable hints\\footnote{You can also write footnotes this way}. } + p { Tips can also include multiple paragraphs and even lists: } + ul { + li "Item 1" + li "Item 2" + } +} + +note "Notes highlight supportive information." + +warning { + p { Warnings call out risky behavior\\footnote(key="shared"){You can even share the same footnote in multiple locations.,}. } +} + +danger { + p { Danger paragraphs emphasize critical hazards\\footnote(ref="shared");. } +} + +quote { + p "Quotes include sourced or emphasized wording." + p { - Unknown author } +} + +spoiler "Spoilers hide key story information until revealed." + +footnotes; + +h1(id="literals") { Literal and Preformatted Blocks } + +p: +| Literal blocks keep exact content and skip parsing, so characters like \\{ \\} are not special. +| They are introduced by a trailing colon. +| You can capture snippets without escaping anything. + +pre(id="code-sample", syntax="zig") {test { + const message = "HyperDoc"; + const answer = \\mono(syntax="zig"){42}; +} +} + +h1(id="media") { Figures } + +p { + The image below has a caption, alt text, and a relative asset path. +} + +img(id="fig-diagram", alt="Diagram showing a HyperDoc example", path="./assets/diagram.svg") { + HyperDoc is centered inside a rounded rectangle. +} + +h1(id="dates") { Dates and Times } + +p { + The event was announced on \\date(fmt="long"){2025-12-17} at \\time(fmt="short"){13:45:00Z}. + A combined timestamp looks like \\datetime(fmt="iso"){2025-12-17T13:45:00Z}. +} + +h1 "References" + +p { + In chapter \\ref(ref="paragraphs"); we've read about how we can format paragraphs. + This chapter here teaches about references. +} + +p { + References to another chapter can use the numeric value (like \\ref(ref="ol", fmt="index");), use only the name (like \\ref(ref="table-ref",fmt="name");) or we can use a \\ref(ref="table-ref"){custom name like with regular links}. +} + +h1(id="table-ref") { Tables } + +p { + See the dedicated tables example file for row groups and colspan usage. +} + +table(id="inventory") { + columns { + td "Item" + td "Quantity" + td "Notes" + } + group { Fresh Produce } + row(title="Fruit") { + td "Apples" + td "12" + td { p { Delivered on \\date(fmt="short"){2025-02-08}. } } + } + row(title="Vegetables") { + td "Carrots" + td "7" + td { p { Store at \\time(fmt="rough"){08:00:00Z}. } } + } + group { Pantry } + row(title="Dry Goods") { + td "Rice" + td "3" + td { p { Packed on \\datetime(fmt="relative"){2025-02-08T08:00:00Z}. } } + } + row(title="Bulk") { + td(colspan="2") { p { This cell spans two columns. } } + td "Requires label" + } +} +`; sourceField.value = initialText; diff --git a/src/render/html5.zig b/src/render/html5.zig index a7acf35..a7ac6e9 100644 --- a/src/render/html5.zig +++ b/src/render/html5.zig @@ -491,7 +491,7 @@ const RenderContext = struct { try writeIndent(ctx.writer, indent + indent_step); try writeStartTag(ctx.writer, "th", .regular, .{ - .scope = "colgroup", + .scope = "rowgroup", .colspan = @as(u32, @intCast(@max(@as(usize, 1), column_count))), }); try ctx.renderSpans(group.content); diff --git a/test/snapshot/tables.html b/test/snapshot/tables.html index 9384ec5..9ffe48c 100644 --- a/test/snapshot/tables.html +++ b/test/snapshot/tables.html @@ -21,7 +21,7 @@

            §1 Table Coverage

          - + @@ -43,7 +43,7 @@

          §1 Table Coverage

          - + From 4535359e2f1466401bb25548b85f9acb0b5a8853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= Date: Sun, 4 Jan 2026 14:48:04 +0100 Subject: [PATCH 112/116] Adds pages rendering workflow --- .github/workflows/pages.yml | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/pages.yml diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 0000000..7704611 --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,42 @@ +name: Pages + +on: + push: + branches: [hdoc-2.0] + +jobs: + build: + runs-on: ubuntu-latest + # Grant GITHUB_TOKEN the permissions required to make a Pages deployment + permissions: + pages: write # to deploy to Pages + id-token: write # to verify the deployment originates from an appropriate source + environment: + name: github-pages + url: ${{steps.deployment.outputs.page_url}} + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup Zig + uses: mlugg/setup-zig@v2 + with: + version: 0.15.2 + + - name: Build + run: | + zig build install + + - name: Test + run: | + zig build test + + - name: Upload static files as artifact + id: pages-upload + uses: actions/upload-pages-artifact@v3 + with: + path: zig-out/www + + - name: Deploy artifact + id: deployment + uses: actions/deploy-pages@v4 From 1668ff11d68881b3ab3a8f40dcfabec52b816f89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 5 Jan 2026 10:10:09 +0100 Subject: [PATCH 113/116] Add wasm LSP stub and extension CI workflow --- .github/workflows/extension-ci.yml | 75 + build.zig | 17 + src/wasm-lsp.zig | 9 + vscode-ext/.gitignore | 6 + vscode-ext/justfile | 13 + vscode-ext/language-configuration.json | 44 + vscode-ext/package-lock.json | 3140 ++++++++++++++++++ vscode-ext/package.json | 76 + vscode-ext/src/extension.ts | 141 + vscode-ext/src/utils.ts | 98 + vscode-ext/syntaxes/hyperdoc.tmLanguage.json | 74 + vscode-ext/test/utils.test.ts | 79 + vscode-ext/tsconfig.json | 26 + 13 files changed, 3798 insertions(+) create mode 100644 .github/workflows/extension-ci.yml create mode 100644 src/wasm-lsp.zig create mode 100644 vscode-ext/.gitignore create mode 100644 vscode-ext/justfile create mode 100644 vscode-ext/language-configuration.json create mode 100644 vscode-ext/package-lock.json create mode 100644 vscode-ext/package.json create mode 100644 vscode-ext/src/extension.ts create mode 100644 vscode-ext/src/utils.ts create mode 100644 vscode-ext/syntaxes/hyperdoc.tmLanguage.json create mode 100644 vscode-ext/test/utils.test.ts create mode 100644 vscode-ext/tsconfig.json diff --git a/.github/workflows/extension-ci.yml b/.github/workflows/extension-ci.yml new file mode 100644 index 0000000..6393547 --- /dev/null +++ b/.github/workflows/extension-ci.yml @@ -0,0 +1,75 @@ +name: Extension CI + +on: + pull_request: + branches: [master, hdoc-2.0] + push: + branches: [master, hdoc-2.0] + +jobs: + build-and-package: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Zig + uses: mlugg/setup-zig@v2 + with: + version: 0.15.2 + + - name: Build Zig artifacts + run: zig build install + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 18 + + - name: Install extension dependencies + run: npm ci + working-directory: vscode-ext + + - name: Copy wasm artifacts into extension bundle + run: | + mkdir -p vscode-ext/wasm + cp zig-out/www/hyperdoc_wasm.wasm vscode-ext/wasm/ || true + cp zig-out/www/hyperdoc_wasm_lsp.wasm vscode-ext/wasm/ + + - name: Build extension + run: npm run compile + working-directory: vscode-ext + + - name: Test extension + run: npm test + working-directory: vscode-ext + + - name: Package extension + run: | + npm run package + mv *.vsix hyperdoc-vscode.vsix + working-directory: vscode-ext + + - name: Upload packaged extension + uses: actions/upload-artifact@v4 + with: + name: hyperdoc-vscode.vsix + path: vscode-ext/hyperdoc-vscode.vsix + + publish: + needs: build-and-package + if: github.event_name == 'push' && github.ref == 'refs/heads/hdoc-2.0' + runs-on: ubuntu-latest + environment: + name: vscode-marketplace + steps: + - name: Download packaged extension + uses: actions/download-artifact@v4 + with: + name: hyperdoc-vscode.vsix + path: artifacts + + - name: Publish to VS Code Marketplace + run: npx --yes vsce publish --packagePath artifacts/hyperdoc-vscode.vsix -p "$VSCE_PAT" + env: + VSCE_PAT: ${{ secrets.VSCE_PAT }} diff --git a/build.zig b/build.zig index fbbdf6a..3c33a0f 100644 --- a/build.zig +++ b/build.zig @@ -93,6 +93,23 @@ pub fn build(b: *std.Build) void { }); b.getInstallStep().dependOn(&install_wasm.step); + const wasm_lsp_exe = b.addExecutable(.{ + .name = "hyperdoc_wasm_lsp", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/wasm-lsp.zig"), + .target = wasm_target, + .optimize = optimize, + .single_threaded = true, + .imports = &.{ + .{ .name = "hyperdoc", .module = hyperdoc }, + }, + }), + }); + const install_wasm_lsp = b.addInstallArtifact(wasm_lsp_exe, .{ + .dest_dir = .{ .override = www_dir }, + }); + b.getInstallStep().dependOn(&install_wasm_lsp.step); + const install_web = b.addInstallFileWithDir(b.path("src/playground.html"), www_dir, "index.html"); b.getInstallStep().dependOn(&install_web.step); diff --git a/src/wasm-lsp.zig b/src/wasm-lsp.zig new file mode 100644 index 0000000..0160f40 --- /dev/null +++ b/src/wasm-lsp.zig @@ -0,0 +1,9 @@ +const std = @import("std"); + +pub export fn _start() void {} + +pub export fn hyperdoc_lsp_ping() void { + // Placeholder entrypoint for a wasm-based language server. + // Real initialization will be wired once the wasm server is implemented. + std.mem.doNotOptimizeAway(@as(u32, 0)); +} diff --git a/vscode-ext/.gitignore b/vscode-ext/.gitignore new file mode 100644 index 0000000..b5d8d59 --- /dev/null +++ b/vscode-ext/.gitignore @@ -0,0 +1,6 @@ +node_modules +out +*.vsix +.vscode-test +.DS_Store +wasm/ diff --git a/vscode-ext/justfile b/vscode-ext/justfile new file mode 100644 index 0000000..6cdaa8a --- /dev/null +++ b/vscode-ext/justfile @@ -0,0 +1,13 @@ +default: setup build test + +setup: + npm install + +build: + npm run compile + +test: + npm test + +package: + npm run package diff --git a/vscode-ext/language-configuration.json b/vscode-ext/language-configuration.json new file mode 100644 index 0000000..de94959 --- /dev/null +++ b/vscode-ext/language-configuration.json @@ -0,0 +1,44 @@ +{ + "wordPattern": "(-?\\d*\\.\\d\\w*)|([^\\`\\~\\!\\@\\#\\%\\^\\&\\*\\(\\)\\=\\+\\[\\{\\]\\}\\\\\\|\\;\\:\\'\\\"\\,\\.\\<\\>\\/?\\s]+)", + "brackets": [ + [ + "{", + "}" + ], + [ + "(", + ")" + ] + ], + "autoClosingPairs": [ + { + "open": "{", + "close": "}" + }, + { + "open": "(", + "close": ")" + }, + { + "open": "\"", + "close": "\"", + "notIn": [ + "string" + ] + } + ], + "surroundingPairs": [ + [ + "{", + "}" + ], + [ + "(", + ")" + ], + [ + "\"", + "\"" + ] + ] +} diff --git a/vscode-ext/package-lock.json b/vscode-ext/package-lock.json new file mode 100644 index 0000000..4bfd06c --- /dev/null +++ b/vscode-ext/package-lock.json @@ -0,0 +1,3140 @@ +{ + "name": "hyperdoc-vscode", + "version": "0.0.1", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "hyperdoc-vscode", + "version": "0.0.1", + "license": "MIT", + "devDependencies": { + "@types/mocha": "^10.0.6", + "@types/node": "^18.19.0", + "@types/vscode": "1.85.0", + "mocha": "^10.4.0", + "ts-node": "^10.9.2", + "typescript": "^5.4.0", + "vsce": "^2.15.0", + "vscode": "^1.1.37", + "vscode-languageclient": "^9.0.1" + }, + "engines": { + "vscode": "^1.85.0" + } + }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, + "node_modules/@tootallnate/once": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-1.1.2.tgz", + "integrity": "sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 6" + } + }, + "node_modules/@tsconfig/node10": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.12.tgz", + "integrity": "sha512-UCYBaeFvM11aU2y3YPZ//O5Rhj+xKyzy7mvcIoAjASbigy8mHMryP5cK7dgjlz2hWxh1g5pLw084E0a/wlUSFQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/mocha": { + "version": "10.0.10", + "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.10.tgz", + "integrity": "sha512-xPyYSz1cMPnJQhl0CLMH68j3gprKZaTjG3s5Vi+fDgx+uhG9NOXwbVt52eFS8ECyXhyKcjDLCBEqBExKuiZb7Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "18.19.130", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", + "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@types/vscode": { + "version": "1.85.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.85.0.tgz", + "integrity": "sha512-CF/RBon/GXwdfmnjZj0WTUMZN5H6YITOfBCP4iEZlOtVQXuzw6t7Le7+cR+7JzdMrnlm7Mfp49Oj2TuSXIWo3g==", + "dev": true, + "license": "MIT" + }, + "node_modules/acorn": { + "version": "8.15.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", + "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", + "dev": true, + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.4", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz", + "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==", + "dev": true, + "license": "MIT", + "dependencies": { + "acorn": "^8.11.0" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/ansi-colors": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", + "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^1.9.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/anymatch": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", + "dev": true, + "license": "ISC", + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==", + "dev": true, + "license": "MIT" + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true, + "license": "Python-2.0" + }, + "node_modules/azure-devops-node-api": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/azure-devops-node-api/-/azure-devops-node-api-11.2.0.tgz", + "integrity": "sha512-XdiGPhrpaT5J8wdERRKs5g8E0Zy1pvOYTli7z9E8nmOn3YGp4FhtjhrOyFmX/8veWCwdI69mCHKJw6l+4J/bHA==", + "dev": true, + "license": "MIT", + "dependencies": { + "tunnel": "0.0.6", + "typed-rest-client": "^1.8.4" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/binary-extensions": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", + "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "dev": true, + "license": "ISC" + }, + "node_modules/brace-expansion": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", + "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/browser-stdout": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/browser-stdout/-/browser-stdout-1.3.1.tgz", + "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==", + "dev": true, + "license": "ISC" + }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "dev": true, + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/cheerio": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.1.2.tgz", + "integrity": "sha512-IkxPpb5rS/d1IiLbHMgfPuS0FgiWTtFIm/Nj+2woXDLTZ7fOT2eqzgYbdMlLweqlHbsZjxEChoVK+7iph7jyQg==", + "dev": true, + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "encoding-sniffer": "^0.2.1", + "htmlparser2": "^10.0.0", + "parse5": "^7.3.0", + "parse5-htmlparser2-tree-adapter": "^7.1.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^7.12.0", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=20.18.1" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/chokidar": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", + "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", + "dev": true, + "license": "MIT", + "dependencies": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + }, + "engines": { + "node": ">= 8.10.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, + "node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", + "dev": true, + "license": "ISC" + }, + "node_modules/cliui": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", + "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "dev": true, + "license": "ISC", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.0", + "wrap-ansi": "^7.0.0" + } + }, + "node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==", + "dev": true, + "license": "MIT" + }, + "node_modules/commander": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz", + "integrity": "sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 6" + } + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true, + "license": "MIT" + }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/decamelize": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-4.0.0.tgz", + "integrity": "sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "mimic-response": "^3.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/diff": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.0.tgz", + "integrity": "sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dev": true, + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "dev": true, + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, + "node_modules/encoding-sniffer": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", + "integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==", + "dev": true, + "license": "MIT", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "dev": true, + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "dev": true, + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es6-promise": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.2.8.tgz", + "integrity": "sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==", + "dev": true, + "license": "MIT" + }, + "node_modules/es6-promisify": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/es6-promisify/-/es6-promisify-5.0.0.tgz", + "integrity": "sha512-C+d6UdsYDk0lMebHNR4S2NybQMMngAOnOwYBQjTOiv0MkoJMP0Myw2mgpDLBcpfCmRLxyFqYhS/CfOENq4SJhQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "es6-promise": "^4.0.3" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/expand-template": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "dev": true, + "license": "(MIT OR WTFPL)", + "engines": { + "node": ">=6" + } + }, + "node_modules/fd-slicer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", + "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "dev": true, + "license": "MIT", + "dependencies": { + "pend": "~1.2.0" + } + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/find-up": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", + "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", + "dev": true, + "license": "MIT", + "dependencies": { + "locate-path": "^6.0.0", + "path-exists": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/flat": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", + "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==", + "dev": true, + "license": "BSD-3-Clause", + "bin": { + "flat": "cli.js" + } + }, + "node_modules/fs-constants": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", + "dev": true, + "license": "MIT" + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "dev": true, + "license": "ISC" + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true, + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "dev": true, + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "dev": true, + "license": "MIT" + }, + "node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "deprecated": "Glob versions prior to v9 are no longer supported", + "dev": true, + "license": "ISC", + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/growl": { + "version": "1.10.5", + "resolved": "https://registry.npmjs.org/growl/-/growl-1.10.5.tgz", + "integrity": "sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4.x" + } + }, + "node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "dev": true, + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, + "node_modules/hosted-git-info": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-4.1.0.tgz", + "integrity": "sha512-kyCuEOWjJqZuDbRHzL8V93NzQhwIB71oFWSyzVo+KPZI+pnQPPxucdkrOZvkLRnrf5URsQM+IJ09Dw29cRALIA==", + "dev": true, + "license": "ISC", + "dependencies": { + "lru-cache": "^6.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/htmlparser2": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz", + "integrity": "sha512-TwAZM+zE5Tq3lrEHvOlvwgj1XLWQCtaaibSN11Q+gGBAS7Y1uZSWwXXRe4iF6OXnaq1riyQAPFOBtYc77Mxq0g==", + "dev": true, + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.1", + "entities": "^6.0.0" + } + }, + "node_modules/htmlparser2/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/http-proxy-agent": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-4.0.1.tgz", + "integrity": "sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@tootallnate/once": "1", + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dev": true, + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", + "dev": true, + "license": "ISC", + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "dev": true, + "license": "ISC" + }, + "node_modules/is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "dev": true, + "license": "MIT", + "dependencies": { + "binary-extensions": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-plain-obj": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-2.1.0.tgz", + "integrity": "sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-unicode-supported": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz", + "integrity": "sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/js-yaml": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", + "dev": true, + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/keytar": { + "version": "7.9.0", + "resolved": "https://registry.npmjs.org/keytar/-/keytar-7.9.0.tgz", + "integrity": "sha512-VPD8mtVtm5JNtA2AErl6Chp06JBfy7diFQ7TQQhdpWOl6MrCRB+eRbvAZUsbGQS9kiMq0coJsy0W0vHpDCkWsQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^4.3.0", + "prebuild-install": "^7.0.1" + } + }, + "node_modules/leven": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", + "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/linkify-it": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-3.0.3.tgz", + "integrity": "sha512-ynTsyrFSdE5oZ/O9GEf00kPngmOfVwazR5GKDq6EYfhlpFug3J2zybX56a2PRRpc9P+FuSoGNAwjlbDs9jJBPQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "uc.micro": "^1.0.1" + } + }, + "node_modules/locate-path": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", + "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", + "dev": true, + "license": "MIT", + "dependencies": { + "p-locate": "^5.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/log-symbols": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz", + "integrity": "sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==", + "dev": true, + "license": "MIT", + "dependencies": { + "chalk": "^4.1.0", + "is-unicode-supported": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/log-symbols/node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/log-symbols/node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/log-symbols/node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/log-symbols/node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, + "license": "MIT" + }, + "node_modules/log-symbols/node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/log-symbols/node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", + "dev": true, + "license": "ISC" + }, + "node_modules/markdown-it": { + "version": "12.3.2", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-12.3.2.tgz", + "integrity": "sha512-TchMembfxfNVpHkbtriWltGWc+m3xszaRD0CZup7GFFhzIgQqxIfn3eGj1yZpfuflzPvfkt611B2Q/Bsk1YnGg==", + "dev": true, + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1", + "entities": "~2.1.0", + "linkify-it": "^3.0.1", + "mdurl": "^1.0.1", + "uc.micro": "^1.0.5" + }, + "bin": { + "markdown-it": "bin/markdown-it.js" + } + }, + "node_modules/markdown-it/node_modules/entities": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.1.0.tgz", + "integrity": "sha512-hCx1oky9PFrJ611mf0ifBLBRW8lUUVRlFolb5gWRfIELabBlbp9xZvrqZLZAs+NxFnbfQoeGd8wDkygjg7U85w==", + "dev": true, + "license": "BSD-2-Clause", + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mdurl": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", + "integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g==", + "dev": true, + "license": "MIT" + }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "dev": true, + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/mkdirp": { + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", + "integrity": "sha512-SknJC52obPfGQPnjIkXbmA6+5H15E+fR+E4iR2oQ3zzCLbd7/ONua69R/Gw7AgkTLsRG+r5fzksYwWe1AgTyWA==", + "deprecated": "Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.)", + "dev": true, + "license": "MIT", + "dependencies": { + "minimist": "0.0.8" + }, + "bin": { + "mkdirp": "bin/cmd.js" + } + }, + "node_modules/mkdirp-classic": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", + "dev": true, + "license": "MIT" + }, + "node_modules/mkdirp/node_modules/minimist": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", + "integrity": "sha512-miQKw5Hv4NS1Psg2517mV4e4dYNaO3++hjAvLOAzKqZ61rH8NS1SK+vbfBWZ5PY/Me/bEWhUwqMghEW5Fb9T7Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/mocha": { + "version": "10.8.2", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.8.2.tgz", + "integrity": "sha512-VZlYo/WE8t1tstuRmqgeyBgCbJc/lEdopaa+axcKzTBJ+UIdlAB9XnmvTCAH4pwR4ElNInaedhEBmZD8iCSVEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-colors": "^4.1.3", + "browser-stdout": "^1.3.1", + "chokidar": "^3.5.3", + "debug": "^4.3.5", + "diff": "^5.2.0", + "escape-string-regexp": "^4.0.0", + "find-up": "^5.0.0", + "glob": "^8.1.0", + "he": "^1.2.0", + "js-yaml": "^4.1.0", + "log-symbols": "^4.1.0", + "minimatch": "^5.1.6", + "ms": "^2.1.3", + "serialize-javascript": "^6.0.2", + "strip-json-comments": "^3.1.1", + "supports-color": "^8.1.1", + "workerpool": "^6.5.1", + "yargs": "^16.2.0", + "yargs-parser": "^20.2.9", + "yargs-unparser": "^2.0.0" + }, + "bin": { + "_mocha": "bin/_mocha", + "mocha": "bin/mocha.js" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/mocha/node_modules/brace-expansion": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/mocha/node_modules/escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mocha/node_modules/glob": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz", + "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==", + "deprecated": "Glob versions prior to v9 are no longer supported", + "dev": true, + "license": "ISC", + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^5.0.1", + "once": "^1.3.0" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/mocha/node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/mocha/node_modules/minimatch": { + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/mocha/node_modules/strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mocha/node_modules/supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/mute-stream": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-0.0.8.tgz", + "integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==", + "dev": true, + "license": "ISC" + }, + "node_modules/napi-build-utils": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", + "dev": true, + "license": "MIT" + }, + "node_modules/node-abi": { + "version": "3.85.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.85.0.tgz", + "integrity": "sha512-zsFhmbkAzwhTft6nd3VxcG0cvJsT70rL+BIGHWVq5fi6MwGrHwzqKaxXE+Hl2GmnGItnDKPPkO5/LQqjVkIdFg==", + "dev": true, + "license": "MIT", + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/node-abi/node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/node-addon-api": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-4.3.0.tgz", + "integrity": "sha512-73sE9+3UaLYYFmDsFZnqCInzPyh3MqIwZO9cw58yIqAZhONrrabrYyYe3TuIqtIiOuTXVhsGau8hcrhhwSsDIQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-locate": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", + "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", + "dev": true, + "license": "MIT", + "dependencies": { + "p-limit": "^3.0.2" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/parse-semver": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/parse-semver/-/parse-semver-1.1.1.tgz", + "integrity": "sha512-Eg1OuNntBMH0ojvEKSrvDSnwLmvVuUOSdylH/pSCPNMIspLlweJyIWXCE+k/5hm3cj/EBUYwmWkjhBALNP4LXQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "semver": "^5.1.0" + } + }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "dev": true, + "license": "MIT", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/pend": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", + "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==", + "dev": true, + "license": "MIT" + }, + "node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/prebuild-install": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", + "dev": true, + "license": "MIT", + "dependencies": { + "detect-libc": "^2.0.0", + "expand-template": "^2.0.3", + "github-from-package": "0.0.0", + "minimist": "^1.2.3", + "mkdirp-classic": "^0.5.3", + "napi-build-utils": "^2.0.0", + "node-abi": "^3.3.0", + "pump": "^3.0.0", + "rc": "^1.2.7", + "simple-get": "^4.0.0", + "tar-fs": "^2.0.0", + "tunnel-agent": "^0.6.0" + }, + "bin": { + "prebuild-install": "bin.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/pump": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz", + "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==", + "dev": true, + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/qs": { + "version": "6.14.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz", + "integrity": "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/randombytes": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", + "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "safe-buffer": "^5.1.0" + } + }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "dev": true, + "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, + "node_modules/read": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/read/-/read-1.0.7.tgz", + "integrity": "sha512-rSOKNYUmaxy0om1BNjMN4ezNT6VKK+2xF4GBhc81mkH7L60i6dp8qPYrkndNLT3QPphoII3maL9PVC9XmhHwVQ==", + "dev": true, + "license": "ISC", + "dependencies": { + "mute-stream": "~0.0.4" + }, + "engines": { + "node": ">=0.8" + } + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "dev": true, + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "dev": true, + "license": "MIT", + "dependencies": { + "picomatch": "^2.2.1" + }, + "engines": { + "node": ">=8.10.0" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "dev": true, + "license": "MIT" + }, + "node_modules/sax": { + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/sax/-/sax-1.4.3.tgz", + "integrity": "sha512-yqYn1JhPczigF94DMS+shiDMjDowYO6y9+wB/4WgO0Y19jWYk0lQ4tuG5KI7kj4FTp1wxPj5IFfcrz/s1c3jjQ==", + "dev": true, + "license": "BlueOak-1.0.0" + }, + "node_modules/semver": { + "version": "5.7.2", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz", + "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver" + } + }, + "node_modules/serialize-javascript": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz", + "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "randombytes": "^2.1.0" + } + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "dev": true, + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "dev": true, + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "dev": true, + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "dev": true, + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-support": { + "version": "0.5.21", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz", + "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "dev": true, + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/tar-fs": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.1.4" + } + }, + "node_modules/tar-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/tmp": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz", + "integrity": "sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.14" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, + "node_modules/ts-node/node_modules/diff": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", + "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/tunnel": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz", + "integrity": "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.6.11 <=0.7.0 || >=0.7.3" + } + }, + "node_modules/tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, + "node_modules/typed-rest-client": { + "version": "1.8.11", + "resolved": "https://registry.npmjs.org/typed-rest-client/-/typed-rest-client-1.8.11.tgz", + "integrity": "sha512-5UvfMpd1oelmUPRbbaVnq+rHP7ng2cE4qoQkQeAqxRL6PklkxsM0g32/HL0yfvruK6ojQ5x8EE+HF4YV6DtuCA==", + "dev": true, + "license": "MIT", + "dependencies": { + "qs": "^6.9.1", + "tunnel": "0.0.6", + "underscore": "^1.12.1" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/uc.micro": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz", + "integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==", + "dev": true, + "license": "MIT" + }, + "node_modules/underscore": { + "version": "1.13.7", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.7.tgz", + "integrity": "sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==", + "dev": true, + "license": "MIT" + }, + "node_modules/undici": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.16.0.tgz", + "integrity": "sha512-QEg3HPMll0o3t2ourKwOeUAZ159Kn9mx5pnzHRQO8+Wixmh88YdZRiIwat0iNzNNXn0yoEtXJqFpyW7eM8BV7g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true, + "license": "MIT" + }, + "node_modules/url-join": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz", + "integrity": "sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==", + "dev": true, + "license": "MIT" + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "dev": true, + "license": "MIT" + }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", + "dev": true, + "license": "MIT" + }, + "node_modules/vsce": { + "version": "2.15.0", + "resolved": "https://registry.npmjs.org/vsce/-/vsce-2.15.0.tgz", + "integrity": "sha512-P8E9LAZvBCQnoGoizw65JfGvyMqNGlHdlUXD1VAuxtvYAaHBKLBdKPnpy60XKVDAkQCfmMu53g+gq9FM+ydepw==", + "deprecated": "vsce has been renamed to @vscode/vsce. Install using @vscode/vsce instead.", + "dev": true, + "license": "MIT", + "dependencies": { + "azure-devops-node-api": "^11.0.1", + "chalk": "^2.4.2", + "cheerio": "^1.0.0-rc.9", + "commander": "^6.1.0", + "glob": "^7.0.6", + "hosted-git-info": "^4.0.2", + "keytar": "^7.7.0", + "leven": "^3.1.0", + "markdown-it": "^12.3.2", + "mime": "^1.3.4", + "minimatch": "^3.0.3", + "parse-semver": "^1.1.1", + "read": "^1.0.7", + "semver": "^5.1.0", + "tmp": "^0.2.1", + "typed-rest-client": "^1.8.4", + "url-join": "^4.0.1", + "xml2js": "^0.4.23", + "yauzl": "^2.3.1", + "yazl": "^2.2.2" + }, + "bin": { + "vsce": "vsce" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/vscode": { + "version": "1.1.37", + "resolved": "https://registry.npmjs.org/vscode/-/vscode-1.1.37.tgz", + "integrity": "sha512-vJNj6IlN7IJPdMavlQa1KoFB3Ihn06q1AiN3ZFI/HfzPNzbKZWPPuiU+XkpNOfGU5k15m4r80nxNPlM7wcc0wg==", + "deprecated": "This package is deprecated in favor of @types/vscode and vscode-test. For more information please read: https://code.visualstudio.com/updates/v1_36#_splitting-vscode-package-into-typesvscode-and-vscodetest", + "dev": true, + "license": "MIT", + "dependencies": { + "glob": "^7.1.2", + "http-proxy-agent": "^4.0.1", + "https-proxy-agent": "^5.0.0", + "mocha": "^5.2.0", + "semver": "^5.4.1", + "source-map-support": "^0.5.0", + "vscode-test": "^0.4.1" + }, + "bin": { + "vscode-install": "bin/install" + }, + "engines": { + "node": ">=8.9.3" + } + }, + "node_modules/vscode-jsonrpc": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/vscode-languageclient": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/vscode-languageclient/-/vscode-languageclient-9.0.1.tgz", + "integrity": "sha512-JZiimVdvimEuHh5olxhxkht09m3JzUGwggb5eRUkzzJhZ2KjCN0nh55VfiED9oez9DyF8/fz1g1iBV3h+0Z2EA==", + "dev": true, + "license": "MIT", + "dependencies": { + "minimatch": "^5.1.0", + "semver": "^7.3.7", + "vscode-languageserver-protocol": "3.17.5" + }, + "engines": { + "vscode": "^1.82.0" + } + }, + "node_modules/vscode-languageclient/node_modules/brace-expansion": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/vscode-languageclient/node_modules/minimatch": { + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/vscode-languageclient/node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/vscode-languageserver-protocol": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", + "dev": true, + "license": "MIT", + "dependencies": { + "vscode-jsonrpc": "8.2.0", + "vscode-languageserver-types": "3.17.5" + } + }, + "node_modules/vscode-languageserver-types": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==", + "dev": true, + "license": "MIT" + }, + "node_modules/vscode-test": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/vscode-test/-/vscode-test-0.4.3.tgz", + "integrity": "sha512-EkMGqBSefZH2MgW65nY05rdRSko15uvzq4VAPM5jVmwYuFQKE7eikKXNJDRxL+OITXHB6pI+a3XqqD32Y3KC5w==", + "deprecated": "This package has been renamed to @vscode/test-electron, please update to the new name", + "dev": true, + "license": "MIT", + "dependencies": { + "http-proxy-agent": "^2.1.0", + "https-proxy-agent": "^2.2.1" + }, + "engines": { + "node": ">=8.9.3" + } + }, + "node_modules/vscode-test/node_modules/agent-base": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-4.3.0.tgz", + "integrity": "sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==", + "dev": true, + "license": "MIT", + "dependencies": { + "es6-promisify": "^5.0.0" + }, + "engines": { + "node": ">= 4.0.0" + } + }, + "node_modules/vscode-test/node_modules/debug": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", + "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/vscode-test/node_modules/http-proxy-agent": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-2.1.0.tgz", + "integrity": "sha512-qwHbBLV7WviBl0rQsOzH6o5lwyOIvwp/BdFnvVxXORldu5TmjFfjzBcWUWS5kWAZhmv+JtiDhSuQCp4sBfbIgg==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "4", + "debug": "3.1.0" + }, + "engines": { + "node": ">= 4.5.0" + } + }, + "node_modules/vscode-test/node_modules/https-proxy-agent": { + "version": "2.2.4", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz", + "integrity": "sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^4.3.0", + "debug": "^3.1.0" + }, + "engines": { + "node": ">= 4.5.0" + } + }, + "node_modules/vscode-test/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "dev": true, + "license": "MIT" + }, + "node_modules/vscode/node_modules/commander": { + "version": "2.15.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.15.1.tgz", + "integrity": "sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==", + "dev": true, + "license": "MIT" + }, + "node_modules/vscode/node_modules/debug": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", + "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/vscode/node_modules/diff": { + "version": "3.5.0", + "resolved": "https://registry.npmjs.org/diff/-/diff-3.5.0.tgz", + "integrity": "sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/vscode/node_modules/glob": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.2.tgz", + "integrity": "sha512-MJTUg1kjuLeQCJ+ccE4Vpa6kKVXkPYJ2mOCQyUuKLcLQsdrMCpBPUi8qVE6+YuaJkozeA9NusTAw3hLr8Xe5EQ==", + "deprecated": "Glob versions prior to v9 are no longer supported", + "dev": true, + "license": "ISC", + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + } + }, + "node_modules/vscode/node_modules/he": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/he/-/he-1.1.1.tgz", + "integrity": "sha512-z/GDPjlRMNOa2XJiB4em8wJpuuBfrFOlYKTZxtpkdr1uPdibHI8rYA3MY0KDObpVyaes0e/aunid/t88ZI2EKA==", + "dev": true, + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, + "node_modules/vscode/node_modules/minimatch": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", + "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/vscode/node_modules/mocha": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-5.2.0.tgz", + "integrity": "sha512-2IUgKDhc3J7Uug+FxMXuqIyYzH7gJjXECKe/w43IGgQHTSj3InJi+yAA7T24L9bQMRKiUEHxEX37G5JpVUGLcQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "browser-stdout": "1.3.1", + "commander": "2.15.1", + "debug": "3.1.0", + "diff": "3.5.0", + "escape-string-regexp": "1.0.5", + "glob": "7.1.2", + "growl": "1.10.5", + "he": "1.1.1", + "minimatch": "3.0.4", + "mkdirp": "0.5.1", + "supports-color": "5.4.0" + }, + "bin": { + "_mocha": "bin/_mocha", + "mocha": "bin/mocha" + }, + "engines": { + "node": ">= 4.0.0" + } + }, + "node_modules/vscode/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "dev": true, + "license": "MIT" + }, + "node_modules/vscode/node_modules/supports-color": { + "version": "5.4.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.4.0.tgz", + "integrity": "sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation", + "dev": true, + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/workerpool": { + "version": "6.5.1", + "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.5.1.tgz", + "integrity": "sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi/node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/wrap-ansi/node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/wrap-ansi/node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, + "license": "MIT" + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/xml2js": { + "version": "0.4.23", + "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz", + "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==", + "dev": true, + "license": "MIT", + "dependencies": { + "sax": ">=0.6.0", + "xmlbuilder": "~11.0.0" + }, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/xmlbuilder": { + "version": "11.0.1", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", + "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true, + "license": "ISC" + }, + "node_modules/yargs": { + "version": "16.2.0", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", + "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "dev": true, + "license": "MIT", + "dependencies": { + "cliui": "^7.0.2", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.0", + "y18n": "^5.0.5", + "yargs-parser": "^20.2.2" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs-parser": { + "version": "20.2.9", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", + "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs-unparser": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/yargs-unparser/-/yargs-unparser-2.0.0.tgz", + "integrity": "sha512-7pRTIA9Qc1caZ0bZ6RYRGbHJthJWuakf+WmHK0rVeLkNrrGhfoabBNdue6kdINI6r4if7ocq9aD/n7xwKOdzOA==", + "dev": true, + "license": "MIT", + "dependencies": { + "camelcase": "^6.0.0", + "decamelize": "^4.0.0", + "flat": "^5.0.2", + "is-plain-obj": "^2.1.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/yauzl": { + "version": "2.10.0", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", + "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-crc32": "~0.2.3", + "fd-slicer": "~1.1.0" + } + }, + "node_modules/yazl": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/yazl/-/yazl-2.5.1.tgz", + "integrity": "sha512-phENi2PLiHnHb6QBVot+dJnaAZ0xosj7p3fWl+znIjBDlnMI2PsZCJZ306BPTFOaHf5qdDEI8x5qFrSOBN5vrw==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-crc32": "~0.2.3" + } + }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + } + } +} diff --git a/vscode-ext/package.json b/vscode-ext/package.json new file mode 100644 index 0000000..64d354a --- /dev/null +++ b/vscode-ext/package.json @@ -0,0 +1,76 @@ +{ + "name": "hyperdoc-vscode", + "displayName": "HyperDoc", + "description": "HyperDoc language basics, highlighting, and completion.", + "version": "0.0.1", + "publisher": "hyperdoc", + "license": "MIT", + "engines": { + "vscode": "^1.85.0" + }, + "categories": [ + "Programming Languages" + ], + "activationEvents": [ + "onLanguage:hyperdoc", + "onCommand:hyperdoc.startWasmLanguageServer" + ], + "main": "./out/extension.js", + "contributes": { + "languages": [ + { + "id": "hyperdoc", + "aliases": [ + "HyperDoc", + "hyperdoc" + ], + "extensions": [ + ".hdoc" + ], + "configuration": "./language-configuration.json" + } + ], + "grammars": [ + { + "language": "hyperdoc", + "scopeName": "source.hyperdoc", + "path": "./syntaxes/hyperdoc.tmLanguage.json" + } + ], + "commands": [ + { + "command": "hyperdoc.startWasmLanguageServer", + "title": "HyperDoc: Start Wasm Language Server" + } + ], + "configuration": { + "title": "HyperDoc", + "properties": { + "hyperdoc.languageServer.wasmPath": { + "type": "string", + "default": "", + "description": "Path to a HyperDoc language server WebAssembly binary. Leave empty to disable the wasm language server stub." + } + } + } + }, + "scripts": { + "vscode:prepublish": "npm run compile", + "compile": "tsc -p ./", + "watch": "tsc -watch -p ./", + "lint": "echo \"No lint configured\"", + "test": "mocha -r ts-node/register test/**/*.test.ts", + "package": "vsce package" + }, + "devDependencies": { + "@types/node": "^18.19.0", + "@types/vscode": "1.85.0", + "@types/mocha": "^10.0.6", + "mocha": "^10.4.0", + "ts-node": "^10.9.2", + "typescript": "^5.4.0", + "vsce": "^2.15.0", + "vscode": "^1.1.37", + "vscode-languageclient": "^9.0.1" + } +} diff --git a/vscode-ext/src/extension.ts b/vscode-ext/src/extension.ts new file mode 100644 index 0000000..59da2fc --- /dev/null +++ b/vscode-ext/src/extension.ts @@ -0,0 +1,141 @@ +import * as fs from "fs/promises"; +import * as vscode from "vscode"; +import { + ATTRIBUTE_SUGGESTIONS, + ELEMENT_SUGGESTIONS, + Suggestion, + computeIsInAttributeList, + mapSuggestionKind, + resolveWasmPath +} from "./utils"; + +class HyperdocCompletionProvider implements vscode.CompletionItemProvider { + provideCompletionItems( + document: vscode.TextDocument, + position: vscode.Position + ): vscode.ProviderResult { + const inAttributeList = isInAttributeList(document, position); + const pool = inAttributeList ? ATTRIBUTE_SUGGESTIONS : ELEMENT_SUGGESTIONS; + + return pool.map((item) => createCompletionItem(item)); + } +} + +function createCompletionItem(item: Suggestion): vscode.CompletionItem { + const completion = new vscode.CompletionItem( + item.label, + mapSuggestionKind(item.kind) + ); + completion.detail = item.detail; + return completion; +} + +export function isInAttributeList( + document: vscode.TextDocument, + position: vscode.Position +): boolean { + const text = document.getText( + new vscode.Range(new vscode.Position(0, 0), position) + ); + return computeIsInAttributeList(text); +} + +class WasmLanguageServerController { + private wasmModule: WebAssembly.Module | undefined; + private readonly output: vscode.OutputChannel; + + constructor(private readonly context: vscode.ExtensionContext) { + this.output = vscode.window.createOutputChannel("HyperDoc"); + } + + async prepareFromConfiguration(): Promise { + const configuredPath = vscode.workspace + .getConfiguration("hyperdoc") + .get("languageServer.wasmPath") + ?.trim(); + + if (!configuredPath) { + this.wasmModule = undefined; + this.output.appendLine( + "HyperDoc wasm language server is disabled (no path configured)." + ); + return; + } + + await this.loadWasmModule(configuredPath); + } + + dispose(): void { + this.wasmModule = undefined; + this.output.dispose(); + } + + private async loadWasmModule(rawPath: string): Promise { + const resolvedPath = resolveWasmPath(rawPath, { + extensionPath: this.context.extensionPath, + workspaceFolders: vscode.workspace.workspaceFolders?.map( + (folder) => folder.uri.fsPath + ) + }); + this.output.appendLine( + `Preparing HyperDoc wasm language server stub from: ${resolvedPath}` + ); + + try { + const bytes = await fs.readFile(resolvedPath); + const wasmBytes = Uint8Array.from(bytes); + this.wasmModule = await WebAssembly.compile(wasmBytes); + this.output.appendLine( + "Wasm module compiled. Language client wiring is intentionally disabled until the server shim is available." + ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + vscode.window.showWarningMessage( + `HyperDoc: failed to load wasm language server (${message}).` + ); + } + } +} + +export async function activate(context: vscode.ExtensionContext): Promise { + const completionProvider = vscode.languages.registerCompletionItemProvider( + { language: "hyperdoc" }, + new HyperdocCompletionProvider(), + "\\", + "{", + "(" + ); + + const wasmController = new WasmLanguageServerController(context); + + const startWasmCommand = vscode.commands.registerCommand( + "hyperdoc.startWasmLanguageServer", + async () => { + await wasmController.prepareFromConfiguration(); + vscode.window.showInformationMessage( + "HyperDoc wasm language server stub prepared (when configured)." + ); + } + ); + + const configChangeListener = vscode.workspace.onDidChangeConfiguration( + async (event) => { + if (event.affectsConfiguration("hyperdoc.languageServer.wasmPath")) { + await wasmController.prepareFromConfiguration(); + } + } + ); + + context.subscriptions.push( + completionProvider, + wasmController, + startWasmCommand, + configChangeListener + ); + + await wasmController.prepareFromConfiguration(); +} + +export function deactivate(): void { + // No-op +} diff --git a/vscode-ext/src/utils.ts b/vscode-ext/src/utils.ts new file mode 100644 index 0000000..9d896be --- /dev/null +++ b/vscode-ext/src/utils.ts @@ -0,0 +1,98 @@ +import * as path from "path"; + +export type Suggestion = { + label: string; + detail: string; + kind: "class" | "function" | "property"; +}; + +export const ELEMENT_SUGGESTIONS: Suggestion[] = [ + { label: "hdoc", detail: "Document header", kind: "class" }, + { label: "title", detail: "Document title", kind: "class" }, + { label: "h1", detail: "Heading level 1", kind: "class" }, + { label: "h2", detail: "Heading level 2", kind: "class" }, + { label: "h3", detail: "Heading level 3", kind: "class" }, + { label: "toc", detail: "Table of contents", kind: "class" }, + { label: "footnotes", detail: "Footnote dump", kind: "class" }, + { label: "p", detail: "Paragraph", kind: "class" }, + { label: "note", detail: "Admonition block: note", kind: "class" }, + { label: "warning", detail: "Admonition block: warning", kind: "class" }, + { label: "danger", detail: "Admonition block: danger", kind: "class" }, + { label: "tip", detail: "Admonition block: tip", kind: "class" }, + { label: "quote", detail: "Admonition block: quote", kind: "class" }, + { label: "spoiler", detail: "Admonition block: spoiler", kind: "class" }, + { label: "ul", detail: "Unordered list", kind: "class" }, + { label: "ol", detail: "Ordered list", kind: "class" }, + { label: "li", detail: "List item", kind: "class" }, + { label: "img", detail: "Figure/image", kind: "class" }, + { label: "pre", detail: "Preformatted block", kind: "class" }, + { label: "table", detail: "Table", kind: "class" }, + { label: "columns", detail: "Table columns header", kind: "class" }, + { label: "row", detail: "Table row", kind: "class" }, + { label: "group", detail: "Table row group", kind: "class" }, + { label: "td", detail: "Table cell", kind: "class" }, + { label: "\\em", detail: "Inline emphasis", kind: "function" }, + { label: "\\mono", detail: "Inline monospace", kind: "function" }, + { label: "\\strike", detail: "Inline strikethrough", kind: "function" }, + { label: "\\sub", detail: "Inline subscript", kind: "function" }, + { label: "\\sup", detail: "Inline superscript", kind: "function" }, + { label: "\\link", detail: "Inline link", kind: "function" }, + { label: "\\date", detail: "Inline date", kind: "function" }, + { label: "\\time", detail: "Inline time", kind: "function" }, + { label: "\\datetime", detail: "Inline datetime", kind: "function" }, + { label: "\\ref", detail: "Inline reference", kind: "function" }, + { label: "\\footnote", detail: "Inline footnote", kind: "function" } +]; + +export const ATTRIBUTE_SUGGESTIONS: Suggestion[] = [ + { label: "id", detail: "Block identifier", kind: "property" }, + { label: "title", detail: "Title attribute", kind: "property" }, + { label: "lang", detail: "Language override", kind: "property" }, + { label: "fmt", detail: "Format selection", kind: "property" }, + { label: "ref", detail: "Reference target", kind: "property" }, + { label: "key", detail: "Footnote key", kind: "property" } +]; + +export function computeIsInAttributeList(text: string): boolean { + const lastOpen = text.lastIndexOf("("); + if (lastOpen === -1) { + return false; + } + + const lastClose = text.lastIndexOf(")"); + if (lastClose > lastOpen) { + return false; + } + + const afterOpen = text.slice(lastOpen + 1); + return !afterOpen.includes("{") && !afterOpen.includes("}"); +} + +export function mapSuggestionKind(kind: Suggestion["kind"]): number { + switch (kind) { + case "class": + return 6; + case "function": + return 3; + case "property": + return 10; + default: + return 9; + } +} + +export function resolveWasmPath( + rawPath: string, + context: { extensionPath: string; workspaceFolders?: string[] } +): string { + if (path.isAbsolute(rawPath)) { + return rawPath; + } + + const workspaceFolder = context.workspaceFolders?.[0]; + if (workspaceFolder) { + return path.join(workspaceFolder, rawPath); + } + + return path.join(context.extensionPath, rawPath); +} diff --git a/vscode-ext/syntaxes/hyperdoc.tmLanguage.json b/vscode-ext/syntaxes/hyperdoc.tmLanguage.json new file mode 100644 index 0000000..0fa088e --- /dev/null +++ b/vscode-ext/syntaxes/hyperdoc.tmLanguage.json @@ -0,0 +1,74 @@ +{ + "name": "HyperDoc", + "scopeName": "source.hyperdoc", + "patterns": [ + { + "name": "entity.name.type.hyperdoc", + "match": "^(\\s*)(\\\\?[A-Za-z0-9_]+)", + "captures": { + "1": { + "name": "punctuation.whitespace.leading.hyperdoc" + }, + "2": { + "name": "entity.name.tag.hyperdoc" + } + } + }, + { + "name": "support.function.inline.hyperdoc", + "match": "\\\\[A-Za-z0-9_]+" + }, + { + "name": "variable.parameter.attribute.hyperdoc", + "match": "([A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)?)(\\s*)(=)", + "captures": { + "1": { + "name": "variable.parameter.attribute.hyperdoc" + }, + "2": { + "name": "punctuation.separator.attribute.hyperdoc" + }, + "3": { + "name": "keyword.operator.assignment.hyperdoc" + } + } + }, + { + "name": "string.quoted.double.hyperdoc", + "match": "\"(?:\\\\.|[^\"\\\\])*\"" + }, + { + "name": "punctuation.section.braces.hyperdoc", + "match": "[{}]" + }, + { + "name": "punctuation.section.parens.hyperdoc", + "match": "[()]" + }, + { + "name": "punctuation.terminator.empty-body.hyperdoc", + "match": ";" + }, + { + "name": "punctuation.definition.verbatim.hyperdoc", + "match": "(:)(?=\\s*(?:$|\\n|\\|))", + "captures": { + "1": { + "name": "punctuation.definition.verbatim.start.hyperdoc" + } + } + }, + { + "name": "meta.verbatim.line.hyperdoc", + "match": "^\\s*(\\|)(.*)$", + "captures": { + "1": { + "name": "punctuation.definition.verbatim.bar.hyperdoc" + }, + "2": { + "name": "string.unquoted.verbatim.hyperdoc" + } + } + } + ] +} diff --git a/vscode-ext/test/utils.test.ts b/vscode-ext/test/utils.test.ts new file mode 100644 index 0000000..e201bd8 --- /dev/null +++ b/vscode-ext/test/utils.test.ts @@ -0,0 +1,79 @@ +import { strict as assert } from "node:assert"; +import path from "path"; +import { + ATTRIBUTE_SUGGESTIONS, + ELEMENT_SUGGESTIONS, + computeIsInAttributeList, + mapSuggestionKind, + resolveWasmPath +} from "../src/utils"; + +describe("computeIsInAttributeList", () => { + it("returns false when no opening paren is present", () => { + assert.equal(computeIsInAttributeList("hdoc "), false); + }); + + it("returns true between parentheses before closing", () => { + const text = 'node(attr="1"'; + assert.equal(computeIsInAttributeList(text), true); + }); + + it("returns false after the closing parenthesis", () => { + const text = 'node(attr="1") '; + assert.equal(computeIsInAttributeList(text), false); + }); + + it("returns false if a block brace appears after the last open paren", () => { + const text = 'node(attr="1"{'; + assert.equal(computeIsInAttributeList(text), false); + }); +}); + +describe("completion suggestions", () => { + it("exposes element suggestions with both block and inline names", () => { + const labels = ELEMENT_SUGGESTIONS.map((s) => s.label); + assert(labels.includes("hdoc")); + assert(labels.includes("\\em")); + }); + + it("exposes attribute suggestions", () => { + const labels = ATTRIBUTE_SUGGESTIONS.map((s) => s.label); + assert(labels.includes("id")); + assert(labels.includes("fmt")); + }); +}); + +describe("mapSuggestionKind", () => { + it("maps to completion item kinds", () => { + assert.equal(mapSuggestionKind("class"), 6); + assert.equal(mapSuggestionKind("function"), 3); + assert.equal(mapSuggestionKind("property"), 10); + }); +}); + +describe("resolveWasmPath", () => { + const extPath = "/extension"; + + it("returns absolute paths unchanged", () => { + const input = "/tmp/server.wasm"; + assert.equal( + resolveWasmPath(input, { extensionPath: extPath }), + input + ); + }); + + it("uses workspace folder when available", () => { + const output = resolveWasmPath("server.wasm", { + extensionPath: extPath, + workspaceFolders: ["/workspace/project"] + }); + assert.equal(output, path.join("/workspace/project", "server.wasm")); + }); + + it("falls back to the extension path", () => { + const output = resolveWasmPath("server.wasm", { + extensionPath: extPath + }); + assert.equal(output, path.join(extPath, "server.wasm")); + }); +}); diff --git a/vscode-ext/tsconfig.json b/vscode-ext/tsconfig.json new file mode 100644 index 0000000..c24d51b --- /dev/null +++ b/vscode-ext/tsconfig.json @@ -0,0 +1,26 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "ES2020", + "outDir": "out", + "lib": [ + "ES2020", + "DOM" + ], + "sourceMap": true, + "rootDir": ".", + "strict": true, + "moduleResolution": "node", + "esModuleInterop": true, + "skipLibCheck": true, + "types": [ + "node", + "vscode", + "mocha" + ] + }, + "include": [ + "src", + "test" + ] +} From 7a07e4692cc20357bc1767c3c192cf7068c5637d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 5 Jan 2026 10:16:30 +0100 Subject: [PATCH 114/116] Use Node 20 for extension packaging workflow --- .github/workflows/extension-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/extension-ci.yml b/.github/workflows/extension-ci.yml index 6393547..a4b123c 100644 --- a/.github/workflows/extension-ci.yml +++ b/.github/workflows/extension-ci.yml @@ -24,7 +24,7 @@ jobs: - name: Setup Node.js uses: actions/setup-node@v4 with: - node-version: 18 + node-version: 20 - name: Install extension dependencies run: npm ci From a4e2b0cbe668bb4c3f22ab1fd14aeb8a953525ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 5 Jan 2026 11:01:35 +0100 Subject: [PATCH 115/116] Point extension main to compiled output --- vscode-ext/package.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vscode-ext/package.json b/vscode-ext/package.json index 64d354a..84b47d0 100644 --- a/vscode-ext/package.json +++ b/vscode-ext/package.json @@ -15,7 +15,11 @@ "onLanguage:hyperdoc", "onCommand:hyperdoc.startWasmLanguageServer" ], - "main": "./out/extension.js", + "main": "./out/src/extension.js", + "repository": { + "type": "git", + "url": "https://github.com/ashet-hypertext/hyperdoc.git" + }, "contributes": { "languages": [ { From cc5e7206784153a2b32c6d72249e72ed331c7967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Mon, 5 Jan 2026 11:01:39 +0100 Subject: [PATCH 116/116] Add repository link and license for extension packaging --- LICENSE | 21 +++++++++++++++++++++ vscode-ext/package.json | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..637c23a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 HyperDoc Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vscode-ext/package.json b/vscode-ext/package.json index 84b47d0..a72ce19 100644 --- a/vscode-ext/package.json +++ b/vscode-ext/package.json @@ -18,7 +18,7 @@ "main": "./out/src/extension.js", "repository": { "type": "git", - "url": "https://github.com/ashet-hypertext/hyperdoc.git" + "url": "https://github.com/Ashet-Technologies/hyperdoc.git" }, "contributes": { "languages": [
          "Section One""Section One"
          Row 1
          "Section Two""Section Two"
          Row 3