From b7f53610e44104a77487c0e1adef7f8eb2b2acfc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix.queissner@endress.com>
Date: Mon, 22 Dec 2025 10:40:36 +0100
Subject: [PATCH 001/116] Works towards HyperDoc 2.0

---
 .github/workflows/validate.yml |   2 +-
 README.md                      |  87 ++----
 build.zig                      |  33 +--
 docs/specification.md          | 373 ++++++++++++++++++++++++++
 examples/featureset.hdoc       |  80 ------
 examples/html-excerciser.hdoc  |   2 -
 examples/hyperdoc.hdoc         |  43 ---
 flake.lock                     | 147 ----------
 flake.nix                      |  49 ----
 src/data/default.css           |  40 ---
 src/hyperdoc.zig               | 472 ++++-----------------------------
 src/main.zig                   | 133 ++--------
 src/renderer/Html.zig          | 167 ------------
 src/renderer/HyperDoc.zig      | 158 -----------
 src/renderer/Markdown.zig      | 131 ---------
 src/testsuite.zig              | 212 +--------------
 16 files changed, 488 insertions(+), 1641 deletions(-)
 create mode 100644 docs/specification.md
 delete mode 100644 examples/featureset.hdoc
 delete mode 100644 examples/html-excerciser.hdoc
 delete mode 100644 examples/hyperdoc.hdoc
 delete mode 100644 flake.lock
 delete mode 100644 flake.nix
 delete mode 100644 src/data/default.css
 delete mode 100644 src/renderer/Html.zig
 delete mode 100644 src/renderer/HyperDoc.zig
 delete mode 100644 src/renderer/Markdown.zig

diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
index f8d28d6..0b8538c 100644
--- a/.github/workflows/validate.yml
+++ b/.github/workflows/validate.yml
@@ -16,7 +16,7 @@ jobs:
       - name: Setup Zig
         uses: mlugg/setup-zig@v2
         with:
-          version: 0.15.1
+          version: 0.15.2
 
       - name: Build
         run: |
diff --git a/README.md b/README.md
index 1755e29..6806429 100644
--- a/README.md
+++ b/README.md
@@ -1,62 +1,31 @@
 # Ashet HyperDocument Format
 
-This format is used for both the _Hyper Wiki_ as well as the _Gateway_ application to store and display
-hyperlinked documents.
-
-The format is a rich-text format that can encode/store/display the following document blocks:
-
-- paragraphs (consisting of a sequence of spans)
-  - regular text
-  - links
-  - bold/emphasised text
-  - monospaced text
-  - line break
-- 3 levels of headings
-- ordered and unordered lists
-  - each list item is a paragraph or another list
-- quotes (paragraph with special styling)
-- preformatted text (code blocks, also uses the paragraph formatting)
-- images
-
-Regular text is assumed to use a proportional font, while preformatted text is required to be rendered as monospace.
-
-## Storage
-
-HyperDocument is stored as a trivial-to-parse plain text format, not necessarily meant to be edited by humans,
-but still human readable.
-
-**Example:**
-
-```lua
-hdoc "1.0"
-p {
-  span "Hello, World!\n"
-  link "http://google.com" "Visit Google!"
-  span "\n"
-  emph "This is fat!"
-  span "\n"
-  mono "int main()"
-  span "\n"
-}
-enumerate {
-  item { p { span "first" } }
-  item { p { span "second" } }
-  item { p { span "third" } }
-}
-itemize {
-  item { p { span "first" } }
-  item { p { span "second" } }
-  item { p { span "third" } }
-}
-quote {
-  span "Life is what happens when you're busy making other plans.\n - John Lennon"
-}
-pre {
-  span "const std = @import(\"std\");\n"
-  span "\n"
-  span "pub fn main() !void {\n"
-  span "    std.debug.print(\"Hello, World!\\n\", .{});\n"
-  span "}\n"
-}
-image "dog.png"
+## Motivation
+
+> TODO: Write motivation
+
+## Specification
+
+[Read the specification](docs/specification.md).
+
+## Building
+
+Requires [Zig 0.15.2](https://ziglang.org/) installed.
+
+### Build debug application
+
+```sh-session
+[user@host] hyperdoc$ zig build
+```
+
+### Build release application
+
+```sh-session
+[user@host] hyperdoc$ zig build -Drelease
+```
+
+### Run test suite
+
+```sh-session
+[user@host] hyperdoc$ zig build test
 ```
diff --git a/build.zig b/build.zig
index a6f8daa..0c845f2 100644
--- a/build.zig
+++ b/build.zig
@@ -2,27 +2,17 @@ const std = @import("std");
 
 pub fn build(b: *std.Build) void {
     // Options:
-
     const target = b.standardTargetOptions(.{});
-    const optimize = b.standardOptimizeOption(.{});
+    const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseSafe });
 
     // Targets:
-
     const run_step = b.step("run", "Run the app");
     const test_step = b.step("test", "Run unit tests");
 
     // Build:
-
-    const pt_dep = b.dependency("parser_toolkit", .{});
-    const args = b.dependency("args", .{});
-
-    const hyperdoc = b.addModule(
-        "hyperdoc",
-        .{
-            .root_source_file = b.path("src/hyperdoc.zig"),
-        },
-    );
-    hyperdoc.addImport("parser-toolkit", pt_dep.module("parser-toolkit"));
+    const hyperdoc = b.addModule("hyperdoc", .{
+        .root_source_file = b.path("src/hyperdoc.zig"),
+    });
 
     const exe = b.addExecutable(.{
         .name = "hyperdoc",
@@ -30,13 +20,11 @@ pub fn build(b: *std.Build) void {
             .root_source_file = b.path("src/main.zig"),
             .target = target,
             .optimize = optimize,
+            .imports = &.{
+                .{ .name = "hyperdoc", .module = hyperdoc },
+            },
         }),
-        .use_llvm = true,
     });
-
-    exe.root_module.addImport("hyperdoc", hyperdoc);
-    exe.root_module.addImport("args", args.module("args"));
-
     b.installArtifact(exe);
 
     const run_cmd = b.addRunArtifact(exe);
@@ -52,11 +40,10 @@ pub fn build(b: *std.Build) void {
             .root_source_file = b.path("src/testsuite.zig"),
             .target = target,
             .optimize = optimize,
+            .imports = &.{
+                .{ .name = "hyperdoc", .module = hyperdoc },
+            },
         }),
-        .use_llvm = true,
     });
-
-    exe_tests.root_module.addImport("hyperdoc", hyperdoc);
-
     test_step.dependOn(&b.addRunArtifact(exe_tests).step);
 }
diff --git a/docs/specification.md b/docs/specification.md
new file mode 100644
index 0000000..3e08e00
--- /dev/null
+++ b/docs/specification.md
@@ -0,0 +1,373 @@
+# HyperDoc 2.0
+
+This specification describes the document markup language "HyperDoc 2.0", that tries to be a simple to parse, easy to write markup language for hypertext documents.
+
+It sits somewhat between LaTeX and Markdown and tries to be way simpler to parse than Markdown, but keep useful semantics around.
+
+## Syntax Overview
+
+```hdoc
+hdoc "2.0"
+
+h1{HyperDoc 2.0}
+
+toc{}
+
+h2{Paragraphs}
+
+p { This is a simple paragraph containing text. }
+
+p(id="foo") {
+  This is a paragraph with an attribute "id" with the value "foo".
+}
+
+p {
+  This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice.
+  Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}).
+  We can also \link(ref="foo"){link to other parts of a document) or \link(url="https://ashet.computer"){to websites}.
+  With \mono(lang="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font.
+}
+
+h2{Special Paragraphs}
+
+note    { HyperDoc 2.0 also supports different types of paragraphs. }
+warning { These should affect rendering, and have well-defined semantics attached to them. }
+danger  { You shall not assume any specific formatting of these elements though. }
+tip     { They typically have a standardized style though. }
+quote   { You shall not pass! }
+spoiler { Nobody expects the Spanish Inquisition! }
+
+h2{Literals and Preformatted Text}
+
+p:
+| we can also use literal lines.
+| these are introduced by a trailing colon (':') at the end of a line.
+| each following line that starts with whitespace followed by a pipe character ('|')
+| is then part of the contents.
+| Literal lines don't perform any parsing, so they don't require any escaping of characters.
+| This is really useful for code blocks:
+
+pre(lang="c"):
+| #include <stdio.h>
+| int main(int argc, char const * argv[]) {
+|   printf("Hello, World!\n");
+|   return 0;
+| }
+
+h2{String Literals}
+
+p "It's also possible to use a string literal for bodies if desired."
+
+p { \em "Magic" is a simple way to highlight single words or text with escaping in inlines. }
+
+h2{Images & Figures}
+
+p { We can also add images to our documents: }
+
+img(id="fig1", path="./preview.jpeg") { If this is non-empty, it's a figure caption. }
+
+h2{Lists}
+
+p { Also lists are possible: }
+
+h3{Unordered Lists}
+
+ul {
+  li { p { Apples } }
+  li { p { Bananas } }
+  li { p { Cucumbers } }
+}
+
+h3{Ordered Lists}
+
+ol {
+  li { p { Collect underpants } }
+  li { p { ? } }
+  li { p { Profit } }
+}
+
+h2{Tables}
+
+p { And last, but not least, we can have tables: }
+
+table {
+  columns {
+    td "Key"
+    td "Value"
+  }
+  row {
+    td "Author"
+    td { Felix "xq" Queißner }
+  }
+  row {
+    td "Date of Invention"
+    td { \date{2025-12-17} }
+  }
+}
+```
+
+## Grammar
+
+This grammar describes the text format
+
+Short notes on grammar notation:
+
+- `{ ... }` is a repetition
+- `[ ... ]` is an option
+- `a | b | c` is alternatives
+- `( ... )` is a group
+- `"foo"` is a literal token sequence
+- `/.../` is a regex
+- Whitespace is assumed to be ignored unless matched by a literal, so tokens are typically separated by whitespace
+- Upper case elements are roughly tokens, while lowercase elements are rules.
+
+```
+document       := HEADER { block }
+
+block          := IDENTIFIER [ attribute_list ] body
+
+body           := list | literal | STRING
+literal        := ":" "\n" { LITERAL_LINE }
+
+list           := "{" { escape | inline | block | WORD } "}"
+escape         := "\\" | "\{" | "\}"
+inline         := "\" IDENTIFIER [ attribute_list ] body
+
+attribute_list := "(" [ attribute { "," attribute } ] ")"
+attribute      := IDENTIFIER "=" STRING
+
+IDENTIFIER     := /\b\w+\b/
+HEADER         := /^hdoc\s+"2.0"\s*$/
+STRING         := /"(\\.|[^"\r\n])*"/
+LITERAL_LINE   := /^\s*\|(.*)$/
+WORD           := /[^\s\{\}\\]+/
+```
+
+## Semantic Structure
+
+All elements have these attributes:
+
+| Attribute | Function                                                                                                                                          |
+| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `lang`    | Marks the (human) language of the contents of that element. This must be an [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag). |
+
+
+## Top-Level / Block Elements
+
+All top-level elements have these attributes:
+
+| Attribute | Function                                                                         |
+| --------- | -------------------------------------------------------------------------------- |
+| `id`      | Marks a target for a `\link(ref="...")`. Must be unique throughout the document. |
+
+### Headings: `h1`, `h2`, `h3`
+
+**Allowed Items:** Inline Text
+
+These elements are all rendered as headings of different levels.
+
+- `h1` is the top-level heading.
+- `h2` is the level below `h1`.
+- `h3` is the level below `h2`.
+
+### Paragraphs: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
+
+**Allowed Items:** Inline Text
+
+These elements are all rendered as paragraphs.
+
+The type of the paragraph includes a semantic hint:
+
+- `p`: A normal paragraph.
+- `note`: A paragraph that informs the reader. This is typically rendered with a blue/white color hint. The associated icon is a white i in a blue box/circle.
+- `warning`: A paragraph that warns the reader. This is typically rendered with a yellow/black color hint. The associated icon is a yellow triangle with a black exclamation mark.
+- `danger`: A paragraph that warns the of danger. This is typically rendered with a red/white color hint. The associated icon is a red octagon with a white exclamation mark.
+- `tip`: A paragraph that gives the reader a tip. The associated icon is a lightbulb.
+- `quote`: A paragraph that quotes a foreign source. This is typically rendered with a small indentation and a distinct font.
+- `spoiler`: A paragraph that contains information the reader about things they might not want to know. This is typically visually hidden/blurred so it's unreadable until a reader action is performed.
+
+### Lists: `ul`, `ol`
+
+**Allowed Items:** `li`
+
+- `ul` is an unordered list rendered with typically either dashes or dots as list enumerators.
+- `ol` is an ordered list rendered with typically either roman or arabic numerals as list enumerators.
+
+#### Ordered List `ol`
+
+| Attribute | Function                                                                                                             |
+| --------- | -------------------------------------------------------------------------------------------------------------------- |
+| `first`   | An integer that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. |
+
+### Figures: `img`
+
+**Allowed Items:** Inline Text
+
+| Attribute | Function                                                                                                                                           |
+| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `alt`     | A textual description of the image contents for vision-impaired users. Similar to the [HTML alt tag](https://en.wikipedia.org/wiki/Alt_attribute). |
+| `path`    | A path relative to the current file that points to an image file that should be shown.                                                             |
+
+This element shows a full-width image or figure. Its contents are the figure description.
+
+If the contents are empty, the figure may be rendered in a simpler form.
+
+### Preformatted: `pre`
+
+**Allowed Items:** Inline Text
+
+| Attribute | Function                                                                                                |
+| --------- | ------------------------------------------------------------------------------------------------------- |
+| `syntax`  | If present, hints a syntax highlighter that this preformatted block contains programming language code. |
+
+In contrast to all other block types, a `pre` block retains whitespace and line-break information and lays out the text as-is.
+
+It does not allow automatic line break insertion or word-wrapping.
+
+If a pre contains inline elements, these will still be parsed and apply their styles to the text spans.
+
+### Table Of Contents: `toc`
+
+**Allowed Items:** *none*
+
+| Attribute | Function                                                                |
+| --------- | ----------------------------------------------------------------------- |
+| `depth`   | `1`, `2` or `3`. Defines how many levels of headings shall be included. |
+
+Renders a table of contents for the current document.
+
+This element allows no child items.
+
+## Lists
+
+### List Items `li`
+
+**Allowed Items:** Block Elements *or* String Content.
+
+These elements wrap a sequence of blocks that will be rendered for this list item.
+
+It also allows a string to be used as it's content directly, this will be equivalent to having a nested paragraph with that strings content:
+
+```
+ul {
+  li { p { This is a normal item. } }
+  li "This is a normal item."
+}
+```
+
+will have two identical list items.
+
+### Tables: `table`
+
+Allowed Items: `columns`, `row`, `group`
+
+> TODO: Spec out tables proper.
+> `columns` is basically a `row` with only column headings
+> `row` is just a row with cells
+> all rows must contain the same amount of cell span
+> `group` is a heading for subsequent rows
+> `row.title` attribute is displayed in a column left of the first column, the top-left element is always empty
+
+## Table Elements
+
+### Column Headers: `columns`
+
+**Allowed Items:** `td`
+
+This element contains cells 
+
+### Rows: `row`
+
+**Allowed Items:** `td`
+
+| Attribute | Function                                                                     |
+| --------- | ---------------------------------------------------------------------------- |
+| `title`   | A title caption for this row. If present, will be shown left of all columns. |
+
+### Row Groups: `group`
+
+**Allowed Items:** Inline Text
+
+A *row group* is a row that contains a single heading-style cell that labels the rows below.
+
+### Cells: `td`
+
+**Allowed Items:** Block Elements *or* String Content.
+
+| Attribute | Function                                           |
+| --------- | -------------------------------------------------- |
+| `colspan` | Integer defining how many columns this cell spans. |
+
+This element contains the contents of a table cell.
+
+> TODO: Similar to `li`, it can be string or block-sequence.
+
+## Inline Text
+
+These elements are all allowed inside a paragraph-like content and can typically be nested.
+
+### Emphasis: `em`
+
+**Nesting:** Yes
+
+Formats the text as emphasised. This is typically bold or italic rendering.
+
+### Monospaced: `mono`
+
+**Nesting:** Yes
+
+| Attribute | Function                                                                                  |
+| --------- | ----------------------------------------------------------------------------------------- |
+| `syntax`  | If present, hints a syntax highlighter that this span contains programming language code. |
+
+Formats the text in a monospaced font. This is useful for code-like structures.
+
+### Strike-through: `strike`
+
+**Nesting:** Yes
+
+Renders the text with a horizontal line through the text, striking it out.
+
+### Sub/Superscript: `sub`, `sup`
+
+**Nesting:** Yes
+
+Renders the text a bit smaller and moved upwards (`sub`) or downwards (`sub`) to allow sub- or superscript rendering.
+
+### Linking: `link`
+
+**Nesting:** Yes
+
+| Attribute | Function                                                                                                 |
+| --------- | -------------------------------------------------------------------------------------------------------- |
+| `ref`     | Points the link to a top-level block with the `id` of this `ref` attribute. Mutually exclusive to `url`. |
+| `url`     | Points the link to the resource inside the `url`. Mutually exclusive to `ref`. |
+
+Adds a hyperlink to the contents. This allows a reader to navigate by typically clicking the link.
+
+### Localized Date/Time: `date`, `time`, `datedate`
+
+**Nesting:** No
+
+Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner.
+
+> TODO: Add `fmt` attribute:
+> `\date` takes an attribute fmt which can be 
+> - "year" (2025)
+> - "month" (December),
+> - "day" (22th)
+> - "weekday" (monday)
+> - "short" (22.12.2025)
+> - "long" (22th of December 2025)
+> - "relative" (two days ago, two months ago, ...)
+>
+> `\time` takes an attribute fmt which can be 
+> - "short" (09:41)
+> - "long" (09:41:25)
+> - "rough" (early morning, morning, noon, afternoon, evening, late in the night, ...)
+> - "relative" (two minutes ago, two days ago, ...)
+> 
+> `\datetime` takes an attribute fmt which can be 
+> - *To be done*
+> - ...
+> 
\ No newline at end of file
diff --git a/examples/featureset.hdoc b/examples/featureset.hdoc
deleted file mode 100644
index 9d3a3af..0000000
--- a/examples/featureset.hdoc
+++ /dev/null
@@ -1,80 +0,0 @@
-hdoc "1.0"
-h1 "intro" "Introduction"
-toc { }
-h2 "" "Basic Features"
-
-h3 "" "Spans"
-p {
-  span "Hello, World!\n"
-  link "http://google.com" "Visit Google!"
-  span "\n"
-  emph "This is fat!"
-  span "\n"
-  mono "int main()"
-}
-
-h3 "" "Lists"
-enumerate {
-  item { p { span "first" } }
-  item { p { span "second" } }
-  item { p { span "third" } }
-}
-itemize {
-  item { p { span "first" } }
-  item { p { span "second" } }
-  item { p { span "third" } }
-}
-
-h3 "" "Block Quote"
-quote {
-  span "Life is what happens when you're busy making other plans.\n - John Lennon"
-}
-
-h3 "" "Code Example"
-pre "zig" {
-  span "const std = @import(\"std\");\n"
-  span "\n"
-  span "pub fn main() !void {\n"
-  span "    std.debug.print(\"Hello, World!\\n\", .{});\n"
-  span "}"
-}
-image "dog.png"
-
-h2 "" "Nested lists"
-itemize {
-  item { p { span "first" } }
-  item { p { span "second" } }
-  item { itemize {
-    item { p { span "third.first" } }
-    item { p { span "third.second" } }
-  } }
-  item {enumerate {
-    item { p { span "fourth.first" } }
-    item { p { span "fourth.second" } }
-  } }
-}
-
-enumerate {
-  item { p { span "Item 1" } }
-  item { p { span "Item 2" } }
-  item { p { span "Item 3" } }
-  item { p { span "Item 4" } }
-  item { p { span "Item 5" } }
-  item { p { span "Item 6" } }
-  item { p { span "Item 7" } }
-  item { p { span "Item 8" } }
-  item { p { span "Item 9" } }
-}
-
-enumerate {
-  item { p { span "Item 1" } }
-  item { p { span "Item 2" } }
-  item { p { span "Item 3" } }
-  item { p { span "Item 4" } }
-  item { p { span "Item 5" } }
-  item { p { span "Item 6" } }
-  item { p { span "Item 7" } }
-  item { p { span "Item 8" } }
-  item { p { span "Item 9" } }
-  item { p { span "Item 10" } }
-}
\ No newline at end of file
diff --git a/examples/html-excerciser.hdoc b/examples/html-excerciser.hdoc
deleted file mode 100644
index d620a88..0000000
--- a/examples/html-excerciser.hdoc
+++ /dev/null
@@ -1,2 +0,0 @@
-hdoc "1.0"
-p { span "<html></html>" }
diff --git a/examples/hyperdoc.hdoc b/examples/hyperdoc.hdoc
deleted file mode 100644
index ae3a25e..0000000
--- a/examples/hyperdoc.hdoc
+++ /dev/null
@@ -1,43 +0,0 @@
-hdoc "1.0"
-h1 "" "HyperDocument File Format"
-toc {}
-h2 "intro" "Introduction"
-p {
-  span "The HyperDocument file format is meant to descibe rich text files that link between each other. "
-}
-h2 "structure" "Structure"
-p {
-  span "The structure of HyperDocument files is pretty simple. Each file starts with a "
-  mono "hdoc \"1.0\""
-  span " sequence that will mark both \"magic number\" and format version. "
-}
-p {
-  span "After the header, an arbitrary number of block elements follows."
-  span "Each block can be considered similar to a paragraph in text documents, but isn't necessarily only a pure text element. "
-  span "The following blocks types are available:"
-}
-itemize {
-  item { p { mono "h1" span ", " mono "h2" span ", " mono "h3" span " - First, second and third level headings" } }
-  item { p { mono "toc" span " - A table of contents" } }
-  item { p { mono "p" span " - A regular, plain text paragraph" } }
-  item { p { mono "quote" span " - A block quote" } }
-  item { p { mono "enumerate" span " - An ordered list, using numbering" } }
-  item { p { mono "itemize" span " - An unordered list, using bullet points" } }
-  item { p { mono "pre" span " - A preformatted block of text, optionally with a language tag" } }
-  item { p { mono "image" span " - A picture that is inserted into the document." } }
-}
-p {
-  span "The " mono "p" span ", " mono "quote" span " and " mono "pre" span " blocks contain a sequence of non-nested spans. "
-  span "The following span types are available:"
-}
-itemize {
-  item { p { mono "span" span " - Regular, unformatted plain text" } }
-  item { p { mono "emph" span " - Emphasises text" } }
-  item { p { mono "mono" span " - Monospaced font" } }
-  item { p { mono "link" span " - Plain text with a hyperlink." } }
-}
-
-p {
-  span "The blocks " mono "enumerate" span " and " mono "itemize" span " contain elements of type " mono "item" span ".\n"
-  span "Each of those items contain a list of blocks."
-}
diff --git a/flake.lock b/flake.lock
deleted file mode 100644
index 1ba4434..0000000
--- a/flake.lock
+++ /dev/null
@@ -1,147 +0,0 @@
-{
-  "nodes": {
-    "flake-compat": {
-      "flake": false,
-      "locked": {
-        "lastModified": 1696426674,
-        "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
-        "type": "github"
-      },
-      "original": {
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "type": "github"
-      }
-    },
-    "flake-utils": {
-      "inputs": {
-        "systems": "systems"
-      },
-      "locked": {
-        "lastModified": 1710146030,
-        "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
-    "flake-utils_2": {
-      "inputs": {
-        "systems": "systems_2"
-      },
-      "locked": {
-        "lastModified": 1705309234,
-        "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
-    "nixpkgs": {
-      "locked": {
-        "lastModified": 1718229064,
-        "narHash": "sha256-ZFav8A9zPNfjZg/wrxh1uZeMJHELRfRgFP+meq01XYk=",
-        "owner": "nixos",
-        "repo": "nixpkgs",
-        "rev": "5c2ec3a5c2ee9909904f860dadc19bc12cd9cc44",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nixos",
-        "ref": "nixos-23.11",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "nixpkgs_2": {
-      "locked": {
-        "lastModified": 1708161998,
-        "narHash": "sha256-6KnemmUorCvlcAvGziFosAVkrlWZGIc6UNT9GUYr0jQ=",
-        "owner": "NixOS",
-        "repo": "nixpkgs",
-        "rev": "84d981bae8b5e783b3b548de505b22880559515f",
-        "type": "github"
-      },
-      "original": {
-        "owner": "NixOS",
-        "ref": "nixos-23.11",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "root": {
-      "inputs": {
-        "flake-utils": "flake-utils",
-        "nixpkgs": "nixpkgs",
-        "zig": "zig"
-      }
-    },
-    "systems": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
-    },
-    "systems_2": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
-    },
-    "zig": {
-      "inputs": {
-        "flake-compat": "flake-compat",
-        "flake-utils": "flake-utils_2",
-        "nixpkgs": "nixpkgs_2"
-      },
-      "locked": {
-        "lastModified": 1718324667,
-        "narHash": "sha256-AZGskEGjvUmeb+fgBv4lxtCUtXmYBI+ABOlV+og9X14=",
-        "owner": "mitchellh",
-        "repo": "zig-overlay",
-        "rev": "b2c14e5f842af6b2bf03e634f73fd84f6956d4ba",
-        "type": "github"
-      },
-      "original": {
-        "owner": "mitchellh",
-        "repo": "zig-overlay",
-        "type": "github"
-      }
-    }
-  },
-  "root": "root",
-  "version": 7
-}
diff --git a/flake.nix b/flake.nix
deleted file mode 100644
index e1f2933..0000000
--- a/flake.nix
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-  description = "HyperDoc, a simple hyper document format";
-
-  inputs = {
-    nixpkgs.url = "github:nixos/nixpkgs/nixos-23.11";
-    flake-utils.url = "github:numtide/flake-utils";
-    zig.url = "github:mitchellh/zig-overlay";
-  };
-
-  outputs = {
-    self,
-    nixpkgs,
-    flake-utils,
-    ...
-  } @ inputs: let
-    overlays = [
-      # Other overlays
-      (final: prev: {
-        zigpkgs = inputs.zig.packages.${prev.system};
-      })
-    ];
-
-    # Our supported systems are the same supported systems as the Zig binaries
-    systems = builtins.attrNames inputs.zig.packages;
-  in
-    flake-utils.lib.eachSystem systems (
-      system: let
-        pkgs = import nixpkgs {inherit overlays system;};
-      in let
-        zig = pkgs.zigpkgs."0.13.0";
-      in rec {
-        packages.default = pkgs.stdenv.mkDerivation {
-          name = "hyperdoc";
-          src = ./.;
-          nativeBuildInputs = [zig];
-
-          configurePhase = "";
-
-          buildPhase = ''
-            zig build
-          '';
-
-          installPhase = ''
-            mv zig-out $out
-          '';
-        };
-      }
-    );
-}
diff --git a/src/data/default.css b/src/data/default.css
deleted file mode 100644
index 6040b76..0000000
--- a/src/data/default.css
+++ /dev/null
@@ -1,40 +0,0 @@
-* {
-  box-sizing: border-box;
-}
-
-
-body {
-  max-width: 60em;
-  margin-left: auto;
-  margin-right: auto;
-  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-}
-
-blockquote {
-  margin: 0;
-  padding: 0.5em;
-  border-left: 4px solid green;
-  background: rgba(0.7, 0.7, 0.7, 17%);
-}
-
-pre {
-  padding: 0.5em;
-  border: 3px solid black;
-  font-family: 'Courier New', Courier, monospace
-}
-
-ol,
-ul {
-  margin: 0;
-  padding: 0;
-  padding-left: 1em;
-}
-
-em {
-  font-style: normal;
-  font-weight: bold;
-}
-
-code {
-  font-family: 'Courier New', Courier, monospace
-}
\ No newline at end of file
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 7900095..c04b0f2 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -19,450 +19,90 @@ pub const Document = struct {
 /// Depending on the level of nesting, the width might decrease
 /// from the full document size.
 pub const Block = union(enum) {
-    paragraph: Paragraph,
-    ordered_list: []Item,
-    unordered_list: []Item,
-    quote: Paragraph,
-    preformatted: CodeBlock,
-    image: Image,
-    heading: Heading,
-    table_of_contents,
+    // TODO
 };
 
-/// A paragraph is a sequence of spans.
-pub const Paragraph = struct {
-    contents: []Span,
-};
-
-/// A list item is a sequence of blocks
-pub const Item = struct {
-    contents: []Block,
-};
-
-/// A code block is a paragraph with a programming language attachment
-pub const CodeBlock = struct {
-    contents: []Span,
-    language: []const u8, // empty=none
-};
-
-/// An image is a block that will display non-text content.
-pub const Image = struct {
-    path: []const u8,
-};
-
-/// A heading is a block that will be rendered in a bigger/different font
-/// and introduces a new section of the document.
-/// It has an anchor that can be referenced.
-pub const Heading = struct {
-    level: Level,
-    title: []const u8,
-    anchor: []const u8,
-
-    pub const Level = enum(u2) {
-        document = 0,
-        chapter = 1,
-        section = 2,
-    };
-};
-
-/// Spans are the building blocks of paragraphs. Each span is
-/// defining a sequence of text with a certain formatting.
-pub const Span = union(enum) {
-    text: []const u8,
-    emphasis: []const u8,
-    monospace: []const u8,
-    link: Link,
-};
-
-/// Links are spans that can refer to other documents or elements.
-pub const Link = struct {
-    href: []const u8,
-    text: []const u8,
-};
-
-pub const ErrorLocation = parser_toolkit.Location;
-
 /// Parses a HyperDoc document.
 pub fn parse(
     allocator: std.mem.Allocator,
+    /// The source code to be parsed
     plain_text: []const u8,
-    error_location: ?*ErrorLocation,
+    /// An optional diagnostics element that receives diagnostic messages like errors and warnings.
+    /// If present, will be filled out by the parser.
+    diagnostics: ?*Diagnostics,
 ) !Document {
     var arena = std.heap.ArenaAllocator.init(allocator);
     errdefer arena.deinit();
 
-    var tokenizer: Tokenizer = .init(plain_text, null);
-
-    var parser: Parser = .{
-        .allocator = arena.allocator(),
-        .core = .init(&tokenizer),
-    };
-
-    defer if (error_location) |err| {
-        err.* = tokenizer.current_location;
-    };
-
-    const root_id = parser.acceptIdentifier() catch return error.InvalidFormat;
-    if (root_id != .hdoc)
-        return error.InvalidFormat;
-    const version_number = parser.accept(.text) catch return error.InvalidFormat;
-    if (!std.mem.eql(u8, version_number.text, "\"1.0\""))
-        return error.InvalidVersion;
+    _ = plain_text;
+    _ = diagnostics;
 
-    const root_elements = try parser.acceptBlockSequence(.eof);
-
-    return Document{
-        .arena = arena,
-        .contents = root_elements,
-    };
+    @panic("TODO: Implement this");
 }
 
-const Parser = struct {
-    allocator: std.mem.Allocator,
-    core: ParserCore,
-
-    fn save(parser: *Parser) Tokenizer.State {
-        return parser.core.saveState();
-    }
-
-    fn restore(parser: *Parser, state: Tokenizer.State) void {
-        return parser.core.restoreState(state);
-    }
-
-    fn accept(parser: *Parser, token_type: TokenType) !Token {
-        const state = parser.save();
-        errdefer parser.restore(state);
-
-        const token = (try parser.core.nextToken()) orelse return error.EndOfFile;
-        if (token.type != token_type)
-            return error.UnexpectedToken;
-        return token;
-    }
-
-    fn consume(parser: *Parser, token_type: TokenType) !void {
-        _ = try parser.accept(token_type);
-    }
+/// A diagnostic message.
+pub const Diagnostic = struct {
+    pub const Severity = enum { warning, @"error" };
 
-    const Identifier = enum {
-        // management
-        hdoc,
-
-        // blocks
-        h1,
-        h2,
-        h3,
-        toc,
-        p,
-        enumerate,
-        itemize,
-        quote,
-        pre,
-        image,
-
-        // spans
-        span,
-        link,
-        emph,
-        mono,
-
-        // list of blocks
-        item,
+    pub const Location = struct {
+        line: u32,
+        column: u32,
     };
-    fn acceptIdentifier(parser: *Parser) !Identifier {
-        const tok = try parser.accept(.identifier);
-        return std.meta.stringToEnum(Identifier, tok.text) orelse return error.InvalidIdentifier;
-    }
-
-    fn acceptText(parser: *Parser) ![]const u8 {
-        const text_tok = try parser.accept(.text);
-
-        const text = text_tok.text;
-
-        std.debug.assert(text.len >= 2);
-        std.debug.assert(text[0] == text[text.len - 1]);
-
-        const string_body = text[1 .. text.len - 1];
-
-        const allocator = parser.allocator;
-        var temp_string: std.ArrayList(u8) = .empty;
-        defer temp_string.deinit(allocator);
-
-        try temp_string.ensureTotalCapacity(allocator, string_body.len);
-
-        {
-            var i: usize = 0;
-            while (i < string_body.len) {
-                const c = string_body[i];
-                if (c != '\\') {
-                    try temp_string.append(allocator, c);
-                    i += 1;
-                    continue;
-                }
-                i += 1;
-                if (i >= string_body.len)
-                    return error.InvalidEscapeSequence;
-                const selector = string_body[i];
-                i += 1;
-                switch (selector) {
-                    'n' => try temp_string.append(allocator, '\n'),
-                    'r' => try temp_string.append(allocator, '\r'),
-                    'e' => try temp_string.append(allocator, '\x1B'),
 
-                    // TODO: Implement the following cases:
-                    // '\xFF'
-                    // '\u{ABCD}'
+    /// An diagnostic code encoded as a 16 bit integer.
+    /// The upper 4 bit encode the severity of the code, the lower 12 bit the number.
+    pub const Code = enum(u16) {
+        // bitmasks:
+        const ERROR = 0x1000;
+        const WARNING = 0x2000;
 
-                    else => {
-                        try temp_string.append(allocator, selector);
-                    },
-                }
-            }
-        }
-
-        return try temp_string.toOwnedSlice(allocator);
-    }
+        // TODO: Add other diagnostic codes
 
-    const BlockSequenceTerminator = enum { @"}", eof };
+        // errors:
+        invalid_character = ERROR | 1,
 
-    fn acceptBlockSequence(parser: *Parser, terminator: BlockSequenceTerminator) ![]Block {
-        const allocator = parser.allocator;
-        var seq: std.ArrayList(Block) = .empty;
-        defer seq.deinit(allocator);
+        // warnings:
+        missing_space_in_literal = WARNING | 1,
 
-        accept_loop: while (true) {
-            const id = switch (terminator) {
-                .@"}" => if (parser.acceptIdentifier()) |id|
-                    id
-                else |_| if (parser.accept(.@"}")) |_|
-                    break :accept_loop
-                else |_|
-                    return error.UnexpectedToken,
-                .eof => if (parser.acceptIdentifier()) |id|
-                    id
-                else |err| switch (err) {
-                    error.EndOfFile => break :accept_loop,
-                    else => |e| return e,
-                },
+        pub fn get_severity(code: Code) Severity {
+            const num = @intFromEnum(code);
+            return switch (num & 0xF000) {
+                ERROR => .@"error",
+                WARNING => .warning,
+                else => @panic("invalid error code!"),
             };
-
-            switch (id) {
-                .toc => {
-                    try parser.consume(.@"{");
-                    try parser.consume(.@"}");
-                    try seq.append(allocator, .table_of_contents);
-                },
-
-                .h1, .h2, .h3 => {
-                    const anchor = try parser.acceptText();
-                    const title = try parser.acceptText();
-
-                    try seq.append(allocator, .{
-                        .heading = .{
-                            .level = switch (id) {
-                                .h1 => .document,
-                                .h2 => .chapter,
-                                .h3 => .section,
-                                else => unreachable,
-                            },
-                            .title = title,
-                            .anchor = anchor,
-                        },
-                    });
-                },
-
-                .p, .quote => {
-                    try parser.consume(.@"{");
-                    const items = try parser.acceptSpanSequence();
-
-                    try seq.append(allocator, if (id == .p)
-                        .{ .paragraph = .{ .contents = items } }
-                    else
-                        .{ .quote = .{ .contents = items } });
-                },
-
-                .pre => {
-                    const language = try parser.acceptText();
-                    try parser.consume(.@"{");
-                    const items = try parser.acceptSpanSequence();
-
-                    try seq.append(allocator, .{
-                        .preformatted = .{
-                            .language = language,
-                            .contents = items,
-                        },
-                    });
-                },
-
-                .enumerate, .itemize => {
-                    try parser.consume(.@"{");
-
-                    var list: std.ArrayList(Item) = .empty;
-                    defer list.deinit(allocator);
-
-                    while (true) {
-                        if (parser.consume(.@"}")) |_| {
-                            break;
-                        } else |_| {}
-
-                        const ident = try parser.acceptIdentifier();
-                        if (ident != .item) {
-                            return error.UnexpectedToken;
-                        }
-
-                        try parser.consume(.@"{");
-
-                        const sequence = try parser.acceptBlockSequence(.@"}");
-
-                        try list.append(allocator, .{
-                            .contents = sequence,
-                        });
-                    }
-
-                    const list_slice = try list.toOwnedSlice(allocator);
-
-                    try seq.append(allocator, if (id == .enumerate)
-                        .{ .ordered_list = list_slice }
-                    else
-                        .{ .unordered_list = list_slice });
-                },
-
-                .image => {
-                    const file_path = try parser.acceptText();
-                    try seq.append(allocator, .{
-                        .image = .{
-                            .path = file_path,
-                        },
-                    });
-                },
-
-                .item,
-                .hdoc,
-                .link,
-                .emph,
-                .mono,
-                .span,
-                => return error.InvalidTopLevelItem,
-            }
-        }
-
-        return try seq.toOwnedSlice(allocator);
-    }
-
-    fn acceptSpanSequence(parser: *Parser) ![]Span {
-        const allocator = parser.allocator;
-        var seq: std.ArrayList(Span) = .empty;
-        defer seq.deinit(allocator);
-
-        accept_loop: while (true) {
-            const id = if (parser.acceptIdentifier()) |id|
-                id
-            else |_| if (parser.accept(.@"}")) |_|
-                break :accept_loop
-            else |_|
-                return error.UnexpectedToken;
-
-            switch (id) {
-                .item,
-                .toc,
-                .h1,
-                .h2,
-                .h3,
-                .p,
-                .quote,
-                .pre,
-                .enumerate,
-                .itemize,
-                .image,
-                .hdoc,
-                => return error.InvalidSpan,
-
-                .span => {
-                    const text = try parser.acceptText();
-                    try seq.append(allocator, .{ .text = text });
-                },
-                .emph => {
-                    const text = try parser.acceptText();
-                    try seq.append(allocator, .{ .emphasis = text });
-                },
-                .mono => {
-                    const text = try parser.acceptText();
-                    try seq.append(allocator, .{ .monospace = text });
-                },
-
-                .link => {
-                    const href = try parser.acceptText();
-                    const text = try parser.acceptText();
-                    try seq.append(allocator, .{
-                        .link = .{
-                            .href = href,
-                            .text = text,
-                        },
-                    });
-                },
-            }
         }
+    };
 
-        return try seq.toOwnedSlice(allocator);
-    }
+    code: Code,
+    location: Location,
+    message: []const u8,
 };
 
-const ParserCore = parser_toolkit.ParserCore(Tokenizer, .{ .whitespace, .comment });
-
-const Pattern = parser_toolkit.Pattern(TokenType);
-
-const Token = Tokenizer.Token;
-
-const Tokenizer = parser_toolkit.Tokenizer(TokenType, &[_]Pattern{
-    Pattern.create(
-        .comment,
-        parser_toolkit.matchers.withPrefix(
-            "#",
-            parser_toolkit.matchers.takeNoneOf("\n"),
-        ),
-    ),
-
-    Pattern.create(.@"{", parser_toolkit.matchers.literal("{")),
-    Pattern.create(.@"}", parser_toolkit.matchers.literal("}")),
-    Pattern.create(.text, matchStringLiteral('\"')),
-
-    Pattern.create(.identifier, parser_toolkit.matchers.identifier),
-
-    Pattern.create(.whitespace, parser_toolkit.matchers.whitespace),
-});
-
-fn matchStringLiteral(comptime boundary: u8) parser_toolkit.Matcher {
-    const T = struct {
-        fn match(str: []const u8) ?usize {
-            if (str.len < 2)
-                return null;
-
-            if (str[0] != boundary)
-                return null;
+/// A collection of diagnostic messages.
+pub const Diagnostics = struct {
+    arena: std.heap.ArenaAllocator,
+    items: std.ArrayList(Diagnostic) = .empty,
 
-            var i: usize = 1;
-            while (i < str.len) {
-                if (str[i] == boundary)
-                    return i + 1;
+    pub fn init(allocator: std.mem.Allocator) Diagnostic {
+        return .{ .arena = .init(allocator) };
+    }
 
-                if (str[i] == '\\') {
-                    i += 2; // skip over the escape and the escaped char
-                } else {
-                    i += 1; // just go to the next char
-                }
-            }
+    pub fn deinit(diag: *Diagnostics) void {
+        diag.arena.deinit();
+        diag.* = undefined;
+    }
 
-            return null;
-        }
-    };
+    pub fn add(diag: *Diagnostics, code: Diagnostic.Code, location: Diagnostic.Location, comptime fmt: []const u8, args: anytype) !void {
+        const allocator = diag.arena.allocator();
 
-    return T.match;
-}
+        const msg = try std.fmt.allocPrint(allocator, fmt, args);
+        errdefer allocator.free(msg);
 
-const TokenType = enum {
-    comment,
-    whitespace,
-    identifier,
-    text,
-    @"{",
-    @"}",
+        try diag.items.append(allocator, .{
+            .location = location,
+            .code = code,
+            .message = msg,
+        });
+    }
 };
diff --git a/src/main.zig b/src/main.zig
index 462bfe7..3cdb76c 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,127 +1,28 @@
 const std = @import("std");
+const builtin = @import("builtin");
 const hdoc = @import("hyperdoc");
-const args_parser = @import("args");
 
-pub fn main() !u8 {
-    var stdout_buf: [1024]u8 = undefined;
-    const stdout_file: std.fs.File = .stdout();
-    var stdout_writer = stdout_file.writer(&stdout_buf);
-    const stdout = &stdout_writer.interface;
-    var stderr_buf: [1024]u8 = undefined;
-    const stderr_file: std.fs.File = .stderr();
-    var stderr_writer = stderr_file.writer(&stderr_buf);
-    const stderr = &stderr_writer.interface;
-
-    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
-    defer _ = gpa.deinit();
-
-    const allocator = gpa.allocator();
-
-    var cli = args_parser.parseForCurrentProcess(CliOptions, allocator, .print) catch return 1;
-    defer cli.deinit();
-
-    if (cli.options.help) {
-        try printUsage(cli.executable_name.?, stdout);
-        return 0;
-    }
-
-    if (cli.positionals.len != 1) {
-        try printUsage(cli.executable_name.?, stderr);
-        return 1;
-    }
+var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
 
-    var error_location: hdoc.ErrorLocation = undefined;
-
-    var document: hdoc.Document = blk: {
-        const source_text = try std.fs.cwd().readFileAlloc(
-            allocator,
-            cli.positionals[0],
-            512 << 20,
-        ); // 512MB
-        defer allocator.free(source_text);
-
-        break :blk hdoc.parse(allocator, source_text, &error_location) catch |err| {
-            error_location.source = cli.positionals[0];
-            std.log.err("{f}: Failed to parse document: {s}", .{
-                error_location,
-                switch (err) {
-                    error.UnexpectedToken,
-                    error.InvalidIdentifier,
-                    error.UnexpectedCharacter,
-                    error.InvalidTopLevelItem,
-                    error.InvalidSpan,
-                    => "syntax error",
-                    error.InvalidFormat => "not a HyperDocument file",
-                    error.InvalidVersion => "unsupported file version",
-                    error.OutOfMemory => "out of memory",
-                    error.EndOfFile => "unexpected end of file",
-                    error.InvalidEscapeSequence => "illegal escape sequence",
-                    // else => |e|   @errorName(e),
-                },
-            });
-            return 1;
-        };
+pub fn main() !u8 {
+    defer if (builtin.mode == .Debug) {
+        std.debug.assert(debug_allocator.deinit() == .ok);
     };
-    defer document.deinit();
-
-    const output_file: ?std.fs.File = if (cli.options.output != null and !std.mem.eql(u8, cli.options.output.?, "-"))
-        try std.fs.cwd().createFile(cli.options.output.?, .{})
+    const allocator = if (builtin.mode == .Debug)
+        debug_allocator.allocator()
     else
-        null;
-    defer if (output_file) |f| f.close();
+        std.heap.smp_allocator;
 
-    const renderDocument = switch (cli.options.format) {
-        .hdoc => &@import("renderer/HyperDoc.zig").render,
-        .html => &@import("renderer/Html.zig").render,
-        .markdown => &@import("renderer/Markdown.zig").render,
-    };
-
-    if (output_file) |f| {
-        var out_buf: [1024]u8 = undefined;
-        var out_writer = f.writer(&out_buf);
-        const output_stream = &out_writer.interface;
-        try renderDocument(output_stream, document);
-        try output_stream.flush();
-    } else {
-        try renderDocument(stdout, document);
-        try stdout.flush();
-    }
-
-    return 0;
-}
-
-const TargetFormat = enum {
-    hdoc,
-    html,
-    markdown,
-};
+    // TODO: Parse arguments and load file.
+    const document =
+        \\hdoc "2.0"
+        \\
+    ;
 
-const CliOptions = struct {
-    help: bool = false,
-    format: TargetFormat = .hdoc,
-    output: ?[]const u8 = null,
+    var doc = try hdoc.parse(allocator, document, null);
+    defer doc.deinit();
 
-    pub const shorthands = .{
-        .h = "help",
-        .f = "format",
-        .o = "output",
-    };
-};
+    // TODO: Dump AST
 
-fn printUsage(exe_name: []const u8, stream: *std.Io.Writer) !void {
-    try stream.print("{s} [-h] [-f <format>] <file>\n", .{
-        std.fs.path.basename(exe_name),
-    });
-    try stream.writeAll(
-        \\
-        \\Options:
-        \\  -h, --help              Prints this text
-        \\  -f, --format <format>   Converts the given <file> into <format>. Legal values are:
-        \\                          - hdoc     - Formats the input file into canonical format.
-        \\                          - html     - Renders the HyperDocument as HTML.
-        \\                          - markdown - Renders the HyperDocument as CommonMark.
-        \\  -o, --output <result>   Instead of printing to stdout, will put the output into <result>.
-        \\
-    );
-    try stream.flush();
+    return 0;
 }
diff --git a/src/renderer/Html.zig b/src/renderer/Html.zig
deleted file mode 100644
index 94ad957..0000000
--- a/src/renderer/Html.zig
+++ /dev/null
@@ -1,167 +0,0 @@
-const std = @import("std");
-const hdoc = @import("hyperdoc");
-
-pub const WriteError = std.Io.Writer.Error;
-
-pub fn render(writer: *std.Io.Writer, document: hdoc.Document) WriteError!void {
-    try writer.writeAll(
-        \\<!doctype html>
-        \\<head>
-        \\<meta charset="UTF-8">
-        \\<style>
-    );
-    try writer.writeAll(@embedFile("../data/default.css"));
-    try writer.writeAll(
-        \\</style>
-        \\</head>
-        \\<body>
-    );
-
-    try renderBlocks(writer, document, document.contents);
-
-    try writer.writeAll(
-        \\</body>
-        \\
-    );
-}
-
-fn renderBlocks(
-    writer: *std.Io.Writer,
-    document: hdoc.Document,
-    blocks: []const hdoc.Block,
-) WriteError!void {
-    for (blocks) |block| {
-        try renderBlock(writer, document, block);
-    }
-}
-
-fn renderBlock(
-    writer: *std.Io.Writer,
-    document: hdoc.Document,
-    block: hdoc.Block,
-) WriteError!void {
-    switch (block) {
-        .paragraph => |content| {
-            try writer.writeAll("<p>");
-            try renderSpans(writer, content.contents);
-            try writer.writeAll("</p>\n");
-        },
-
-        .ordered_list => |content| {
-            try writer.writeAll("<ol>\n");
-            for (content) |item| {
-                try writer.writeAll("<li>");
-                try renderBlocks(writer, document, item.contents);
-                try writer.writeAll("</li>\n");
-            }
-            try writer.writeAll("</ol>\n");
-        },
-
-        .unordered_list => |content| {
-            try writer.writeAll("<ul>\n");
-            for (content) |item| {
-                try writer.writeAll("<li>");
-                try renderBlocks(writer, document, item.contents);
-                try writer.writeAll("</li>\n");
-            }
-            try writer.writeAll("</ul>\n");
-        },
-
-        .quote => |content| {
-            try writer.writeAll("<blockquote>");
-            try renderSpans(writer, content.contents);
-            try writer.writeAll("</blockquote>\n");
-        },
-
-        .preformatted => |content| {
-            if (!std.mem.eql(u8, content.language, "")) {
-                try writer.print("<pre class=\"lang-{s}\">", .{content.language});
-            } else {
-                try writer.writeAll("<pre>");
-            }
-            try renderSpans(writer, content.contents);
-            try writer.writeAll("</pre>\n");
-        },
-        .image => |content| {
-            try writer.print("<img class=\"block\" href=\"{s}\">\n", .{content.path});
-        },
-        .heading => |content| {
-            try writer.writeAll(switch (content.level) {
-                .document => "<h1",
-                .chapter => "<h2",
-                .section => "<h3",
-            });
-            if (content.anchor.len > 0) {
-                try writer.print(" id=\"{s}\"", .{content.anchor});
-            }
-            try writer.writeAll(">");
-
-            try writer.print("{f}", .{escapeHtml(content.title)});
-
-            try writer.writeAll(switch (content.level) {
-                .document => "</h1>\n",
-                .chapter => "</h2>\n",
-                .section => "</h3>\n",
-            });
-        },
-        .table_of_contents => |content| {
-            // TODO: Render TOC
-            _ = content;
-        },
-    }
-}
-
-fn renderSpans(
-    writer: *std.Io.Writer,
-    spans: []const hdoc.Span,
-) WriteError!void {
-    for (spans) |span| {
-        try renderSpan(writer, span);
-    }
-}
-
-fn renderSpan(writer: *std.Io.Writer, span: hdoc.Span) WriteError!void {
-    switch (span) {
-        .text => |val| {
-            try writer.print("{f}", .{escapeHtml(val)});
-        },
-        .emphasis => |val| {
-            try writer.writeAll("<em>");
-            try writer.print("{f}", .{escapeHtml(val)});
-            try writer.writeAll("</em>");
-        },
-        .monospace => |val| {
-            try writer.writeAll("<code>");
-            try writer.print("{f}", .{escapeHtml(val)});
-            try writer.writeAll("</code>");
-        },
-        .link => |val| {
-            try writer.print("<a href=\"{s}\">{f}</a>", .{
-                val.href,
-                escapeHtml(val.text),
-            });
-        },
-    }
-}
-
-fn escapeHtml(string: []const u8) HtmlEscaper {
-    return .{ .string = string };
-}
-
-const HtmlEscaper = struct {
-    string: []const u8,
-
-    pub fn format(html: HtmlEscaper, writer: *std.Io.Writer) !void {
-        for (html.string) |char| {
-            switch (char) {
-                '&' => try writer.writeAll("&amp;"),
-                '<' => try writer.writeAll("&lt;"),
-                '>' => try writer.writeAll("&gt;"),
-                '\"' => try writer.writeAll("&quot;"),
-                '\'' => try writer.writeAll("&#39;"),
-                '\n' => try writer.writeAll("<br/>"),
-                else => try writer.writeByte(char),
-            }
-        }
-    }
-};
diff --git a/src/renderer/HyperDoc.zig b/src/renderer/HyperDoc.zig
deleted file mode 100644
index 5aa508f..0000000
--- a/src/renderer/HyperDoc.zig
+++ /dev/null
@@ -1,158 +0,0 @@
-const std = @import("std");
-const hdoc = @import("hyperdoc");
-
-pub const WriteError = std.Io.Writer.Error;
-
-pub fn render(writer: *std.Io.Writer, document: hdoc.Document) WriteError!void {
-    try writer.writeAll("hdoc \"1.0\"\n");
-    try renderBlocks(writer, document, document.contents, 0);
-}
-
-fn renderBlocks(
-    writer: *std.Io.Writer,
-    document: hdoc.Document,
-    blocks: []const hdoc.Block,
-    indent: usize,
-) WriteError!void {
-    for (blocks) |block| {
-        try renderBlock(writer, document, block, indent);
-    }
-}
-
-fn renderBlock(
-    writer: *std.Io.Writer,
-    document: hdoc.Document,
-    block: hdoc.Block,
-    indent: usize,
-) WriteError!void {
-    try writer.splatByteAll(' ', 2 * indent);
-    switch (block) {
-        .paragraph => |content| {
-            try writer.writeAll("p {\n");
-            try renderSpans(writer, content.contents, indent + 1);
-            try writer.splatByteAll(' ', 2 * indent);
-            try writer.writeAll("}\n");
-        },
-
-        .ordered_list => |content| {
-            try writer.writeAll("enumerate {\n");
-            for (content) |item| {
-                try writer.splatByteAll(' ', 2 * indent + 2);
-                try writer.writeAll("item {\n");
-
-                try renderBlocks(writer, document, item.contents, indent + 2);
-
-                try writer.splatByteAll(' ', 2 * indent + 2);
-                try writer.writeAll("}\n");
-            }
-            try writer.splatByteAll(' ', 2 * indent);
-            try writer.writeAll("}\n");
-        },
-
-        .unordered_list => |content| {
-            try writer.writeAll("itemize {\n");
-            for (content) |item| {
-                try writer.splatByteAll(' ', 2 * indent + 2);
-                try writer.writeAll("item {\n");
-
-                try renderBlocks(writer, document, item.contents, indent + 2);
-
-                try writer.splatByteAll(' ', 2 * indent + 2);
-                try writer.writeAll("}\n");
-            }
-            try writer.splatByteAll(' ', 2 * indent);
-            try writer.writeAll("}\n");
-        },
-
-        .quote => |content| {
-            try writer.writeAll("quote {\n");
-            try renderSpans(writer, content.contents, indent + 1);
-            try writer.splatByteAll(' ', 2 * indent);
-            try writer.writeAll("}\n");
-        },
-
-        .preformatted => |content| {
-            try writer.print("pre \"{f}\" {{\n", .{
-                escape(content.language),
-            });
-            try renderSpans(writer, content.contents, indent + 1);
-            try writer.splatByteAll(' ', 2 * indent);
-            try writer.writeAll("}\n");
-        },
-        .image => |content| {
-            try writer.print("image \"{f}\"\n", .{
-                escape(content.path),
-            });
-        },
-        .heading => |content| {
-            try writer.writeAll(switch (content.level) {
-                .document => "h1",
-                .chapter => "h2",
-                .section => "h3",
-            });
-            try writer.print(" \"{f}\" \"{f}\"\n", .{
-                escape(content.anchor),
-                escape(content.title),
-            });
-        },
-        .table_of_contents => {
-            try writer.writeAll("toc {}\n");
-        },
-    }
-}
-
-fn renderSpans(
-    writer: *std.Io.Writer,
-    spans: []const hdoc.Span,
-    indent: usize,
-) WriteError!void {
-    for (spans) |span| {
-        try renderSpan(writer, span, indent);
-    }
-}
-
-fn renderSpan(
-    writer: *std.Io.Writer,
-    span: hdoc.Span,
-    indent: usize,
-) WriteError!void {
-    try writer.splatByteAll(' ', 2 * indent);
-    switch (span) {
-        .text => |val| {
-            try writer.print("span \"{f}\"\n", .{escape(val)});
-        },
-        .emphasis => |val| {
-            try writer.print("emph \"{f}\"\n", .{escape(val)});
-        },
-        .monospace => |val| {
-            try writer.print("mono \"{f}\"\n", .{escape(val)});
-        },
-        .link => |val| {
-            try writer.print("link \"{f}\" \"{f}\"\n", .{
-                escape(val.href),
-                escape(val.text),
-            });
-        },
-    }
-}
-
-fn escape(string: []const u8) HDocEscaper {
-    return .{ .string = string };
-}
-
-const HDocEscaper = struct {
-    string: []const u8,
-
-    pub fn format(html: HDocEscaper, writer: *std.Io.Writer) !void {
-        for (html.string) |char| {
-            switch (char) {
-                '\n' => try writer.writeAll("\\n"),
-                '\r' => try writer.writeAll("\\r"),
-                '\x1B' => try writer.writeAll("\\e"),
-                '\'' => try writer.writeAll("\\\'"),
-                '\"' => try writer.writeAll("\\\""),
-                else => try writer.writeByte(char),
-            }
-        }
-    }
-};
diff --git a/src/renderer/Markdown.zig b/src/renderer/Markdown.zig
deleted file mode 100644
index e8ba9ab..0000000
--- a/src/renderer/Markdown.zig
+++ /dev/null
@@ -1,131 +0,0 @@
-const std = @import("std");
-const hdoc = @import("hyperdoc");
-
-const WriteError = std.Io.Writer.Error;
-
-pub fn render(writer: *std.Io.Writer, document: hdoc.Document) WriteError!void {
-    try renderBlocks(writer, document, document.contents);
-}
-
-fn renderBlocks(
-    writer: *std.Io.Writer,
-    document: hdoc.Document,
-    blocks: []const hdoc.Block,
-) WriteError!void {
-    for (blocks) |block| {
-        try renderBlock(writer, document, block);
-    }
-}
-
-fn renderBlock(
-    writer: *std.Io.Writer,
-    document: hdoc.Document,
-    block: hdoc.Block,
-) WriteError!void {
-    switch (block) {
-        .paragraph => |content| {
-            try renderSpans(writer, content.contents);
-            try writer.writeAll("\n\n");
-        },
-
-        .ordered_list => |content| {
-            for (content) |item| {
-                try writer.writeAll("- ");
-                try renderBlocks(writer, document, item.contents);
-            }
-        },
-
-        .unordered_list => |content| {
-            for (content, 1..) |item, index| {
-                try writer.print("{}. ", .{index});
-                try renderBlocks(writer, document, item.contents);
-            }
-        },
-
-        .quote => |content| {
-            try writer.writeAll("> ");
-            try renderSpans(writer, content.contents);
-            try writer.writeAll("\n\n");
-        },
-
-        .preformatted => |content| {
-            try writer.print("```{s}\n", .{content.language});
-            try renderSpans(writer, content.contents);
-            try writer.writeAll("```\n\n");
-        },
-        .image => |content| {
-            try writer.print("![]({s})\n\n", .{content.path});
-        },
-        .heading => |content| {
-            try writer.writeAll(switch (content.level) {
-                .document => "# ",
-                .chapter => "## ",
-                .section => "### ",
-            });
-            if (content.anchor.len > 0) {
-                std.log.warn("anchor not supported in markdown!", .{});
-            }
-
-            try writer.print("{f}\n\n", .{escapeMd(content.title)});
-        },
-        .table_of_contents => |content| {
-            // TODO: Render TOC
-            _ = content;
-        },
-    }
-}
-
-fn renderSpans(
-    writer: *std.Io.Writer,
-    spans: []const hdoc.Span,
-) WriteError!void {
-    for (spans) |span| {
-        try renderSpan(writer, span);
-    }
-}
-
-fn renderSpan(writer: *std.Io.Writer, span: hdoc.Span) WriteError!void {
-    switch (span) {
-        .text => |val| {
-            try writer.print("{f}", .{escapeMd(val)});
-        },
-        .emphasis => |val| {
-            try writer.writeAll("**");
-            try writer.print("{f}", .{escapeMd(val)});
-            try writer.writeAll("**");
-        },
-        .monospace => |val| {
-            try writer.writeAll("`");
-            try writer.print("{f}", .{escapeMd(val)});
-            try writer.writeAll("`");
-        },
-        .link => |val| {
-            try writer.print("[{f}]({s})", .{
-                escapeMd(val.text),
-                val.href,
-            });
-        },
-    }
-}
-
-fn escapeMd(string: []const u8) MarkdownEscaper {
-    return .{ .string = string };
-}
-
-const MarkdownEscaper = struct {
-    string: []const u8,
-
-    pub fn format(html: MarkdownEscaper, writer: *std.Io.Writer) !void {
-        for (html.string) |char| {
-            switch (char) {
-                '&' => try writer.writeAll("&amp;"),
-                '<' => try writer.writeAll("&lt;"),
-                '>' => try writer.writeAll("&gt;"),
-                '\"' => try writer.writeAll("&quot;"),
-                '\'' => try writer.writeAll("&#39;"),
-                '\n' => try writer.writeAll("  \n"),
-                else => try writer.writeByte(char),
-            }
-        }
-    }
-};
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 4c0d4ac..58450b5 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -8,7 +8,7 @@ fn testAcceptDocument(document: []const u8) !void {
 
 test "empty document" {
     try testAcceptDocument(
-        \\hdoc "1.0"
+        \\hdoc "2.0"
     );
 }
 
@@ -22,17 +22,11 @@ test "invalid document" {
     try std.testing.expectError(error.InvalidFormat, testAcceptDocument(
         \\hdoc {
     ));
-    try std.testing.expectError(error.InvalidFormat, testAcceptDocument(
-        \\span
-    ));
-    try std.testing.expectError(error.InvalidFormat, testAcceptDocument(
-        \\blob
-    ));
 }
 
 test "invalid version" {
     try std.testing.expectError(error.InvalidFormat, testAcceptDocument(
-        \\hdoc 1.0
+        \\hdoc 2.0
     ));
     try std.testing.expectError(error.InvalidVersion, testAcceptDocument(
         \\hdoc ""
@@ -40,207 +34,7 @@ test "invalid version" {
     try std.testing.expectError(error.InvalidVersion, testAcceptDocument(
         \\hdoc "1.2"
     ));
-}
-
-test "accept toc" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\toc {}
-    );
-}
-
-test "accept multiple blocks" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\toc {}
-        \\toc {}
-        \\toc {}
-        \\toc {}
-    );
-}
-
-test "accept image" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\image "dog.png"
-    );
-}
-
-test "accept headers" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\h1 "" "Empty anchor"
-        \\h2 "chapter" "Chapter anchor"
-        \\h3 "section" "Section anchor"
-    );
-}
-
-test "invalid top level items" {
-    try std.testing.expectError(error.InvalidTopLevelItem, testAcceptDocument(
-        \\hdoc "1.0"
-        \\span
-    ));
-    try std.testing.expectError(error.InvalidTopLevelItem, testAcceptDocument(
-        \\hdoc "1.0"
-        \\link
-    ));
-    try std.testing.expectError(error.InvalidTopLevelItem, testAcceptDocument(
-        \\hdoc "1.0"
-        \\emph
-    ));
-    try std.testing.expectError(error.InvalidTopLevelItem, testAcceptDocument(
+    try std.testing.expectError(error.InvalidVersion, testAcceptDocument(
         \\hdoc "1.0"
-        \\mono
     ));
 }
-
-test "empty ordered lists" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\enumerate {}
-    );
-}
-
-test "ordered lists" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\enumerate {
-        \\  item { toc {} }
-        \\  item { toc {} }
-        \\  item { toc {} }
-        \\}
-    );
-}
-
-test "unordered lists" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\itemize {
-        \\  item { toc {} } 
-        \\  item { toc {} } 
-        \\  item { toc {} } 
-        \\}
-    );
-}
-
-test "nested lists" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\enumerate {
-        \\  item { itemize { } }
-        \\  item { enumerate { } }
-        \\  item { toc { } }
-        \\  item { itemize { item { toc { } } } }
-        \\  item { enumerate { item { toc { } } } }
-        \\}
-    );
-}
-
-test "empty paragraph" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\p{}
-        \\p{}
-        \\p{}
-    );
-}
-
-test "empty quote" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\quote{}
-        \\quote{}
-        \\quote{}
-    );
-}
-
-test "spans" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\p{ span "hello" }
-        \\p{ span "\n" }
-        \\p{ span "" }
-    );
-}
-
-test "mono" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\p{ mono "hello" }
-        \\p{ mono "\n" }
-        \\p{ mono "" }
-    );
-}
-
-test "emph" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\p{ emph "hello" }
-        \\p{ emph "\n" }
-        \\p{ emph "" }
-    );
-}
-
-test "links" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\p{ link "" "hello" }
-        \\p{ link "" "\n" }
-        \\p{ link "" "" }
-        \\p{ link "https://www.example.com/deep/path.txt" "hello" }
-        \\p{ link "https://www.example.com/deep/path.txt" "\n" }
-        \\p{ link "https://www.example.com/deep/path.txt" "" }
-        \\p{ link "#anchor" "hello" }
-        \\p{ link "#anchor" "\n" }
-        \\p{ link "#anchor" "" }
-    );
-}
-
-test "code block" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\pre "" { }
-        \\pre "c++" { }
-        \\pre "zig" { }
-        \\pre "c++" { span "#include <cstdio>" }
-        \\pre "zig" { span "const std = @import(\"std\");" }
-    );
-}
-
-test "example document" {
-    try testAcceptDocument(
-        \\hdoc "1.0"
-        \\h1 "intro" "Introduction"
-        \\toc { }
-        \\p {
-        \\  span "Hello, World!\n"
-        \\  link "http://google.com" "Visit Google!"
-        \\  span "\n"
-        \\  emph "This is fat!"
-        \\  span "\n"
-        \\  mono "int main()"
-        \\  span "\n"
-        \\}
-        \\enumerate {
-        \\  item { p { span "first" } }
-        \\  item { p { span "second" } }
-        \\  item { p { span "third" } }
-        \\}
-        \\itemize {
-        \\  item { p { span "first" } }
-        \\  item { p { span "second" } }
-        \\  item { p { span "third" } }
-        \\}
-        \\quote {
-        \\  span "Life is what happens when you're busy making other plans.\n - John Lennon"
-        \\}
-        \\pre "zig" {
-        \\  span "const std = @import(\"std\");\n"
-        \\  span "\n"
-        \\  span "pub fn main() !void {\n"
-        \\  span "    std.debug.print(\"Hello, World!\\n\", .{});\n"
-        \\  span "}\n"
-        \\}
-        \\image "dog.png"
-    );
-}

From f2d3b5624b795ce80a9e939aac6f99174b49957e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix.queissner@endress.com>
Date: Mon, 22 Dec 2025 11:07:01 +0100
Subject: [PATCH 002/116] Fixes some spec parts

---
 docs/specification.md | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 3e08e00..f3790b0 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -24,8 +24,8 @@ p(id="foo") {
 p {
   This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice.
   Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}).
-  We can also \link(ref="foo"){link to other parts of a document) or \link(url="https://ashet.computer"){to websites}.
-  With \mono(lang="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font.
+  We can also \link(ref="foo"){link to other parts of a document} or \link(url="https://ashet.computer"){to websites}.
+  With \mono(syntax="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font.
 }
 
 h2{Special Paragraphs}
@@ -47,7 +47,7 @@ p:
 | Literal lines don't perform any parsing, so they don't require any escaping of characters.
 | This is really useful for code blocks:
 
-pre(lang="c"):
+pre(syntax="c"):
 | #include <stdio.h>
 | int main(int argc, char const * argv[]) {
 |   printf("Hello, World!\n");
@@ -116,9 +116,9 @@ Short notes on grammar notation:
 - `[ ... ]` is an option
 - `a | b | c` is alternatives
 - `( ... )` is a group
-- `"foo"` is a literal token sequence
+- `"foo"` is a literal token sequence, no escape sequences (So `"\"` is a single backslash)
 - `/.../` is a regex
-- Whitespace is assumed to be ignored unless matched by a literal, so tokens are typically separated by whitespace
+- Whitespace is assumed to be ignored between tokens unless matched by a literal or regex, so tokens are typically separated by whitespace
 - Upper case elements are roughly tokens, while lowercase elements are rules.
 
 ```
@@ -143,6 +143,8 @@ LITERAL_LINE   := /^\s*\|(.*)$/
 WORD           := /[^\s\{\}\\]+/
 ```
 
+**NOTE:** `list` also allows `block` for `inline` elements, as this enables us to have support for balanced braces without special care. The `block` elements will be flattened when rendering an inline list body into the document.
+
 ## Semantic Structure
 
 All elements have these attributes:
@@ -306,6 +308,10 @@ This element contains the contents of a table cell.
 
 These elements are all allowed inside a paragraph-like content and can typically be nested.
 
+### Plain Text
+
+This is normal plain text and has no special meaning.
+
 ### Emphasis: `em`
 
 **Nesting:** Yes
@@ -332,7 +338,7 @@ Renders the text with a horizontal line through the text, striking it out.
 
 **Nesting:** Yes
 
-Renders the text a bit smaller and moved upwards (`sub`) or downwards (`sub`) to allow sub- or superscript rendering.
+Renders the text a bit smaller and moved upwards (`sup`) or downwards (`sub`) to allow sub- or superscript rendering.
 
 ### Linking: `link`
 
@@ -345,7 +351,7 @@ Renders the text a bit smaller and moved upwards (`sub`) or downwards (`sub`) to
 
 Adds a hyperlink to the contents. This allows a reader to navigate by typically clicking the link.
 
-### Localized Date/Time: `date`, `time`, `datedate`
+### Localized Date/Time: `date`, `time`, `datetime`
 
 **Nesting:** No
 

From 13b8b6b13a28a9f6a3666bf425c13806a84f22b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix.queissner@endress.com>
Date: Mon, 22 Dec 2025 11:45:51 +0100
Subject: [PATCH 003/116] More spec improvements

---
 docs/specification.md | 60 +++++++++++++++++++------------------------
 1 file changed, 27 insertions(+), 33 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index f3790b0..6757a25 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -145,6 +145,8 @@ WORD           := /[^\s\{\}\\]+/
 
 **NOTE:** `list` also allows `block` for `inline` elements, as this enables us to have support for balanced braces without special care. The `block` elements will be flattened when rendering an inline list body into the document.
 
+**NOTE:** All attribute values are strings, so numeric-looking values are still expressed as strings (e.g. `depth="1"`).
+
 ## Semantic Structure
 
 All elements have these attributes:
@@ -199,7 +201,7 @@ The type of the paragraph includes a semantic hint:
 
 | Attribute | Function                                                                                                             |
 | --------- | -------------------------------------------------------------------------------------------------------------------- |
-| `first`   | An integer that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. |
+| `first`   | An integer string that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. |
 
 ### Figures: `img`
 
@@ -234,7 +236,7 @@ If a pre contains inline elements, these will still be parsed and apply their st
 
 | Attribute | Function                                                                |
 | --------- | ----------------------------------------------------------------------- |
-| `depth`   | `1`, `2` or `3`. Defines how many levels of headings shall be included. |
+| `depth`   | String `1`, `2` or `3`. Defines how many levels of headings shall be included. |
 
 Renders a table of contents for the current document.
 
@@ -261,14 +263,17 @@ will have two identical list items.
 
 ### Tables: `table`
 
-Allowed Items: `columns`, `row`, `group`
+**Allowed Items:** `columns`, `row`, `group`
+
+Tables are made up of an optional header row (`columns`) followed by a sequence of `row` and `group` elements.
 
-> TODO: Spec out tables proper.
-> `columns` is basically a `row` with only column headings
-> `row` is just a row with cells
-> all rows must contain the same amount of cell span
-> `group` is a heading for subsequent rows
-> `row.title` attribute is displayed in a column left of the first column, the top-left element is always empty
+- `columns` defines the header labels and the column count.
+- `row` defines a data row.
+- `group` provides a section heading that applies to subsequent rows until the next group or the end of the table.
+
+All `row` and `columns` elements must resolve to the same number of columns after applying `colspan`.
+If a `row` uses the `title` attribute or a `group` is present, renderers must reserve a leading title column.
+In that case, the header row should have an empty leading cell before the column headers.
 
 ## Table Elements
 
@@ -276,7 +281,7 @@ Allowed Items: `columns`, `row`, `group`
 
 **Allowed Items:** `td`
 
-This element contains cells 
+This element contains the header cells for each column.
 
 ### Rows: `row`
 
@@ -298,16 +303,20 @@ A *row group* is a row that contains a single heading-style cell that labels the
 
 | Attribute | Function                                           |
 | --------- | -------------------------------------------------- |
-| `colspan` | Integer defining how many columns this cell spans. |
+| `colspan` | Integer string defining how many columns this cell spans. |
 
 This element contains the contents of a table cell.
 
-> TODO: Similar to `li`, it can be string or block-sequence.
+Like `li`, a `td` can either contain a single string or a nested block sequence.
 
 ## Inline Text
 
 These elements are all allowed inside a paragraph-like content and can typically be nested.
 
+*Inline Text* can either be a string literal, a literal block or a list.
+
+If the text is a list, it allows the use of inline elements like `\em` or `\mono`.
+
 ### Plain Text
 
 This is normal plain text and has no special meaning.
@@ -355,25 +364,10 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically
 
 **Nesting:** No
 
-Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner.
+| Element     | Attribute | Function                                                                                                                                          |
+| ----------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `date`      | `fmt`     | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`.                                                                                    |
+| `time`      | `fmt`     | `short`, `long`, `rough`, `relative`.                                                                                                              |
+| `datetime`  | `fmt`     | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). |
 
-> TODO: Add `fmt` attribute:
-> `\date` takes an attribute fmt which can be 
-> - "year" (2025)
-> - "month" (December),
-> - "day" (22th)
-> - "weekday" (monday)
-> - "short" (22.12.2025)
-> - "long" (22th of December 2025)
-> - "relative" (two days ago, two months ago, ...)
->
-> `\time` takes an attribute fmt which can be 
-> - "short" (09:41)
-> - "long" (09:41:25)
-> - "rough" (early morning, morning, noon, afternoon, evening, late in the night, ...)
-> - "relative" (two minutes ago, two days ago, ...)
-> 
-> `\datetime` takes an attribute fmt which can be 
-> - *To be done*
-> - ...
-> 
\ No newline at end of file
+Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner.

From 71f0bf86da836dabfdb623176012607e6827f388 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix.queissner@endress.com>
Date: Mon, 22 Dec 2025 12:05:05 +0100
Subject: [PATCH 004/116] Adds example files.

---
 examples/assets/diagram.svg |   5 ++
 examples/featurematrix.hdoc | 101 ++++++++++++++++++++++++++++++++++++
 examples/guide.hdoc         |  83 +++++++++++++++++++++++++++++
 examples/tables.hdoc        |  36 +++++++++++++
 4 files changed, 225 insertions(+)
 create mode 100644 examples/assets/diagram.svg
 create mode 100644 examples/featurematrix.hdoc
 create mode 100644 examples/guide.hdoc
 create mode 100644 examples/tables.hdoc

diff --git a/examples/assets/diagram.svg b/examples/assets/diagram.svg
new file mode 100644
index 0000000..a9d4754
--- /dev/null
+++ b/examples/assets/diagram.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="240" height="120" viewBox="0 0 240 120" role="img" aria-label="HyperDoc sample diagram">
+  <rect x="10" y="10" width="220" height="100" rx="12" fill="#f4f4f7" stroke="#444" stroke-width="2" />
+  <text x="120" y="55" text-anchor="middle" font-family="sans-serif" font-size="16" fill="#222">HyperDoc</text>
+  <text x="120" y="80" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#555">Example Asset</text>
+</svg>
\ No newline at end of file
diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc
new file mode 100644
index 0000000..c900af3
--- /dev/null
+++ b/examples/featurematrix.hdoc
@@ -0,0 +1,101 @@
+hdoc "2.0"
+
+h1 { Small Computer Feature Matrix }
+
+table {
+  columns {
+    td "Ashet Home Computer"
+    td { \link(url="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" }
+    td { \link(url="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" }
+    td { \link(url="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" }
+    td { \link(url="https://www.codycomputer.org/") "Cody Computer" }
+  }
+  row(title="CPU Bus Width") {
+    td "32 bit"
+    td "64 bit"
+    td "8 bit"
+    td "32 bit"
+    td "8 bit"
+  }
+  row(title="CPU Architecture") {
+    td "Arm Cortex-M33"
+    td "Arm Cortex-A72"
+    td "AVRe+"
+    td "Arm Cortex-M0+"
+    td "6502"
+  }
+  row(title="CPU Model") {
+    td { \link(url="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" }
+    td { \link(url="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" }
+    td { \link(url="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" }
+    td { \link(url="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" }
+    td { \link(url="https://wdc65xx.com/integrated-circuit") "W65C02S" }
+  }
+  row(title="CPU Cores") {
+    td "2"
+    td "4"
+    td "1"
+    td "2"
+    td "1"
+  }
+  row(title="CPU Clock") {
+    td "150 MHz"
+    td "1.8 GHz"
+    td "16 MHz"
+    td "133 MHz"
+    td "1 MHz"
+  }
+  row(title="System Memory") {
+    td "8 MB"
+    td "1, 2, 4 or 8 GB"
+    td "2 KB"
+    td "264 kB"
+    td "64 kB"
+  }
+  row(title="Comprehensible") {
+    td "✅"
+    td "❌"
+    td "✅"
+    td "✅"
+    td "✅"
+  }
+  row(title="Modern I/O") {
+    td "✅"
+    td "✅"
+    td "❌"
+    td { ❌\sup{1} }
+    td "❌"
+  }
+  row(title="Modular Design") {
+    td "✅"
+    td "❌"
+    td "❌"
+    td "✅"
+    td { ✅\sup{2} }
+  }
+  row(title="Full Documentation") {
+    td "✅"
+    td "❌"
+    td "✅"
+    td "✅"
+    td "✅"
+  }
+  row(title="Ethernet") {
+    td "✅"
+    td "✅"
+    td "❌"
+    td "❌"
+    td "❌"
+  }
+  row(title="Parallax Propeller") {
+    td { ✅ (\link(url="https://www.parallax.com/propeller-2"){Propeller 2}) }
+    td "❌"
+    td "❌"
+    td "❌"
+    td { ✅ (\link(url="https://www.parallax.com/propeller-1"){Propeller 1}) }
+  }
+}
+
+p { \sup{1}: Neotron Pico uses PS/2 for mouse/keyboard and VGA for video. }
+
+p { \sup{2}: Cody Computer has a single cartridge that can be added. }
diff --git a/examples/guide.hdoc b/examples/guide.hdoc
new file mode 100644
index 0000000..82ed458
--- /dev/null
+++ b/examples/guide.hdoc
@@ -0,0 +1,83 @@
+hdoc "2.0"
+
+h1(id="intro", lang="en") { HyperDoc 2.0 Examples }
+
+toc(depth="2") {}
+
+h2(id="paragraphs") { Paragraphs and Inline Text }
+
+p(id="p-basic") {
+    This paragraph shows plain text mixed with \em{emphasis}, \strike{strike-through}, and \mono{monospaced} spans.
+}
+
+p(lang="de") {
+    Dieser Absatz zeigt das Attribut \mono{lang} auf Blockebene.
+}
+
+p "This paragraph uses a string literal body instead of a list."
+
+p {
+  Inline scripts support \mono(syntax="zig"){const version = "2.0";} as well as sub/superscripts like H\sub{2}O and x\sup{2}.
+}
+
+p {
+  Links can target \link(ref="fig-diagram"){other blocks} or external \link(url="https://ashet.computer"){resources}.
+}
+
+note    { Notes highlight supportive information. }
+warning { Warnings call out risky behavior. }
+danger  { Danger paragraphs emphasize critical hazards. }
+tip     { Tips provide actionable hints. }
+quote   { Quotes include sourced or emphasized wording. }
+spoiler { Spoilers hide key story information until revealed. }
+
+h2(id="literals") { Literal and Preformatted Blocks }
+
+p:
+| Literal blocks keep exact content and skip parsing, so characters like \{ \} are not special.
+| They are introduced by a trailing colon.
+| You can capture snippets without escaping anything.
+
+pre(id="code-sample", syntax="zig") {
+test {
+    const message = "HyperDoc";
+    const answer = \mono(syntax="zig"){42};
+}
+}
+
+h2(id="lists") { Lists }
+
+ul {
+  li { p { Apples } }
+  li "Bananas"
+  li { p { \em{Cucumbers} with inline markup. } }
+}
+
+ol(first="3") {
+  li { p { Start counting at three. } }
+  li "Continue with a string item."
+  li { p { Finish the sequence. } }
+}
+
+h2(id="media") { Figures }
+
+p {
+  The image below has a caption, alt text, and a relative asset path. 
+}
+
+img(id="fig-diagram", alt="Diagram showing a HyperDoc example", path="./assets/diagram.svg") {
+  HyperDoc is centered inside a rounded rectangle.
+}
+
+h2(id="dates") { Dates and Times }
+
+p {
+  The event was announced on \date(fmt="long"){2025-12-17} at \time(fmt="short"){13:45:00}.
+  A combined timestamp looks like \datetime(fmt="iso"){2025-12-17T13:45:00Z}.
+}
+
+h2(id="table-ref") { Tables }
+
+p {
+  See the dedicated tables example file for row groups and colspan usage.
+}
diff --git a/examples/tables.hdoc b/examples/tables.hdoc
new file mode 100644
index 0000000..5adb144
--- /dev/null
+++ b/examples/tables.hdoc
@@ -0,0 +1,36 @@
+hdoc "2.0"
+
+h1(id="tables") { HyperDoc 2.0 Table Examples }
+
+toc(depth="1") {}
+
+h2(id="table-basic") { Table Structure }
+
+table(id="inventory") {
+  columns {
+    td "Item"
+    td "Quantity"
+    td "Notes"
+  }
+  group { Fresh Produce }
+  row(title="Fruit") {
+    td "Apples"
+    td "12"
+    td { p { Delivered on \date(fmt="short"){2025-02-08}. } }
+  }
+  row(title="Vegetables") {
+    td "Carrots"
+    td "7"
+    td { p { Store at \time(fmt="rough"){08:00:00}. } }
+  }
+  group { Pantry }
+  row(title="Dry Goods") {
+    td "Rice"
+    td "3"
+    td { p { Packed on \datetime(fmt="relative"){2025-02-08T08:00:00Z}. } }
+  }
+  row(title="Bulk") {
+    td(colspan="2") { p { This cell spans two columns. } }
+    td "Requires label"
+  }
+}

From 6a8f864011efaaf24cbccf918a81531ce0e66c6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix.queissner@endress.com>
Date: Mon, 22 Dec 2025 13:22:22 +0100
Subject: [PATCH 005/116] Removes the dependencies for now.

---
 build.zig.zon | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/build.zig.zon b/build.zig.zon
index 9b78c87..00a368a 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -4,14 +4,14 @@
     .fingerprint = 0xfd1a4802abc4739e,
 
     .dependencies = .{
-        .parser_toolkit = .{
-            .url = "git+https://github.com/ikskuh/parser-toolkit.git#62e0a3dca3632bb361df59407b2d7805280ab1b9",
-            .hash = "parser_toolkit-0.1.0-baYGPUVCEwBaVmu09ORh0lLlVjRaJ489TdSIdTa_8VWg",
-        },
-        .args = .{
-            .url = "git+https://github.com/ikskuh/zig-args.git#8ae26b44a884ff20dca98ee84c098e8f8e94902f",
-            .hash = "args-0.0.0-CiLiqojRAACGzDRO7A9dw7kWSchNk29caJZkXuMCb0Cn",
-        },
+        // .parser_toolkit = .{
+        //     .url = "git+https://github.com/ikskuh/parser-toolkit.git#62e0a3dca3632bb361df59407b2d7805280ab1b9",
+        //     .hash = "parser_toolkit-0.1.0-baYGPUVCEwBaVmu09ORh0lLlVjRaJ489TdSIdTa_8VWg",
+        // },
+        // .args = .{
+        //     .url = "git+https://github.com/ikskuh/zig-args.git#8ae26b44a884ff20dca98ee84c098e8f8e94902f",
+        //     .hash = "args-0.0.0-CiLiqojRAACGzDRO7A9dw7kWSchNk29caJZkXuMCb0Cn",
+        // },
     },
 
     .paths = .{""},

From 83bf570b18e241d21d77c6567f9051bc1de49e8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix.queissner@endress.com>
Date: Mon, 22 Dec 2025 15:24:10 +0100
Subject: [PATCH 006/116] Adds AGENTS.md

---
 AGENTS.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 AGENTS.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..05ef7d4
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,12 @@
+# AGENTS
+
+## General guidelines
+
+- Keep changes focused and incremental; prefer small, reviewable commits.
+- Follow existing code style and formatting conventions.
+- Use `zig fmt` on Zig source files after edits.
+- Ensure new tests are added or updated when behavior changes.
+- Run relevant tests (`zig build test`) when making code changes.
+- Run `zig build` to validate the main application still compiles
+- Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/`.
+- Avoid editing documentation unless the request explicitly asks for it.

From 460bd942f42b0119967ccccf232d264e180878e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix.queissner@endress.com>
Date: Mon, 22 Dec 2025 15:36:08 +0100
Subject: [PATCH 007/116] Vibecoded: Adds tokenizer for the new HyperDoc format

---
 docs/specification.md |   9 +-
 src/hyperdoc.zig      | 198 +++++++++++++++++++++++++++++++++++++++++-
 src/main.zig          |  27 ++++--
 src/testsuite.zig     | 123 ++++++++++++++++++++------
 4 files changed, 314 insertions(+), 43 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 6757a25..9802700 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -124,23 +124,22 @@ Short notes on grammar notation:
 ```
 document       := HEADER { block }
 
-block          := IDENTIFIER [ attribute_list ] body
+block          := WORD [ attribute_list ] body
 
 body           := list | literal | STRING
 literal        := ":" "\n" { LITERAL_LINE }
 
 list           := "{" { escape | inline | block | WORD } "}"
 escape         := "\\" | "\{" | "\}"
-inline         := "\" IDENTIFIER [ attribute_list ] body
+inline         := "\" WORD [ attribute_list ] body
 
 attribute_list := "(" [ attribute { "," attribute } ] ")"
-attribute      := IDENTIFIER "=" STRING
+attribute      := WORD "=" STRING
 
-IDENTIFIER     := /\b\w+\b/
 HEADER         := /^hdoc\s+"2.0"\s*$/
 STRING         := /"(\\.|[^"\r\n])*"/
 LITERAL_LINE   := /^\s*\|(.*)$/
-WORD           := /[^\s\{\}\\]+/
+WORD           := /[^\s\{\}\\\"(),=:]+/
 ```
 
 **NOTE:** `list` also allows `block` for `inline` elements, as this enables us to have support for balanced braces without special care. The `block` elements will be flattened when rendering an inline list body into the document.
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index c04b0f2..2a49308 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -19,7 +19,198 @@ pub const Document = struct {
 /// Depending on the level of nesting, the width might decrease
 /// from the full document size.
 pub const Block = union(enum) {
-    // TODO
+    placeholder: void,
+};
+
+/// A token emitted by the HyperDoc tokenizer.
+pub const Token = struct {
+    pub const Tag = enum {
+        eof,
+        word,
+        string_literal,
+        unterminated_string_literal,
+        literal_line,
+        newline,
+        @"{",
+        @"}",
+        @"(",
+        @")",
+        @",",
+        @"=",
+        @":",
+        @"\\",
+        invalid_character,
+    };
+
+    tag: Tag,
+    offset: usize,
+    len: usize,
+
+    /// Returns the slice of the original input covered by this token.
+    pub fn slice(token: Token, input: []const u8) []const u8 {
+        return input[token.offset .. token.offset + token.len];
+    }
+};
+
+/// Tokenizes HyperDoc source text incrementally.
+pub const Tokenizer = struct {
+    input: []const u8,
+    index: usize = 0,
+    line_start: bool = true,
+    finished: bool = false,
+
+    /// Creates a tokenizer for the provided input.
+    pub fn init(input: []const u8) Tokenizer {
+        return .{ .input = input };
+    }
+
+    /// Returns the next token, or null after emitting EOF once.
+    pub fn next(tok: *Tokenizer) ?Token {
+        if (tok.finished) {
+            return null;
+        }
+
+        while (tok.index < tok.input.len) {
+            const start = tok.index;
+            const ch = tok.input[tok.index];
+
+            if (tok.line_start) {
+                const literal = tok.scanLiteralLine();
+                if (literal) |token| {
+                    return token;
+                }
+            }
+
+            if (tok.isNewline(ch)) {
+                const consumed = tok.consumeNewline();
+                tok.line_start = true;
+                return .{ .tag = .newline, .offset = start, .len = consumed };
+            }
+
+            if (tok.isHorizontalWhitespace(ch)) {
+                tok.index += 1;
+                tok.line_start = false;
+                continue;
+            }
+
+            tok.line_start = false;
+
+            switch (ch) {
+                '{' => return tok.simpleToken(.@"{"),
+                '}' => return tok.simpleToken(.@"}"),
+                '(' => return tok.simpleToken(.@"("),
+                ')' => return tok.simpleToken(.@")"),
+                ',' => return tok.simpleToken(.@","),
+                '=' => return tok.simpleToken(.@"="),
+                ':' => return tok.simpleToken(.@":"),
+                '\\' => return tok.simpleToken(.@"\\"),
+                '"' => return tok.scanStringLiteral(),
+                else => {},
+            }
+
+            if (tok.isWordChar(ch)) {
+                return tok.scanWord();
+            }
+
+            // Non-obvious fallback: we still emit a token for unknown bytes
+            // so callers can recover and keep walking the stream.
+            tok.index += 1;
+            return .{ .tag = .invalid_character, .offset = start, .len = 1 };
+        }
+
+        tok.finished = true;
+        return .{ .tag = .eof, .offset = tok.input.len, .len = 0 };
+    }
+
+    /// Emits a single-character token at the current offset.
+    fn simpleToken(tok: *Tokenizer, tag: Token.Tag) Token {
+        const start = tok.index;
+        tok.index += 1;
+        return .{ .tag = tag, .offset = start, .len = 1 };
+    }
+
+    /// Scans a quoted string or an unterminated string literal.
+    fn scanStringLiteral(tok: *Tokenizer) Token {
+        const start = tok.index;
+        tok.index += 1;
+        while (tok.index < tok.input.len) {
+            const ch = tok.input[tok.index];
+            if (ch == '"') {
+                tok.index += 1;
+                return .{ .tag = .string_literal, .offset = start, .len = tok.index - start };
+            }
+            if (tok.isNewline(ch)) {
+                // We stop before the newline so the next call can emit it.
+                return .{ .tag = .unterminated_string_literal, .offset = start, .len = tok.index - start };
+            }
+            if (ch == '\\') {
+                // Escape sequences consume the next byte, even if it is a quote.
+                if (tok.index + 1 >= tok.input.len) {
+                    tok.index = tok.input.len;
+                    break;
+                }
+                tok.index += 2;
+                continue;
+            }
+            tok.index += 1;
+        }
+
+        return .{ .tag = .unterminated_string_literal, .offset = start, .len = tok.index - start };
+    }
+
+    /// Scans a WORD token as defined by the grammar.
+    fn scanWord(tok: *Tokenizer) Token {
+        const start = tok.index;
+        tok.index += 1;
+        while (tok.index < tok.input.len and tok.isWordChar(tok.input[tok.index])) {
+            tok.index += 1;
+        }
+        return .{ .tag = .word, .offset = start, .len = tok.index - start };
+    }
+
+    /// Scans a literal line token if the current position is at a line start.
+    fn scanLiteralLine(tok: *Tokenizer) ?Token {
+        const start = tok.index;
+        var cursor = tok.index;
+        while (cursor < tok.input.len and tok.isHorizontalWhitespace(tok.input[cursor])) {
+            cursor += 1;
+        }
+        if (cursor >= tok.input.len or tok.input[cursor] != '|') {
+            return null;
+        }
+        cursor += 1;
+        while (cursor < tok.input.len and !tok.isNewline(tok.input[cursor])) {
+            cursor += 1;
+        }
+        tok.index = cursor;
+        tok.line_start = false;
+        return .{ .tag = .literal_line, .offset = start, .len = cursor - start };
+    }
+
+    /// Consumes a newline, including CRLF sequences.
+    fn consumeNewline(tok: *Tokenizer) usize {
+        if (tok.input[tok.index] == '\r') {
+            if (tok.index + 1 < tok.input.len and tok.input[tok.index + 1] == '\n') {
+                tok.index += 2;
+                return 2;
+            }
+            tok.index += 1;
+            return 1;
+        }
+        tok.index += 1;
+        return 1;
+    }
+    fn isWordChar(_: *Tokenizer, ch: u8) bool {
+        return !std.ascii.isControl(ch) and !std.ascii.isWhitespace(ch) and ch != '{' and ch != '}' and ch != '\\' and ch != '"' and ch != '(' and ch != ')' and ch != ',' and ch != '=' and ch != ':';
+    }
+
+    fn isHorizontalWhitespace(_: *Tokenizer, ch: u8) bool {
+        return ch == ' ' or ch == '\t';
+    }
+
+    fn isNewline(_: *Tokenizer, ch: u8) bool {
+        return ch == '\n' or ch == '\r';
+    }
 };
 
 /// Parses a HyperDoc document.
@@ -37,7 +228,10 @@ pub fn parse(
     _ = plain_text;
     _ = diagnostics;
 
-    @panic("TODO: Implement this");
+    return .{
+        .arena = arena,
+        .contents = &[_]Block{},
+    };
 }
 
 /// A diagnostic message.
diff --git a/src/main.zig b/src/main.zig
index 3cdb76c..6810065 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -13,16 +13,27 @@ pub fn main() !u8 {
     else
         std.heap.smp_allocator;
 
-    // TODO: Parse arguments and load file.
-    const document =
-        \\hdoc "2.0"
-        \\
-    ;
+    const args = try std.process.argsAlloc(allocator);
+    defer std.process.argsFree(allocator, args);
 
-    var doc = try hdoc.parse(allocator, document, null);
-    defer doc.deinit();
+    if (args.len < 2) {
+        const stderr = std.fs.File.stderr().deprecatedWriter();
+        try stderr.print("usage: {s} <file>\n", .{args[0]});
+        return 1;
+    }
 
-    // TODO: Dump AST
+    const path = args[1];
+    const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10);
+    defer allocator.free(document);
+
+    var tokenizer = hdoc.Tokenizer.init(document);
+    var stdout = std.fs.File.stdout().deprecatedWriter();
+    while (tokenizer.next()) |token| {
+        try stdout.print("{s} \"{f}\"\n", .{ @tagName(token.tag), std.zig.fmtString(token.slice(document)) });
+        if (token.tag == .eof) {
+            break;
+        }
+    }
 
     return 0;
 }
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 58450b5..ee42e38 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -6,35 +6,102 @@ fn testAcceptDocument(document: []const u8) !void {
     defer doc.deinit();
 }
 
-test "empty document" {
-    try testAcceptDocument(
-        \\hdoc "2.0"
+const TokenExpect = struct {
+    tag: hdoc.Token.Tag,
+    lexeme: []const u8,
+};
+
+fn expectTokens(input: []const u8, expected: []const TokenExpect) !void {
+    var tokenizer = hdoc.Tokenizer.init(input);
+    var index: usize = 0;
+    while (true) {
+        const token_opt = tokenizer.next();
+        if (token_opt == null) {
+            break;
+        }
+        const token = token_opt.?;
+        try std.testing.expect(index < expected.len);
+        try std.testing.expectEqual(expected[index].tag, token.tag);
+        try std.testing.expectEqualStrings(expected[index].lexeme, token.slice(input));
+        index += 1;
+        if (token.tag == .eof) {
+            break;
+        }
+    }
+    try std.testing.expectEqual(expected.len, index);
+    try std.testing.expect(tokenizer.next() == null);
+}
+
+test "tokenizes header line" {
+    try expectTokens("hdoc \"2.0\"\n", &.{
+        .{ .tag = .word, .lexeme = "hdoc" },
+        .{ .tag = .string_literal, .lexeme = "\"2.0\"" },
+        .{ .tag = .newline, .lexeme = "\n" },
+        .{ .tag = .eof, .lexeme = "" },
+    });
+}
+
+test "tokenizes literal lines" {
+    try expectTokens("p:\n| code\n  |more\n", &.{
+        .{ .tag = .word, .lexeme = "p" },
+        .{ .tag = .@":", .lexeme = ":" },
+        .{ .tag = .newline, .lexeme = "\n" },
+        .{ .tag = .literal_line, .lexeme = "| code" },
+        .{ .tag = .newline, .lexeme = "\n" },
+        .{ .tag = .literal_line, .lexeme = "  |more" },
+        .{ .tag = .newline, .lexeme = "\n" },
+        .{ .tag = .eof, .lexeme = "" },
+    });
+}
+
+test "tokenizes unterminated string" {
+    try expectTokens("\"oops\n", &.{
+        .{ .tag = .unterminated_string_literal, .lexeme = "\"oops" },
+        .{ .tag = .newline, .lexeme = "\n" },
+        .{ .tag = .eof, .lexeme = "" },
+    });
+}
+
+test "tokenizes word and escapes" {
+    try expectTokens("{alpha \\{ -dash}", &.{
+        .{ .tag = .@"{", .lexeme = "{" },
+        .{ .tag = .word, .lexeme = "alpha" },
+        .{ .tag = .@"\\", .lexeme = "\\" },
+        .{ .tag = .@"{", .lexeme = "{" },
+        .{ .tag = .word, .lexeme = "-dash" },
+        .{ .tag = .@"}", .lexeme = "}" },
+        .{ .tag = .eof, .lexeme = "" },
+    });
+}
+
+test "tokenizes mixed sequences" {
+    try expectTokens(
+        "note(id=\"x\"){\n\\em \"hi\", -dash\n}\n",
+        &.{
+            .{ .tag = .word, .lexeme = "note" },
+            .{ .tag = .@"(", .lexeme = "(" },
+            .{ .tag = .word, .lexeme = "id" },
+            .{ .tag = .@"=", .lexeme = "=" },
+            .{ .tag = .string_literal, .lexeme = "\"x\"" },
+            .{ .tag = .@")", .lexeme = ")" },
+            .{ .tag = .@"{", .lexeme = "{" },
+            .{ .tag = .newline, .lexeme = "\n" },
+            .{ .tag = .@"\\", .lexeme = "\\" },
+            .{ .tag = .word, .lexeme = "em" },
+            .{ .tag = .string_literal, .lexeme = "\"hi\"" },
+            .{ .tag = .@",", .lexeme = "," },
+            .{ .tag = .word, .lexeme = "-dash" },
+            .{ .tag = .newline, .lexeme = "\n" },
+            .{ .tag = .@"}", .lexeme = "}" },
+            .{ .tag = .newline, .lexeme = "\n" },
+            .{ .tag = .eof, .lexeme = "" },
+        },
     );
 }
 
-test "invalid document" {
-    try std.testing.expectError(error.InvalidFormat, testAcceptDocument(
-        \\
-    ));
-    try std.testing.expectError(error.InvalidFormat, testAcceptDocument(
-        \\hdoc
-    ));
-    try std.testing.expectError(error.InvalidFormat, testAcceptDocument(
-        \\hdoc {
-    ));
-}
-
-test "invalid version" {
-    try std.testing.expectError(error.InvalidFormat, testAcceptDocument(
-        \\hdoc 2.0
-    ));
-    try std.testing.expectError(error.InvalidVersion, testAcceptDocument(
-        \\hdoc ""
-    ));
-    try std.testing.expectError(error.InvalidVersion, testAcceptDocument(
-        \\hdoc "1.2"
-    ));
-    try std.testing.expectError(error.InvalidVersion, testAcceptDocument(
-        \\hdoc "1.0"
-    ));
+test "tokenizes invalid characters" {
+    try expectTokens("\x00", &.{
+        .{ .tag = .invalid_character, .lexeme = "\x00" },
+        .{ .tag = .eof, .lexeme = "" },
+    });
 }

From f6e67573ebb79a3760835d84a551d37d48d8343b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Tue, 23 Dec 2025 09:29:06 +0100
Subject: [PATCH 008/116] Starts to rework specification to have improved
 definition and setup.

---
 AGENTS.md                   |   2 +-
 docs/specification.md       | 186 +++++++++++++++--------------------
 examples/demo.hdoc          |  97 ++++++++++++++++++
 examples/featurematrix.hdoc |  24 ++---
 examples/guide.hdoc         |   8 +-
 src/hyperdoc.zig            | 191 ------------------------------------
 src/main.zig                |   9 +-
 src/testsuite.zig           | 100 -------------------
 test/parser/stress.hdoc     |  74 ++++++++++++++
 9 files changed, 267 insertions(+), 424 deletions(-)
 create mode 100644 examples/demo.hdoc
 create mode 100644 test/parser/stress.hdoc

diff --git a/AGENTS.md b/AGENTS.md
index 05ef7d4..0bb6695 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -8,5 +8,5 @@
 - Ensure new tests are added or updated when behavior changes.
 - Run relevant tests (`zig build test`) when making code changes.
 - Run `zig build` to validate the main application still compiles
-- Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/`.
+- Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/` and `test/`.
 - Avoid editing documentation unless the request explicitly asks for it.
diff --git a/docs/specification.md b/docs/specification.md
index 9802700..996c00a 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -7,108 +7,25 @@ It sits somewhat between LaTeX and Markdown and tries to be way simpler to parse
 ## Syntax Overview
 
 ```hdoc
-hdoc "2.0"
+hdoc(version="2.0");
 
-h1{HyperDoc 2.0}
-
-toc{}
-
-h2{Paragraphs}
-
-p { This is a simple paragraph containing text. }
-
-p(id="foo") {
-  This is a paragraph with an attribute "id" with the value "foo".
-}
+h1 "Introduction"
 
 p {
-  This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice.
-  Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}).
-  We can also \link(ref="foo"){link to other parts of a document} or \link(url="https://ashet.computer"){to websites}.
-  With \mono(syntax="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font.
+  This is my first HyperDoc 2.0 document!
 }
 
-h2{Special Paragraphs}
-
-note    { HyperDoc 2.0 also supports different types of paragraphs. }
-warning { These should affect rendering, and have well-defined semantics attached to them. }
-danger  { You shall not assume any specific formatting of these elements though. }
-tip     { They typically have a standardized style though. }
-quote   { You shall not pass! }
-spoiler { Nobody expects the Spanish Inquisition! }
-
-h2{Literals and Preformatted Text}
-
-p:
-| we can also use literal lines.
-| these are introduced by a trailing colon (':') at the end of a line.
-| each following line that starts with whitespace followed by a pipe character ('|')
-| is then part of the contents.
-| Literal lines don't perform any parsing, so they don't require any escaping of characters.
-| This is really useful for code blocks:
-
 pre(syntax="c"):
 | #include <stdio.h>
-| int main(int argc, char const * argv[]) {
-|   printf("Hello, World!\n");
+| int main(int argc, char *argv[]) {
+|   printf("Hello, World!");
 |   return 0;
 | }
-
-h2{String Literals}
-
-p "It's also possible to use a string literal for bodies if desired."
-
-p { \em "Magic" is a simple way to highlight single words or text with escaping in inlines. }
-
-h2{Images & Figures}
-
-p { We can also add images to our documents: }
-
-img(id="fig1", path="./preview.jpeg") { If this is non-empty, it's a figure caption. }
-
-h2{Lists}
-
-p { Also lists are possible: }
-
-h3{Unordered Lists}
-
-ul {
-  li { p { Apples } }
-  li { p { Bananas } }
-  li { p { Cucumbers } }
-}
-
-h3{Ordered Lists}
-
-ol {
-  li { p { Collect underpants } }
-  li { p { ? } }
-  li { p { Profit } }
-}
-
-h2{Tables}
-
-p { And last, but not least, we can have tables: }
-
-table {
-  columns {
-    td "Key"
-    td "Value"
-  }
-  row {
-    td "Author"
-    td { Felix "xq" Queißner }
-  }
-  row {
-    td "Date of Invention"
-    td { \date{2025-12-17} }
-  }
-}
 ```
 
 ## Grammar
 
-This grammar describes the text format
+This grammar describes the hypertext format.
 
 Short notes on grammar notation:
 
@@ -121,13 +38,13 @@ Short notes on grammar notation:
 - Whitespace is assumed to be ignored between tokens unless matched by a literal or regex, so tokens are typically separated by whitespace
 - Upper case elements are roughly tokens, while lowercase elements are rules.
 
-```
-document       := HEADER { block }
+```ebnf
+document       := { block }
 
 block          := WORD [ attribute_list ] body
 
-body           := list | literal | STRING
-literal        := ":" "\n" { LITERAL_LINE }
+body           := ";" | list | verbatim | STRING
+verbatim       := ":" "\n" { VERBATIM_LINE }
 
 list           := "{" { escape | inline | block | WORD } "}"
 escape         := "\\" | "\{" | "\}"
@@ -136,9 +53,8 @@ inline         := "\" WORD [ attribute_list ] body
 attribute_list := "(" [ attribute { "," attribute } ] ")"
 attribute      := WORD "=" STRING
 
-HEADER         := /^hdoc\s+"2.0"\s*$/
 STRING         := /"(\\.|[^"\r\n])*"/
-LITERAL_LINE   := /^\s*\|(.*)$/
+VERBATIM_LINE  := /^\s*\|(.*)$/
 WORD           := /[^\s\{\}\\\"(),=:]+/
 ```
 
@@ -146,6 +62,60 @@ WORD           := /[^\s\{\}\\\"(),=:]+/
 
 **NOTE:** All attribute values are strings, so numeric-looking values are still expressed as strings (e.g. `depth="1"`).
 
+## Element Overview
+
+| Element                                                     | Element Type | Allowed Children             | Attributes                           |
+| ----------------------------------------------------------- | ------------ | ---------------------------- | ------------------------------------ |
+| *Document*                                                  | Document     | `hdoc`, Blocks               |                                      |
+| `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author` |
+| `h1`, `h2`, `h3`                                            | Block        | Text Body                    | `lang`, \[`id`\]                     |
+| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block        | Text Body                    | `lang`, \[`id`\]                     |
+| `ul`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\]                     |
+| `ol`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\], `first`            |
+| `img`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `alt`, `path`      |
+| `pre`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `syntax`           |
+| `toc`                                                       | Block        | -                            | `lang`, \[`id`\], `depth`            |
+| `table`                                                     | Block        | Table Rows                   | `lang`, \[`id`\]                     |
+| `columns`                                                   | Table Row    | `td` ≥ 1                     | `lang`                               |
+| `group`                                                     | Table Row    | Text Body                    | `lang`,                              |
+| `row`                                                       | Table Row    | `td` ≥ 1                     | `lang`, `title`                      |
+| `td`                                                        | Table Cell   | Blocks, String, Verbatim     | `lang`, `colspan`                    |
+| `li`                                                        | List Item    | Blocks, String, Verbatim     | `lang`                               |
+| `\em`                                                       | Text Body    | Text Body                    | `lang`                               |
+| `\mono`                                                     | Text Body    | Text Body                    | `lang`, `syntax`                     |
+| `\strike`                                                   | Text Body    | Text Body                    | `lang`                               |
+| `\sub`, `\sup`                                              | Text Body    | Text Body                    | `lang`                               |
+| `\link`                                                     | Text Body    | Text Body                    | `lang`, (`ref` \| `uri`)             |
+| `\date`, `\time`, `\datetime`                               | Text Body    | Plain Text, String, Verbatim | `lang`, `fmt`                        |
+| *Plain Text*                                                | Text Body    | -                            |                                      |
+| *String*                                                    | Text Body    | -                            |                                      |
+| *Verbatim*                                                  | Text Body    | -                            |                                      |
+
+Notes:
+
+- The attribute `id` is only allowed when the element is a top-level element (direct child of the document)
+- The attributes `ref` and `uri` on a `\link` are mutually exclusive
+- `\date`, `\time` and `\datetime` cannot contain other text body items except for plain text, string or verbatim content.
+
+## Attribute Overview
+
+| Attribute | Required | Allowed Values                                                                               | Description                                                                     |
+| --------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
+| `version` | Yes      | `2.0`                                                                                        | Describes the version of this HyperDoc document.                                |
+| `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                         | Defines the language of the elements contents.                                  |
+| `title`   | No       | *Any*                                                                                        | Sets the title of the document or the table row.                                |
+| `author`  | No       | *Any*                                                                                        | Sets the author of the document.                                                |
+| `id`      | No       | Non-empty                                                                                    | Sets a reference which can be linked to with `\link(ref="...")`.                |
+| `first`   | No       | Decimal integer numbers ≥ 0                                                                  | Sets the number of the first list item.                                         |
+| `alt`     | No       | Non-empty                                                                                    | Sets the alternative text shown when an image cannot be loaded.                 |
+| `path`    | Yes      | Non-empty file path to an image file                                                         | Defines the file path where the image file can be found.                        |
+| `syntax`  | No       | *See element documentation*                                                                  | Hints the syntax highlighter how how the elements context shall be highlighted. |
+| `depth`   | No       | `1`, `2` or `3`                                                                              | Defines how many levels of headings shall be included.                          |
+| `colspan` | No       | Decimal integer numbers ≥ 1                                                                  | Sets how many columns the table cell spans.                                     |
+| `ref`     | No       | Any value present in an `id` attribute.                                                      | References any `id` inside this document.                                       |
+| `uri`     | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI.                                         |
+| `fmt`     | No       | *See element documentation*                                                                  |                                                                                 |
+
 ## Semantic Structure
 
 All elements have these attributes:
@@ -198,8 +168,8 @@ The type of the paragraph includes a semantic hint:
 
 #### Ordered List `ol`
 
-| Attribute | Function                                                                                                             |
-| --------- | -------------------------------------------------------------------------------------------------------------------- |
+| Attribute | Function                                                                                                                    |
+| --------- | --------------------------------------------------------------------------------------------------------------------------- |
 | `first`   | An integer string that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. |
 
 ### Figures: `img`
@@ -233,8 +203,8 @@ If a pre contains inline elements, these will still be parsed and apply their st
 
 **Allowed Items:** *none*
 
-| Attribute | Function                                                                |
-| --------- | ----------------------------------------------------------------------- |
+| Attribute | Function                                                                       |
+| --------- | ------------------------------------------------------------------------------ |
 | `depth`   | String `1`, `2` or `3`. Defines how many levels of headings shall be included. |
 
 Renders a table of contents for the current document.
@@ -300,8 +270,8 @@ A *row group* is a row that contains a single heading-style cell that labels the
 
 **Allowed Items:** Block Elements *or* String Content.
 
-| Attribute | Function                                           |
-| --------- | -------------------------------------------------- |
+| Attribute | Function                                                  |
+| --------- | --------------------------------------------------------- |
 | `colspan` | Integer string defining how many columns this cell spans. |
 
 This element contains the contents of a table cell.
@@ -354,8 +324,8 @@ Renders the text a bit smaller and moved upwards (`sup`) or downwards (`sub`) to
 
 | Attribute | Function                                                                                                 |
 | --------- | -------------------------------------------------------------------------------------------------------- |
-| `ref`     | Points the link to a top-level block with the `id` of this `ref` attribute. Mutually exclusive to `url`. |
-| `url`     | Points the link to the resource inside the `url`. Mutually exclusive to `ref`. |
+| `ref`     | Points the link to a top-level block with the `id` of this `ref` attribute. Mutually exclusive to `uri`. |
+| `uri`     | Points the link to the resource inside the `uri`. Mutually exclusive to `ref`.                           |
 
 Adds a hyperlink to the contents. This allows a reader to navigate by typically clicking the link.
 
@@ -363,10 +333,10 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically
 
 **Nesting:** No
 
-| Element     | Attribute | Function                                                                                                                                          |
-| ----------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `date`      | `fmt`     | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`.                                                                                    |
-| `time`      | `fmt`     | `short`, `long`, `rough`, `relative`.                                                                                                              |
-| `datetime`  | `fmt`     | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). |
+| Element    | Attribute | Function                                                                                                    |
+| ---------- | --------- | ----------------------------------------------------------------------------------------------------------- |
+| `date`     | `fmt`     | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`.                                             |
+| `time`     | `fmt`     | `short`, `long`, `rough`, `relative`.                                                                       |
+| `datetime` | `fmt`     | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). |
 
 Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner.
diff --git a/examples/demo.hdoc b/examples/demo.hdoc
new file mode 100644
index 0000000..a092e91
--- /dev/null
+++ b/examples/demo.hdoc
@@ -0,0 +1,97 @@
+hdoc(version="2.0", lang="en", title="HyperDoc \"2.0\" Demonstration");
+
+h1{HyperDoc 2.0}
+
+toc;
+
+h2{Paragraphs}
+
+p { This is a simple paragraph containing text. }
+
+p(id="foo") {
+  This is a paragraph with an attribute "id" with the value "foo".
+}
+
+p {
+  This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice.
+  Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}).
+  We can also \link(ref="foo"){link to other parts of a document} or \link(uri="https://ashet.computer"){to websites}.
+  With \mono(syntax="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font.
+}
+
+h2{Special Paragraphs}
+
+note    { HyperDoc 2.0 also supports different types of paragraphs. }
+warning { These should affect rendering, and have well-defined semantics attached to them. }
+danger  { You shall not assume any specific formatting of these elements though. }
+tip     { They typically have a standardized style though. }
+quote   { You shall not pass! }
+spoiler { Nobody expects the Spanish Inquisition! }
+
+h2{Verbatim and Preformatted Text}
+
+p:
+| We can also use verbatim text mode.
+| This is introduced by a trailing colon (':') at the end of a line.
+| Each following line that starts with whitespace followed by a pipe character ('|')
+| is then part of the contents.
+| Verbatim text doesn't perform any interpretation of its contents, so no escaping is required.
+| This is really useful for code blocks:
+
+pre(syntax="c"):
+| #include <stdio.h>
+| int main(int argc, char const * argv[]) {
+|   printf("Hello, World!\n");
+|   return 0;
+| }
+
+h2{String Literals}
+
+p "It's also possible to use a string literal for bodies if desired."
+
+p { \em "Magic" is a simple way to highlight single words or text with escaping in inlines. }
+
+h2{Images & Figures}
+
+p { We can also add images to our documents: }
+
+img(id="fig1", path="./preview.jpeg") { If this is non-empty, it's a figure caption. }
+
+h2{Lists}
+
+p { Also lists are possible: }
+
+h3{Unordered Lists}
+
+ul {
+  li { p { Apples } }
+  li { p { Bananas } }
+  li { p { Cucumbers } }
+}
+
+h3{Ordered Lists}
+
+ol {
+  li { p { Collect underpants } }
+  li { p { ? } }
+  li { p { Profit } }
+}
+
+h2{Tables}
+
+p { And last, but not least, we can have tables: }
+
+table {
+  columns {
+    td "Key"
+    td "Value"
+  }
+  row {
+    td "Author"
+    td { p { Felix "xq" Queißner } }
+  }
+  row {
+    td "Date of Invention"
+    td { p { \date{2025-12-17} } }
+  }
+}
diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc
index c900af3..d72f095 100644
--- a/examples/featurematrix.hdoc
+++ b/examples/featurematrix.hdoc
@@ -1,14 +1,14 @@
-hdoc "2.0"
+hdoc(version="2.0")
 
 h1 { Small Computer Feature Matrix }
 
 table {
   columns {
     td "Ashet Home Computer"
-    td { \link(url="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" }
-    td { \link(url="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" }
-    td { \link(url="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" }
-    td { \link(url="https://www.codycomputer.org/") "Cody Computer" }
+    td { \link(uri="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" }
+    td { \link(uri="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" }
+    td { \link(uri="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" }
+    td { \link(uri="https://www.codycomputer.org/") "Cody Computer" }
   }
   row(title="CPU Bus Width") {
     td "32 bit"
@@ -25,11 +25,11 @@ table {
     td "6502"
   }
   row(title="CPU Model") {
-    td { \link(url="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" }
-    td { \link(url="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" }
-    td { \link(url="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" }
-    td { \link(url="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" }
-    td { \link(url="https://wdc65xx.com/integrated-circuit") "W65C02S" }
+    td { \link(uri="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" }
+    td { \link(uri="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" }
+    td { \link(uri="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" }
+    td { \link(uri="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" }
+    td { \link(uri="https://wdc65xx.com/integrated-circuit") "W65C02S" }
   }
   row(title="CPU Cores") {
     td "2"
@@ -88,11 +88,11 @@ table {
     td "❌"
   }
   row(title="Parallax Propeller") {
-    td { ✅ (\link(url="https://www.parallax.com/propeller-2"){Propeller 2}) }
+    td { ✅ (\link(uri="https://www.parallax.com/propeller-2"){Propeller 2}) }
     td "❌"
     td "❌"
     td "❌"
-    td { ✅ (\link(url="https://www.parallax.com/propeller-1"){Propeller 1}) }
+    td { ✅ (\link(uri="https://www.parallax.com/propeller-1"){Propeller 1}) }
   }
 }
 
diff --git a/examples/guide.hdoc b/examples/guide.hdoc
index 82ed458..94c70c4 100644
--- a/examples/guide.hdoc
+++ b/examples/guide.hdoc
@@ -1,4 +1,4 @@
-hdoc "2.0"
+hdoc(version="2.0")
 
 h1(id="intro", lang="en") { HyperDoc 2.0 Examples }
 
@@ -7,11 +7,11 @@ toc(depth="2") {}
 h2(id="paragraphs") { Paragraphs and Inline Text }
 
 p(id="p-basic") {
-    This paragraph shows plain text mixed with \em{emphasis}, \strike{strike-through}, and \mono{monospaced} spans.
+  This paragraph shows plain text mixed with \em{emphasis}, \strike{strike-through}, and \mono{monospaced} spans.
 }
 
 p(lang="de") {
-    Dieser Absatz zeigt das Attribut \mono{lang} auf Blockebene.
+  Dieser Absatz zeigt das Attribut \mono{lang} auf Blockebene.
 }
 
 p "This paragraph uses a string literal body instead of a list."
@@ -21,7 +21,7 @@ p {
 }
 
 p {
-  Links can target \link(ref="fig-diagram"){other blocks} or external \link(url="https://ashet.computer"){resources}.
+  Links can target \link(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}.
 }
 
 note    { Notes highlight supportive information. }
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 2a49308..c91ef12 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -22,197 +22,6 @@ pub const Block = union(enum) {
     placeholder: void,
 };
 
-/// A token emitted by the HyperDoc tokenizer.
-pub const Token = struct {
-    pub const Tag = enum {
-        eof,
-        word,
-        string_literal,
-        unterminated_string_literal,
-        literal_line,
-        newline,
-        @"{",
-        @"}",
-        @"(",
-        @")",
-        @",",
-        @"=",
-        @":",
-        @"\\",
-        invalid_character,
-    };
-
-    tag: Tag,
-    offset: usize,
-    len: usize,
-
-    /// Returns the slice of the original input covered by this token.
-    pub fn slice(token: Token, input: []const u8) []const u8 {
-        return input[token.offset .. token.offset + token.len];
-    }
-};
-
-/// Tokenizes HyperDoc source text incrementally.
-pub const Tokenizer = struct {
-    input: []const u8,
-    index: usize = 0,
-    line_start: bool = true,
-    finished: bool = false,
-
-    /// Creates a tokenizer for the provided input.
-    pub fn init(input: []const u8) Tokenizer {
-        return .{ .input = input };
-    }
-
-    /// Returns the next token, or null after emitting EOF once.
-    pub fn next(tok: *Tokenizer) ?Token {
-        if (tok.finished) {
-            return null;
-        }
-
-        while (tok.index < tok.input.len) {
-            const start = tok.index;
-            const ch = tok.input[tok.index];
-
-            if (tok.line_start) {
-                const literal = tok.scanLiteralLine();
-                if (literal) |token| {
-                    return token;
-                }
-            }
-
-            if (tok.isNewline(ch)) {
-                const consumed = tok.consumeNewline();
-                tok.line_start = true;
-                return .{ .tag = .newline, .offset = start, .len = consumed };
-            }
-
-            if (tok.isHorizontalWhitespace(ch)) {
-                tok.index += 1;
-                tok.line_start = false;
-                continue;
-            }
-
-            tok.line_start = false;
-
-            switch (ch) {
-                '{' => return tok.simpleToken(.@"{"),
-                '}' => return tok.simpleToken(.@"}"),
-                '(' => return tok.simpleToken(.@"("),
-                ')' => return tok.simpleToken(.@")"),
-                ',' => return tok.simpleToken(.@","),
-                '=' => return tok.simpleToken(.@"="),
-                ':' => return tok.simpleToken(.@":"),
-                '\\' => return tok.simpleToken(.@"\\"),
-                '"' => return tok.scanStringLiteral(),
-                else => {},
-            }
-
-            if (tok.isWordChar(ch)) {
-                return tok.scanWord();
-            }
-
-            // Non-obvious fallback: we still emit a token for unknown bytes
-            // so callers can recover and keep walking the stream.
-            tok.index += 1;
-            return .{ .tag = .invalid_character, .offset = start, .len = 1 };
-        }
-
-        tok.finished = true;
-        return .{ .tag = .eof, .offset = tok.input.len, .len = 0 };
-    }
-
-    /// Emits a single-character token at the current offset.
-    fn simpleToken(tok: *Tokenizer, tag: Token.Tag) Token {
-        const start = tok.index;
-        tok.index += 1;
-        return .{ .tag = tag, .offset = start, .len = 1 };
-    }
-
-    /// Scans a quoted string or an unterminated string literal.
-    fn scanStringLiteral(tok: *Tokenizer) Token {
-        const start = tok.index;
-        tok.index += 1;
-        while (tok.index < tok.input.len) {
-            const ch = tok.input[tok.index];
-            if (ch == '"') {
-                tok.index += 1;
-                return .{ .tag = .string_literal, .offset = start, .len = tok.index - start };
-            }
-            if (tok.isNewline(ch)) {
-                // We stop before the newline so the next call can emit it.
-                return .{ .tag = .unterminated_string_literal, .offset = start, .len = tok.index - start };
-            }
-            if (ch == '\\') {
-                // Escape sequences consume the next byte, even if it is a quote.
-                if (tok.index + 1 >= tok.input.len) {
-                    tok.index = tok.input.len;
-                    break;
-                }
-                tok.index += 2;
-                continue;
-            }
-            tok.index += 1;
-        }
-
-        return .{ .tag = .unterminated_string_literal, .offset = start, .len = tok.index - start };
-    }
-
-    /// Scans a WORD token as defined by the grammar.
-    fn scanWord(tok: *Tokenizer) Token {
-        const start = tok.index;
-        tok.index += 1;
-        while (tok.index < tok.input.len and tok.isWordChar(tok.input[tok.index])) {
-            tok.index += 1;
-        }
-        return .{ .tag = .word, .offset = start, .len = tok.index - start };
-    }
-
-    /// Scans a literal line token if the current position is at a line start.
-    fn scanLiteralLine(tok: *Tokenizer) ?Token {
-        const start = tok.index;
-        var cursor = tok.index;
-        while (cursor < tok.input.len and tok.isHorizontalWhitespace(tok.input[cursor])) {
-            cursor += 1;
-        }
-        if (cursor >= tok.input.len or tok.input[cursor] != '|') {
-            return null;
-        }
-        cursor += 1;
-        while (cursor < tok.input.len and !tok.isNewline(tok.input[cursor])) {
-            cursor += 1;
-        }
-        tok.index = cursor;
-        tok.line_start = false;
-        return .{ .tag = .literal_line, .offset = start, .len = cursor - start };
-    }
-
-    /// Consumes a newline, including CRLF sequences.
-    fn consumeNewline(tok: *Tokenizer) usize {
-        if (tok.input[tok.index] == '\r') {
-            if (tok.index + 1 < tok.input.len and tok.input[tok.index + 1] == '\n') {
-                tok.index += 2;
-                return 2;
-            }
-            tok.index += 1;
-            return 1;
-        }
-        tok.index += 1;
-        return 1;
-    }
-    fn isWordChar(_: *Tokenizer, ch: u8) bool {
-        return !std.ascii.isControl(ch) and !std.ascii.isWhitespace(ch) and ch != '{' and ch != '}' and ch != '\\' and ch != '"' and ch != '(' and ch != ')' and ch != ',' and ch != '=' and ch != ':';
-    }
-
-    fn isHorizontalWhitespace(_: *Tokenizer, ch: u8) bool {
-        return ch == ' ' or ch == '\t';
-    }
-
-    fn isNewline(_: *Tokenizer, ch: u8) bool {
-        return ch == '\n' or ch == '\r';
-    }
-};
-
 /// Parses a HyperDoc document.
 pub fn parse(
     allocator: std.mem.Allocator,
diff --git a/src/main.zig b/src/main.zig
index 6810065..44013de 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -26,14 +26,7 @@ pub fn main() !u8 {
     const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10);
     defer allocator.free(document);
 
-    var tokenizer = hdoc.Tokenizer.init(document);
-    var stdout = std.fs.File.stdout().deprecatedWriter();
-    while (tokenizer.next()) |token| {
-        try stdout.print("{s} \"{f}\"\n", .{ @tagName(token.tag), std.zig.fmtString(token.slice(document)) });
-        if (token.tag == .eof) {
-            break;
-        }
-    }
+    // TODO: Parse document
 
     return 0;
 }
diff --git a/src/testsuite.zig b/src/testsuite.zig
index ee42e38..961cef5 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -5,103 +5,3 @@ fn testAcceptDocument(document: []const u8) !void {
     var doc = try hdoc.parse(std.testing.allocator, document, null);
     defer doc.deinit();
 }
-
-const TokenExpect = struct {
-    tag: hdoc.Token.Tag,
-    lexeme: []const u8,
-};
-
-fn expectTokens(input: []const u8, expected: []const TokenExpect) !void {
-    var tokenizer = hdoc.Tokenizer.init(input);
-    var index: usize = 0;
-    while (true) {
-        const token_opt = tokenizer.next();
-        if (token_opt == null) {
-            break;
-        }
-        const token = token_opt.?;
-        try std.testing.expect(index < expected.len);
-        try std.testing.expectEqual(expected[index].tag, token.tag);
-        try std.testing.expectEqualStrings(expected[index].lexeme, token.slice(input));
-        index += 1;
-        if (token.tag == .eof) {
-            break;
-        }
-    }
-    try std.testing.expectEqual(expected.len, index);
-    try std.testing.expect(tokenizer.next() == null);
-}
-
-test "tokenizes header line" {
-    try expectTokens("hdoc \"2.0\"\n", &.{
-        .{ .tag = .word, .lexeme = "hdoc" },
-        .{ .tag = .string_literal, .lexeme = "\"2.0\"" },
-        .{ .tag = .newline, .lexeme = "\n" },
-        .{ .tag = .eof, .lexeme = "" },
-    });
-}
-
-test "tokenizes literal lines" {
-    try expectTokens("p:\n| code\n  |more\n", &.{
-        .{ .tag = .word, .lexeme = "p" },
-        .{ .tag = .@":", .lexeme = ":" },
-        .{ .tag = .newline, .lexeme = "\n" },
-        .{ .tag = .literal_line, .lexeme = "| code" },
-        .{ .tag = .newline, .lexeme = "\n" },
-        .{ .tag = .literal_line, .lexeme = "  |more" },
-        .{ .tag = .newline, .lexeme = "\n" },
-        .{ .tag = .eof, .lexeme = "" },
-    });
-}
-
-test "tokenizes unterminated string" {
-    try expectTokens("\"oops\n", &.{
-        .{ .tag = .unterminated_string_literal, .lexeme = "\"oops" },
-        .{ .tag = .newline, .lexeme = "\n" },
-        .{ .tag = .eof, .lexeme = "" },
-    });
-}
-
-test "tokenizes word and escapes" {
-    try expectTokens("{alpha \\{ -dash}", &.{
-        .{ .tag = .@"{", .lexeme = "{" },
-        .{ .tag = .word, .lexeme = "alpha" },
-        .{ .tag = .@"\\", .lexeme = "\\" },
-        .{ .tag = .@"{", .lexeme = "{" },
-        .{ .tag = .word, .lexeme = "-dash" },
-        .{ .tag = .@"}", .lexeme = "}" },
-        .{ .tag = .eof, .lexeme = "" },
-    });
-}
-
-test "tokenizes mixed sequences" {
-    try expectTokens(
-        "note(id=\"x\"){\n\\em \"hi\", -dash\n}\n",
-        &.{
-            .{ .tag = .word, .lexeme = "note" },
-            .{ .tag = .@"(", .lexeme = "(" },
-            .{ .tag = .word, .lexeme = "id" },
-            .{ .tag = .@"=", .lexeme = "=" },
-            .{ .tag = .string_literal, .lexeme = "\"x\"" },
-            .{ .tag = .@")", .lexeme = ")" },
-            .{ .tag = .@"{", .lexeme = "{" },
-            .{ .tag = .newline, .lexeme = "\n" },
-            .{ .tag = .@"\\", .lexeme = "\\" },
-            .{ .tag = .word, .lexeme = "em" },
-            .{ .tag = .string_literal, .lexeme = "\"hi\"" },
-            .{ .tag = .@",", .lexeme = "," },
-            .{ .tag = .word, .lexeme = "-dash" },
-            .{ .tag = .newline, .lexeme = "\n" },
-            .{ .tag = .@"}", .lexeme = "}" },
-            .{ .tag = .newline, .lexeme = "\n" },
-            .{ .tag = .eof, .lexeme = "" },
-        },
-    );
-}
-
-test "tokenizes invalid characters" {
-    try expectTokens("\x00", &.{
-        .{ .tag = .invalid_character, .lexeme = "\x00" },
-        .{ .tag = .eof, .lexeme = "" },
-    });
-}
diff --git a/test/parser/stress.hdoc b/test/parser/stress.hdoc
new file mode 100644
index 0000000..aca067a
--- /dev/null
+++ b/test/parser/stress.hdoc
@@ -0,0 +1,74 @@
+hdoc(version="2.0")
+
+p {
+  On Monday at 09:07, the support desk logged a ticket titled "Login loop (again)". The user wrote, "I click
+  'Sign in', the page flashes, and I'm back where I started." Someone replied, "That sounds like a cookie
+  issue—clear your cache," but the customer insisted they’d already tried: "Chrome, Safari, even a private window."
+  The message ended with an oddly specific note: "It only happens when the Wi-Fi name is 'Office-Guest'." Nobody
+  knew whether to laugh or worry.
+}
+
+p {
+  The product manager’s summary was short but loaded: "We shipped the hotfix; we didn't fix the root
+  cause." In the same breath, she added, "Don't roll back unless you absolutely have to—it's worse." Later,
+  in a longer thread, she used quotes inside quotes: "When QA says 'it’s fine', what they mean is 'it hasn’t
+  exploded yet'." The tone wasn’t cruel, just tired, and the timestamps (11:58, 12:01, 12:03) made it feel like
+  a miniature drama.
+}
+
+p {
+  In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed
+  steps like: export → transform → validate → import, then added a parenthetical aside (which itself contained
+  parentheses): "Use the staging key (not the production key (seriously))". A different person pasted a pseudo-path,
+  /var/tmp/builds/2025-12-23/, and then warned, "If you see `NULL` in the output, don't 'fix' it by replacing it with
+  '0'—that's how we broke reporting last time."
+}
+
+p {
+  When the vendor called, they insisted everything was "within spec"; our engineer disagreed. "Within spec" can mean
+  two opposite things, she said: either the spec is strict, or the spec is meaningless. She pulled up a screenshot and
+  quoted the line: "Error: expected ']' but found '\}'". Then she joked, "At least it’s honest," and forwarded the log
+  snippet with a subject line that read, "Re: Re: RE: Please confirm ASAP!!!" (three exclamation marks included, unfortunately).
+}
+
+p {
+  The draft contract read like a puzzle: "Client shall provide 'reasonable access' to systems," while another clause said,
+  "Provider may request access at any time." Someone circled the phrase "reasonable access" and wrote, "Reasonable for whom?"
+  A lawyer suggested adding: "…as mutually agreed in writing," but the team worried that "in writing" would exclude Slack, email,
+  and tickets—so they proposed: "…in writing (including electronic messages)". Even that sparked debate: does "electronic messages"
+  include chat reactions, like 👍 or ✅?
+}
+
+p {
+  A teammate tried to reproduce the bug and wrote a narrative that sounded like a short story: "I opened the dashboard, clicked
+  'Reports', then typed 'Q4' into the search field." The UI responded with "No results found" even though the sidebar clearly showed "Q4
+  Forecast". The odd part: if you type "Q4 " (with a trailing space), results appear. He ended the note with, "Yes, I know that sounds
+  fake," and added, "But watch: 'Q4' ≠ 'Q4 '." It’s the sort of thing parsers and humans both hate.
+}
+
+p {
+  The incident timeline included times in different formats—09:15 CET, 08:15 UTC, and "around 8-ish"—which made the postmortem messy.
+  One line said, "Database CPU hit 92% (again)," another said, "CPU was fine; it was I/O." Someone pasted a link:
+  \link(uri="https://example.com/status?from=2025-12-23T08:00:00Z&to=2025-12-23T10:00:00Z"){https://example.com/status?from=2025-12-23T08:00:00Z&to=2025-12-23T10:00:00Z} and then asked, "Why does
+  the graph show a dip at 08:37?" The answer was "maintenance," but the maintenance note was filed under "misc"—not "maintenance."
+}
+
+p {
+  In the customer’s feedback form, the message was polite but pointed: "Your app is great—until it isn't." They described a checkout flow where the
+  total briefly showed €19.99, then flipped to €1,999.00, then back again. "I know it's probably formatting," they wrote, "but seeing that number
+  made me think I'd been scammed." Another sentence used both quote styles: "The label says 'Total', but the tooltip says "Estimated total"." The
+  difference matters when people are anxious.
+}
+
+p {
+  A developer left a comment in the code review: "This is readable, but it's not maintainable." When asked what that meant, he replied, "Readable
+  means I can understand it today; maintainable means I can change it tomorrow without breaking it." He pointed at a line that looked harmless — \mono(syntax="c"){if (a < b && c > d) return;} — and said, "It encodes policy with no explanation." Then, in the same comment, he used
+  markdown-like fragments that shouldn’t be parsed as such: *not emphasis*, *not italics*, and [not a link](just text).
+}
+
+p {
+  Before the release, the checklist included items that were half instruction, half superstition: "Update changelog; tag release; don't forget the
+  'README' typo." Someone wrote "DONE" in all caps, then later edited it to "done" because the automation treats "DONE" as a keyword. Another item
+  read: "Confirm the 'rollback plan' exists (even if we never use it)." The final note—"If anything feels off, stop"—was simple, but it carried the
+  weight of every prior incident, every "it’s fine," every quiet "…maybe not."
+}

From f5c13eaadb43cd6750b9505c0f93e5999170ed39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Tue, 23 Dec 2025 11:31:44 +0100
Subject: [PATCH 009/116] Implements new parser for HyperDoc 2.0

---
 examples/demo.hdoc          |   2 +-
 examples/featurematrix.hdoc |  10 +-
 examples/guide.hdoc         |   2 +-
 examples/tables.hdoc        |   2 +-
 src/hyperdoc.zig            | 666 +++++++++++++++++++++++++++++++++++-
 src/main.zig                |   3 +-
 test/parser/stress.hdoc     |   2 +-
 7 files changed, 672 insertions(+), 15 deletions(-)

diff --git a/examples/demo.hdoc b/examples/demo.hdoc
index a092e91..68ef189 100644
--- a/examples/demo.hdoc
+++ b/examples/demo.hdoc
@@ -1,4 +1,4 @@
-hdoc(version="2.0", lang="en", title="HyperDoc \"2.0\" Demonstration");
+hdoc(version="2.0" , lang="en", title="HyperDoc \"2.0\" Demonstration");
 
 h1{HyperDoc 2.0}
 
diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc
index d72f095..3600e02 100644
--- a/examples/featurematrix.hdoc
+++ b/examples/featurematrix.hdoc
@@ -1,4 +1,4 @@
-hdoc(version="2.0")
+hdoc(version="2.0");
 
 h1 { Small Computer Feature Matrix }
 
@@ -63,7 +63,7 @@ table {
     td "✅"
     td "✅"
     td "❌"
-    td { ❌\sup{1} }
+    td { p { ❌\sup{1} } }
     td "❌"
   }
   row(title="Modular Design") {
@@ -71,7 +71,7 @@ table {
     td "❌"
     td "❌"
     td "✅"
-    td { ✅\sup{2} }
+    td { p { ✅\sup{2} } }
   }
   row(title="Full Documentation") {
     td "✅"
@@ -88,11 +88,11 @@ table {
     td "❌"
   }
   row(title="Parallax Propeller") {
-    td { ✅ (\link(uri="https://www.parallax.com/propeller-2"){Propeller 2}) }
+    td { p { ✅ (\link(uri="https://www.parallax.com/propeller-2"){Propeller 2}) } }
     td "❌"
     td "❌"
     td "❌"
-    td { ✅ (\link(uri="https://www.parallax.com/propeller-1"){Propeller 1}) }
+    td { p { ✅ (\link(uri="https://www.parallax.com/propeller-1"){Propeller 1}) } }
   }
 }
 
diff --git a/examples/guide.hdoc b/examples/guide.hdoc
index 94c70c4..50f7b64 100644
--- a/examples/guide.hdoc
+++ b/examples/guide.hdoc
@@ -1,4 +1,4 @@
-hdoc(version="2.0")
+hdoc(version="2.0");
 
 h1(id="intro", lang="en") { HyperDoc 2.0 Examples }
 
diff --git a/examples/tables.hdoc b/examples/tables.hdoc
index 5adb144..28f73f9 100644
--- a/examples/tables.hdoc
+++ b/examples/tables.hdoc
@@ -1,4 +1,4 @@
-hdoc "2.0"
+hdoc(version="2.0");
 
 h1(id="tables") { HyperDoc 2.0 Table Examples }
 
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index c91ef12..5bf47e0 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -19,7 +19,13 @@ pub const Document = struct {
 /// Depending on the level of nesting, the width might decrease
 /// from the full document size.
 pub const Block = union(enum) {
-    placeholder: void,
+    header: Header,
+
+    pub const Header = struct {
+        title: ?[]const u8,
+        author: ?[]const u8,
+        date: ?[]const u8,
+    };
 };
 
 /// Parses a HyperDoc document.
@@ -30,19 +36,665 @@ pub fn parse(
     /// An optional diagnostics element that receives diagnostic messages like errors and warnings.
     /// If present, will be filled out by the parser.
     diagnostics: ?*Diagnostics,
-) !Document {
+) error{ OutOfMemory, SyntaxError }!Document {
     var arena = std.heap.ArenaAllocator.init(allocator);
     errdefer arena.deinit();
 
-    _ = plain_text;
-    _ = diagnostics;
+    var parser: Parser = .{
+        .code = plain_text,
+        .arena = arena.allocator(),
+        .diagnostics = diagnostics,
+    };
+
+    var sema: SemanticAnalyzer = .{
+        .arena = arena.allocator(),
+        .diagnostics = diagnostics,
+    };
+
+    var blocks: std.ArrayList(Block) = .empty;
+
+    while (true) {
+        errdefer |err| {
+            std.log.debug("error at examples/demo.hdoc:{f}: {t}", .{
+                parser.make_diagnostic_location(parser.offset),
+                err,
+            });
+        }
+
+        const node = parser.accept_node(.top_level) catch |err| switch (err) {
+            error.OutOfMemory => |e| return @as(error{OutOfMemory}!Document, e), // TODO: What the fuck? Bug report!
+
+            error.EndOfFile => break,
+
+            error.UnexpectedEndOfFile,
+            error.InvalidCharacter,
+            error.UnexpectedCharacter,
+            error.UnterminatedStringLiteral,
+            error.UnterminatedList,
+            => return error.SyntaxError,
+        };
+
+        const block = sema.translate_toplevel_node(node) catch |err| switch (err) {
+            error.OutOfMemory => |e| return @as(error{OutOfMemory}!Document, e),
+
+            error.InvalidNodeType => continue,
+        };
+
+        try blocks.append(arena.allocator(), block);
+    }
 
     return .{
         .arena = arena,
-        .contents = &[_]Block{},
+        .contents = try blocks.toOwnedSlice(arena.allocator()),
     };
 }
 
+pub const SemanticAnalyzer = struct {
+    arena: std.mem.Allocator,
+    diagnostics: ?*Diagnostics,
+
+    fn translate_toplevel_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType }!Block {
+        _ = sema;
+        switch (node.type) {
+            else => {
+                return error.InvalidNodeType;
+            },
+        }
+    }
+};
+
+pub const Parser = struct {
+    code: []const u8,
+    offset: usize = 0,
+
+    arena: std.mem.Allocator,
+    diagnostics: ?*Diagnostics,
+
+    pub const ScopeType = enum { top_level, nested };
+
+    pub fn accept_node(parser: *Parser, comptime scope_type: ScopeType) !Node {
+        const type_ident = parser.accept_identifier() catch |err| switch (err) {
+            error.UnexpectedEndOfFile => |e| switch (scope_type) {
+                .nested => return e,
+                .top_level => return error.EndOfFile,
+            },
+            else => |e| return e,
+        };
+        const node_type: NodeType = if (std.meta.stringToEnum(NodeType, type_ident.text)) |node_type|
+            node_type
+        else if (std.mem.startsWith(u8, type_ident.text, "\\"))
+            .unknown_inline
+        else
+            .unknown_block;
+
+        var attributes: std.StringArrayHashMapUnmanaged(Attribute) = .empty;
+        errdefer attributes.deinit(parser.arena);
+
+        if (parser.try_accept_char('(')) {
+            if (!parser.try_accept_char(')')) {
+                // We 're not at the end of the attribute list,
+                // so we know that the next token must be the attribute name.
+
+                while (true) {
+                    const start = parser.offset;
+                    const attr_name = try parser.accept_identifier();
+                    _ = try parser.accept_char('=');
+                    const attr_value = try parser.accept_string();
+                    const attr_location = parser.location(start, parser.offset);
+
+                    const gop_entry = try attributes.getOrPut(parser.arena, attr_name.text);
+                    if (gop_entry.found_existing) {
+                        // TODO: Emit diagnostic
+                    }
+                    gop_entry.value_ptr.* = .{
+                        .location = attr_location,
+                        .value = try parser.unescape_string(attr_value),
+                    };
+
+                    if (!parser.try_accept_char(',')) {
+                        break;
+                    }
+                }
+                try parser.accept_char(')');
+            }
+        }
+
+        if (parser.try_accept_char(';')) {
+            // block has empty content
+            return .{
+                .location = parser.location(type_ident.position.offset, null),
+                .type = node_type,
+                .attributes = attributes,
+                .body = .empty,
+            };
+        }
+
+        if (parser.try_accept_char(':')) {
+            // block has verbatim content
+
+            var lines: std.ArrayList(Token) = .empty;
+
+            while (try parser.try_accept_verbatim_line()) |line| {
+                try lines.append(parser.arena, line);
+            }
+
+            if (lines.items.len == 0) {
+                // TODO: Emit diagnostic about verbatim block with no lines
+            }
+
+            return .{
+                .location = parser.location(type_ident.position.offset, null),
+                .type = node_type,
+                .attributes = attributes,
+                .body = .{ .verbatim = try lines.toOwnedSlice(parser.arena) },
+            };
+        }
+
+        if (try parser.try_accept_string()) |string_body| {
+            // block has string content
+
+            return .{
+                .location = parser.location(type_ident.position.offset, null),
+                .type = node_type,
+                .attributes = attributes,
+                .body = .{ .string = string_body },
+            };
+        }
+
+        var children = if (node_type.has_inline_body())
+            try parser.accept_inline_node_list()
+        else
+            try parser.accept_block_node_list();
+
+        return .{
+            .location = parser.location(type_ident.position.offset, null),
+            .type = node_type,
+            .attributes = attributes,
+            .body = .{ .list = try children.toOwnedSlice(parser.arena) },
+        };
+    }
+
+    pub fn accept_block_node_list(parser: *Parser) error{
+        OutOfMemory,
+        InvalidCharacter,
+        UnterminatedStringLiteral,
+        UnexpectedEndOfFile,
+        UnterminatedList,
+        UnexpectedCharacter,
+    }!std.ArrayList(Node) {
+        var children: std.ArrayList(Node) = .empty;
+        errdefer children.deinit(parser.arena);
+
+        try parser.accept_char('{');
+
+        while (true) {
+            parser.skip_whitespace();
+
+            if (parser.try_accept_char('}'))
+                break;
+
+            const child = try parser.accept_node(.nested);
+            try children.append(parser.arena, child);
+        }
+
+        return children;
+    }
+
+    pub fn accept_inline_node_list(parser: *Parser) error{
+        OutOfMemory,
+        InvalidCharacter,
+        UnterminatedStringLiteral,
+        UnexpectedEndOfFile,
+        UnterminatedList,
+        UnexpectedCharacter,
+    }!std.ArrayList(Node) {
+        var children: std.ArrayList(Node) = .empty;
+        errdefer children.deinit(parser.arena);
+
+        try parser.accept_char('{');
+
+        var nesting: usize = 0;
+
+        while (true) {
+            parser.skip_whitespace();
+
+            const head = parser.peek_char() orelse {
+                // TODO: Emit diagnostic
+                return error.UnterminatedList;
+            };
+
+            switch (head) {
+                '{' => {
+                    nesting += 1;
+                    parser.offset += 1;
+                },
+
+                '}' => {
+                    parser.offset += 1;
+
+                    if (nesting == 0)
+                        break;
+
+                    nesting -= 1;
+                },
+
+                '\\' => backslash: {
+                    if (parser.offset < parser.code.len - 1) {
+                        const next_char = parser.code[parser.offset + 1];
+                        switch (next_char) {
+                            '{', '}', '\\' => {
+                                // Escaped brace
+                                parser.offset += 2;
+                                break :backslash;
+                            },
+                            else => {},
+                        }
+                    }
+
+                    const child = try parser.accept_node(.nested);
+
+                    // This will only be a non-inline node if we have a bug.
+                    std.debug.assert(child.type.is_inline());
+
+                    try children.append(parser.arena, child);
+                },
+
+                else => {
+                    const word = try parser.accept_word();
+                    try children.append(parser.arena, .{
+                        .location = word.position,
+                        .type = .text,
+                        .attributes = .empty,
+                        .body = .empty,
+                    });
+                },
+            }
+        }
+
+        return children;
+    }
+
+    pub fn try_accept_verbatim_line(parser: *Parser) !?Token {
+        parser.skip_whitespace();
+
+        const head = parser.offset;
+        if (!parser.try_accept_char('|')) {
+            return null;
+        }
+
+        while (!parser.at_end()) {
+            const c = parser.code[parser.offset];
+            if (c == '\n') {
+                break;
+            }
+
+            // we don't consume the LF character, as each verbatim line should be prefixed with exactly a single LF character
+            parser.offset += 1;
+        }
+        if (parser.at_end()) {
+            // TODO: Emit diagnostic about verbatim lines should have an empty line feed at the end of the file.
+        }
+
+        const token = parser.slice(head, parser.offset);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "|"));
+        return token;
+    }
+
+    pub fn peek_char(parser: *Parser) ?u8 {
+        if (parser.at_end())
+            return null;
+        return parser.code[parser.offset];
+    }
+
+    pub fn accept_char(parser: *Parser, expected: u8) error{ UnexpectedEndOfFile, UnexpectedCharacter }!void {
+        if (parser.try_accept_char(expected))
+            return;
+
+        if (parser.at_end())
+            return error.UnexpectedEndOfFile;
+
+        return error.UnexpectedCharacter;
+    }
+
+    pub fn try_accept_char(parser: *Parser, expected: u8) bool {
+        std.debug.assert(!is_space(expected));
+        parser.skip_whitespace();
+
+        if (parser.at_end())
+            return false;
+
+        if (parser.code[parser.offset] != expected)
+            return false;
+
+        parser.offset += 1;
+        return true;
+    }
+
+    pub fn try_accept_string(parser: *Parser) !?Token {
+        parser.skip_whitespace();
+
+        if (parser.at_end())
+            return null;
+
+        if (parser.code[parser.offset] != '"')
+            return null;
+
+        return try parser.accept_string();
+    }
+
+    pub fn accept_string(parser: *Parser) error{ OutOfMemory, UnexpectedEndOfFile, UnexpectedCharacter, UnterminatedStringLiteral }!Token {
+        parser.skip_whitespace();
+
+        if (parser.at_end())
+            return error.UnexpectedEndOfFile;
+
+        const start = parser.offset;
+        if (parser.code[start] != '"')
+            return error.UnexpectedCharacter;
+
+        parser.offset += 1;
+
+        while (parser.offset < parser.code.len) {
+            const c = parser.code[parser.offset];
+            parser.offset += 1;
+
+            switch (c) {
+                '"' => return parser.slice(start, parser.offset),
+
+                '\\' => {
+                    // Escape sequence
+                    if (parser.at_end())
+                        return error.UnterminatedStringLiteral;
+
+                    const escaped = parser.code[parser.offset];
+                    parser.offset += 1;
+
+                    switch (escaped) {
+                        '\n', '\r' => return error.UnterminatedStringLiteral,
+                        else => {},
+                    }
+                },
+
+                else => {},
+            }
+        }
+
+        return error.UnterminatedStringLiteral;
+    }
+
+    pub fn accept_identifier(parser: *Parser) error{ UnexpectedEndOfFile, InvalidCharacter }!Token {
+        parser.skip_whitespace();
+
+        if (parser.at_end())
+            return error.UnexpectedEndOfFile;
+
+        const start = parser.offset;
+        const first = parser.code[start];
+        if (!is_ident_char(first))
+            return error.InvalidCharacter;
+
+        while (parser.offset < parser.code.len) {
+            const c = parser.code[parser.offset];
+            if (!is_ident_char(c))
+                break;
+            parser.offset += 1;
+        }
+
+        return parser.slice(start, parser.offset);
+    }
+
+    /// Accepts a word token (a sequence of non-whitespace characters).
+    pub fn accept_word(parser: *Parser) error{UnexpectedEndOfFile}!Token {
+        parser.skip_whitespace();
+
+        if (parser.at_end())
+            return error.UnexpectedEndOfFile;
+
+        const start = parser.offset;
+
+        while (parser.offset < parser.code.len) {
+            const c = parser.code[parser.offset];
+            if (is_space(c))
+                break;
+            switch (c) {
+                // These are word-terminating characters:
+                '{', '}', '\\' => break,
+                else => {},
+            }
+            parser.offset += 1;
+        }
+
+        return parser.slice(start, parser.offset);
+    }
+
+    /// Skips forward until the first non-whitespace character.
+    pub fn skip_whitespace(parser: *Parser) void {
+        while (!parser.at_end()) {
+            const c = parser.code[parser.offset];
+            if (!is_space(c)) {
+                break;
+            }
+            parser.offset += 1;
+        }
+    }
+
+    pub fn at_end(parser: *Parser) bool {
+        return parser.offset >= parser.code.len;
+    }
+
+    /// Accepts a string literal, including the surrounding quotes.
+    pub fn unescape_string(parser: *Parser, token: Token) error{OutOfMemory}![]const u8 {
+        std.debug.assert(token.text.len >= 2);
+        std.debug.assert(token.text[0] == '"' and token.text[token.text.len - 1] == '"');
+
+        _ = parser;
+        // TODO: Implement unescaping logic here.
+
+        // For now, we just return the raw text.
+        return token.text[1 .. token.text.len - 1];
+    }
+
+    pub fn location(parser: *Parser, start: usize, end: ?usize) Location {
+        return .{ .offset = start, .length = (end orelse parser.offset) - start };
+    }
+
+    pub fn slice(parser: *Parser, start: usize, end: usize) Token {
+        return .{
+            .text = parser.code[start..end],
+            .position = .{ .offset = start, .length = end - start },
+        };
+    }
+
+    pub fn make_diagnostic_location(parser: Parser, offset: usize) Diagnostic.Location {
+        var line: u32 = 1;
+        var column: u32 = 1;
+
+        var i: usize = 0;
+        while (i < offset and i < parser.code.len) : (i += 1) {
+            if (parser.code[i] == '\n') {
+                line += 1;
+                column = 1;
+            } else {
+                column += 1;
+            }
+        }
+
+        return .{ .line = line, .column = column };
+    }
+
+    pub fn is_space(c: u8) bool {
+        return switch (c) {
+            ' ', '\t', '\n', '\r' => true,
+            else => false,
+        };
+    }
+
+    pub fn is_ident_char(c: u8) bool {
+        return switch (c) {
+            'a'...'z', 'A'...'Z', '0'...'9', '_', '\\' => true,
+            else => false,
+        };
+    }
+
+    pub const Token = struct {
+        text: []const u8,
+        position: Location,
+    };
+
+    pub const Location = struct {
+        offset: usize,
+        length: usize,
+    };
+
+    pub const NodeType = enum {
+        hdoc,
+        h1,
+        h2,
+        h3,
+        p,
+        note,
+        warning,
+        danger,
+        tip,
+        quote,
+        spoiler,
+        ul,
+        ol,
+        img,
+        pre,
+        toc,
+        table,
+        columns,
+        group,
+        row,
+        td,
+        li,
+
+        text,
+        @"\\em",
+        @"\\mono",
+        @"\\strike",
+        @"\\sub",
+        @"\\sup",
+        @"\\link",
+        @"\\date",
+        @"\\time",
+        @"\\datetime",
+
+        unknown_block,
+        unknown_inline,
+
+        pub fn is_inline(node_type: NodeType) bool {
+            return switch (node_type) {
+                .@"\\em",
+                .@"\\mono",
+                .@"\\strike",
+                .@"\\sub",
+                .@"\\sup",
+                .@"\\link",
+                .@"\\date",
+                .@"\\time",
+                .@"\\datetime",
+                .unknown_inline,
+                .text,
+                => true,
+
+                .hdoc,
+                .h1,
+                .h2,
+                .h3,
+                .p,
+                .note,
+                .warning,
+                .danger,
+                .tip,
+                .quote,
+                .spoiler,
+                .ul,
+                .ol,
+                .img,
+                .pre,
+                .toc,
+                .table,
+                .columns,
+                .group,
+                .row,
+                .td,
+                .li,
+                .unknown_block,
+                => false,
+            };
+        }
+
+        pub fn has_inline_body(node_type: NodeType) bool {
+            return switch (node_type) {
+                .h1,
+                .h2,
+                .h3,
+
+                .p,
+                .note,
+                .warning,
+                .danger,
+                .tip,
+                .quote,
+                .spoiler,
+
+                .img,
+                .pre,
+                .toc,
+                .group,
+
+                .@"\\em",
+                .@"\\mono",
+                .@"\\strike",
+                .@"\\sub",
+                .@"\\sup",
+                .@"\\link",
+                .@"\\date",
+                .@"\\time",
+                .@"\\datetime",
+
+                .unknown_inline,
+                => true,
+
+                .hdoc,
+                .ul,
+                .ol,
+                .table,
+                .columns,
+                .row,
+                .td,
+                .li,
+
+                .text,
+                .unknown_block,
+                => false,
+            };
+        }
+    };
+
+    pub const Node = struct {
+        location: Location,
+        type: NodeType,
+        attributes: std.StringArrayHashMapUnmanaged(Attribute),
+
+        body: Body,
+
+        pub const Body = union(enum) {
+            empty,
+            string: Token,
+            verbatim: []Token,
+            list: []Node,
+        };
+    };
+
+    pub const Attribute = struct {
+        location: Location,
+        value: []const u8,
+    };
+};
+
 /// A diagnostic message.
 pub const Diagnostic = struct {
     pub const Severity = enum { warning, @"error" };
@@ -50,6 +702,10 @@ pub const Diagnostic = struct {
     pub const Location = struct {
         line: u32,
         column: u32,
+
+        pub fn format(loc: Location, w: *std.Io.Writer) !void {
+            try w.print("{d}:{d}", .{ loc.line, loc.column });
+        }
     };
 
     /// An diagnostic code encoded as a 16 bit integer.
diff --git a/src/main.zig b/src/main.zig
index 44013de..32e30a0 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -26,7 +26,8 @@ pub fn main() !u8 {
     const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10);
     defer allocator.free(document);
 
-    // TODO: Parse document
+    var parsed = try hdoc.parse(allocator, document, null);
+    defer parsed.deinit();
 
     return 0;
 }
diff --git a/test/parser/stress.hdoc b/test/parser/stress.hdoc
index aca067a..bec3b0f 100644
--- a/test/parser/stress.hdoc
+++ b/test/parser/stress.hdoc
@@ -1,4 +1,4 @@
-hdoc(version="2.0")
+hdoc(version="2.0");
 
 p {
   On Monday at 09:07, the support desk logged a ticket titled "Login loop (again)". The user wrote, "I click

From 0bd2f1ae884ece34510b062b0d81753f4e432955 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Tue, 23 Dec 2025 12:01:48 +0100
Subject: [PATCH 010/116] Adds functions for test suite, adds fuzzer code

---
 build.zig         |  16 ++-
 src/hyperdoc.zig  |  30 +++++-
 src/testsuite.zig | 265 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 308 insertions(+), 3 deletions(-)

diff --git a/build.zig b/build.zig
index 0c845f2..4c71fd2 100644
--- a/build.zig
+++ b/build.zig
@@ -41,9 +41,23 @@ pub fn build(b: *std.Build) void {
             .target = target,
             .optimize = optimize,
             .imports = &.{
-                .{ .name = "hyperdoc", .module = hyperdoc },
+                rawFileMod(b, "examples/tables.hdoc"),
+                rawFileMod(b, "examples/featurematrix.hdoc"),
+                rawFileMod(b, "examples/demo.hdoc"),
+                rawFileMod(b, "examples/guide.hdoc"),
+                rawFileMod(b, "test/parser/stress.hdoc"),
             },
         }),
+        .use_llvm = true,
     });
     test_step.dependOn(&b.addRunArtifact(exe_tests).step);
 }
+
+fn rawFileMod(b: *std.Build, path: []const u8) std.Build.Module.Import {
+    return .{
+        .name = path,
+        .module = b.createModule(.{
+            .root_source_file = b.path(path),
+        }),
+    };
+}
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 5bf47e0..b55dbf2 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -743,7 +743,7 @@ pub const Diagnostics = struct {
     arena: std.heap.ArenaAllocator,
     items: std.ArrayList(Diagnostic) = .empty,
 
-    pub fn init(allocator: std.mem.Allocator) Diagnostic {
+    pub fn init(allocator: std.mem.Allocator) Diagnostics {
         return .{ .arena = .init(allocator) };
     }
 
@@ -765,3 +765,31 @@ pub const Diagnostics = struct {
         });
     }
 };
+
+test "fuzz parser" {
+    const Impl = struct {
+        fn testOne(impl: @This(), data: []const u8) !void {
+            _ = impl;
+
+            var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+            defer arena.deinit();
+
+            var diagnostics: Diagnostics = .init(std.testing.allocator);
+            defer diagnostics.deinit();
+
+            var doc = parse(std.testing.allocator, data, &diagnostics) catch return;
+            defer doc.deinit();
+        }
+    };
+
+    try std.testing.fuzz(Impl{}, Impl.testOne, .{
+        .corpus = &.{
+            "hdoc(version=\"2.0\");",
+            @embedFile("examples/tables.hdoc"),
+            @embedFile("examples/featurematrix.hdoc"),
+            @embedFile("examples/demo.hdoc"),
+            @embedFile("examples/guide.hdoc"),
+            @embedFile("test/parser/stress.hdoc"),
+        },
+    });
+}
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 961cef5..052e95f 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -1,7 +1,270 @@
 const std = @import("std");
-const hdoc = @import("hyperdoc");
+const hdoc = @import("./hyperdoc.zig");
 
 fn testAcceptDocument(document: []const u8) !void {
     var doc = try hdoc.parse(std.testing.allocator, document, null);
     defer doc.deinit();
 }
+
+fn parseFile(path: []const u8) !void {
+    const source = try std.fs.cwd().readFileAlloc(std.testing.allocator, path, 10 * 1024 * 1024);
+    defer std.testing.allocator.free(source);
+    try testAcceptDocument(source);
+}
+
+fn parseDirectoryTree(path: []const u8) !void {
+    var dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
+    defer dir.close();
+
+    var walker = try dir.walk(std.testing.allocator);
+    defer walker.deinit();
+
+    while (try walker.next()) |entry| {
+        if (entry.kind != .file)
+            continue;
+        if (!std.mem.endsWith(u8, entry.path, ".hdoc"))
+            continue;
+
+        const full_path = try std.fs.path.join(std.testing.allocator, &.{ path, entry.path });
+        defer std.testing.allocator.free(full_path);
+
+        try parseFile(full_path);
+    }
+}
+
+test "parser accepts examples and test documents" {
+    try parseDirectoryTree("examples");
+    try parseDirectoryTree("test");
+}
+
+test "parser accept identifier and word tokens" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "h1 word\\em{test}",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const ident = try parser.accept_identifier();
+    try std.testing.expectEqualStrings("h1", ident.text);
+    try std.testing.expectEqual(@as(usize, 0), ident.position.offset);
+    try std.testing.expectEqual(@as(usize, 2), ident.position.length);
+
+    const word = try parser.accept_word();
+    try std.testing.expectEqualStrings("word", word.text);
+    try std.testing.expectEqual(@as(usize, 3), word.position.offset);
+    try std.testing.expectEqual(@as(usize, 4), word.position.length);
+    try std.testing.expectEqual(@as(usize, 7), parser.offset);
+}
+
+test "parser rejects identifiers with invalid start characters" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "-abc",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    try std.testing.expectError(error.InvalidCharacter, parser.accept_identifier());
+}
+
+test "parser accept string literals and unescape" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "\"hello\\\\n\"",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const token = try parser.accept_string();
+    try std.testing.expectEqualStrings("\"hello\\\\n\"", token.text);
+}
+
+test "parser reports unterminated string literals" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "\"unterminated\n",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    try std.testing.expectError(error.UnterminatedStringLiteral, parser.accept_string());
+}
+
+test "parser handles attributes and empty bodies" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "h1(title=\"Hello\",author=\"World\");",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const node = try parser.accept_node(.top_level);
+    try std.testing.expectEqual(hdoc.Parser.NodeType.h1, node.type);
+    try std.testing.expectEqual(@as(usize, 2), node.attributes.count());
+
+    const title = node.attributes.get("title") orelse return error.TestExpectedEqual;
+    try std.testing.expectEqualStrings("Hello", title.value);
+
+    const author = node.attributes.get("author") orelse return error.TestExpectedEqual;
+    try std.testing.expectEqualStrings("World", author.value);
+
+    try std.testing.expect(node.body == .empty);
+}
+
+test "parser handles string bodies" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "p \"Hello world\"",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const node = try parser.accept_node(.top_level);
+    try std.testing.expectEqual(hdoc.Parser.NodeType.p, node.type);
+    switch (node.body) {
+        .string => |token| try std.testing.expectEqualStrings("\"Hello world\"", token.text),
+        else => return error.TestExpectedEqual,
+    }
+}
+
+test "parser handles verbatim blocks" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "pre:\n|line one\n|line two\n",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const node = try parser.accept_node(.top_level);
+    try std.testing.expectEqual(hdoc.Parser.NodeType.pre, node.type);
+    switch (node.body) {
+        .verbatim => |lines| {
+            try std.testing.expectEqual(@as(usize, 2), lines.len);
+            try std.testing.expectEqualStrings("|line one", lines[0].text);
+            try std.testing.expectEqualStrings("|line two", lines[1].text);
+        },
+        else => return error.TestExpectedEqual,
+    }
+}
+
+test "parser handles block node lists" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "hdoc{h1 \"Title\" p \"Body\"}",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const node = try parser.accept_node(.top_level);
+    try std.testing.expectEqual(hdoc.Parser.NodeType.hdoc, node.type);
+    switch (node.body) {
+        .list => |children| {
+            try std.testing.expectEqual(@as(usize, 2), children.len);
+            try std.testing.expectEqual(hdoc.Parser.NodeType.h1, children[0].type);
+            try std.testing.expectEqual(hdoc.Parser.NodeType.p, children[1].type);
+        },
+        else => return error.TestExpectedEqual,
+    }
+}
+
+test "parser handles inline node lists" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "p { Hello \\em{world} }",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const node = try parser.accept_node(.top_level);
+    try std.testing.expectEqual(hdoc.Parser.NodeType.p, node.type);
+    switch (node.body) {
+        .list => |children| {
+            try std.testing.expectEqual(@as(usize, 2), children.len);
+            try std.testing.expectEqual(hdoc.Parser.NodeType.text, children[0].type);
+            try std.testing.expectEqual(@as(usize, 5), children[0].location.length);
+
+            try std.testing.expectEqual(hdoc.Parser.NodeType.@"\\em", children[1].type);
+            switch (children[1].body) {
+                .list => |inline_children| {
+                    try std.testing.expectEqual(@as(usize, 1), inline_children.len);
+                    try std.testing.expectEqual(hdoc.Parser.NodeType.text, inline_children[0].type);
+                    try std.testing.expectEqual(@as(usize, 5), inline_children[0].location.length);
+                },
+                else => return error.TestExpectedEqual,
+            }
+        },
+        else => return error.TestExpectedEqual,
+    }
+}
+
+test "parser handles unknown node types" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "\\madeup{} mystery{}",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const inline_node = try parser.accept_node(.top_level);
+    try std.testing.expectEqual(hdoc.Parser.NodeType.unknown_inline, inline_node.type);
+    switch (inline_node.body) {
+        .list => |children| try std.testing.expectEqual(@as(usize, 0), children.len),
+        else => return error.TestExpectedEqual,
+    }
+
+    const block_node = try parser.accept_node(.top_level);
+    try std.testing.expectEqual(hdoc.Parser.NodeType.unknown_block, block_node.type);
+    switch (block_node.body) {
+        .list => |children| try std.testing.expectEqual(@as(usize, 0), children.len),
+        else => return error.TestExpectedEqual,
+    }
+}
+
+test "parser reports unterminated inline lists" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "p { word",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    try std.testing.expectError(error.UnterminatedList, parser.accept_node(.top_level));
+}
+
+test "parser maps diagnostic locations" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    var parser: hdoc.Parser = .{
+        .code = "a\nb\nc",
+        .arena = arena.allocator(),
+        .diagnostics = null,
+    };
+
+    const loc = parser.make_diagnostic_location(4);
+    try std.testing.expectEqual(@as(u32, 3), loc.line);
+    try std.testing.expectEqual(@as(u32, 1), loc.column);
+}

From 715f53f9784af090f3c6f36371a9eefae3b1c680 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 24 Dec 2025 12:46:26 +0100
Subject: [PATCH 011/116] Vibecoded: Implements DOM datastructure, adds
 diagnostic error messages and very crude semantic analysis

---
 docs/specification.md |   3 +-
 src/hyperdoc.zig      | 357 +++++++++++++++++++++++++++++++++++++-----
 src/main.zig          |   8 +-
 src/testsuite.zig     |  71 +++++++++
 4 files changed, 396 insertions(+), 43 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 996c00a..fd0e433 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -67,7 +67,7 @@ WORD           := /[^\s\{\}\\\"(),=:]+/
 | Element                                                     | Element Type | Allowed Children             | Attributes                           |
 | ----------------------------------------------------------- | ------------ | ---------------------------- | ------------------------------------ |
 | *Document*                                                  | Document     | `hdoc`, Blocks               |                                      |
-| `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author` |
+| `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author`, `date` |
 | `h1`, `h2`, `h3`                                            | Block        | Text Body                    | `lang`, \[`id`\]                     |
 | `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block        | Text Body                    | `lang`, \[`id`\]                     |
 | `ul`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\]                     |
@@ -105,6 +105,7 @@ Notes:
 | `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                         | Defines the language of the elements contents.                                  |
 | `title`   | No       | *Any*                                                                                        | Sets the title of the document or the table row.                                |
 | `author`  | No       | *Any*                                                                                        | Sets the author of the document.                                                |
+| `date`    | No       | [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) timestamp | Sets the authoring date of the document.                                        |
 | `id`      | No       | Non-empty                                                                                    | Sets a reference which can be linked to with `\link(ref="...")`.                |
 | `first`   | No       | Decimal integer numbers ≥ 0                                                                  | Sets the number of the first list item.                                         |
 | `alt`     | No       | Non-empty                                                                                    | Sets the alternative text shown when an image cannot be loaded.                 |
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index b55dbf2..d557866 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -6,6 +6,7 @@ const parser_toolkit = @import("parser-toolkit");
 pub const Document = struct {
     arena: std.heap.ArenaAllocator,
     contents: []Block,
+    ids: []?[]const u8,
 
     pub fn deinit(doc: *Document) void {
         doc.arena.deinit();
@@ -20,12 +21,129 @@ pub const Document = struct {
 /// from the full document size.
 pub const Block = union(enum) {
     header: Header,
+    heading: Heading,
+    paragraph: Paragraph,
+    list: List,
+    image: Image,
+    preformatted: Preformatted,
+    toc: TableOfContents,
+    table: Table,
 
     pub const Header = struct {
+        lang: ?[]const u8,
         title: ?[]const u8,
+        version: ?[]const u8,
         author: ?[]const u8,
         date: ?[]const u8,
     };
+
+    pub const Heading = struct {
+        level: HeadingLevel,
+        lang: ?[]const u8,
+        content: []Span,
+    };
+
+    pub const HeadingLevel = enum { h1, h2, h3 };
+
+    pub const Paragraph = struct {
+        kind: ParagraphKind,
+        lang: ?[]const u8,
+        content: []Span,
+    };
+
+    pub const ParagraphKind = enum { p, note, warning, danger, tip, quote, spoiler };
+
+    pub const List = struct {
+        lang: ?[]const u8,
+        first: ?u32,
+        items: []ListItem,
+    };
+
+    pub const ListItem = struct {
+        lang: ?[]const u8,
+        content: []Span,
+    };
+
+    pub const Image = struct {
+        lang: ?[]const u8,
+        alt: ?[]const u8,
+        path: ?[]const u8,
+        content: []Span,
+    };
+
+    pub const Preformatted = struct {
+        lang: ?[]const u8,
+        syntax: ?[]const u8,
+        content: []Span,
+    };
+
+    pub const TableOfContents = struct {
+        lang: ?[]const u8,
+        depth: ?u8,
+    };
+
+    pub const Table = struct {
+        lang: ?[]const u8,
+        rows: []TableRow,
+    };
+
+    pub const TableRow = union(enum) {
+        columns: TableColumns,
+        row: TableDataRow,
+        group: TableGroup,
+    };
+
+    pub const TableColumns = struct {
+        lang: ?[]const u8,
+        cells: []TableCell,
+    };
+
+    pub const TableDataRow = struct {
+        lang: ?[]const u8,
+        title: ?[]const u8,
+        cells: []TableCell,
+    };
+
+    pub const TableGroup = struct {
+        lang: ?[]const u8,
+        content: []Span,
+    };
+
+    pub const TableCell = struct {
+        lang: ?[]const u8,
+        colspan: ?u32,
+        content: []Span,
+    };
+};
+
+pub const SpanContent = union(enum) {
+    text: []const u8,
+    date: DateTime,
+    time: DateTime,
+    datetime: DateTime,
+};
+
+pub const DateTime = struct {
+    value: []const u8,
+    format: ?[]const u8 = null,
+};
+
+pub const Span = struct {
+    content: SpanContent,
+    lang: ?[]const u8 = null,
+    em: bool = false,
+    mono: bool = false,
+    strike: bool = false,
+    sub: bool = false,
+    sup: bool = false,
+    link: Link = .none,
+    syntax: ?[]const u8 = null,
+};
+
+pub const Link = union(enum) {
+    none,
+    ref: []const u8,
+    uri: []const u8,
 };
 
 /// Parses a HyperDoc document.
@@ -49,9 +167,11 @@ pub fn parse(
     var sema: SemanticAnalyzer = .{
         .arena = arena.allocator(),
         .diagnostics = diagnostics,
+        .code = plain_text,
     };
 
     var blocks: std.ArrayList(Block) = .empty;
+    var ids: std.ArrayList(?[]const u8) = .empty;
 
     while (true) {
         errdefer |err| {
@@ -81,25 +201,78 @@ pub fn parse(
         };
 
         try blocks.append(arena.allocator(), block);
+        try ids.append(arena.allocator(), null);
     }
 
     return .{
         .arena = arena,
         .contents = try blocks.toOwnedSlice(arena.allocator()),
+        .ids = try ids.toOwnedSlice(arena.allocator()),
     };
 }
 
 pub const SemanticAnalyzer = struct {
     arena: std.mem.Allocator,
     diagnostics: ?*Diagnostics,
+    code: []const u8,
+    seen_header: bool = false,
 
     fn translate_toplevel_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType }!Block {
-        _ = sema;
-        switch (node.type) {
-            else => {
+        if (!sema.seen_header) {
+            sema.seen_header = true;
+            if (node.type != .hdoc) {
+                sema.emit_diagnostic(.missing_hdoc_header, node.location.offset);
                 return error.InvalidNodeType;
-            },
+            }
+        } else if (node.type == .hdoc) {
+            sema.emit_diagnostic(.duplicate_hdoc_header, node.location.offset);
+            return error.InvalidNodeType;
+        }
+
+        if (node.type == .hdoc) {
+            return .{
+                .header = .{
+                    .lang = null,
+                    .title = sema.attr_value(node.attributes, "title"),
+                    .version = sema.attr_value(node.attributes, "version"),
+                    .author = sema.attr_value(node.attributes, "author"),
+                    .date = sema.attr_value(node.attributes, "date"),
+                },
+            };
+        }
+
+        return error.InvalidNodeType;
+    }
+
+    fn attr_value(sema: *SemanticAnalyzer, attrs: std.StringArrayHashMapUnmanaged(Parser.Attribute), name: []const u8) ?[]const u8 {
+        _ = sema;
+        if (attrs.get(name)) |attr| {
+            return attr.value;
+        }
+        return null;
+    }
+
+    fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, offset: usize) void {
+        if (sema.diagnostics) |diag| {
+            diag.add(code, sema.make_location(offset)) catch {};
+        }
+    }
+
+    fn make_location(sema: *SemanticAnalyzer, offset: usize) Diagnostic.Location {
+        var line: u32 = 1;
+        var column: u32 = 1;
+
+        var i: usize = 0;
+        while (i < offset and i < sema.code.len) : (i += 1) {
+            if (sema.code[i] == '\n') {
+                line += 1;
+                column = 1;
+            } else {
+                column += 1;
+            }
         }
+
+        return .{ .line = line, .column = column };
     }
 };
 
@@ -112,7 +285,22 @@ pub const Parser = struct {
 
     pub const ScopeType = enum { top_level, nested };
 
+    fn emitDiagnostic(
+        parser: *Parser,
+        code: Diagnostic.Code,
+        diag_location: Diagnostic.Location,
+    ) void {
+        if (parser.diagnostics) |diag| {
+            diag.add(code, diag_location) catch {};
+        }
+    }
+
     pub fn accept_node(parser: *Parser, comptime scope_type: ScopeType) !Node {
+        parser.skip_whitespace();
+        if (scope_type == .top_level and parser.at_end()) {
+            return error.EndOfFile;
+        }
+
         const type_ident = parser.accept_identifier() catch |err| switch (err) {
             error.UnexpectedEndOfFile => |e| switch (scope_type) {
                 .nested => return e,
@@ -144,7 +332,7 @@ pub const Parser = struct {
 
                     const gop_entry = try attributes.getOrPut(parser.arena, attr_name.text);
                     if (gop_entry.found_existing) {
-                        // TODO: Emit diagnostic
+                        emitDiagnostic(parser, .{ .duplicate_attribute = .{ .name = attr_name.text } }, parser.make_diagnostic_location(attr_location.offset));
                     }
                     gop_entry.value_ptr.* = .{
                         .location = attr_location,
@@ -179,7 +367,7 @@ pub const Parser = struct {
             }
 
             if (lines.items.len == 0) {
-                // TODO: Emit diagnostic about verbatim block with no lines
+                emitDiagnostic(parser, .empty_verbatim_block, parser.make_diagnostic_location(type_ident.position.offset));
             }
 
             return .{
@@ -230,6 +418,11 @@ pub const Parser = struct {
         while (true) {
             parser.skip_whitespace();
 
+            if (parser.at_end()) {
+                emitDiagnostic(parser, .unterminated_block_list, parser.make_diagnostic_location(parser.offset));
+                return error.UnterminatedList;
+            }
+
             if (parser.try_accept_char('}'))
                 break;
 
@@ -259,7 +452,7 @@ pub const Parser = struct {
             parser.skip_whitespace();
 
             const head = parser.peek_char() orelse {
-                // TODO: Emit diagnostic
+                emitDiagnostic(parser, .unterminated_inline_list, parser.make_diagnostic_location(parser.offset));
                 return error.UnterminatedList;
             };
 
@@ -322,6 +515,13 @@ pub const Parser = struct {
             return null;
         }
 
+        const after_pipe = if (!parser.at_end()) parser.code[parser.offset] else null;
+        if (after_pipe) |c| {
+            if (c != ' ' and c != '\n') {
+                emitDiagnostic(parser, .verbatim_missing_space, parser.make_diagnostic_location(head));
+            }
+        }
+
         while (!parser.at_end()) {
             const c = parser.code[parser.offset];
             if (c == '\n') {
@@ -332,11 +532,17 @@ pub const Parser = struct {
             parser.offset += 1;
         }
         if (parser.at_end()) {
-            // TODO: Emit diagnostic about verbatim lines should have an empty line feed at the end of the file.
+            emitDiagnostic(parser, .verbatim_missing_trailing_newline, parser.make_diagnostic_location(parser.offset));
         }
 
         const token = parser.slice(head, parser.offset);
         std.debug.assert(std.mem.startsWith(u8, token.text, "|"));
+        if (token.text.len > 0) {
+            const last = token.text[token.text.len - 1];
+            if (last == ' ' or last == '\t') {
+                emitDiagnostic(parser, .trailing_whitespace, parser.make_diagnostic_location(parser.offset - 1));
+            }
+        }
         return token;
     }
 
@@ -350,9 +556,12 @@ pub const Parser = struct {
         if (parser.try_accept_char(expected))
             return;
 
-        if (parser.at_end())
+        if (parser.at_end()) {
+            emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "character", .expected_char = expected } }, parser.make_diagnostic_location(parser.offset));
             return error.UnexpectedEndOfFile;
+        }
 
+        emitDiagnostic(parser, .{ .unexpected_character = .{ .expected = expected, .found = parser.code[parser.offset] } }, parser.make_diagnostic_location(parser.offset));
         return error.UnexpectedCharacter;
     }
 
@@ -373,8 +582,10 @@ pub const Parser = struct {
     pub fn try_accept_string(parser: *Parser) !?Token {
         parser.skip_whitespace();
 
-        if (parser.at_end())
+        if (parser.at_end()) {
+            emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "string literal" } }, parser.make_diagnostic_location(parser.offset));
             return null;
+        }
 
         if (parser.code[parser.offset] != '"')
             return null;
@@ -385,12 +596,16 @@ pub const Parser = struct {
     pub fn accept_string(parser: *Parser) error{ OutOfMemory, UnexpectedEndOfFile, UnexpectedCharacter, UnterminatedStringLiteral }!Token {
         parser.skip_whitespace();
 
-        if (parser.at_end())
+        if (parser.at_end()) {
+            emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "string literal" } }, parser.make_diagnostic_location(parser.offset));
             return error.UnexpectedEndOfFile;
+        }
 
         const start = parser.offset;
-        if (parser.code[start] != '"')
+        if (parser.code[start] != '"') {
+            emitDiagnostic(parser, .{ .unexpected_character = .{ .expected = '"', .found = parser.code[start] } }, parser.make_diagnostic_location(parser.offset));
             return error.UnexpectedCharacter;
+        }
 
         parser.offset += 1;
 
@@ -419,19 +634,24 @@ pub const Parser = struct {
             }
         }
 
+        emitDiagnostic(parser, .unterminated_string, parser.make_diagnostic_location(start));
         return error.UnterminatedStringLiteral;
     }
 
     pub fn accept_identifier(parser: *Parser) error{ UnexpectedEndOfFile, InvalidCharacter }!Token {
         parser.skip_whitespace();
 
-        if (parser.at_end())
+        if (parser.at_end()) {
+            emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "identifier" } }, parser.make_diagnostic_location(parser.offset));
             return error.UnexpectedEndOfFile;
+        }
 
         const start = parser.offset;
         const first = parser.code[start];
-        if (!is_ident_char(first))
+        if (!is_ident_char(first)) {
+            emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start));
             return error.InvalidCharacter;
+        }
 
         while (parser.offset < parser.code.len) {
             const c = parser.code[parser.offset];
@@ -447,8 +667,10 @@ pub const Parser = struct {
     pub fn accept_word(parser: *Parser) error{UnexpectedEndOfFile}!Token {
         parser.skip_whitespace();
 
-        if (parser.at_end())
+        if (parser.at_end()) {
+            emitDiagnostic(parser, .{ .unexpected_eof = .{ .context = "word" } }, parser.make_diagnostic_location(parser.offset));
             return error.UnexpectedEndOfFile;
+        }
 
         const start = parser.offset;
 
@@ -708,34 +930,77 @@ pub const Diagnostic = struct {
         }
     };
 
-    /// An diagnostic code encoded as a 16 bit integer.
-    /// The upper 4 bit encode the severity of the code, the lower 12 bit the number.
-    pub const Code = enum(u16) {
-        // bitmasks:
-        const ERROR = 0x1000;
-        const WARNING = 0x2000;
-
-        // TODO: Add other diagnostic codes
+    pub const UnexpectedEof = struct { context: []const u8, expected_char: ?u8 = null };
+    pub const UnexpectedCharacter = struct { expected: u8, found: u8 };
+    pub const InvalidIdentifierStart = struct { char: u8 };
+    pub const DuplicateAttribute = struct { name: []const u8 };
+    pub const MissingHdocHeader = struct {};
+    pub const DuplicateHdocHeader = struct {};
 
+    pub const Code = union(enum) {
         // errors:
-        invalid_character = ERROR | 1,
+        unterminated_inline_list,
+        unexpected_eof: UnexpectedEof,
+        unexpected_character: UnexpectedCharacter,
+        unterminated_string,
+        invalid_identifier_start: InvalidIdentifierStart,
+        unterminated_block_list,
+        missing_hdoc_header: MissingHdocHeader,
+        duplicate_hdoc_header: DuplicateHdocHeader,
 
         // warnings:
-        missing_space_in_literal = WARNING | 1,
-
-        pub fn get_severity(code: Code) Severity {
-            const num = @intFromEnum(code);
-            return switch (num & 0xF000) {
-                ERROR => .@"error",
-                WARNING => .warning,
-                else => @panic("invalid error code!"),
+        duplicate_attribute: DuplicateAttribute,
+        empty_verbatim_block,
+        verbatim_missing_trailing_newline,
+        verbatim_missing_space,
+        trailing_whitespace,
+
+        pub fn severity(code: Code) Severity {
+            return switch (code) {
+                .unterminated_inline_list => .@"error",
+                .unexpected_eof => .@"error",
+                .unexpected_character => .@"error",
+                .unterminated_string => .@"error",
+                .invalid_identifier_start => .@"error",
+                .unterminated_block_list => .@"error",
+                .missing_hdoc_header => .@"error",
+                .duplicate_hdoc_header => .@"error",
+
+                .duplicate_attribute => .warning,
+                .empty_verbatim_block => .warning,
+                .verbatim_missing_trailing_newline => .warning,
+                .verbatim_missing_space => .warning,
+                .trailing_whitespace => .warning,
             };
         }
+
+        pub fn format(code: Code, w: anytype) !void {
+            switch (code) {
+                .unterminated_inline_list => try w.writeAll("Inline list body is unterminated (missing '}' before end of file)."),
+                .unexpected_eof => |ctx| {
+                    if (ctx.expected_char) |ch| {
+                        try w.print("Unexpected end of file while expecting '{c}'.", .{ch});
+                    } else {
+                        try w.print("Unexpected end of file while expecting {s}.", .{ctx.context});
+                    }
+                },
+                .unexpected_character => |ctx| try w.print("Expected '{c}' but found '{c}'.", .{ ctx.expected, ctx.found }),
+                .unterminated_string => try w.writeAll("Unterminated string literal (missing closing \")."),
+                .invalid_identifier_start => |ctx| try w.print("Invalid identifier start character: '{c}'.", .{ctx.char}),
+                .unterminated_block_list => try w.writeAll("Block list body is unterminated (missing '}' before end of file)."),
+                .missing_hdoc_header => try w.writeAll("Document must start with an 'hdoc' header."),
+                .duplicate_hdoc_header => try w.writeAll("Only one 'hdoc' header is allowed; additional header found."),
+                .duplicate_attribute => |ctx| try w.print("Duplicate attribute '{s}' will overwrite the earlier value.", .{ctx.name}),
+                .empty_verbatim_block => try w.writeAll("Verbatim block has no lines."),
+                .verbatim_missing_trailing_newline => try w.writeAll("Verbatim line should end with a newline."),
+                .verbatim_missing_space => try w.writeAll("Expected a space after '|' in verbatim line."),
+                .trailing_whitespace => try w.writeAll("Trailing whitespace at end of line."),
+            }
+        }
     };
 
     code: Code,
     location: Location,
-    message: []const u8,
 };
 
 /// A collection of diagnostic messages.
@@ -752,18 +1017,28 @@ pub const Diagnostics = struct {
         diag.* = undefined;
     }
 
-    pub fn add(diag: *Diagnostics, code: Diagnostic.Code, location: Diagnostic.Location, comptime fmt: []const u8, args: anytype) !void {
-        const allocator = diag.arena.allocator();
-
-        const msg = try std.fmt.allocPrint(allocator, fmt, args);
-        errdefer allocator.free(msg);
-
-        try diag.items.append(allocator, .{
+    pub fn add(diag: *Diagnostics, code: Diagnostic.Code, location: Diagnostic.Location) !void {
+        try diag.items.append(diag.arena.allocator(), .{
             .location = location,
             .code = code,
-            .message = msg,
         });
     }
+
+    pub fn has_error(diag: Diagnostics) bool {
+        for (diag.items.items) |item| {
+            if (item.code.severity() == .@"error")
+                return true;
+        }
+        return false;
+    }
+
+    pub fn has_warning(diag: Diagnostics) bool {
+        for (diag.items.items) |item| {
+            if (item.code.severity() == .warning)
+                return true;
+        }
+        return false;
+    }
 };
 
 test "fuzz parser" {
diff --git a/src/main.zig b/src/main.zig
index 32e30a0..d9ecaf0 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -26,8 +26,14 @@ pub fn main() !u8 {
     const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10);
     defer allocator.free(document);
 
-    var parsed = try hdoc.parse(allocator, document, null);
+    var diagnostics: hdoc.Diagnostics = .init(allocator);
+    defer diagnostics.deinit();
+
+    var parsed = try hdoc.parse(allocator, document, &diagnostics);
     defer parsed.deinit();
 
+    if (diagnostics.has_error())
+        return 1;
+
     return 0;
 }
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 052e95f..634c428 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -241,6 +241,77 @@ test "parser handles unknown node types" {
     }
 }
 
+fn diagnosticsContain(diag: *const hdoc.Diagnostics, expected: hdoc.Diagnostic.Code) bool {
+    for (diag.items.items) |item| {
+        if (std.meta.activeTag(item.code) == std.meta.activeTag(expected)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+test "parsing valid document yields empty diagnostics" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, "hdoc(version=\"2.0\");", &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expect(!diagnostics.has_warning());
+    try std.testing.expectEqual(@as(usize, 0), diagnostics.items.items.len);
+}
+
+test "diagnostic codes are emitted for expected samples" {
+    const Case = struct {
+        code: hdoc.Diagnostic.Code,
+        samples: []const []const u8,
+    };
+
+    const cases = [_]Case{
+        .{ .code = .{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }, .samples = &.{"hdoc(version=\"2.0\"); h1("} },
+        .{ .code = .{ .unexpected_character = .{ .expected = '{', .found = '1' } }, .samples = &.{"hdoc(version=\"2.0\"); h1 123"} },
+        .{ .code = .unterminated_string, .samples = &.{"hdoc(version=\"2.0\"); h1 \"unterminated"} },
+        .{ .code = .{ .invalid_identifier_start = .{ .char = '-' } }, .samples = &.{"hdoc(version=\"2.0\"); -abc"} },
+        .{ .code = .unterminated_block_list, .samples = &.{"hdoc{h1 \"x\""} },
+        .{ .code = .unterminated_inline_list, .samples = &.{"hdoc(version=\"2.0\"); p {hello"} },
+        .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(title=\"a\",title=\"b\");"} },
+        .{ .code = .empty_verbatim_block, .samples = &.{"hdoc(version=\"2.0\"); pre:\n"} },
+        .{ .code = .verbatim_missing_trailing_newline, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|line"} },
+        .{ .code = .verbatim_missing_space, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|nospace\n"} },
+        .{ .code = .trailing_whitespace, .samples = &.{"hdoc(version=\"2.0\"); pre:\n| trailing \n"} },
+        .{ .code = .missing_hdoc_header, .samples = &.{"h1 \"Title\""} },
+        .{ .code = .duplicate_hdoc_header, .samples = &.{"hdoc(version=\"2.0\"); hdoc(version=\"2.0\");"} },
+    };
+
+    inline for (cases) |case| {
+        for (case.samples) |sample| {
+            var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+            defer diagnostics.deinit();
+
+            const maybe_doc = hdoc.parse(std.testing.allocator, sample, &diagnostics) catch |err| switch (err) {
+                error.OutOfMemory => return err,
+                else => null,
+            };
+
+            if (maybe_doc) |doc| {
+                var owned_doc = doc;
+                defer owned_doc.deinit();
+            }
+
+            try std.testing.expect(diagnosticsContain(&diagnostics, case.code));
+
+            const expected_severity = case.code.severity();
+            if (expected_severity == .@"error") {
+                try std.testing.expect(diagnostics.has_error());
+            } else {
+                try std.testing.expect(!diagnostics.has_error());
+                try std.testing.expect(diagnostics.has_warning());
+            }
+        }
+    }
+}
+
 test "parser reports unterminated inline lists" {
     var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
     defer arena.deinit();

From 46cdc65e79b2cc86a8e25d5724726e3b9ce7ddc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 24 Dec 2025 13:58:07 +0100
Subject: [PATCH 012/116] Introduces more types into the DOM, adds basics for
 attribute parsing.

---
 docs/specification.md |  85 +++++-----
 examples/demo.hdoc    |   2 +-
 src/hyperdoc.zig      | 360 +++++++++++++++++++++++++++++++++---------
 src/main.zig          |   2 +
 4 files changed, 334 insertions(+), 115 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index fd0e433..4c00749 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -64,32 +64,32 @@ WORD           := /[^\s\{\}\\\"(),=:]+/
 
 ## Element Overview
 
-| Element                                                     | Element Type | Allowed Children             | Attributes                           |
-| ----------------------------------------------------------- | ------------ | ---------------------------- | ------------------------------------ |
-| *Document*                                                  | Document     | `hdoc`, Blocks               |                                      |
+| Element                                                     | Element Type | Allowed Children             | Attributes                                   |
+| ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------- |
+| *Document*                                                  | Document     | `hdoc`, Blocks               |                                              |
 | `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author`, `date` |
-| `h1`, `h2`, `h3`                                            | Block        | Text Body                    | `lang`, \[`id`\]                     |
-| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block        | Text Body                    | `lang`, \[`id`\]                     |
-| `ul`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\]                     |
-| `ol`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\], `first`            |
-| `img`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `alt`, `path`      |
-| `pre`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `syntax`           |
-| `toc`                                                       | Block        | -                            | `lang`, \[`id`\], `depth`            |
-| `table`                                                     | Block        | Table Rows                   | `lang`, \[`id`\]                     |
-| `columns`                                                   | Table Row    | `td` ≥ 1                     | `lang`                               |
-| `group`                                                     | Table Row    | Text Body                    | `lang`,                              |
-| `row`                                                       | Table Row    | `td` ≥ 1                     | `lang`, `title`                      |
-| `td`                                                        | Table Cell   | Blocks, String, Verbatim     | `lang`, `colspan`                    |
-| `li`                                                        | List Item    | Blocks, String, Verbatim     | `lang`                               |
-| `\em`                                                       | Text Body    | Text Body                    | `lang`                               |
-| `\mono`                                                     | Text Body    | Text Body                    | `lang`, `syntax`                     |
-| `\strike`                                                   | Text Body    | Text Body                    | `lang`                               |
-| `\sub`, `\sup`                                              | Text Body    | Text Body                    | `lang`                               |
-| `\link`                                                     | Text Body    | Text Body                    | `lang`, (`ref` \| `uri`)             |
-| `\date`, `\time`, `\datetime`                               | Text Body    | Plain Text, String, Verbatim | `lang`, `fmt`                        |
-| *Plain Text*                                                | Text Body    | -                            |                                      |
-| *String*                                                    | Text Body    | -                            |                                      |
-| *Verbatim*                                                  | Text Body    | -                            |                                      |
+| `h1`, `h2`, `h3`                                            | Block        | Text Body                    | `lang`, \[`id`\]                             |
+| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block        | Text Body                    | `lang`, \[`id`\]                             |
+| `ul`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\]                             |
+| `ol`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\], `first`                    |
+| `img`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `alt`, `path`              |
+| `pre`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `syntax`                   |
+| `toc`                                                       | Block        | -                            | `lang`, \[`id`\], `depth`                    |
+| `table`                                                     | Block        | Table Rows                   | `lang`, \[`id`\]                             |
+| `columns`                                                   | Table Row    | `td` ≥ 1                     | `lang`                                       |
+| `group`                                                     | Table Row    | Text Body                    | `lang`,                                      |
+| `row`                                                       | Table Row    | `td` ≥ 1                     | `lang`, `title`                              |
+| `td`                                                        | Table Cell   | Blocks, String, Verbatim     | `lang`, `colspan`                            |
+| `li`                                                        | List Item    | Blocks, String, Verbatim     | `lang`                                       |
+| `\em`                                                       | Text Body    | Text Body                    | `lang`                                       |
+| `\mono`                                                     | Text Body    | Text Body                    | `lang`, `syntax`                             |
+| `\strike`                                                   | Text Body    | Text Body                    | `lang`                                       |
+| `\sub`, `\sup`                                              | Text Body    | Text Body                    | `lang`                                       |
+| `\link`                                                     | Text Body    | Text Body                    | `lang`, (`ref` \| `uri`)                     |
+| `\date`, `\time`, `\datetime`                               | Text Body    | Plain Text, String, Verbatim | `lang`, `fmt`                                |
+| *Plain Text*                                                | Text Body    | -                            |                                              |
+| *String*                                                    | Text Body    | -                            |                                              |
+| *Verbatim*                                                  | Text Body    | -                            |                                              |
 
 Notes:
 
@@ -99,23 +99,23 @@ Notes:
 
 ## Attribute Overview
 
-| Attribute | Required | Allowed Values                                                                               | Description                                                                     |
-| --------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
-| `version` | Yes      | `2.0`                                                                                        | Describes the version of this HyperDoc document.                                |
-| `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                         | Defines the language of the elements contents.                                  |
-| `title`   | No       | *Any*                                                                                        | Sets the title of the document or the table row.                                |
-| `author`  | No       | *Any*                                                                                        | Sets the author of the document.                                                |
-| `date`    | No       | [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) timestamp | Sets the authoring date of the document.                                        |
-| `id`      | No       | Non-empty                                                                                    | Sets a reference which can be linked to with `\link(ref="...")`.                |
-| `first`   | No       | Decimal integer numbers ≥ 0                                                                  | Sets the number of the first list item.                                         |
-| `alt`     | No       | Non-empty                                                                                    | Sets the alternative text shown when an image cannot be loaded.                 |
-| `path`    | Yes      | Non-empty file path to an image file                                                         | Defines the file path where the image file can be found.                        |
-| `syntax`  | No       | *See element documentation*                                                                  | Hints the syntax highlighter how how the elements context shall be highlighted. |
-| `depth`   | No       | `1`, `2` or `3`                                                                              | Defines how many levels of headings shall be included.                          |
-| `colspan` | No       | Decimal integer numbers ≥ 1                                                                  | Sets how many columns the table cell spans.                                     |
-| `ref`     | No       | Any value present in an `id` attribute.                                                      | References any `id` inside this document.                                       |
-| `uri`     | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI.                                         |
-| `fmt`     | No       | *See element documentation*                                                                  |                                                                                 |
+| Attribute | Required | Allowed Values                                                                                                                                                                            | Description                                                                     |
+| --------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
+| `version` | Yes      | `2.0`                                                                                                                                                                                     | Describes the version of this HyperDoc document.                                |
+| `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                                                                                                                      | Defines the language of the elements contents.                                  |
+| `title`   | No       | *Any*                                                                                                                                                                                     | Sets the title of the document or the table row.                                |
+| `author`  | No       | *Any*                                                                                                                                                                                     | Sets the author of the document.                                                |
+| `date`    | No       | A date-time value using the format specified below (intersection between [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601)) | Sets the authoring date of the document.                                        |
+| `id`      | No       | Non-empty                                                                                                                                                                                 | Sets a reference which can be linked to with `\link(ref="...")`.                |
+| `first`   | No       | Decimal integer numbers ≥ 0                                                                                                                                                               | Sets the number of the first list item.                                         |
+| `alt`     | No       | Non-empty                                                                                                                                                                                 | Sets the alternative text shown when an image cannot be loaded.                 |
+| `path`    | Yes      | Non-empty file path to an image file                                                                                                                                                      | Defines the file path where the image file can be found.                        |
+| `syntax`  | No       | *See element documentation*                                                                                                                                                               | Hints the syntax highlighter how how the elements context shall be highlighted. |
+| `depth`   | No       | `1`, `2` or `3`                                                                                                                                                                           | Defines how many levels of headings shall be included.                          |
+| `colspan` | No       | Decimal integer numbers ≥ 1                                                                                                                                                               | Sets how many columns the table cell spans.                                     |
+| `ref`     | No       | Any value present in an `id` attribute.                                                                                                                                                   | References any `id` inside this document.                                       |
+| `uri`     | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987)                                                                                              | Links to a foreign document with a URI.                                         |
+| `fmt`     | No       | *See element documentation*                                                                                                                                                               |                                                                                 |
 
 ## Semantic Structure
 
@@ -341,3 +341,4 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically
 | `datetime` | `fmt`     | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). |
 
 Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner.
+
diff --git a/examples/demo.hdoc b/examples/demo.hdoc
index 68ef189..a092e91 100644
--- a/examples/demo.hdoc
+++ b/examples/demo.hdoc
@@ -1,4 +1,4 @@
-hdoc(version="2.0" , lang="en", title="HyperDoc \"2.0\" Demonstration");
+hdoc(version="2.0", lang="en", title="HyperDoc \"2.0\" Demonstration");
 
 h1{HyperDoc 2.0}
 
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index d557866..32aacc9 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -5,9 +5,19 @@ const parser_toolkit = @import("parser-toolkit");
 /// tree structure of the document.
 pub const Document = struct {
     arena: std.heap.ArenaAllocator,
+
+    version: Version,
+
+    // document contents:
     contents: []Block,
     ids: []?[]const u8,
 
+    // header information
+    lang: ?[]const u8,
+    title: ?[]const u8,
+    author: ?[]const u8,
+    date: ?DateTime,
+
     pub fn deinit(doc: *Document) void {
         doc.arena.deinit();
         doc.* = undefined;
@@ -20,7 +30,6 @@ pub const Document = struct {
 /// Depending on the level of nesting, the width might decrease
 /// from the full document size.
 pub const Block = union(enum) {
-    header: Header,
     heading: Heading,
     paragraph: Paragraph,
     list: List,
@@ -29,14 +38,6 @@ pub const Block = union(enum) {
     toc: TableOfContents,
     table: Table,
 
-    pub const Header = struct {
-        lang: ?[]const u8,
-        title: ?[]const u8,
-        version: ?[]const u8,
-        author: ?[]const u8,
-        date: ?[]const u8,
-    };
-
     pub const Heading = struct {
         level: HeadingLevel,
         lang: ?[]const u8,
@@ -118,15 +119,17 @@ pub const Block = union(enum) {
 
 pub const SpanContent = union(enum) {
     text: []const u8,
-    date: DateTime,
-    time: DateTime,
-    datetime: DateTime,
+    date: FormattedDateTime(Date),
+    time: FormattedDateTime(Time),
+    datetime: FormattedDateTime(DateTime),
 };
 
-pub const DateTime = struct {
-    value: []const u8,
-    format: ?[]const u8 = null,
-};
+pub fn FormattedDateTime(comptime DT: type) type {
+    return struct {
+        value: DT,
+        format: DT.Format = .default,
+    };
+}
 
 pub const Span = struct {
     content: SpanContent,
@@ -146,6 +149,95 @@ pub const Link = union(enum) {
     uri: []const u8,
 };
 
+/// HyperDoc Version Number
+pub const Version = struct {
+    major: u16,
+    minor: u16,
+
+    pub fn parse(text: []const u8) !Version {
+        const split_index = std.mem.indexOfScalar(u8, text, '.') orelse return error.InvalidValue;
+
+        const head = text[0..split_index];
+        const tail = text[split_index + 1 ..];
+
+        return .{
+            .major = std.fmt.parseInt(u16, head, 10) catch return error.InvalidValue,
+            .minor = std.fmt.parseInt(u16, tail, 10) catch return error.InvalidValue,
+        };
+    }
+};
+
+pub const DateTime = struct {
+    pub const Format = enum {
+        pub const default: Format = .short;
+
+        short,
+        long,
+        relative,
+        iso,
+    };
+
+    date: Date,
+    time: Time,
+
+    pub fn parse(text: []const u8) !DateTime {
+        const split_index = std.mem.indexOfScalar(u8, text, 'T') orelse return error.InvalidValue;
+
+        const head = text[0..split_index];
+        const tail = text[split_index + 1 ..];
+
+        return .{
+            .date = try Date.parse(head),
+            .time = try Time.parse(tail),
+        };
+    }
+};
+
+pub const Date = struct {
+    pub const Format = enum {
+        pub const default: Format = .short;
+        year,
+        month,
+        day,
+        weekday,
+        short,
+        long,
+        relative,
+        iso,
+    };
+
+    year: i32, // e.g., 2024
+    month: u4, // 1-12
+    day: u5, // 1-31
+
+    pub fn parse(text: []const u8) !Date {
+        _ = text;
+        @panic("TODO: Implement this");
+    }
+};
+
+pub const Time = struct {
+    pub const Format = enum {
+        pub const default: Format = .short;
+
+        long,
+        short,
+        rough,
+        relative,
+        iso,
+    };
+
+    hour: u5, // 0-23
+    minute: u6, // 0-59
+    second: u6, // 0-59
+    microsecond: u20, // 0-999999
+
+    pub fn parse(text: []const u8) !Time {
+        _ = text;
+        @panic("TODO: Implement this");
+    }
+};
+
 /// Parses a HyperDoc document.
 pub fn parse(
     allocator: std.mem.Allocator,
@@ -154,7 +246,7 @@ pub fn parse(
     /// An optional diagnostics element that receives diagnostic messages like errors and warnings.
     /// If present, will be filled out by the parser.
     diagnostics: ?*Diagnostics,
-) error{ OutOfMemory, SyntaxError }!Document {
+) error{ OutOfMemory, SyntaxError, MalformedDocument }!Document {
     var arena = std.heap.ArenaAllocator.init(allocator);
     errdefer arena.deinit();
 
@@ -170,9 +262,6 @@ pub fn parse(
         .code = plain_text,
     };
 
-    var blocks: std.ArrayList(Block) = .empty;
-    var ids: std.ArrayList(?[]const u8) = .empty;
-
     while (true) {
         errdefer |err| {
             std.log.debug("error at examples/demo.hdoc:{f}: {t}", .{
@@ -194,67 +283,185 @@ pub fn parse(
             => return error.SyntaxError,
         };
 
-        const block = sema.translate_toplevel_node(node) catch |err| switch (err) {
-            error.OutOfMemory => |e| return @as(error{OutOfMemory}!Document, e),
-
-            error.InvalidNodeType => continue,
-        };
-
-        try blocks.append(arena.allocator(), block);
-        try ids.append(arena.allocator(), null);
+        try sema.append_node(node);
     }
 
+    const header = sema.header orelse return error.MalformedDocument;
+
     return .{
         .arena = arena,
-        .contents = try blocks.toOwnedSlice(arena.allocator()),
-        .ids = try ids.toOwnedSlice(arena.allocator()),
+        .contents = try sema.blocks.toOwnedSlice(arena.allocator()),
+        .ids = try sema.ids.toOwnedSlice(arena.allocator()),
+
+        .lang = header.lang,
+        .title = header.title,
+        .version = header.version,
+        .author = header.author,
+        .date = header.date,
     };
 }
 
 pub const SemanticAnalyzer = struct {
+    const Header = struct {
+        version: Version,
+        lang: ?[]const u8,
+        title: ?[]const u8,
+        author: ?[]const u8,
+        date: ?DateTime,
+    };
+
     arena: std.mem.Allocator,
     diagnostics: ?*Diagnostics,
     code: []const u8,
-    seen_header: bool = false,
-
-    fn translate_toplevel_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType }!Block {
-        if (!sema.seen_header) {
-            sema.seen_header = true;
-            if (node.type != .hdoc) {
-                sema.emit_diagnostic(.missing_hdoc_header, node.location.offset);
-                return error.InvalidNodeType;
+
+    header: ?Header = null,
+    blocks: std.ArrayList(Block) = .empty,
+    ids: std.ArrayList(?[]const u8) = .empty,
+
+    fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{OutOfMemory}!void {
+        switch (node.type) {
+            .hdoc => {
+                if (sema.header != null) {
+                    try sema.emit_diagnostic(.duplicate_hdoc_header, node.location.offset);
+                }
+                sema.header = sema.translate_header_node(node) catch |err| switch (err) {
+                    error.OutOfMemory => |e| return e,
+                    error.BadAttributes => null,
+                };
+            },
+
+            else => {
+                if (sema.header == null) {
+                    if (sema.blocks.items.len == 0) {
+                        // Emit error for the first encountered block.
+                        // This can only happen exactly once, as we either:
+                        // - have already set a header block when the first non-header nodes arrives.
+                        // - we have processed another block already, so the previous block would've emitted the warning already.
+                        try sema.emit_diagnostic(.missing_hdoc_header, node.location.offset);
+                    }
+                }
+
+                const block, const id = sema.translate_block_node(node) catch |err| switch (err) {
+                    error.OutOfMemory => |e| return e,
+                    error.InvalidNodeType, error.BadAttributes => {
+                        return;
+                    },
+                };
+
+                try sema.blocks.append(sema.arena, block);
+                try sema.ids.append(sema.arena, id);
+            },
+        }
+    }
+
+    fn translate_header_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }!Header {
+        std.debug.assert(node.type == .hdoc);
+
+        const attrs = try sema.get_attributes(node, struct {
+            version: Version,
+            title: ?[]const u8 = null,
+            author: ?[]const u8 = null,
+            date: ?DateTime = null,
+            lang: ?[]const u8 = null,
+        });
+
+        return .{
+            .version = attrs.version,
+            .lang = attrs.lang,
+            .title = attrs.title,
+            .author = attrs.author,
+            .date = attrs.date,
+        };
+    }
+
+    fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes }!struct { Block, ?[]const u8 } {
+        std.debug.assert(node.type != .hdoc);
+
+        _ = sema;
+
+        return error.InvalidNodeType;
+    }
+
+    fn get_attributes(sema: *SemanticAnalyzer, node: Parser.Node, comptime Attrs: type) error{ OutOfMemory, BadAttributes }!Attrs {
+        const Fields = std.meta.FieldEnum(Attrs);
+        const fields = @typeInfo(Attrs).@"struct".fields;
+
+        var required: std.EnumSet(Fields) = .initEmpty();
+
+        var attrs: Attrs = undefined;
+        inline for (fields) |fld| {
+            if (fld.default_value_ptr) |default_value_ptr| {
+                @field(attrs, fld.name) = @as(*const fld.type, @ptrCast(@alignCast(default_value_ptr))).*;
+            } else {
+                @field(attrs, fld.name) = undefined;
+                required.insert(@field(Fields, fld.name));
             }
-        } else if (node.type == .hdoc) {
-            sema.emit_diagnostic(.duplicate_hdoc_header, node.location.offset);
-            return error.InvalidNodeType;
         }
 
-        if (node.type == .hdoc) {
-            return .{
-                .header = .{
-                    .lang = null,
-                    .title = sema.attr_value(node.attributes, "title"),
-                    .version = sema.attr_value(node.attributes, "version"),
-                    .author = sema.attr_value(node.attributes, "author"),
-                    .date = sema.attr_value(node.attributes, "date"),
-                },
+        var any_invalid = false;
+        var found: std.EnumSet(Fields) = .initEmpty();
+        for (node.attributes.keys(), node.attributes.values()) |key, attrib| {
+            const fld = std.meta.stringToEnum(Fields, key) orelse {
+                try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, node.location.offset);
+                continue;
             };
+            if (found.contains(fld)) {
+                try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, node.location.offset);
+            }
+            found.insert(fld);
+
+            switch (fld) {
+                inline else => |tag| @field(attrs, @tagName(tag)) = sema.cast_value(attrib, @FieldType(Attrs, @tagName(tag))) catch |err| switch (err) {
+                    error.OutOfMemory => |e| return e,
+
+                    else => {
+                        any_invalid = true;
+
+                        try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, node.location.offset);
+
+                        continue;
+                    },
+                },
+            }
         }
 
-        return error.InvalidNodeType;
+        // Check if we have any required attributes missing:
+        var any_missing = false;
+        {
+            var iter = required.iterator();
+            while (iter.next()) |req_field| {
+                if (!found.contains(req_field)) {
+                    try sema.emit_diagnostic(.{ .missing_attribute = .{ .type = node.type, .name = @tagName(req_field) } }, node.location.offset);
+                    any_missing = true;
+                }
+            }
+        }
+        if (any_missing or any_invalid)
+            return error.BadAttributes;
+
+        return attrs;
     }
 
-    fn attr_value(sema: *SemanticAnalyzer, attrs: std.StringArrayHashMapUnmanaged(Parser.Attribute), name: []const u8) ?[]const u8 {
-        _ = sema;
-        if (attrs.get(name)) |attr| {
-            return attr.value;
+    fn cast_value(sema: *SemanticAnalyzer, attrib: Parser.Attribute, comptime T: type) error{ OutOfMemory, InvalidValue }!T {
+        if (@typeInfo(T) == .optional) {
+            return try sema.cast_value(attrib, @typeInfo(T).optional.child);
         }
-        return null;
+
+        return switch (T) {
+            []const u8 =>  attrib.value,
+
+            Version => Version.parse(attrib.value) catch return error.InvalidValue,
+            DateTime => DateTime.parse(attrib.value) catch return error.InvalidValue,
+            Date => Date.parse(attrib.value) catch return error.InvalidValue,
+            Time => Time.parse(attrib.value) catch return error.InvalidValue,
+
+            else => @compileError("Unsupported attribute type: " ++ @typeName(T)),
+        };
     }
 
-    fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, offset: usize) void {
+    fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, offset: usize) !void {
         if (sema.diagnostics) |diag| {
-            diag.add(code, sema.make_location(offset)) catch {};
+            try diag.add(code, sema.make_location(offset));
         }
     }
 
@@ -934,6 +1141,7 @@ pub const Diagnostic = struct {
     pub const UnexpectedCharacter = struct { expected: u8, found: u8 };
     pub const InvalidIdentifierStart = struct { char: u8 };
     pub const DuplicateAttribute = struct { name: []const u8 };
+    pub const NodeAttributeError = struct { type: Parser.NodeType, name: []const u8 };
     pub const MissingHdocHeader = struct {};
     pub const DuplicateHdocHeader = struct {};
 
@@ -947,8 +1155,11 @@ pub const Diagnostic = struct {
         unterminated_block_list,
         missing_hdoc_header: MissingHdocHeader,
         duplicate_hdoc_header: DuplicateHdocHeader,
+        missing_attribute: NodeAttributeError,
+        invalid_attribute: NodeAttributeError,
 
         // warnings:
+        unknown_attribute: NodeAttributeError,
         duplicate_attribute: DuplicateAttribute,
         empty_verbatim_block,
         verbatim_missing_trailing_newline,
@@ -957,20 +1168,25 @@ pub const Diagnostic = struct {
 
         pub fn severity(code: Code) Severity {
             return switch (code) {
-                .unterminated_inline_list => .@"error",
-                .unexpected_eof => .@"error",
-                .unexpected_character => .@"error",
-                .unterminated_string => .@"error",
-                .invalid_identifier_start => .@"error",
-                .unterminated_block_list => .@"error",
-                .missing_hdoc_header => .@"error",
-                .duplicate_hdoc_header => .@"error",
-
-                .duplicate_attribute => .warning,
-                .empty_verbatim_block => .warning,
-                .verbatim_missing_trailing_newline => .warning,
-                .verbatim_missing_space => .warning,
-                .trailing_whitespace => .warning,
+                .unterminated_inline_list,
+                .unexpected_eof,
+                .unexpected_character,
+                .unterminated_string,
+                .invalid_identifier_start,
+                .unterminated_block_list,
+                .missing_hdoc_header,
+                .duplicate_hdoc_header,
+                .invalid_attribute,
+                .missing_attribute,
+                => .@"error",
+
+                .unknown_attribute,
+                .duplicate_attribute,
+                .empty_verbatim_block,
+                .verbatim_missing_trailing_newline,
+                .verbatim_missing_space,
+                .trailing_whitespace,
+                => .warning,
             };
         }
 
diff --git a/src/main.zig b/src/main.zig
index d9ecaf0..ccc0786 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -35,5 +35,7 @@ pub fn main() !u8 {
     if (diagnostics.has_error())
         return 1;
 
+    // TODO: Implement dumping of "parsed "
+
     return 0;
 }

From 986cf055921e2204ad9507993e23a8983c89cb64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 24 Dec 2025 14:15:38 +0100
Subject: [PATCH 013/116] Vibecoded: Adds dump code in main.

---
 build.zig    |  13 ++
 src/main.zig | 452 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 464 insertions(+), 1 deletion(-)

diff --git a/build.zig b/build.zig
index 4c71fd2..5018027 100644
--- a/build.zig
+++ b/build.zig
@@ -51,6 +51,19 @@ pub fn build(b: *std.Build) void {
         .use_llvm = true,
     });
     test_step.dependOn(&b.addRunArtifact(exe_tests).step);
+
+    const main_tests = b.addTest(.{
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/main.zig"),
+            .target = target,
+            .optimize = optimize,
+            .imports = &.{
+                .{ .name = "hyperdoc", .module = hyperdoc },
+            },
+        }),
+        .use_llvm = true,
+    });
+    test_step.dependOn(&b.addRunArtifact(main_tests).step);
 }
 
 fn rawFileMod(b: *std.Build, path: []const u8) std.Build.Module.Import {
diff --git a/src/main.zig b/src/main.zig
index ccc0786..98ea655 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -4,6 +4,455 @@ const hdoc = @import("hyperdoc");
 
 var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
 
+const indent_step: usize = 2;
+
+fn writeIndent(writer: anytype, indent: usize) !void {
+    var i: usize = 0;
+    while (i < indent) : (i += 1) {
+        try writer.writeByte(' ');
+    }
+}
+
+fn writeStringValue(writer: anytype, value: []const u8) !void {
+    try writer.print("\"{f}\"", .{std.zig.fmtString(value)});
+}
+
+fn writeOptionalStringValue(writer: anytype, value: ?[]const u8) !void {
+    if (value) |text| {
+        try writeStringValue(writer, text);
+    } else {
+        try writer.writeAll("null");
+    }
+}
+
+fn writeOptionalIntValue(writer: anytype, value: anytype) !void {
+    if (value) |number| {
+        try writer.print("{}", .{number});
+    } else {
+        try writer.writeAll("null");
+    }
+}
+
+fn dumpOptionalStringField(writer: anytype, indent: usize, key: []const u8, value: ?[]const u8) !void {
+    try writeIndent(writer, indent);
+    try writer.print("{s}: ", .{key});
+    try writeOptionalStringValue(writer, value);
+    try writer.writeByte('\n');
+}
+
+fn dumpOptionalNumberField(writer: anytype, indent: usize, key: []const u8, value: anytype) !void {
+    try writeIndent(writer, indent);
+    try writer.print("{s}: ", .{key});
+    try writeOptionalIntValue(writer, value);
+    try writer.writeByte('\n');
+}
+
+fn dumpBoolField(writer: anytype, indent: usize, key: []const u8, value: bool) !void {
+    try writeIndent(writer, indent);
+    try writer.print("{s}: {}\n", .{ key, value });
+}
+
+fn dumpEnumField(writer: anytype, indent: usize, key: []const u8, value: anytype) !void {
+    try writeIndent(writer, indent);
+    try writer.print("{s}: {s}\n", .{ key, @tagName(value) });
+}
+
+fn dumpVersion(writer: anytype, indent: usize, version: hdoc.Version) !void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("version:\n");
+    try writeIndent(writer, indent + indent_step);
+    try writer.print("major: {}\n", .{version.major});
+    try writeIndent(writer, indent + indent_step);
+    try writer.print("minor: {}\n", .{version.minor});
+}
+
+fn dumpDate(writer: anytype, indent: usize, date: hdoc.Date) !void {
+    try writeIndent(writer, indent);
+    try writer.print("year: {}\n", .{date.year});
+    try writeIndent(writer, indent);
+    try writer.print("month: {}\n", .{date.month});
+    try writeIndent(writer, indent);
+    try writer.print("day: {}\n", .{date.day});
+}
+
+fn dumpTime(writer: anytype, indent: usize, time: hdoc.Time) !void {
+    try writeIndent(writer, indent);
+    try writer.print("hour: {}\n", .{time.hour});
+    try writeIndent(writer, indent);
+    try writer.print("minute: {}\n", .{time.minute});
+    try writeIndent(writer, indent);
+    try writer.print("second: {}\n", .{time.second});
+    try writeIndent(writer, indent);
+    try writer.print("microsecond: {}\n", .{time.microsecond});
+}
+
+fn dumpDateTime(writer: anytype, indent: usize, datetime: hdoc.DateTime) !void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("date:\n");
+    try dumpDate(writer, indent + indent_step, datetime.date);
+    try writeIndent(writer, indent);
+    try writer.writeAll("time:\n");
+    try dumpTime(writer, indent + indent_step, datetime.time);
+}
+
+fn dumpFormattedDate(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.Date)) !void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("value:\n");
+    try dumpDate(writer, indent + indent_step, formatted.value);
+    try writeIndent(writer, indent);
+    try writer.print("format: {s}\n", .{@tagName(formatted.format)});
+}
+
+fn dumpFormattedTime(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.Time)) !void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("value:\n");
+    try dumpTime(writer, indent + indent_step, formatted.value);
+    try writeIndent(writer, indent);
+    try writer.print("format: {s}\n", .{@tagName(formatted.format)});
+}
+
+fn dumpFormattedDateTime(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) !void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("value:\n");
+    try dumpDateTime(writer, indent + indent_step, formatted.value);
+    try writeIndent(writer, indent);
+    try writer.print("format: {s}\n", .{@tagName(formatted.format)});
+}
+
+fn dumpSpanContent(writer: anytype, indent: usize, content: hdoc.SpanContent) !void {
+    switch (content) {
+        .text => |text| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("text: ");
+            try writeStringValue(writer, text);
+            try writer.writeByte('\n');
+        },
+        .date => |date| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("date:\n");
+            try dumpFormattedDate(writer, indent + indent_step, date);
+        },
+        .time => |time| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("time:\n");
+            try dumpFormattedTime(writer, indent + indent_step, time);
+        },
+        .datetime => |datetime| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("datetime:\n");
+            try dumpFormattedDateTime(writer, indent + indent_step, datetime);
+        },
+    }
+}
+
+fn dumpLink(writer: anytype, indent: usize, link: hdoc.Link) !void {
+    switch (link) {
+        .none => {
+            try writeIndent(writer, indent);
+            try writer.writeAll("link: none\n");
+        },
+        .ref => |value| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("link:\n");
+            try writeIndent(writer, indent + indent_step);
+            try writer.writeAll("ref: ");
+            try writeStringValue(writer, value);
+            try writer.writeByte('\n');
+        },
+        .uri => |value| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("link:\n");
+            try writeIndent(writer, indent + indent_step);
+            try writer.writeAll("uri: ");
+            try writeStringValue(writer, value);
+            try writer.writeByte('\n');
+        },
+    }
+}
+
+fn dumpSpan(writer: anytype, indent: usize, span: hdoc.Span) !void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("content:\n");
+    try dumpSpanContent(writer, indent + indent_step, span.content);
+    try dumpOptionalStringField(writer, indent, "lang", span.lang);
+    try dumpBoolField(writer, indent, "em", span.em);
+    try dumpBoolField(writer, indent, "mono", span.mono);
+    try dumpBoolField(writer, indent, "strike", span.strike);
+    try dumpBoolField(writer, indent, "sub", span.sub);
+    try dumpBoolField(writer, indent, "sup", span.sup);
+    try dumpLink(writer, indent, span.link);
+    try dumpOptionalStringField(writer, indent, "syntax", span.syntax);
+}
+
+fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []const hdoc.Span) !void {
+    try writeIndent(writer, indent);
+    if (spans.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (spans) |span| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("-\n");
+        try dumpSpan(writer, indent + indent_step * 2, span);
+    }
+}
+
+fn dumpListItem(writer: anytype, indent: usize, item: hdoc.Block.ListItem) !void {
+    try dumpOptionalStringField(writer, indent, "lang", item.lang);
+    try dumpSpanListField(writer, indent, "content", item.content);
+}
+
+fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: []const hdoc.Block.ListItem) !void {
+    try writeIndent(writer, indent);
+    if (items.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (items) |item| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("-\n");
+        try dumpListItem(writer, indent + indent_step * 2, item);
+    }
+}
+
+fn dumpTableCell(writer: anytype, indent: usize, cell: hdoc.Block.TableCell) !void {
+    try dumpOptionalStringField(writer, indent, "lang", cell.lang);
+    try dumpOptionalNumberField(writer, indent, "colspan", cell.colspan);
+    try dumpSpanListField(writer, indent, "content", cell.content);
+}
+
+fn dumpTableCellsField(writer: anytype, indent: usize, key: []const u8, cells: []const hdoc.Block.TableCell) !void {
+    try writeIndent(writer, indent);
+    if (cells.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (cells) |cell| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("-\n");
+        try dumpTableCell(writer, indent + indent_step * 2, cell);
+    }
+}
+
+fn dumpTableColumns(writer: anytype, indent: usize, columns: hdoc.Block.TableColumns) !void {
+    try dumpOptionalStringField(writer, indent, "lang", columns.lang);
+    try dumpTableCellsField(writer, indent, "cells", columns.cells);
+}
+
+fn dumpTableDataRow(writer: anytype, indent: usize, row: hdoc.Block.TableDataRow) !void {
+    try dumpOptionalStringField(writer, indent, "lang", row.lang);
+    try dumpOptionalStringField(writer, indent, "title", row.title);
+    try dumpTableCellsField(writer, indent, "cells", row.cells);
+}
+
+fn dumpTableGroup(writer: anytype, indent: usize, group: hdoc.Block.TableGroup) !void {
+    try dumpOptionalStringField(writer, indent, "lang", group.lang);
+    try dumpSpanListField(writer, indent, "content", group.content);
+}
+
+fn dumpTableRow(writer: anytype, indent: usize, row: hdoc.Block.TableRow) !void {
+    switch (row) {
+        .columns => |columns| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("columns:\n");
+            try dumpTableColumns(writer, indent + indent_step, columns);
+        },
+        .row => |data_row| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("row:\n");
+            try dumpTableDataRow(writer, indent + indent_step, data_row);
+        },
+        .group => |group| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("group:\n");
+            try dumpTableGroup(writer, indent + indent_step, group);
+        },
+    }
+}
+
+fn dumpTableRowsField(writer: anytype, indent: usize, key: []const u8, rows: []const hdoc.Block.TableRow) !void {
+    try writeIndent(writer, indent);
+    if (rows.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (rows) |row| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("-\n");
+        try dumpTableRow(writer, indent + indent_step * 2, row);
+    }
+}
+
+fn dumpBlock(writer: anytype, indent: usize, block: hdoc.Block) !void {
+    switch (block) {
+        .heading => |heading| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("heading:\n");
+            try dumpEnumField(writer, indent + indent_step, "level", heading.level);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang);
+            try dumpSpanListField(writer, indent + indent_step, "content", heading.content);
+        },
+        .paragraph => |paragraph| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("paragraph:\n");
+            try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang);
+            try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content);
+        },
+        .list => |list| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("list:\n");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang);
+            try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first);
+            try dumpListItemsField(writer, indent + indent_step, "items", list.items);
+        },
+        .image => |image| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("image:\n");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt);
+            try dumpOptionalStringField(writer, indent + indent_step, "path", image.path);
+            try dumpSpanListField(writer, indent + indent_step, "content", image.content);
+        },
+        .preformatted => |preformatted| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("preformatted:\n");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax);
+            try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content);
+        },
+        .toc => |toc| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("toc:\n");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang);
+            try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth);
+        },
+        .table => |table| {
+            try writeIndent(writer, indent);
+            try writer.writeAll("table:\n");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang);
+            try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows);
+        },
+    }
+}
+
+fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: []const hdoc.Block) !void {
+    try writeIndent(writer, indent);
+    if (blocks.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (blocks) |block| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("-\n");
+        try dumpBlock(writer, indent + indent_step * 2, block);
+    }
+}
+
+fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, values: []?[]const u8) !void {
+    try writeIndent(writer, indent);
+    if (values.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (values) |value| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("- ");
+        try writeOptionalStringValue(writer, value);
+        try writer.writeByte('\n');
+    }
+}
+
+fn dumpOptionalDateTimeField(writer: anytype, indent: usize, key: []const u8, value: ?hdoc.DateTime) !void {
+    try writeIndent(writer, indent);
+    if (value) |datetime| {
+        try writer.print("{s}:\n", .{key});
+        try dumpDateTime(writer, indent + indent_step, datetime);
+    } else {
+        try writer.print("{s}: null\n", .{key});
+    }
+}
+
+fn dumpDocument(writer: anytype, doc: *const hdoc.Document) !void {
+    try writer.writeAll("document:\n");
+    try dumpVersion(writer, indent_step, doc.version);
+    try dumpOptionalStringField(writer, indent_step, "lang", doc.lang);
+    try dumpOptionalStringField(writer, indent_step, "title", doc.title);
+    try dumpOptionalStringField(writer, indent_step, "author", doc.author);
+    try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date);
+    try dumpBlockListField(writer, indent_step, "contents", doc.contents);
+    try dumpOptionalStringListField(writer, indent_step, "ids", doc.ids);
+}
+
+test "dumpDocument escapes string values" {
+    const title = "Doc \"Title\"\n";
+    const span_text = "Hello \"world\"\n";
+    const link_ref = "section \"A\"";
+    const id_value = "id:1\n";
+
+    var doc: hdoc.Document = .{
+        .arena = std.heap.ArenaAllocator.init(std.testing.allocator),
+        .version = .{ .major = 1, .minor = 2 },
+        .contents = &.{},
+        .ids = &.{},
+        .lang = null,
+        .title = title,
+        .author = null,
+        .date = null,
+    };
+    defer doc.deinit();
+
+    const arena_alloc = doc.arena.allocator();
+
+    const spans = try arena_alloc.alloc(hdoc.Span, 1);
+    spans[0] = .{
+        .content = .{ .text = span_text },
+        .link = .{ .ref = link_ref },
+    };
+
+    const blocks = try arena_alloc.alloc(hdoc.Block, 1);
+    blocks[0] = .{
+        .heading = .{
+            .level = .h1,
+            .lang = null,
+            .content = spans,
+        },
+    };
+    doc.contents = blocks;
+
+    const ids = try arena_alloc.alloc(?[]const u8, 1);
+    ids[0] = id_value;
+    doc.ids = ids;
+
+    var buffer: std.ArrayList(u8) = .empty;
+    defer buffer.deinit(std.testing.allocator);
+
+    try dumpDocument(buffer.writer(std.testing.allocator), &doc);
+    const output = buffer.items;
+
+    const expected_title = try std.fmt.allocPrint(std.testing.allocator, "title: \"{f}\"\n", .{std.zig.fmtString(title)});
+    defer std.testing.allocator.free(expected_title);
+    try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null);
+
+    const expected_span = try std.fmt.allocPrint(std.testing.allocator, "text: \"{f}\"\n", .{std.zig.fmtString(span_text)});
+    defer std.testing.allocator.free(expected_span);
+    try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null);
+
+    const expected_link = try std.fmt.allocPrint(std.testing.allocator, "ref: \"{f}\"\n", .{std.zig.fmtString(link_ref)});
+    defer std.testing.allocator.free(expected_link);
+    try std.testing.expect(std.mem.indexOf(u8, output, expected_link) != null);
+
+    const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value)});
+    defer std.testing.allocator.free(expected_id);
+    try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null);
+}
+
 pub fn main() !u8 {
     defer if (builtin.mode == .Debug) {
         std.debug.assert(debug_allocator.deinit() == .ok);
@@ -35,7 +484,8 @@ pub fn main() !u8 {
     if (diagnostics.has_error())
         return 1;
 
-    // TODO: Implement dumping of "parsed "
+    const stdout = std.fs.File.stdout().deprecatedWriter();
+    try dumpDocument(stdout, &parsed);
 
     return 0;
 }

From 5ccfcbf3fd5ee8ed6e3ee989b9d84b4f18e5d368 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 24 Dec 2025 14:22:39 +0100
Subject: [PATCH 014/116] Prepares node translation by stubbing out block
 layer.

---
 src/hyperdoc.zig | 107 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 32aacc9..2956179 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -377,11 +377,109 @@ pub const SemanticAnalyzer = struct {
     fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes }!struct { Block, ?[]const u8 } {
         std.debug.assert(node.type != .hdoc);
 
-        _ = sema;
+        switch (node.type) {
+            .hdoc => unreachable,
+
+            .h1, .h2, .h3 => {
+                const heading, const id = try sema.translate_heading_node(node);
+                return .{ .{ .heading = heading }, id };
+            },
+            .p, .note, .warning, .danger, .tip, .quote, .spoiler => {
+                const paragraph, const id = try sema.translate_paragraph_node(node);
+                return .{ .{ .paragraph = paragraph }, id };
+            },
+            .ul, .ol => {
+                const list, const id = try sema.translate_list_node(node);
+                return .{ .{ .list = list }, id };
+            },
+            .img => {
+                const image, const id = try sema.translate_image_node(node);
+                return .{ .{ .image = image }, id };
+            },
+            .pre => {
+                const preformatted, const id = try sema.translate_preformatted_node(node);
+                return .{ .{ .preformatted = preformatted }, id };
+            },
+            .toc => {
+                const toc, const id = try sema.translate_toc_node(node);
+                return .{ .{ .toc = toc }, id };
+            },
+            .table => {
+                const table, const id = try sema.translate_table_node(node);
+                return .{ .{ .table = table }, id };
+            },
+
+            .unknown_block, .unknown_inline => {
+                try sema.emit_diagnostic(.{ .unknown_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location.offset);
+                return error.InvalidNodeType;
+            },
+
+            .@"\\em",
+            .@"\\mono",
+            .@"\\strike",
+            .@"\\sub",
+            .@"\\sup",
+            .@"\\link",
+            .@"\\time",
+            .@"\\date",
+            .@"\\datetime",
+            .text,
+            .columns,
+            .group,
+            .row,
+            .td,
+            .li,
+            => {
+                try sema.emit_diagnostic(.{ .invalid_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location.offset);
+                return error.InvalidNodeType;
+            },
+        }
 
         return error.InvalidNodeType;
     }
 
+    fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?[]const u8 } {
+        _ = sema;
+        _ = node;
+        @panic("Not yet implemented");
+    }
+
+    fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?[]const u8 } {
+        _ = sema;
+        _ = node;
+        @panic("Not yet implemented");
+    }
+
+    fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?[]const u8 } {
+        _ = sema;
+        _ = node;
+        @panic("Not yet implemented");
+    }
+
+    fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?[]const u8 } {
+        _ = sema;
+        _ = node;
+        @panic("Not yet implemented");
+    }
+
+    fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?[]const u8 } {
+        _ = sema;
+        _ = node;
+        @panic("Not yet implemented");
+    }
+
+    fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?[]const u8 } {
+        _ = sema;
+        _ = node;
+        @panic("Not yet implemented");
+    }
+
+    fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?[]const u8 } {
+        _ = sema;
+        _ = node;
+        @panic("Not yet implemented");
+    }
+
     fn get_attributes(sema: *SemanticAnalyzer, node: Parser.Node, comptime Attrs: type) error{ OutOfMemory, BadAttributes }!Attrs {
         const Fields = std.meta.FieldEnum(Attrs);
         const fields = @typeInfo(Attrs).@"struct".fields;
@@ -448,7 +546,7 @@ pub const SemanticAnalyzer = struct {
         }
 
         return switch (T) {
-            []const u8 =>  attrib.value,
+            []const u8 => attrib.value,
 
             Version => Version.parse(attrib.value) catch return error.InvalidValue,
             DateTime => DateTime.parse(attrib.value) catch return error.InvalidValue,
@@ -1144,6 +1242,7 @@ pub const Diagnostic = struct {
     pub const NodeAttributeError = struct { type: Parser.NodeType, name: []const u8 };
     pub const MissingHdocHeader = struct {};
     pub const DuplicateHdocHeader = struct {};
+    pub const InvalidBlockError = struct { name: []const u8 };
 
     pub const Code = union(enum) {
         // errors:
@@ -1157,6 +1256,8 @@ pub const Diagnostic = struct {
         duplicate_hdoc_header: DuplicateHdocHeader,
         missing_attribute: NodeAttributeError,
         invalid_attribute: NodeAttributeError,
+        unknown_block_type: InvalidBlockError,
+        invalid_block_type: InvalidBlockError,
 
         // warnings:
         unknown_attribute: NodeAttributeError,
@@ -1178,6 +1279,8 @@ pub const Diagnostic = struct {
                 .duplicate_hdoc_header,
                 .invalid_attribute,
                 .missing_attribute,
+                .unknown_block_type,
+                .invalid_block_type,
                 => .@"error",
 
                 .unknown_attribute,

From c9b32d521dbe283f1fa6a5cfb79fd1f212e60ff4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 25 Dec 2025 23:22:36 +0100
Subject: [PATCH 015/116] Streamlines some parts of the processor the improve
 the code structure.

---
 src/hyperdoc.zig  | 117 +++++++++++++++++++++++++++++++++++-----------
 src/testsuite.zig |   4 +-
 2 files changed, 91 insertions(+), 30 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 2956179..08c1b6f 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -346,6 +346,10 @@ pub const SemanticAnalyzer = struct {
                     error.InvalidNodeType, error.BadAttributes => {
                         return;
                     },
+                    error.Unimplemented => {
+                        std.log.warn("implementd translation of {} node", .{node.type});
+                        return;
+                    },
                 };
 
                 try sema.blocks.append(sema.arena, block);
@@ -374,7 +378,7 @@ pub const SemanticAnalyzer = struct {
         };
     }
 
-    fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes }!struct { Block, ?[]const u8 } {
+    fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes, Unimplemented }!struct { Block, ?[]const u8 } {
         std.debug.assert(node.type != .hdoc);
 
         switch (node.type) {
@@ -439,45 +443,100 @@ pub const SemanticAnalyzer = struct {
     }
 
     fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?[]const u8 } {
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+            id: ?[]const u8 = null,
+        });
+
+        const heading: Block.Heading = .{
+            .level = switch (node.type) {
+                .h1 => .h1,
+                .h2 => .h2,
+                .h3 => .h3,
+                else => unreachable,
+            },
+            .lang = attrs.lang,
+            .content = try sema.translate_inline_list(node.body),
+        };
+
+        return .{ heading, attrs.id };
+    }
+
+    fn translate_inline_list(sema: *SemanticAnalyzer, body: Parser.Node.Body) error{ OutOfMemory, Unimplemented }![]Span {
+        switch (body) {
+            .empty => return &.{},
+
+            .string => {
+                std.log.warn("TODO: Implement string span translation", .{});
+                return error.Unimplemented;
+            },
+            .verbatim => {
+                std.log.warn("TODO: Implement verbatim span translation", .{});
+                return error.Unimplemented;
+            },
+
+            .list => {
+                var spans: std.ArrayList(Span) = .empty;
+                errdefer spans.deinit(sema.arena);
+
+                // TODO: Insert a space span between two regular text spans if they are not consecutive to each other.
+
+                for (body.list) |child_node| {
+                    const span = try sema.translate_span_node(child_node);
+                    try spans.append(sema.arena, span);
+                }
+
+                // TODO: Compact spans by joining spans with equal properties
+
+                return try spans.toOwnedSlice(sema.arena);
+            },
+        }
+    }
+
+    fn translate_span_node(sema: *SemanticAnalyzer, node: Parser.Node) !Span {
+        //
         _ = sema;
-        _ = node;
-        @panic("Not yet implemented");
+        std.log.warn("TODO: Translate spans of type {}", .{node.type});
+
+        return .{
+            .content = .{ .text = "???" },
+        };
     }
 
     fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?[]const u8 } {
         _ = sema;
         _ = node;
-        @panic("Not yet implemented");
+        return error.Unimplemented; // TODO: Implement this node type
     }
 
     fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?[]const u8 } {
         _ = sema;
         _ = node;
-        @panic("Not yet implemented");
+        return error.Unimplemented; // TODO: Implement this node type
     }
 
     fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?[]const u8 } {
         _ = sema;
         _ = node;
-        @panic("Not yet implemented");
+        return error.Unimplemented; // TODO: Implement this node type
     }
 
     fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?[]const u8 } {
         _ = sema;
         _ = node;
-        @panic("Not yet implemented");
+        return error.Unimplemented; // TODO: Implement this node type
     }
 
     fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?[]const u8 } {
         _ = sema;
         _ = node;
-        @panic("Not yet implemented");
+        return error.Unimplemented; // TODO: Implement this node type
     }
 
     fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?[]const u8 } {
         _ = sema;
         _ = node;
-        @panic("Not yet implemented");
+        return error.Unimplemented; // TODO: Implement this node type
     }
 
     fn get_attributes(sema: *SemanticAnalyzer, node: Parser.Node, comptime Attrs: type) error{ OutOfMemory, BadAttributes }!Attrs {
@@ -545,13 +604,15 @@ pub const SemanticAnalyzer = struct {
             return try sema.cast_value(attrib, @typeInfo(T).optional.child);
         }
 
+        const value = try sema.unescape_string(attrib.value);
+
         return switch (T) {
-            []const u8 => attrib.value,
+            []const u8 => value,
 
-            Version => Version.parse(attrib.value) catch return error.InvalidValue,
-            DateTime => DateTime.parse(attrib.value) catch return error.InvalidValue,
-            Date => Date.parse(attrib.value) catch return error.InvalidValue,
-            Time => Time.parse(attrib.value) catch return error.InvalidValue,
+            Version => Version.parse(value) catch return error.InvalidValue,
+            DateTime => DateTime.parse(value) catch return error.InvalidValue,
+            Date => Date.parse(value) catch return error.InvalidValue,
+            Time => Time.parse(value) catch return error.InvalidValue,
 
             else => @compileError("Unsupported attribute type: " ++ @typeName(T)),
         };
@@ -579,6 +640,18 @@ pub const SemanticAnalyzer = struct {
 
         return .{ .line = line, .column = column };
     }
+
+    /// Accepts a string literal, including the surrounding quotes.
+    pub fn unescape_string(sema: *SemanticAnalyzer, token: Parser.Token) error{OutOfMemory}![]const u8 {
+        std.debug.assert(token.text.len >= 2);
+        std.debug.assert(token.text[0] == '"' and token.text[token.text.len - 1] == '"');
+
+        _ = sema;
+        // TODO: Implement unescaping logic here.
+
+        // For now, we just return the raw text.
+        return token.text[1 .. token.text.len - 1];
+    }
 };
 
 pub const Parser = struct {
@@ -641,7 +714,7 @@ pub const Parser = struct {
                     }
                     gop_entry.value_ptr.* = .{
                         .location = attr_location,
-                        .value = try parser.unescape_string(attr_value),
+                        .value = attr_value,
                     };
 
                     if (!parser.try_accept_char(',')) {
@@ -1009,18 +1082,6 @@ pub const Parser = struct {
         return parser.offset >= parser.code.len;
     }
 
-    /// Accepts a string literal, including the surrounding quotes.
-    pub fn unescape_string(parser: *Parser, token: Token) error{OutOfMemory}![]const u8 {
-        std.debug.assert(token.text.len >= 2);
-        std.debug.assert(token.text[0] == '"' and token.text[token.text.len - 1] == '"');
-
-        _ = parser;
-        // TODO: Implement unescaping logic here.
-
-        // For now, we just return the raw text.
-        return token.text[1 .. token.text.len - 1];
-    }
-
     pub fn location(parser: *Parser, start: usize, end: ?usize) Location {
         return .{ .offset = start, .length = (end orelse parser.offset) - start };
     }
@@ -1218,7 +1279,7 @@ pub const Parser = struct {
 
     pub const Attribute = struct {
         location: Location,
-        value: []const u8,
+        value: Token,
     };
 };
 
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 634c428..a0700b4 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -114,10 +114,10 @@ test "parser handles attributes and empty bodies" {
     try std.testing.expectEqual(@as(usize, 2), node.attributes.count());
 
     const title = node.attributes.get("title") orelse return error.TestExpectedEqual;
-    try std.testing.expectEqualStrings("Hello", title.value);
+    try std.testing.expectEqualStrings("\"Hello\"", title.value.text);
 
     const author = node.attributes.get("author") orelse return error.TestExpectedEqual;
-    try std.testing.expectEqualStrings("World", author.value);
+    try std.testing.expectEqualStrings("\"World\"", author.value.text);
 
     try std.testing.expect(node.body == .empty);
 }

From 5979ab79ab27e54bdb02ff80b3376d1ae8a655b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 25 Dec 2025 23:56:33 +0100
Subject: [PATCH 016/116] Vibecoded: Implements date/time parsing, improves
 spec

---
 docs/specification.md |  72 ++++++++++++++++++++++-------
 src/hyperdoc.zig      | 103 ++++++++++++++++++++++++++++++++++++++++--
 src/testsuite.zig     |  66 +++++++++++++++++++++++++++
 3 files changed, 220 insertions(+), 21 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 4c00749..4494022 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -99,23 +99,23 @@ Notes:
 
 ## Attribute Overview
 
-| Attribute | Required | Allowed Values                                                                                                                                                                            | Description                                                                     |
-| --------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
-| `version` | Yes      | `2.0`                                                                                                                                                                                     | Describes the version of this HyperDoc document.                                |
-| `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                                                                                                                      | Defines the language of the elements contents.                                  |
-| `title`   | No       | *Any*                                                                                                                                                                                     | Sets the title of the document or the table row.                                |
-| `author`  | No       | *Any*                                                                                                                                                                                     | Sets the author of the document.                                                |
-| `date`    | No       | A date-time value using the format specified below (intersection between [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601)) | Sets the authoring date of the document.                                        |
-| `id`      | No       | Non-empty                                                                                                                                                                                 | Sets a reference which can be linked to with `\link(ref="...")`.                |
-| `first`   | No       | Decimal integer numbers ≥ 0                                                                                                                                                               | Sets the number of the first list item.                                         |
-| `alt`     | No       | Non-empty                                                                                                                                                                                 | Sets the alternative text shown when an image cannot be loaded.                 |
-| `path`    | Yes      | Non-empty file path to an image file                                                                                                                                                      | Defines the file path where the image file can be found.                        |
-| `syntax`  | No       | *See element documentation*                                                                                                                                                               | Hints the syntax highlighter how how the elements context shall be highlighted. |
-| `depth`   | No       | `1`, `2` or `3`                                                                                                                                                                           | Defines how many levels of headings shall be included.                          |
-| `colspan` | No       | Decimal integer numbers ≥ 1                                                                                                                                                               | Sets how many columns the table cell spans.                                     |
-| `ref`     | No       | Any value present in an `id` attribute.                                                                                                                                                   | References any `id` inside this document.                                       |
-| `uri`     | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987)                                                                                              | Links to a foreign document with a URI.                                         |
-| `fmt`     | No       | *See element documentation*                                                                                                                                                               |                                                                                 |
+| Attribute | Required | Allowed Values                                                                                                                                                                                                            | Description                                                                     |
+| --------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
+| `version` | Yes      | `2.0`                                                                                                                                                                                                                     | Describes the version of this HyperDoc document.                                |
+| `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                                                                                                                                                      | Defines the language of the elements contents.                                  |
+| `title`   | No       | *Any*                                                                                                                                                                                                                     | Sets the title of the document or the table row.                                |
+| `author`  | No       | *Any*                                                                                                                                                                                                                     | Sets the author of the document.                                                |
+| `date`    | No       | A date-time value using the format specified below (a conservative intersection of [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), compatible with both) | Sets the authoring date of the document.                                        |
+| `id`      | No       | Non-empty                                                                                                                                                                                                                 | Sets a reference which can be linked to with `\link(ref="...")`.                |
+| `first`   | No       | Decimal integer numbers ≥ 0                                                                                                                                                                                               | Sets the number of the first list item.                                         |
+| `alt`     | No       | Non-empty                                                                                                                                                                                                                 | Sets the alternative text shown when an image cannot be loaded.                 |
+| `path`    | Yes      | Non-empty file path to an image file                                                                                                                                                                                      | Defines the file path where the image file can be found.                        |
+| `syntax`  | No       | *See element documentation*                                                                                                                                                                                               | Hints the syntax highlighter how how the elements context shall be highlighted. |
+| `depth`   | No       | `1`, `2` or `3`                                                                                                                                                                                                           | Defines how many levels of headings shall be included.                          |
+| `colspan` | No       | Decimal integer numbers ≥ 1                                                                                                                                                                                               | Sets how many columns the table cell spans.                                     |
+| `ref`     | No       | Any value present in an `id` attribute.                                                                                                                                                                                   | References any `id` inside this document.                                       |
+| `uri`     | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987)                                                                                                                              | Links to a foreign document with a URI.                                         |
+| `fmt`     | No       | *See element documentation*                                                                                                                                                                                               |                                                                                 |
 
 ## Semantic Structure
 
@@ -342,3 +342,41 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically
 
 Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner.
 
+## Date/Time Formatting
+
+All date/time values MUST use the formats defined in this section. This is a conservative, interoperable intersection between [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), so values that conform here are valid under both specifications. Digits are ASCII decimal unless stated otherwise.
+
+### Date Format
+
+Date strings MUST follow `YYYY-MM-DD`.
+
+- `YYYY` is a year with one or more digits.
+- `MM` is a two-digit month in the range `01` to `12`.
+- `DD` is a two-digit day in the range `01` to `31`.
+- The `-` separators are mandatory.
+
+Examples: `2025-12-25`, `1-01-01`.
+
+### Time Format
+
+Time strings MUST follow `hh:mm:ss` with a required time zone.
+
+- `hh`, `mm`, `ss` are two-digit hour, minute, second fields.
+- Hour MUST be in `00` to `23`, minute and second MUST be in `00` to `59`.
+- An optional fractional seconds component MAY follow the seconds field as `.` plus
+  1, 2, 3, 6, or 9 digits.
+- The fractional separator MUST be `.`. Comma is not allowed.
+- A time zone is required and MUST be either `Z` (UTC) or a numeric offset
+  in the form `+hh:mm` or `-hh:mm` with two-digit hour/minute fields.
+- Offset hours MUST be in `00` to `23`, offset minutes MUST be in `00` to `59`.
+
+Examples: `22:30:46Z`, `22:30:46.136+01:00`, `21:30:46.136797358-05:30`.
+
+### Date/Time Format
+
+Date/time strings MUST combine a date and time with a literal `T`.
+
+- Format: `YYYY-MM-DD` + `T` + `hh:mm:ss` (with optional fraction and required zone).
+
+Examples: `2025-12-25T22:31:50.13+01:00`, `2025-12-25T21:31:43Z`.
+
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 08c1b6f..84514ee 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -211,8 +211,31 @@ pub const Date = struct {
     day: u5, // 1-31
 
     pub fn parse(text: []const u8) !Date {
-        _ = text;
-        @panic("TODO: Implement this");
+        const first_dash = std.mem.indexOfScalar(u8, text, '-') orelse return error.InvalidValue;
+        const tail = text[first_dash + 1 ..];
+        const second_dash_rel = std.mem.indexOfScalar(u8, tail, '-') orelse return error.InvalidValue;
+        const second_dash = first_dash + 1 + second_dash_rel;
+
+        const year_text = text[0..first_dash];
+        const month_text = text[first_dash + 1 .. second_dash];
+        const day_text = text[second_dash + 1 ..];
+
+        if (year_text.len == 0 or month_text.len != 2 or day_text.len != 2) return error.InvalidValue;
+
+        const year_value = std.fmt.parseInt(u32, year_text, 10) catch return error.InvalidValue;
+        if (year_value > std.math.maxInt(i32)) return error.InvalidValue;
+
+        const month_value = std.fmt.parseInt(u8, month_text, 10) catch return error.InvalidValue;
+        const day_value = std.fmt.parseInt(u8, day_text, 10) catch return error.InvalidValue;
+
+        if (month_value < 1 or month_value > 12) return error.InvalidValue;
+        if (day_value < 1 or day_value > 31) return error.InvalidValue;
+
+        return .{
+            .year = @intCast(year_value),
+            .month = @intCast(month_value),
+            .day = @intCast(day_value),
+        };
     }
 };
 
@@ -231,10 +254,82 @@ pub const Time = struct {
     minute: u6, // 0-59
     second: u6, // 0-59
     microsecond: u20, // 0-999999
+    zone_offset: i32, // in minutes
 
     pub fn parse(text: []const u8) !Time {
-        _ = text;
-        @panic("TODO: Implement this");
+        if (text.len < 9) return error.InvalidValue;
+
+        const hour = std.fmt.parseInt(u8, text[0..2], 10) catch return error.InvalidValue;
+        if (text[2] != ':') return error.InvalidValue;
+        const minute = std.fmt.parseInt(u8, text[3..5], 10) catch return error.InvalidValue;
+        if (text[5] != ':') return error.InvalidValue;
+        const second = std.fmt.parseInt(u8, text[6..8], 10) catch return error.InvalidValue;
+
+        if (hour > 23 or minute > 59 or second > 59) return error.InvalidValue;
+
+        var index: usize = 8;
+        var microsecond: u20 = 0;
+
+        if (index >= text.len) return error.InvalidValue;
+
+        if (text[index] == '.') {
+            const start = index + 1;
+            var end = start;
+            while (end < text.len and std.ascii.isDigit(text[end])) : (end += 1) {}
+            if (end == start) return error.InvalidValue;
+
+            const fraction_value = std.fmt.parseInt(u64, text[start..end], 10) catch return error.InvalidValue;
+            microsecond = fractionToMicrosecond(end - start, fraction_value) orelse return error.InvalidValue;
+            index = end;
+        }
+
+        if (index >= text.len) return error.InvalidValue;
+
+        if (text[index] == 'Z') {
+            if (index + 1 != text.len) return error.InvalidValue;
+            return .{
+                .hour = @intCast(hour),
+                .minute = @intCast(minute),
+                .second = @intCast(second),
+                .microsecond = microsecond,
+                .zone_offset = 0,
+            };
+        }
+
+        const sign_char = text[index];
+        if (sign_char != '+' and sign_char != '-') return error.InvalidValue;
+        const sign: i32 = if (sign_char == '+') 1 else -1;
+
+        if (text.len - index != 6) return error.InvalidValue;
+        const zone_hour = std.fmt.parseInt(u8, text[index + 1 .. index + 3], 10) catch return error.InvalidValue;
+        if (text[index + 3] != ':') return error.InvalidValue;
+        const zone_minute = std.fmt.parseInt(u8, text[index + 4 .. index + 6], 10) catch return error.InvalidValue;
+
+        if (zone_hour > 23 or zone_minute > 59) return error.InvalidValue;
+
+        const zone_total: u16 = @as(u16, zone_hour) * 60 + zone_minute;
+        const offset_minutes: i32 = sign * @as(i32, zone_total);
+
+        return .{
+            .hour = @intCast(hour),
+            .minute = @intCast(minute),
+            .second = @intCast(second),
+            .microsecond = microsecond,
+            .zone_offset = offset_minutes,
+        };
+    }
+
+    fn fractionToMicrosecond(len: usize, value: u64) ?u20 {
+        const micro: u64 = switch (len) {
+            1 => value * 100_000,
+            2 => value * 10_000,
+            3 => value * 1_000,
+            6 => value,
+            9 => value / 1_000,
+            else => return null,
+        };
+        if (micro > 999_999) return null;
+        return @intCast(micro);
     }
 };
 
diff --git a/src/testsuite.zig b/src/testsuite.zig
index a0700b4..0212271 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -339,3 +339,69 @@ test "parser maps diagnostic locations" {
     try std.testing.expectEqual(@as(u32, 3), loc.line);
     try std.testing.expectEqual(@as(u32, 1), loc.column);
 }
+
+test "Version.parse accepts dotted versions" {
+    const version = try hdoc.Version.parse("2.0");
+    try std.testing.expectEqual(@as(u16, 2), version.major);
+    try std.testing.expectEqual(@as(u16, 0), version.minor);
+
+    try std.testing.expectError(error.InvalidValue, hdoc.Version.parse("2"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Version.parse("2."));
+    try std.testing.expectError(error.InvalidValue, hdoc.Version.parse("2.0.1"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Version.parse(".1"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Version.parse("2.a"));
+}
+
+test "Date.parse accepts ISO dates" {
+    const date = try hdoc.Date.parse("2025-12-25");
+    try std.testing.expectEqual(@as(i32, 2025), date.year);
+    try std.testing.expectEqual(@as(u4, 12), date.month);
+    try std.testing.expectEqual(@as(u5, 25), date.day);
+
+    const short_year = try hdoc.Date.parse("1-01-01");
+    try std.testing.expectEqual(@as(i32, 1), short_year.year);
+    try std.testing.expectEqual(@as(u4, 1), short_year.month);
+    try std.testing.expectEqual(@as(u5, 1), short_year.day);
+
+    try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-1-01"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-13-01"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-12-32"));
+}
+
+test "Time.parse accepts ISO times with zones" {
+    const utc = try hdoc.Time.parse("22:30:46Z");
+    try std.testing.expectEqual(@as(u5, 22), utc.hour);
+    try std.testing.expectEqual(@as(u6, 30), utc.minute);
+    try std.testing.expectEqual(@as(u6, 46), utc.second);
+    try std.testing.expectEqual(@as(u20, 0), utc.microsecond);
+    try std.testing.expectEqual(@as(i32, 0), utc.zone_offset);
+
+    const fractional = try hdoc.Time.parse("22:30:46.136+01:00");
+    try std.testing.expectEqual(@as(u20, 136_000), fractional.microsecond);
+    try std.testing.expectEqual(@as(i32, 60), fractional.zone_offset);
+
+    const nanos = try hdoc.Time.parse("21:30:46.136797358-05:30");
+    try std.testing.expectEqual(@as(u20, 136_797), nanos.microsecond);
+    try std.testing.expectEqual(@as(i32, -330), nanos.zone_offset);
+
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("21:30:46,1Z"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("22:30:46"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("24:00:00Z"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:60:00Z"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:60Z"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:59.1234Z"));
+}
+
+test "DateTime.parse accepts ISO date-time" {
+    const datetime = try hdoc.DateTime.parse("2025-12-25T22:31:50.13+01:00");
+    try std.testing.expectEqual(@as(i32, 2025), datetime.date.year);
+    try std.testing.expectEqual(@as(u4, 12), datetime.date.month);
+    try std.testing.expectEqual(@as(u5, 25), datetime.date.day);
+    try std.testing.expectEqual(@as(u5, 22), datetime.time.hour);
+    try std.testing.expectEqual(@as(u6, 31), datetime.time.minute);
+    try std.testing.expectEqual(@as(u6, 50), datetime.time.second);
+    try std.testing.expectEqual(@as(u20, 130_000), datetime.time.microsecond);
+    try std.testing.expectEqual(@as(i32, 60), datetime.time.zone_offset);
+
+    try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z"));
+}

From 53f09469bea45299ceb14b24bdde991b6d60b5ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 26 Dec 2025 00:15:15 +0100
Subject: [PATCH 017/116] Improves spec a lot

---
 docs/specification.md | 429 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 397 insertions(+), 32 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 4494022..d3c0959 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -2,7 +2,7 @@
 
 This specification describes the document markup language "HyperDoc 2.0", that tries to be a simple to parse, easy to write markup language for hypertext documents.
 
-It sits somewhat between LaTeX and Markdown and tries to be way simpler to parse than Markdown, but keep useful semantics around.
+It sits in a space where it's unambigious to parse, but still relatively convenient to write.
 
 ## Syntax Overview
 
@@ -23,51 +23,416 @@ pre(syntax="c"):
 | }
 ```
 
-## Grammar
+## Document encoding
 
-This grammar describes the hypertext format.
+This section defines the required byte-level encoding and line structure of HyperDoc documents.
 
-Short notes on grammar notation:
+### Character encoding
 
-- `{ ... }` is a repetition
-- `[ ... ]` is an option
-- `a | b | c` is alternatives
-- `( ... )` is a group
-- `"foo"` is a literal token sequence, no escape sequences (So `"\"` is a single backslash)
-- `/.../` is a regex
-- Whitespace is assumed to be ignored between tokens unless matched by a literal or regex, so tokens are typically separated by whitespace
-- Upper case elements are roughly tokens, while lowercase elements are rules.
+- A HyperDoc document **MUST** be encoded as **UTF-8**.
+- A HyperDoc document **MUST NOT** contain invalid UTF-8 byte sequences.
+
+**Byte Order Mark (BOM):**
+
+- A UTF-8 BOM (the byte sequence `EF BB BF`) **SHOULD NOT** be used. Tooling **MAY** accept it and treat it as U+FEFF at the beginning of the document.
+
+### Line endings
+
+- Lines **MUST** be terminated by either:
+  - `<LF>` (U+000A), or
+  - `<CR><LF>` (U+000D U+000A).
+- A bare `<CR>` **MUST NOT** appear except as part of a `<CR><LF>` sequence.
+
+A document **MAY** mix `<LF>` and `<CR><LF>` line endings, but tooling **SHOULD** normalize to a single convention when rewriting documents.
+
+The canonical line ending emitted by tooling **SHOULD** be `<LF>`.
+
+### Control characters
+
+- The only permitted control character **within a line** is:
+  - `<TAB>` (U+0009).
+- Apart from line terminators (`<LF>` and `<CR>` only as part of `<CR><LF>`), all other Unicode control characters (General Category `Cc`) **MUST NOT** appear anywhere in a HyperDoc document.
+
+### Unicode text
+
+- Apart from the restrictions above, arbitrary Unicode text is allowed.
+
+### Recommendations for writing systems and directionality (non-normative)
+
+HyperDoc does not define special handling for right-to-left scripts, bidirectional layout, or writing system segmentation. For readability and to reduce ambiguity across renderers and editors:
+
+- Authors **SHOULD** keep each paragraph primarily in a **single writing system/directionality** where practical.
+- Tooling **MAY** warn when a paragraph mixes strongly different directional scripts or contains invisible bidirectional formatting characters (e.g., bidi overrides/isolates), since these can be confusing in editors and reviews.
+
+## Syntax
+
+This chapter defines the **syntactic structure** of HyperDoc documents: how characters form tokens, how tokens form **nodes**, and how nodes nest. It intentionally does **not** define meaning (required elements, allowed attributes per node type, ID/refs, allowed escape sequences, etc.). Those are handled in later chapters as **semantic validity** rules.
+
+A HyperDoc document is a sequence of **nodes**. Each node has:
+
+- a **node name** (identifier),
+- an optional **attribute list** `(key="value", ...)`,
+- and a mandatory **body**, which is one of:
+  - `;` empty body,
+  - `"..."` string literal body,
+  - `:` verbatim body (one or more `|` lines),
+  - `{ ... }` list body.
+
+A list body `{ ... }` is parsed in one of two modes:
+
+- **Block-list mode**: the list contains nested nodes.
+- **Inline-list mode**: the list contains a token stream of text items, escape tokens, inline nodes, and balanced brace groups.
+
+The grammar below is syntax-only and intentionally leaves the choice between block-list and inline-list content to an **external disambiguation rule**.
+
+### Grammar (EBNF)
 
 ```ebnf
-document       := { block }
+(* ---------- Top level ---------- *)
+
+document        ::= ws , { node , ws } , EOF ;
+
+(* ---------- Nodes ---------- *)
+
+node            ::= node_name , ws , [ attribute_list , ws ] , body ;
+
+body            ::= empty_body
+                  | string_body
+                  | verbatim_body
+                  | list_body ;
+
+empty_body      ::= ";" ;
+
+string_body     ::= string_literal ;
+
+verbatim_body   ::= ":" , { ws , piped_line } ;
+
+list_body       ::= "{" , list_content , "}" ;
+
+(*
+  IMPORTANT: list_content is intentionally ambiguous.
+  A conforming parser chooses either inline_content or block_content by an
+  EXTERNAL rule (see “Disambiguation for list bodies”).
+*)
+list_content    ::= inline_content | block_content ;
+
+
+(* ---------- Attributes ---------- *)
+
+attribute_list  ::= "(" , ws ,
+                    [ attribute ,
+                      { ws , "," , ws , attribute } ,
+                      [ ws , "," ]          (* trailing comma allowed *)
+                    ] ,
+                    ws , ")" ;
+
+attribute       ::= attr_key , ws , "=" , ws , string_literal ;
+
+(*
+  Attribute keys may include '-' and ':' in addition to node-name characters.
+*)
+attr_key        ::= attr_key_char , { attr_key_char } ;
+
+attr_key_char   ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | ":" | "\" ;
+
+
+(* ---------- Block-list content ---------- *)
 
-block          := WORD [ attribute_list ] body
+block_content   ::= ws , { node , ws } ;
 
-body           := ";" | list | verbatim | STRING
-verbatim       := ":" "\n" { VERBATIM_LINE }
 
-list           := "{" { escape | inline | block | WORD } "}"
-escape         := "\\" | "\{" | "\}"
-inline         := "\" WORD [ attribute_list ] body
+(* ---------- Inline-list content ---------- *)
 
-attribute_list := "(" [ attribute { "," attribute } ] ")"
-attribute      := WORD "=" STRING
+inline_content  ::= ws , { inline_item , ws } ;
 
-STRING         := /"(\\.|[^"\r\n])*"/
-VERBATIM_LINE  := /^\s*\|(.*)$/
-WORD           := /[^\s\{\}\\\"(),=:]+/
+inline_item     ::= word
+                  | escape_text
+                  | inline_node
+                  | inline_group ;
+
+(*
+  Balanced braces in inline content are represented as inline_group.
+  If braces cannot be balanced, they must be written as \{ and \}.
+*)
+inline_group    ::= "{" , inline_content , "}" ;
+
+(*
+  Backslash dispatch inside inline content:
+  - If next char is one of '\', '{', '}', emit escape_text.
+  - Otherwise begin an inline_node.
+*)
+escape_text     ::= "\" , ( "\" | "{" | "}" ) ;
+
+inline_node     ::= inline_name , ws , [ attribute_list , ws ] , body ;
+
+(*
+  Inline node names start with '\' and then continue with node-name characters.
+*)
+inline_name     ::= "\" , node_name_char_no_backslash , { node_name_char } ;
+
+
+(* ---------- Words / node names ---------- *)
+
+(*
+  Node names intentionally do NOT include ':' because ':' is also a body marker
+  (e.g. 'p:' for verbatim body) and adjacency is allowed.
+*)
+node_name       ::= node_name_char , { node_name_char } ;
+
+node_name_char  ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | "\" ;
+
+node_name_char_no_backslash
+                ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" ;
+
+word            ::= word_char , { word_char } ;
+
+(*
+  word_char matches any Unicode scalar value except:
+  - whitespace
+  - '{' or '}'
+  - '\' (because '\' begins escape_text or inline_node)
+*)
+word_char       ::= ? any scalar value except WS, "{", "}", "\" ? ;
+
+
+(* ---------- String literals (syntax only; no escape validation here) ---------- *)
+
+string_literal  ::= "\"" , { string_unit } , "\"" ;
+
+(*
+  string_unit is permissive enough that malformed escapes remain parsable,
+  BUT forbids escaping control characters (including LF/CR/TAB).
+  Raw TAB is allowed as a normal string_char.
+*)
+string_unit     ::= string_char | "\" , escaped_noncontrol ;
+
+string_char     ::= ? any scalar value except '"', '\', LF, CR ? ;
+
+escaped_noncontrol
+                ::= ? any scalar value except control chars (Unicode category Cc) ? ;
+
+
+(* ---------- Verbatim lines ---------- *)
+
+piped_line      ::= "|" , { not_line_end } , line_terminator ;
+
+not_line_end    ::= ? any scalar value except CR and LF ? ;
+
+line_terminator ::= LF | CR , LF | EOF ;
+
+
+(* ---------- Whitespace ---------- *)
+
+ws              ::= { WS } ;
+
+WS              ::= " " | "\t" | CR | LF ;
+
+CR              ::= "\r" ;
+LF              ::= "\n" ;
 ```
 
-**NOTE:** `list` also allows `block` for `inline` elements, as this enables us to have support for balanced braces without special care. The `block` elements will be flattened when rendering an inline list body into the document.
+### Additional syntax rules and notes (normative)
+
+#### 1) Maximal-munch for identifiers
+
+When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consume the **longest possible** sequence of allowed identifier characters (maximal munch). This is required because `\` is a legal identifier character and must not be arbitrarily split.
+
+#### 2) Disambiguation for list bodies (external chooser)
+
+The production `list_content ::= inline_content | block_content` is resolved by a deterministic, non-backtracking rule:
+
+1. Before parsing the content of a `{ ... }` list body, the parser **MUST** choose exactly one list mode: **Inline-list mode** or **Block-list mode**.
+2. The mode is determined solely from the syntactic **node name token** (not attributes, not body contents, not document state).
+3. Required behavior (recovery-friendly):
+   - If the node name begins with `\`, the parser **MUST** choose **Inline-list mode**.
+   - If the node name is recognized as a built-in name with a specified list mode, the parser **MUST** choose that mode.
+   - Otherwise (unknown / misspelled / unsupported node name), the parser **MUST** choose **Inline-list mode**.
+
+This rule ensures unknown nodes accept rich inline content for typo recovery (e.g. `prre { ... }`).
+
+#### 3) Inline-list mode: brace balancing and escape-text tokens
+
+In **Inline-list mode**:
+
+- `{` and `}` that appear as literal characters in the inline stream are represented structurally as `inline_group` and therefore **must be balanced**.
+- If braces cannot be balanced, they **must** be written using the escape-text tokens `\{` and `\}`.
+- A backslash in inline content is interpreted as:
+  - one of the three **escape-text tokens** `\\`, `\{`, `\}`, or
+  - the start of an `inline_node` otherwise.
+
+The escape-text tokens exist primarily so the three characters `\`, `{`, `}` can be represented literally within inline content without always starting an inline node.
 
-**NOTE:** All attribute values are strings, so numeric-looking values are still expressed as strings (e.g. `depth="1"`).
+#### 4) String literals are syntax-only at this stage
+
+String literals are delimited by `"` and parsed without interpreting escape meanings. This is intentional: documents with malformed or unknown escape sequences remain **syntactically valid**, allowing formatters and other tooling to round-trip source reliably.
+
+However, the following are **syntactically invalid** inside string literals:
+
+- raw LF or CR characters (line breaks are not allowed within `"..."`),
+- a backslash immediately followed by a **control character** (Unicode General Category `Cc`), which includes TAB.
+
+(Separately: which escape sequences are *semantically* valid is defined later.)
+
+#### 5) Verbatim bodies are line-oriented
+
+In a verbatim body (`:`):
+
+- The body consists of zero or more `piped_line` entries.
+- Each `piped_line` starts with `|` after optional whitespace skipping.
+- The content of a verbatim line is everything up to the line terminator; it is not tokenized into nodes.
+
+A file ending without a final newline is syntactically allowed (`EOF` as a line terminator), though tooling may warn.
+
+#### 6) Syntactic validity vs semantic validity
+
+A document is **syntactically valid** if it matches the grammar and the additional syntax rules above (maximal munch, list-mode disambiguation, inline brace balancing, and the string-literal constraints).
+
+A syntactically valid document may still be **semantically invalid**. Semantic validation is defined later and may include rules such as required header nodes, attribute constraints, reference resolution, allowed escape sequences, encoding policy, and disallowed control characters in source text.
+
+## Escape encoding
+
+This chapter defines how **escape sequences are interpreted** to produce decoded Unicode text. Escape processing is part of **semantic validation**: a document may be syntactically valid even if it contains unknown or malformed escapes, but it is not semantically valid unless all escapes decode successfully under the rules below.
+
+HyperDoc documents are UTF-8 text. Unless explicitly stated otherwise, all “characters” in this chapter refer to Unicode scalar values.
+
+### Scope
+
+Escape sequences are recognized in two places:
+
+1. **STRING literals** (the `"..."` body form, and attribute values which are also STRING literals).
+2. **Inline escape-text tokens** inside inline-list bodies: `\\`, `\{`, `\}` (these are emitted as text spans by the parser and can be decoded to literal characters during semantic processing).
+
+No other part of the syntax performs escape decoding (not node names, not verbatim bodies, not block-list structure).
+
+## Control character policy
+
+HyperDoc forbids control characters except **LF** and **CR**.
+
+- A semantically valid document **MUST NOT** contain any Unicode control characters (General Category `Cc`) anywhere **except**:
+  - U+000A LINE FEED (LF)
+  - U+000D CARRIAGE RETURN (CR)
+
+This rule applies both to:
+
+- the raw document text (source), and
+- any decoded text produced from escapes.
+
+Implications:
+
+- TAB (U+0009) is forbidden, including if introduced via `\u{9}`.
+- NUL (U+0000) is forbidden, including if introduced via `\u{0}`.
+
+(Structural line breaks in the file may be LF or CRLF or CR as allowed by the syntax rules; decoded strings may contain LF/CR only via escapes.)
+
+### String literal escape sequences
+
+#### Overview
+
+Within a STRING literal, a backslash (`\`) begins an escape sequence. The set of valid escapes is deliberately small.
+
+A semantic validator/decoder **MUST** accept exactly the escape forms listed below and **MUST** reject all others.
+
+#### Supported escapes (STRING literals)
+
+The following escapes are valid inside STRING literals:
+
+| Escape     | Decodes to                   |
+| ---------- | ---------------------------- |
+| `\\`       | U+005C REVERSE SOLIDUS (`\`) |
+| `\"`       | U+0022 QUOTATION MARK (`"`)  |
+| `\n`       | U+000A LINE FEED (LF)        |
+| `\r`       | U+000D CARRIAGE RETURN (CR)  |
+| `\u{H...}` | Unicode scalar value U+H...  |
+
+No other escapes exist. In particular, `\0`, `\xHH`, `\e`, and similar are not part of HyperDoc.
+
+#### Unicode escape `\u{H...}`
+
+`H...` is a non-empty sequence of hexadecimal digits (`0–9`, `A–F`, `a–f`) representing a Unicode code point in hexadecimal.
+
+Rules:
+
+- The hex sequence **MUST** contain **1 to 6** hex digits.
+- The value **MUST** be within `0x0 .. 0x10FFFF` inclusive.
+- The value **MUST NOT** be in the surrogate range `0xD800 .. 0xDFFF`.
+- The value **MUST NOT** decode to a forbidden control character (see Control character policy). The only allowed controls are LF and CR.
+
+Notes:
+
+- Leading zeros are allowed (`\u{000041}` is `A`).
+- `\u{20}` is ASCII space. (`\u{032}` is U+0032, the digit `"2"`, because the digits are hexadecimal.)
+
+#### Invalid escapes (STRING literals)
+
+A semantic validator/decoder **MUST** reject a document (or at least reject that literal) if any STRING literal contains:
+
+- an unknown escape (e.g. `\q`, `\uFFFF`, `\x20`, `\t`, `\b`, …),
+- an unterminated escape (string ends immediately after `\`),
+- a malformed Unicode escape (`\u{}`, missing `{`/`}`, non-hex digits, more than 6 hex digits),
+- a Unicode escape outside the valid scalar range or within the surrogate range,
+- a Unicode escape that produces a forbidden control character.
+
+#### Canonical encoding recommendations (non-normative)
+
+For authors and formatters:
+
+- Prefer `\\` and `\"` for literal backslash and quote.
+- Prefer `\n` and `\r` for LF/CR instead of `\u{A}` / `\u{D}`.
+- Prefer the shortest hex form for `\u{...}` without leading zeros unless alignment/readability benefits.
+
+### Inline escape-text tokens in inline-list bodies
+
+Inside **inline-list bodies**, the syntax defines three special two-character text tokens:
+
+- `\\`
+- `\{`
+- `\}`
+
+These exist so that inline content can contain literal `\`, `{`, and `}` without always starting an inline node (`\name{...}`) or requiring brace balancing.
+
+#### Decoding rule
+
+During semantic text construction, an implementation **MAY** decode these tokens as:
+
+- `\\` → `\`
+- `\{` → `{`
+- `\}` → `}`
+
+This decoding is independent of STRING literal escapes: these tokens occur in inline text streams, not inside `"..."` literals.
+
+#### Round-tripping note (normative intent)
+
+A formatter or tooling that aims to preserve the author’s intent **SHOULD** preserve the distinction between:
+
+- a literal `{`/`}` that is part of a balanced inline group, and
+- an escaped brace token `\{`/`\}` that was used to avoid imbalance.
+
+This distinction matters for reliable reconstruction and for edits that may reflow or restructure inline content.
+
+### Interaction with syntax
+
+- Escape decoding is performed **after** syntactic parsing.
+- Syntactic parsing of STRING literals is delimiter-based and does not validate escape *meaning*.
+- Semantic validation determines whether escapes are valid and produces the decoded Unicode text.
+
+This separation is intentional: it allows autoformatters to parse and rewrite documents that may contain malformed escapes without losing information, while still allowing strict validators to enforce the escape rules above.
+
+## Semantic Validity
+
+> TO BE DONE.
+>
+> - Attribute uniqueness
+> - Attribute must be defined on a node
+> - Non-optional attributes must be present
+> - id is only valid on top-level nodes
+> - id must be unique
+> - id is case sensitive
+> - ref must point to an existing id
 
 ## Element Overview
 
 | Element                                                     | Element Type | Allowed Children             | Attributes                                   |
 | ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------- |
-| *Document*                                                  | Document     | `hdoc`, Blocks               |                                              |
-| `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author`, `date` |
 | `h1`, `h2`, `h3`                                            | Block        | Text Body                    | `lang`, \[`id`\]                             |
 | `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block        | Text Body                    | `lang`, \[`id`\]                             |
 | `ul`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\]                             |
@@ -76,11 +441,13 @@ WORD           := /[^\s\{\}\\\"(),=:]+/
 | `pre`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `syntax`                   |
 | `toc`                                                       | Block        | -                            | `lang`, \[`id`\], `depth`                    |
 | `table`                                                     | Block        | Table Rows                   | `lang`, \[`id`\]                             |
+| *Document*                                                  | Document     | `hdoc`, Blocks               |                                              |
+| `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author`, `date` |
+| `li`                                                        | List Item    | Blocks, String, Verbatim     | `lang`                                       |
+| `td`                                                        | Table Cell   | Blocks, String, Verbatim     | `lang`, `colspan`                            |
 | `columns`                                                   | Table Row    | `td` ≥ 1                     | `lang`                                       |
 | `group`                                                     | Table Row    | Text Body                    | `lang`,                                      |
 | `row`                                                       | Table Row    | `td` ≥ 1                     | `lang`, `title`                              |
-| `td`                                                        | Table Cell   | Blocks, String, Verbatim     | `lang`, `colspan`                            |
-| `li`                                                        | List Item    | Blocks, String, Verbatim     | `lang`                                       |
 | `\em`                                                       | Text Body    | Text Body                    | `lang`                                       |
 | `\mono`                                                     | Text Body    | Text Body                    | `lang`, `syntax`                             |
 | `\strike`                                                   | Text Body    | Text Body                    | `lang`                                       |
@@ -125,7 +492,6 @@ All elements have these attributes:
 | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `lang`    | Marks the (human) language of the contents of that element. This must be an [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag). |
 
-
 ## Top-Level / Block Elements
 
 All top-level elements have these attributes:
@@ -379,4 +745,3 @@ Date/time strings MUST combine a date and time with a literal `T`.
 - Format: `YYYY-MM-DD` + `T` + `hh:mm:ss` (with optional fraction and required zone).
 
 Examples: `2025-12-25T22:31:50.13+01:00`, `2025-12-25T21:31:43Z`.
-

From ba40f2c34f54b1f67fdee3694bfb8075da13aa69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 26 Dec 2025 07:52:19 +0100
Subject: [PATCH 018/116] Refactors Parser.Node.attributes from map to list, so
 the lexical structure is retained

---
 src/hyperdoc.zig  | 50 ++++++++++++++++++++++-------------------------
 src/testsuite.zig | 20 +++++++++++--------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 84514ee..d2b7f0f 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -652,24 +652,26 @@ pub const SemanticAnalyzer = struct {
 
         var any_invalid = false;
         var found: std.EnumSet(Fields) = .initEmpty();
-        for (node.attributes.keys(), node.attributes.values()) |key, attrib| {
+        for (node.attributes.items) |attrib| {
+            const key = attrib.name.text;
+
             const fld = std.meta.stringToEnum(Fields, key) orelse {
-                try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, node.location.offset);
+                try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, attrib.name.location.offset);
                 continue;
             };
             if (found.contains(fld)) {
-                try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, node.location.offset);
+                try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, attrib.name.location.offset);
             }
             found.insert(fld);
 
             switch (fld) {
-                inline else => |tag| @field(attrs, @tagName(tag)) = sema.cast_value(attrib, @FieldType(Attrs, @tagName(tag))) catch |err| switch (err) {
+                inline else => |tag| @field(attrs, @tagName(tag)) = sema.cast_value(attrib.value, @FieldType(Attrs, @tagName(tag))) catch |err| switch (err) {
                     error.OutOfMemory => |e| return e,
 
                     else => {
                         any_invalid = true;
 
-                        try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, node.location.offset);
+                        try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, attrib.value.location.offset);
 
                         continue;
                     },
@@ -694,12 +696,12 @@ pub const SemanticAnalyzer = struct {
         return attrs;
     }
 
-    fn cast_value(sema: *SemanticAnalyzer, attrib: Parser.Attribute, comptime T: type) error{ OutOfMemory, InvalidValue }!T {
+    fn cast_value(sema: *SemanticAnalyzer, attrib: Parser.Token, comptime T: type) error{ OutOfMemory, InvalidValue }!T {
         if (@typeInfo(T) == .optional) {
             return try sema.cast_value(attrib, @typeInfo(T).optional.child);
         }
 
-        const value = try sema.unescape_string(attrib.value);
+        const value = try sema.unescape_string(attrib);
 
         return switch (T) {
             []const u8 => value,
@@ -788,7 +790,7 @@ pub const Parser = struct {
         else
             .unknown_block;
 
-        var attributes: std.StringArrayHashMapUnmanaged(Attribute) = .empty;
+        var attributes: std.ArrayList(Attribute) = .empty;
         errdefer attributes.deinit(parser.arena);
 
         if (parser.try_accept_char('(')) {
@@ -797,20 +799,14 @@ pub const Parser = struct {
                 // so we know that the next token must be the attribute name.
 
                 while (true) {
-                    const start = parser.offset;
                     const attr_name = try parser.accept_identifier();
                     _ = try parser.accept_char('=');
                     const attr_value = try parser.accept_string();
-                    const attr_location = parser.location(start, parser.offset);
 
-                    const gop_entry = try attributes.getOrPut(parser.arena, attr_name.text);
-                    if (gop_entry.found_existing) {
-                        emitDiagnostic(parser, .{ .duplicate_attribute = .{ .name = attr_name.text } }, parser.make_diagnostic_location(attr_location.offset));
-                    }
-                    gop_entry.value_ptr.* = .{
-                        .location = attr_location,
+                    try attributes.append(parser.arena, .{
+                        .name = attr_name,
                         .value = attr_value,
-                    };
+                    });
 
                     if (!parser.try_accept_char(',')) {
                         break;
@@ -823,7 +819,7 @@ pub const Parser = struct {
         if (parser.try_accept_char(';')) {
             // block has empty content
             return .{
-                .location = parser.location(type_ident.position.offset, null),
+                .location = parser.location(type_ident.location.offset, null),
                 .type = node_type,
                 .attributes = attributes,
                 .body = .empty,
@@ -840,11 +836,11 @@ pub const Parser = struct {
             }
 
             if (lines.items.len == 0) {
-                emitDiagnostic(parser, .empty_verbatim_block, parser.make_diagnostic_location(type_ident.position.offset));
+                emitDiagnostic(parser, .empty_verbatim_block, parser.make_diagnostic_location(type_ident.location.offset));
             }
 
             return .{
-                .location = parser.location(type_ident.position.offset, null),
+                .location = parser.location(type_ident.location.offset, null),
                 .type = node_type,
                 .attributes = attributes,
                 .body = .{ .verbatim = try lines.toOwnedSlice(parser.arena) },
@@ -855,7 +851,7 @@ pub const Parser = struct {
             // block has string content
 
             return .{
-                .location = parser.location(type_ident.position.offset, null),
+                .location = parser.location(type_ident.location.offset, null),
                 .type = node_type,
                 .attributes = attributes,
                 .body = .{ .string = string_body },
@@ -868,7 +864,7 @@ pub const Parser = struct {
             try parser.accept_block_node_list();
 
         return .{
-            .location = parser.location(type_ident.position.offset, null),
+            .location = parser.location(type_ident.location.offset, null),
             .type = node_type,
             .attributes = attributes,
             .body = .{ .list = try children.toOwnedSlice(parser.arena) },
@@ -968,7 +964,7 @@ pub const Parser = struct {
                 else => {
                     const word = try parser.accept_word();
                     try children.append(parser.arena, .{
-                        .location = word.position,
+                        .location = word.location,
                         .type = .text,
                         .attributes = .empty,
                         .body = .empty,
@@ -1184,7 +1180,7 @@ pub const Parser = struct {
     pub fn slice(parser: *Parser, start: usize, end: usize) Token {
         return .{
             .text = parser.code[start..end],
-            .position = .{ .offset = start, .length = end - start },
+            .location = .{ .offset = start, .length = end - start },
         };
     }
 
@@ -1221,7 +1217,7 @@ pub const Parser = struct {
 
     pub const Token = struct {
         text: []const u8,
-        position: Location,
+        location: Location,
     };
 
     pub const Location = struct {
@@ -1360,7 +1356,7 @@ pub const Parser = struct {
     pub const Node = struct {
         location: Location,
         type: NodeType,
-        attributes: std.StringArrayHashMapUnmanaged(Attribute),
+        attributes: std.ArrayList(Attribute) = .empty,
 
         body: Body,
 
@@ -1373,7 +1369,7 @@ pub const Parser = struct {
     };
 
     pub const Attribute = struct {
-        location: Location,
+        name: Token,
         value: Token,
     };
 };
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 0212271..5b7ed99 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -49,13 +49,13 @@ test "parser accept identifier and word tokens" {
 
     const ident = try parser.accept_identifier();
     try std.testing.expectEqualStrings("h1", ident.text);
-    try std.testing.expectEqual(@as(usize, 0), ident.position.offset);
-    try std.testing.expectEqual(@as(usize, 2), ident.position.length);
+    try std.testing.expectEqual(@as(usize, 0), ident.location.offset);
+    try std.testing.expectEqual(@as(usize, 2), ident.location.length);
 
     const word = try parser.accept_word();
     try std.testing.expectEqualStrings("word", word.text);
-    try std.testing.expectEqual(@as(usize, 3), word.position.offset);
-    try std.testing.expectEqual(@as(usize, 4), word.position.length);
+    try std.testing.expectEqual(@as(usize, 3), word.location.offset);
+    try std.testing.expectEqual(@as(usize, 4), word.location.length);
     try std.testing.expectEqual(@as(usize, 7), parser.offset);
 }
 
@@ -111,12 +111,16 @@ test "parser handles attributes and empty bodies" {
 
     const node = try parser.accept_node(.top_level);
     try std.testing.expectEqual(hdoc.Parser.NodeType.h1, node.type);
-    try std.testing.expectEqual(@as(usize, 2), node.attributes.count());
+    try std.testing.expectEqual(@as(usize, 2), node.attributes.items.len);
 
-    const title = node.attributes.get("title") orelse return error.TestExpectedEqual;
+    const attribs = node.attributes.items;
+
+    const title = attribs[0];
+    try std.testing.expectEqualStrings("title", title.name.text);
     try std.testing.expectEqualStrings("\"Hello\"", title.value.text);
 
-    const author = node.attributes.get("author") orelse return error.TestExpectedEqual;
+    const author = attribs[1];
+    try std.testing.expectEqualStrings("author", author.name.text);
     try std.testing.expectEqualStrings("\"World\"", author.value.text);
 
     try std.testing.expect(node.body == .empty);
@@ -275,7 +279,7 @@ test "diagnostic codes are emitted for expected samples" {
         .{ .code = .{ .invalid_identifier_start = .{ .char = '-' } }, .samples = &.{"hdoc(version=\"2.0\"); -abc"} },
         .{ .code = .unterminated_block_list, .samples = &.{"hdoc{h1 \"x\""} },
         .{ .code = .unterminated_inline_list, .samples = &.{"hdoc(version=\"2.0\"); p {hello"} },
-        .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(title=\"a\",title=\"b\");"} },
+        // TODO: .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(title=\"a\",title=\"b\");"} },
         .{ .code = .empty_verbatim_block, .samples = &.{"hdoc(version=\"2.0\"); pre:\n"} },
         .{ .code = .verbatim_missing_trailing_newline, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|line"} },
         .{ .code = .verbatim_missing_space, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|nospace\n"} },

From 50b409371b2b8aa7113752c889359584876f1711 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 26 Dec 2025 07:59:10 +0100
Subject: [PATCH 019/116] Fixes failing test.

---
 src/hyperdoc.zig  |  6 ++++++
 src/testsuite.zig | 10 ++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index d2b7f0f..4a3c94c 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -1466,6 +1466,12 @@ pub const Diagnostic = struct {
                 .verbatim_missing_trailing_newline => try w.writeAll("Verbatim line should end with a newline."),
                 .verbatim_missing_space => try w.writeAll("Expected a space after '|' in verbatim line."),
                 .trailing_whitespace => try w.writeAll("Trailing whitespace at end of line."),
+
+                .missing_attribute => |ctx| try w.print("Missing required attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }),
+                .invalid_attribute => |ctx| try w.print("Invalid value for attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }),
+                .unknown_attribute => |ctx| try w.print("Unknown attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }),
+                .unknown_block_type => |ctx| try w.print("Unknown block type '{s}'.", .{ctx.name}),
+                .invalid_block_type => |ctx| try w.print("Invalid block type '{s}' in this context.", .{ctx.name}),
             }
         }
     };
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 5b7ed99..6a79530 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -279,7 +279,7 @@ test "diagnostic codes are emitted for expected samples" {
         .{ .code = .{ .invalid_identifier_start = .{ .char = '-' } }, .samples = &.{"hdoc(version=\"2.0\"); -abc"} },
         .{ .code = .unterminated_block_list, .samples = &.{"hdoc{h1 \"x\""} },
         .{ .code = .unterminated_inline_list, .samples = &.{"hdoc(version=\"2.0\"); p {hello"} },
-        // TODO: .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(title=\"a\",title=\"b\");"} },
+        .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");"} },
         .{ .code = .empty_verbatim_block, .samples = &.{"hdoc(version=\"2.0\"); pre:\n"} },
         .{ .code = .verbatim_missing_trailing_newline, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|line"} },
         .{ .code = .verbatim_missing_space, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|nospace\n"} },
@@ -303,7 +303,13 @@ test "diagnostic codes are emitted for expected samples" {
                 defer owned_doc.deinit();
             }
 
-            try std.testing.expect(diagnosticsContain(&diagnostics, case.code));
+            if (!diagnosticsContain(&diagnostics, case.code)) {
+                std.log.err("Diagnostics did not contain expected code: '{t}'", .{case.code});
+                for (diagnostics.items.items) |item| {
+                    std.log.err("  Emitted diagnostic: {f}", .{item.code});
+                }
+                return error.MissingDiagnosticCode;
+            }
 
             const expected_severity = case.code.severity();
             if (expected_severity == .@"error") {

From 8f83120190ebc7dd1b863289d9c344baffd41b1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 26 Dec 2025 14:05:57 +0100
Subject: [PATCH 020/116] Implements translation of H1 nodes and basic inline
 contents.

---
 src/hyperdoc.zig         | 260 +++++++++++++++++++++++++++++++--------
 src/main.zig             |  20 +--
 test/parser/workset.hdoc |   4 +
 3 files changed, 225 insertions(+), 59 deletions(-)
 create mode 100644 test/parser/workset.hdoc

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 4a3c94c..43b1ffe 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -117,13 +117,6 @@ pub const Block = union(enum) {
     };
 };
 
-pub const SpanContent = union(enum) {
-    text: []const u8,
-    date: FormattedDateTime(Date),
-    time: FormattedDateTime(Time),
-    datetime: FormattedDateTime(DateTime),
-};
-
 pub fn FormattedDateTime(comptime DT: type) type {
     return struct {
         value: DT,
@@ -132,15 +125,47 @@ pub fn FormattedDateTime(comptime DT: type) type {
 }
 
 pub const Span = struct {
-    content: SpanContent,
-    lang: ?[]const u8 = null,
-    em: bool = false,
-    mono: bool = false,
-    strike: bool = false,
-    sub: bool = false,
-    sup: bool = false,
-    link: Link = .none,
-    syntax: ?[]const u8 = null,
+    pub const Content = union(enum) {
+        text: []const u8,
+        date: FormattedDateTime(Date),
+        time: FormattedDateTime(Time),
+        datetime: FormattedDateTime(DateTime),
+    };
+
+    pub const Attributes = struct {
+        lang: ?[]const u8 = null,
+        em: bool = false,
+        mono: bool = false,
+        strike: bool = false,
+        sub: bool = false,
+        sup: bool = false,
+        link: Link = .none,
+        syntax: ?[]const u8 = null,
+
+        pub const Overrides = struct {
+            lang: ?[]const u8 = null,
+            em: ?bool = null,
+            mono: ?bool = null,
+            strike: ?bool = null,
+            sub: ?bool = null,
+            sup: ?bool = null,
+            link: ?Link = null,
+            syntax: ?[]const u8 = null,
+        };
+
+        pub fn derive(base: Attributes, overlay: Overrides) Attributes {
+            var new = base;
+            inline for (@typeInfo(Attributes).@"struct".fields) |fld| {
+                if (@field(overlay, fld.name)) |new_value| {
+                    @field(new, fld.name) = new_value;
+                }
+            }
+            return new;
+        }
+    };
+
+    content: Content,
+    attribs: Attributes,
 };
 
 pub const Link = union(enum) {
@@ -551,51 +576,148 @@ pub const SemanticAnalyzer = struct {
                 else => unreachable,
             },
             .lang = attrs.lang,
-            .content = try sema.translate_inline_list(node.body),
+            .content = try sema.translate_inline(node),
         };
 
         return .{ heading, attrs.id };
     }
 
-    fn translate_inline_list(sema: *SemanticAnalyzer, body: Parser.Node.Body) error{ OutOfMemory, Unimplemented }![]Span {
-        switch (body) {
-            .empty => return &.{},
+    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, Unimplemented }![]Span {
+        var spans: std.ArrayList(Span) = .empty;
+        errdefer spans.deinit(sema.arena);
+
+        // TODO: Implement automatic space insertion.
+        //       This must be done when two consecutive nodes are separated by a space
+
+        try sema.translate_inline_body(&spans, node.body, .{});
+
+        // TODO: Compact spans by joining spans with equal properties
+
+        return try spans.toOwnedSlice(sema.arena);
+    }
+
+    fn translate_inline_node(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), node: Parser.Node, attribs: Span.Attributes) !void {
+        switch (node.type) {
+            .unknown_inline,
+            .text,
+            => {
+                try sema.translate_inline_body(spans, node.body, attribs);
+            },
+
+            .@"\\em",
+            .@"\\mono",
+            .@"\\strike",
+            .@"\\sub",
+            .@"\\sup",
+            .@"\\link",
 
-            .string => {
-                std.log.warn("TODO: Implement string span translation", .{});
-                return error.Unimplemented;
+            .@"\\date",
+            .@"\\time",
+            .@"\\datetime",
+            => {
+                // TODO: Implement date/time translation
             },
-            .verbatim => {
-                std.log.warn("TODO: Implement verbatim span translation", .{});
-                return error.Unimplemented;
+
+            .hdoc,
+            .h1,
+            .h2,
+            .h3,
+            .p,
+            .note,
+            .warning,
+            .danger,
+            .tip,
+            .quote,
+            .spoiler,
+            .ul,
+            .ol,
+            .img,
+            .pre,
+            .toc,
+            .table,
+            .columns,
+            .group,
+            .row,
+            .td,
+            .li,
+            .unknown_block,
+            => @panic("PARSER ERROR: The parser emitted a block node inside an inline context"),
+        }
+    }
+
+    fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, Unimplemented }!void {
+        switch (body) {
+            .empty => |location| {
+                try sema.emit_diagnostic(.empty_inline_body, location.offset);
             },
 
-            .list => {
-                var spans: std.ArrayList(Span) = .empty;
-                errdefer spans.deinit(sema.arena);
+            .string => |string_body| {
+                const text = try sema.unescape_string(string_body);
 
-                // TODO: Insert a space span between two regular text spans if they are not consecutive to each other.
+                try spans.append(sema.arena, .{
+                    .content = .{ .text = text },
+                    .attribs = attribs,
+                });
+            },
+
+            .verbatim => |verbatim_lines| {
+                var text_buffer: std.ArrayList(u8) = .empty;
+                defer text_buffer.deinit(sema.arena);
 
-                for (body.list) |child_node| {
-                    const span = try sema.translate_span_node(child_node);
-                    try spans.append(sema.arena, span);
+                var size: usize = verbatim_lines.len -| 1;
+                for (verbatim_lines) |line| {
+                    size += line.text.len;
                 }
+                try text_buffer.ensureTotalCapacityPrecise(sema.arena, size);
+
+                var first_unpadded = true;
+                for (verbatim_lines, 0..) |line, index| {
+                    if (index != 0) {
+                        try text_buffer.append(sema.arena, '\n');
+                    }
+                    std.debug.assert(std.mem.startsWith(u8, line.text, "|"));
+
+                    const is_padded = std.mem.startsWith(u8, line.text, "| ");
 
-                // TODO: Compact spans by joining spans with equal properties
+                    if (!is_padded) {
+                        if (first_unpadded) {
+                            try sema.emit_diagnostic(.unpadded_verbatim_line, line.location.offset);
+                            first_unpadded = false;
+                        }
+                    }
+
+                    const text = if (is_padded)
+                        line.text[2..]
+                    else
+                        line.text[1..];
+
+                    const stripped = std.mem.trimRight(u8, text, " \t");
+                    if (text.len != stripped.len) {
+                        try sema.emit_diagnostic(.trailing_whitespace_in_verbatim_line, line.location.offset + stripped.len);
+                    }
 
-                return try spans.toOwnedSlice(sema.arena);
+                    text_buffer.appendSliceAssumeCapacity(stripped);
+                }
+
+                try spans.append(sema.arena, .{
+                    .content = .{ .text = try text_buffer.toOwnedSlice(sema.arena) },
+                    .attribs = attribs,
+                });
             },
-        }
-    }
 
-    fn translate_span_node(sema: *SemanticAnalyzer, node: Parser.Node) !Span {
-        //
-        _ = sema;
-        std.log.warn("TODO: Translate spans of type {}", .{node.type});
+            .list => |list| {
+                for (list) |child_node| {
+                    try sema.translate_inline_node(spans, child_node, attribs);
+                }
+            },
 
-        return .{
-            .content = .{ .text = "???" },
-        };
+            .text_span => |text_span| {
+                try spans.append(sema.arena, .{
+                    .content = .{ .text = text_span.text },
+                    .attribs = attribs,
+                });
+            },
+        }
     }
 
     fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?[]const u8 } {
@@ -822,7 +944,7 @@ pub const Parser = struct {
                 .location = parser.location(type_ident.location.offset, null),
                 .type = node_type,
                 .attributes = attributes,
-                .body = .empty,
+                .body = .{ .empty = parser.location(parser.offset - 1, null) },
             };
         }
 
@@ -929,6 +1051,15 @@ pub const Parser = struct {
                 '{' => {
                     nesting += 1;
                     parser.offset += 1;
+
+                    const token = parser.slice(parser.offset - 1, parser.offset);
+                    try children.append(parser.arena, .{
+                        .location = token.location,
+                        .type = .text,
+                        .body = .{
+                            .text_span = token,
+                        },
+                    });
                 },
 
                 '}' => {
@@ -938,6 +1069,15 @@ pub const Parser = struct {
                         break;
 
                     nesting -= 1;
+
+                    const token = parser.slice(parser.offset - 1, parser.offset);
+                    try children.append(parser.arena, .{
+                        .location = token.location,
+                        .type = .text,
+                        .body = .{
+                            .text_span = token,
+                        },
+                    });
                 },
 
                 '\\' => backslash: {
@@ -946,7 +1086,18 @@ pub const Parser = struct {
                         switch (next_char) {
                             '{', '}', '\\' => {
                                 // Escaped brace
+
+                                const token = parser.slice(parser.offset, parser.offset + 2);
+                                try children.append(parser.arena, .{
+                                    .location = token.location,
+                                    .type = .text,
+                                    .body = .{
+                                        .text_span = token,
+                                    },
+                                });
+
                                 parser.offset += 2;
+
                                 break :backslash;
                             },
                             else => {},
@@ -966,8 +1117,7 @@ pub const Parser = struct {
                     try children.append(parser.arena, .{
                         .location = word.location,
                         .type = .text,
-                        .attributes = .empty,
-                        .body = .empty,
+                        .body = .{ .text_span = word },
                     });
                 },
             }
@@ -1335,6 +1485,7 @@ pub const Parser = struct {
                 .@"\\datetime",
 
                 .unknown_inline,
+                .unknown_block, // Unknown blocks must also have inline bodies to optimally retain body contents
                 => true,
 
                 .hdoc,
@@ -1347,7 +1498,6 @@ pub const Parser = struct {
                 .li,
 
                 .text,
-                .unknown_block,
                 => false,
             };
         }
@@ -1361,10 +1511,11 @@ pub const Parser = struct {
         body: Body,
 
         pub const Body = union(enum) {
-            empty,
+            empty: Location,
             string: Token,
             verbatim: []Token,
             list: []Node,
+            text_span: Token,
         };
     };
 
@@ -1418,6 +1569,9 @@ pub const Diagnostic = struct {
         verbatim_missing_trailing_newline,
         verbatim_missing_space,
         trailing_whitespace,
+        empty_inline_body,
+        unpadded_verbatim_line,
+        trailing_whitespace_in_verbatim_line,
 
         pub fn severity(code: Code) Severity {
             return switch (code) {
@@ -1441,6 +1595,9 @@ pub const Diagnostic = struct {
                 .verbatim_missing_trailing_newline,
                 .verbatim_missing_space,
                 .trailing_whitespace,
+                .empty_inline_body,
+                .unpadded_verbatim_line,
+                .trailing_whitespace_in_verbatim_line,
                 => .warning,
             };
         }
@@ -1472,6 +1629,11 @@ pub const Diagnostic = struct {
                 .unknown_attribute => |ctx| try w.print("Unknown attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }),
                 .unknown_block_type => |ctx| try w.print("Unknown block type '{s}'.", .{ctx.name}),
                 .invalid_block_type => |ctx| try w.print("Invalid block type '{s}' in this context.", .{ctx.name}),
+
+                .empty_inline_body => try w.writeAll("Inline body is empty."),
+
+                .unpadded_verbatim_line => try w.writeAll("Verbatim line is not properly padded with a space character at the start."),
+                .trailing_whitespace_in_verbatim_line => try w.writeAll("Trailing whitespace at end of verbatim line."),
             }
         }
     };
diff --git a/src/main.zig b/src/main.zig
index 98ea655..df3e962 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -119,7 +119,7 @@ fn dumpFormattedDateTime(writer: anytype, indent: usize, formatted: hdoc.Formatt
     try writer.print("format: {s}\n", .{@tagName(formatted.format)});
 }
 
-fn dumpSpanContent(writer: anytype, indent: usize, content: hdoc.SpanContent) !void {
+fn dumpSpanContent(writer: anytype, indent: usize, content: hdoc.Span.Content) !void {
     switch (content) {
         .text => |text| {
             try writeIndent(writer, indent);
@@ -174,14 +174,14 @@ fn dumpSpan(writer: anytype, indent: usize, span: hdoc.Span) !void {
     try writeIndent(writer, indent);
     try writer.writeAll("content:\n");
     try dumpSpanContent(writer, indent + indent_step, span.content);
-    try dumpOptionalStringField(writer, indent, "lang", span.lang);
-    try dumpBoolField(writer, indent, "em", span.em);
-    try dumpBoolField(writer, indent, "mono", span.mono);
-    try dumpBoolField(writer, indent, "strike", span.strike);
-    try dumpBoolField(writer, indent, "sub", span.sub);
-    try dumpBoolField(writer, indent, "sup", span.sup);
-    try dumpLink(writer, indent, span.link);
-    try dumpOptionalStringField(writer, indent, "syntax", span.syntax);
+    try dumpOptionalStringField(writer, indent, "lang", span.attribs.lang);
+    try dumpBoolField(writer, indent, "em", span.attribs.em);
+    try dumpBoolField(writer, indent, "mono", span.attribs.mono);
+    try dumpBoolField(writer, indent, "strike", span.attribs.strike);
+    try dumpBoolField(writer, indent, "sub", span.attribs.sub);
+    try dumpBoolField(writer, indent, "sup", span.attribs.sup);
+    try dumpLink(writer, indent, span.attribs.link);
+    try dumpOptionalStringField(writer, indent, "syntax", span.attribs.syntax);
 }
 
 fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []const hdoc.Span) !void {
@@ -413,7 +413,7 @@ test "dumpDocument escapes string values" {
     const spans = try arena_alloc.alloc(hdoc.Span, 1);
     spans[0] = .{
         .content = .{ .text = span_text },
-        .link = .{ .ref = link_ref },
+        .attribs = .{ .link = .{ .ref = link_ref } },
     };
 
     const blocks = try arena_alloc.alloc(hdoc.Block, 1);
diff --git a/test/parser/workset.hdoc b/test/parser/workset.hdoc
new file mode 100644
index 0000000..e4b2e8f
--- /dev/null
+++ b/test/parser/workset.hdoc
@@ -0,0 +1,4 @@
+hdoc(version="2.0");
+
+h1:
+| Hello, World!

From c3ff7bcb78520458de79ea3123723a664b33130b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 26 Dec 2025 14:32:36 +0100
Subject: [PATCH 021/116] Implements most guts of the inline composition
 system, but only exposes \em right now.

---
 src/hyperdoc.zig         | 139 +++++++++++++++++++++++++++++++--------
 src/main.zig             |   3 +-
 test/parser/workset.hdoc |   2 +
 3 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 43b1ffe..dd44a87 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -133,41 +133,25 @@ pub const Span = struct {
     };
 
     pub const Attributes = struct {
-        lang: ?[]const u8 = null,
+        lang: []const u8 = "", // empty is absence
+        position: ScriptPosition = .baseline,
         em: bool = false,
         mono: bool = false,
         strike: bool = false,
-        sub: bool = false,
-        sup: bool = false,
         link: Link = .none,
-        syntax: ?[]const u8 = null,
-
-        pub const Overrides = struct {
-            lang: ?[]const u8 = null,
-            em: ?bool = null,
-            mono: ?bool = null,
-            strike: ?bool = null,
-            sub: ?bool = null,
-            sup: ?bool = null,
-            link: ?Link = null,
-            syntax: ?[]const u8 = null,
-        };
-
-        pub fn derive(base: Attributes, overlay: Overrides) Attributes {
-            var new = base;
-            inline for (@typeInfo(Attributes).@"struct".fields) |fld| {
-                if (@field(overlay, fld.name)) |new_value| {
-                    @field(new, fld.name) = new_value;
-                }
-            }
-            return new;
-        }
+        syntax: []const u8 = "", // empty is absence
     };
 
     content: Content,
     attribs: Attributes,
 };
 
+pub const ScriptPosition = enum {
+    baseline,
+    superscript,
+    subscript,
+};
+
 pub const Link = union(enum) {
     none,
     ref: []const u8,
@@ -596,15 +580,91 @@ pub const SemanticAnalyzer = struct {
         return try spans.toOwnedSlice(sema.arena);
     }
 
+    pub const AttribOverrides = struct {
+        lang: ?[]const u8 = null,
+        em: ?bool = null,
+        mono: ?bool = null,
+        strike: ?bool = null,
+        position: ?ScriptPosition = null,
+        link: ?Link = null,
+        syntax: []const u8 = "",
+    };
+
+    fn derive_attribute(sema: *SemanticAnalyzer, location: Parser.Location, old: Span.Attributes, overlay: AttribOverrides) !Span.Attributes {
+        comptime std.debug.assert(@typeInfo(Span.Attributes).@"struct".fields.len == @typeInfo(AttribOverrides).@"struct".fields.len);
+
+        var new = old;
+        if (overlay.lang) |lang| {
+            new.lang = lang;
+        }
+
+        if (overlay.em) |v| {
+            if (old.em) {
+                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .em } }, location.offset);
+            }
+            new.em = v;
+        }
+
+        if (overlay.mono) |mono| {
+            if (old.mono) {
+                if (std.mem.eql(u8, old.syntax, new.syntax)) {
+                    try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .mono } }, location.offset);
+                }
+            }
+            new.mono = mono;
+            new.syntax = overlay.syntax;
+        } else {
+            // can't override syntax without also enabling mono!
+            std.debug.assert(overlay.syntax.len == 0);
+        }
+
+        if (overlay.strike) |strike| {
+            if (old.strike) {
+                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .strike } }, location.offset);
+            }
+            new.strike = strike;
+        }
+
+        if (overlay.position) |new_pos| {
+            std.debug.assert(new_pos != .baseline); // we can never return to baseline script.
+            if (old.position == new_pos) {
+                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .sub } }, location.offset);
+            } else if (old.position != .baseline) {
+                try sema.emit_diagnostic(.{ .invalid_inline_combination = .{
+                    .first = switch (old.position) {
+                        .superscript => .sup,
+                        .subscript => .sub,
+                        .baseline => unreachable,
+                    },
+                    .second = switch (new_pos) {
+                        .superscript => .sup,
+                        .subscript => .sub,
+                        .baseline => unreachable,
+                    },
+                } }, location.offset);
+            }
+            new.position = new_pos;
+        }
+
+        if (overlay.link) |link| {
+            if (old.link != .none) {
+                try sema.emit_diagnostic(.link_not_nestable, location.offset);
+            }
+            new.link = link;
+        }
+
+        return new;
+    }
+
     fn translate_inline_node(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), node: Parser.Node, attribs: Span.Attributes) !void {
         switch (node.type) {
             .unknown_inline,
             .text,
-            => {
-                try sema.translate_inline_body(spans, node.body, attribs);
-            },
+            => try sema.translate_inline_body(spans, node.body, attribs),
 
             .@"\\em",
+            => try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .em = true })),
+
             .@"\\mono",
             .@"\\strike",
             .@"\\sub",
@@ -1546,6 +1606,8 @@ pub const Diagnostic = struct {
     pub const MissingHdocHeader = struct {};
     pub const DuplicateHdocHeader = struct {};
     pub const InvalidBlockError = struct { name: []const u8 };
+    pub const InlineUsageError = struct { attribute: InlineAttribute };
+    pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute };
 
     pub const Code = union(enum) {
         // errors:
@@ -1561,6 +1623,8 @@ pub const Diagnostic = struct {
         invalid_attribute: NodeAttributeError,
         unknown_block_type: InvalidBlockError,
         invalid_block_type: InvalidBlockError,
+        invalid_inline_combination: InlineCombinationError,
+        link_not_nestable,
 
         // warnings:
         unknown_attribute: NodeAttributeError,
@@ -1572,6 +1636,7 @@ pub const Diagnostic = struct {
         empty_inline_body,
         unpadded_verbatim_line,
         trailing_whitespace_in_verbatim_line,
+        redundant_inline: InlineUsageError,
 
         pub fn severity(code: Code) Severity {
             return switch (code) {
@@ -1587,6 +1652,8 @@ pub const Diagnostic = struct {
                 .missing_attribute,
                 .unknown_block_type,
                 .invalid_block_type,
+                .invalid_inline_combination,
+                .link_not_nestable,
                 => .@"error",
 
                 .unknown_attribute,
@@ -1598,6 +1665,7 @@ pub const Diagnostic = struct {
                 .empty_inline_body,
                 .unpadded_verbatim_line,
                 .trailing_whitespace_in_verbatim_line,
+                .redundant_inline,
                 => .warning,
             };
         }
@@ -1634,6 +1702,10 @@ pub const Diagnostic = struct {
 
                 .unpadded_verbatim_line => try w.writeAll("Verbatim line is not properly padded with a space character at the start."),
                 .trailing_whitespace_in_verbatim_line => try w.writeAll("Trailing whitespace at end of verbatim line."),
+
+                .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}),
+                .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }),
+                .link_not_nestable => try w.writeAll("Links are not nestable"),
             }
         }
     };
@@ -1680,6 +1752,17 @@ pub const Diagnostics = struct {
     }
 };
 
+pub const InlineAttribute = enum {
+    lang,
+    em,
+    mono,
+    strike,
+    sub,
+    sup,
+    link,
+    syntax,
+};
+
 test "fuzz parser" {
     const Impl = struct {
         fn testOne(impl: @This(), data: []const u8) !void {
diff --git a/src/main.zig b/src/main.zig
index df3e962..b9e8e7a 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -178,8 +178,7 @@ fn dumpSpan(writer: anytype, indent: usize, span: hdoc.Span) !void {
     try dumpBoolField(writer, indent, "em", span.attribs.em);
     try dumpBoolField(writer, indent, "mono", span.attribs.mono);
     try dumpBoolField(writer, indent, "strike", span.attribs.strike);
-    try dumpBoolField(writer, indent, "sub", span.attribs.sub);
-    try dumpBoolField(writer, indent, "sup", span.attribs.sup);
+    try dumpEnumField(writer, indent, "position", span.attribs.position);
     try dumpLink(writer, indent, span.attribs.link);
     try dumpOptionalStringField(writer, indent, "syntax", span.attribs.syntax);
 }
diff --git a/test/parser/workset.hdoc b/test/parser/workset.hdoc
index e4b2e8f..fbfaf77 100644
--- a/test/parser/workset.hdoc
+++ b/test/parser/workset.hdoc
@@ -2,3 +2,5 @@ hdoc(version="2.0");
 
 h1:
 | Hello, World!
+
+h2{Hello \em{World}!}

From 349ada5367eb481683e6049deb4b3f32582fd13c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 26 Dec 2025 14:48:24 +0100
Subject: [PATCH 022/116] Implements most inline node translations except for
 the date/time parts.

---
 src/hyperdoc.zig | 97 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 88 insertions(+), 9 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index dd44a87..b2f1fe1 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -566,7 +566,7 @@ pub const SemanticAnalyzer = struct {
         return .{ heading, attrs.id };
     }
 
-    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, Unimplemented }![]Span {
+    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }![]Span {
         var spans: std.ArrayList(Span) = .empty;
         errdefer spans.deinit(sema.arena);
 
@@ -662,20 +662,96 @@ pub const SemanticAnalyzer = struct {
             .text,
             => try sema.translate_inline_body(spans, node.body, attribs),
 
-            .@"\\em",
-            => try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{ .em = true })),
+            .@"\\em" => {
+                const props = try sema.get_attributes(node, struct {
+                    lang: ?[]const u8 = null,
+                });
 
-            .@"\\mono",
-            .@"\\strike",
-            .@"\\sub",
-            .@"\\sup",
-            .@"\\link",
+                try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                    .lang = props.lang,
+                    .em = true,
+                }));
+            },
+
+            .@"\\strike" => {
+                const props = try sema.get_attributes(node, struct {
+                    lang: ?[]const u8 = null,
+                });
+
+                try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                    .lang = props.lang,
+                    .strike = true,
+                }));
+            },
+
+            .@"\\sub" => {
+                const props = try sema.get_attributes(node, struct {
+                    lang: ?[]const u8 = null,
+                });
+
+                try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                    .lang = props.lang,
+                    .position = .superscript,
+                }));
+            },
+
+            .@"\\sup" => {
+                const props = try sema.get_attributes(node, struct {
+                    lang: ?[]const u8 = null,
+                });
+
+                try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                    .lang = props.lang,
+                    .position = .subscript,
+                }));
+            },
+
+            .@"\\link" => {
+                const props = try sema.get_attributes(node, struct {
+                    lang: ?[]const u8 = null,
+                    uri: ?[]const u8 = null,
+                    ref: ?[]const u8 = null,
+                });
+
+                if (props.uri != null and props.ref != null) {
+                    try sema.emit_diagnostic(.invalid_link, node.location.offset); // TODO: Use proper attribute location
+                }
+
+                const link: Link = if (props.uri) |uri| blk: {
+                    // TODO: Figure out where to put URI validation (not empty, no leading/trailing whitespace)
+                    break :blk .{ .uri = uri };
+                } else if (props.ref) |ref| blk: {
+                    // TODO: Figure out where to put reference validation (no leading/trailing whitespace)
+                    // TODO: Reference validation must also happen for "id" attribute
+                    break :blk .{ .ref = ref };
+                } else blk: {
+                    try sema.emit_diagnostic(.invalid_link, node.location.offset); // TODO: Use proper attribute location
+                    break :blk .none;
+                };
+
+                try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                    .link = link,
+                }));
+            },
+
+            .@"\\mono" => {
+                const props = try sema.get_attributes(node, struct {
+                    lang: ?[]const u8 = null,
+                    syntax: []const u8 = "",
+                });
+                try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                    .mono = true,
+                    .lang = props.lang,
+                    .syntax = props.syntax,
+                }));
+            },
 
             .@"\\date",
             .@"\\time",
             .@"\\datetime",
             => {
                 // TODO: Implement date/time translation
+                std.log.err("TODO: Implement {t}", .{node.type});
             },
 
             .hdoc,
@@ -705,7 +781,7 @@ pub const SemanticAnalyzer = struct {
         }
     }
 
-    fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, Unimplemented }!void {
+    fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void {
         switch (body) {
             .empty => |location| {
                 try sema.emit_diagnostic(.empty_inline_body, location.offset);
@@ -1625,6 +1701,7 @@ pub const Diagnostic = struct {
         invalid_block_type: InvalidBlockError,
         invalid_inline_combination: InlineCombinationError,
         link_not_nestable,
+        invalid_link,
 
         // warnings:
         unknown_attribute: NodeAttributeError,
@@ -1654,6 +1731,7 @@ pub const Diagnostic = struct {
                 .invalid_block_type,
                 .invalid_inline_combination,
                 .link_not_nestable,
+                .invalid_link,
                 => .@"error",
 
                 .unknown_attribute,
@@ -1706,6 +1784,7 @@ pub const Diagnostic = struct {
                 .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}),
                 .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }),
                 .link_not_nestable => try w.writeAll("Links are not nestable"),
+                .invalid_link => try w.writeAll("\\link requires either ref=\"…\" or uri=\"…\" attribute."),
             }
         }
     };

From 0f6815576f8836c00d1fb208ebe8b780f533a9f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 26 Dec 2025 15:12:23 +0100
Subject: [PATCH 023/116] Vibecoded: Reduces the verbosity of the debug dump
 output

---
 src/main.zig | 196 +++++++++++++++++++++++++++++----------------------
 1 file changed, 112 insertions(+), 84 deletions(-)

diff --git a/src/main.zig b/src/main.zig
index b9e8e7a..3791de7 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -95,92 +95,126 @@ fn dumpDateTime(writer: anytype, indent: usize, datetime: hdoc.DateTime) !void {
     try dumpTime(writer, indent + indent_step, datetime.time);
 }
 
-fn dumpFormattedDate(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.Date)) !void {
-    try writeIndent(writer, indent);
-    try writer.writeAll("value:\n");
-    try dumpDate(writer, indent + indent_step, formatted.value);
-    try writeIndent(writer, indent);
-    try writer.print("format: {s}\n", .{@tagName(formatted.format)});
+fn writeAttrSeparator(writer: anytype, first: *bool) !void {
+    if (first.*) {
+        first.* = false;
+    } else {
+        try writer.writeByte(' ');
+    }
 }
 
-fn dumpFormattedTime(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.Time)) !void {
-    try writeIndent(writer, indent);
-    try writer.writeAll("value:\n");
-    try dumpTime(writer, indent + indent_step, formatted.value);
-    try writeIndent(writer, indent);
-    try writer.print("format: {s}\n", .{@tagName(formatted.format)});
+fn writeSpanAttributes(writer: anytype, span: hdoc.Span) !void {
+    try writer.writeByte('[');
+    var first = true;
+    if (span.attribs.em) {
+        try writeAttrSeparator(writer, &first);
+        try writer.writeAll("em");
+    }
+    if (span.attribs.mono) {
+        try writeAttrSeparator(writer, &first);
+        try writer.writeAll("mono");
+    }
+    if (span.attribs.strike) {
+        try writeAttrSeparator(writer, &first);
+        try writer.writeAll("strike");
+    }
+    if (span.attribs.position != .baseline) {
+        try writeAttrSeparator(writer, &first);
+        try writer.print("position=\"{s}\"", .{@tagName(span.attribs.position)});
+    }
+    switch (span.attribs.link) {
+        .none => {},
+        .ref => |value| {
+            try writeAttrSeparator(writer, &first);
+            try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value)});
+        },
+        .uri => |value| {
+            try writeAttrSeparator(writer, &first);
+            try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value)});
+        },
+    }
+    if (span.attribs.lang.len != 0) {
+        try writeAttrSeparator(writer, &first);
+        try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang)});
+    }
+    if (span.attribs.syntax.len != 0) {
+        try writeAttrSeparator(writer, &first);
+        try writer.print("syntax=\"{f}\"", .{std.zig.fmtString(span.attribs.syntax)});
+    }
+    try writer.writeByte(']');
 }
 
-fn dumpFormattedDateTime(writer: anytype, indent: usize, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) !void {
-    try writeIndent(writer, indent);
-    try writer.writeAll("value:\n");
-    try dumpDateTime(writer, indent + indent_step, formatted.value);
-    try writeIndent(writer, indent);
-    try writer.print("format: {s}\n", .{@tagName(formatted.format)});
+fn writeDateValue(writer: anytype, date: hdoc.Date) !void {
+    try writer.print("{d:0>4}-{d:0>2}-{d:0>2}", .{ date.year, date.month, date.day });
+}
+
+fn writeTimeValue(writer: anytype, time: hdoc.Time) !void {
+    try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ time.hour, time.minute, time.second });
+    if (time.microsecond != 0) {
+        try writer.print(".{d:0>6}", .{time.microsecond});
+    }
+}
+
+fn writeDateTimeValue(writer: anytype, datetime: hdoc.DateTime) !void {
+    try writeDateValue(writer, datetime.date);
+    try writer.writeByte('T');
+    try writeTimeValue(writer, datetime.time);
+}
+
+fn writeFormattedDateInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.Date)) !void {
+    try writer.writeAll("date:");
+    try writeDateValue(writer, formatted.value);
+    if (formatted.format != hdoc.Date.Format.default) {
+        try writer.writeByte('@');
+        try writer.writeAll(@tagName(formatted.format));
+    }
 }
 
-fn dumpSpanContent(writer: anytype, indent: usize, content: hdoc.Span.Content) !void {
+fn writeFormattedTimeInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.Time)) !void {
+    try writer.writeAll("time:");
+    try writeTimeValue(writer, formatted.value);
+    if (formatted.format != hdoc.Time.Format.default) {
+        try writer.writeByte('@');
+        try writer.writeAll(@tagName(formatted.format));
+    }
+}
+
+fn writeFormattedDateTimeInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) !void {
+    try writer.writeAll("datetime:");
+    try writeDateTimeValue(writer, formatted.value);
+    if (formatted.format != hdoc.DateTime.Format.default) {
+        try writer.writeByte('@');
+        try writer.writeAll(@tagName(formatted.format));
+    }
+}
+
+fn writeSpanContentInline(writer: anytype, content: hdoc.Span.Content) !void {
     switch (content) {
         .text => |text| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("text: ");
             try writeStringValue(writer, text);
-            try writer.writeByte('\n');
         },
         .date => |date| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("date:\n");
-            try dumpFormattedDate(writer, indent + indent_step, date);
+            try writer.writeByte('"');
+            try writeFormattedDateInline(writer, date);
+            try writer.writeByte('"');
         },
         .time => |time| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("time:\n");
-            try dumpFormattedTime(writer, indent + indent_step, time);
+            try writer.writeByte('"');
+            try writeFormattedTimeInline(writer, time);
+            try writer.writeByte('"');
         },
         .datetime => |datetime| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("datetime:\n");
-            try dumpFormattedDateTime(writer, indent + indent_step, datetime);
+            try writer.writeByte('"');
+            try writeFormattedDateTimeInline(writer, datetime);
+            try writer.writeByte('"');
         },
     }
 }
 
-fn dumpLink(writer: anytype, indent: usize, link: hdoc.Link) !void {
-    switch (link) {
-        .none => {
-            try writeIndent(writer, indent);
-            try writer.writeAll("link: none\n");
-        },
-        .ref => |value| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("link:\n");
-            try writeIndent(writer, indent + indent_step);
-            try writer.writeAll("ref: ");
-            try writeStringValue(writer, value);
-            try writer.writeByte('\n');
-        },
-        .uri => |value| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("link:\n");
-            try writeIndent(writer, indent + indent_step);
-            try writer.writeAll("uri: ");
-            try writeStringValue(writer, value);
-            try writer.writeByte('\n');
-        },
-    }
-}
-
-fn dumpSpan(writer: anytype, indent: usize, span: hdoc.Span) !void {
-    try writeIndent(writer, indent);
-    try writer.writeAll("content:\n");
-    try dumpSpanContent(writer, indent + indent_step, span.content);
-    try dumpOptionalStringField(writer, indent, "lang", span.attribs.lang);
-    try dumpBoolField(writer, indent, "em", span.attribs.em);
-    try dumpBoolField(writer, indent, "mono", span.attribs.mono);
-    try dumpBoolField(writer, indent, "strike", span.attribs.strike);
-    try dumpEnumField(writer, indent, "position", span.attribs.position);
-    try dumpLink(writer, indent, span.attribs.link);
-    try dumpOptionalStringField(writer, indent, "syntax", span.attribs.syntax);
+fn dumpSpanInline(writer: anytype, span: hdoc.Span) !void {
+    try writeSpanAttributes(writer, span);
+    try writer.writeByte(' ');
+    try writeSpanContentInline(writer, span.content);
 }
 
 fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []const hdoc.Span) !void {
@@ -192,8 +226,9 @@ fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []c
     try writer.print("{s}:\n", .{key});
     for (spans) |span| {
         try writeIndent(writer, indent + indent_step);
-        try writer.writeAll("-\n");
-        try dumpSpan(writer, indent + indent_step * 2, span);
+        try writer.writeAll("- ");
+        try dumpSpanInline(writer, span);
+        try writer.writeByte('\n');
     }
 }
 
@@ -286,31 +321,27 @@ fn dumpTableRowsField(writer: anytype, indent: usize, key: []const u8, rows: []c
     }
 }
 
-fn dumpBlock(writer: anytype, indent: usize, block: hdoc.Block) !void {
+fn dumpBlockInline(writer: anytype, indent: usize, block: hdoc.Block) !void {
     switch (block) {
         .heading => |heading| {
-            try writeIndent(writer, indent);
             try writer.writeAll("heading:\n");
             try dumpEnumField(writer, indent + indent_step, "level", heading.level);
             try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang);
             try dumpSpanListField(writer, indent + indent_step, "content", heading.content);
         },
         .paragraph => |paragraph| {
-            try writeIndent(writer, indent);
             try writer.writeAll("paragraph:\n");
             try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind);
             try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang);
             try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content);
         },
         .list => |list| {
-            try writeIndent(writer, indent);
             try writer.writeAll("list:\n");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang);
             try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first);
             try dumpListItemsField(writer, indent + indent_step, "items", list.items);
         },
         .image => |image| {
-            try writeIndent(writer, indent);
             try writer.writeAll("image:\n");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang);
             try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt);
@@ -318,20 +349,17 @@ fn dumpBlock(writer: anytype, indent: usize, block: hdoc.Block) !void {
             try dumpSpanListField(writer, indent + indent_step, "content", image.content);
         },
         .preformatted => |preformatted| {
-            try writeIndent(writer, indent);
             try writer.writeAll("preformatted:\n");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang);
             try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax);
             try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content);
         },
         .toc => |toc| {
-            try writeIndent(writer, indent);
             try writer.writeAll("toc:\n");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang);
             try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth);
         },
         .table => |table| {
-            try writeIndent(writer, indent);
             try writer.writeAll("table:\n");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang);
             try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows);
@@ -348,8 +376,8 @@ fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: [
     try writer.print("{s}:\n", .{key});
     for (blocks) |block| {
         try writeIndent(writer, indent + indent_step);
-        try writer.writeAll("-\n");
-        try dumpBlock(writer, indent + indent_step * 2, block);
+        try writer.writeAll("- ");
+        try dumpBlockInline(writer, indent + indent_step, block);
     }
 }
 
@@ -439,14 +467,14 @@ test "dumpDocument escapes string values" {
     defer std.testing.allocator.free(expected_title);
     try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null);
 
-    const expected_span = try std.fmt.allocPrint(std.testing.allocator, "text: \"{f}\"\n", .{std.zig.fmtString(span_text)});
+    const expected_span = try std.fmt.allocPrint(
+        std.testing.allocator,
+        "- [link=\"ref:{f}\"] \"{f}\"\n",
+        .{ std.zig.fmtString(link_ref), std.zig.fmtString(span_text) },
+    );
     defer std.testing.allocator.free(expected_span);
     try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null);
 
-    const expected_link = try std.fmt.allocPrint(std.testing.allocator, "ref: \"{f}\"\n", .{std.zig.fmtString(link_ref)});
-    defer std.testing.allocator.free(expected_link);
-    try std.testing.expect(std.mem.indexOf(u8, output, expected_link) != null);
-
     const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value)});
     defer std.testing.allocator.free(expected_id);
     try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null);

From 16123edff0c3c8f09386fe45950ac07a0bb232d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 27 Dec 2025 12:47:36 +0100
Subject: [PATCH 024/116] Implements basic validation of reference and uri
 attributes.

---
 build.zig.zon    |   1 +
 src/hyperdoc.zig | 114 ++++++++++++++++++++++++++++++-----------------
 src/main.zig     |  18 ++++----
 3 files changed, 84 insertions(+), 49 deletions(-)

diff --git a/build.zig.zon b/build.zig.zon
index 00a368a..e098508 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -2,6 +2,7 @@
     .name = .hyperdoc,
     .version = "0.1.0",
     .fingerprint = 0xfd1a4802abc4739e,
+    .minimum_zig_version = "0.15.0",
 
     .dependencies = .{
         // .parser_toolkit = .{
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index b2f1fe1..dc2fe0b 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -10,7 +10,7 @@ pub const Document = struct {
 
     // document contents:
     contents: []Block,
-    ids: []?[]const u8,
+    ids: []?Reference,
 
     // header information
     lang: ?[]const u8,
@@ -154,8 +154,8 @@ pub const ScriptPosition = enum {
 
 pub const Link = union(enum) {
     none,
-    ref: []const u8,
-    uri: []const u8,
+    ref: Reference,
+    uri: Uri,
 };
 
 /// HyperDoc Version Number
@@ -342,6 +342,30 @@ pub const Time = struct {
     }
 };
 
+/// Type-safe wrapper around a URI attribute.
+pub const Uri = struct {
+    pub const empty: Uri = .{ .text = "" };
+
+    text: []const u8,
+
+    pub fn init(text: []const u8) Uri {
+        // TODO: Add correctness validation here
+        return .{ .text = text };
+    }
+};
+
+/// Type-safe wrapper around a reference value (id/ref) attribute.
+pub const Reference = struct {
+    pub const empty: Reference = .{ .text = "" };
+
+    text: []const u8,
+
+    pub fn init(text: []const u8) Reference {
+        // TODO: Add correctness validation here
+        return .{ .text = text };
+    }
+};
+
 /// Parses a HyperDoc document.
 pub fn parse(
     allocator: std.mem.Allocator,
@@ -406,6 +430,8 @@ pub fn parse(
 }
 
 pub const SemanticAnalyzer = struct {
+    const whitespace_chars = " \t";
+
     const Header = struct {
         version: Version,
         lang: ?[]const u8,
@@ -420,7 +446,7 @@ pub const SemanticAnalyzer = struct {
 
     header: ?Header = null,
     blocks: std.ArrayList(Block) = .empty,
-    ids: std.ArrayList(?[]const u8) = .empty,
+    ids: std.ArrayList(?Reference) = .empty,
 
     fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{OutOfMemory}!void {
         switch (node.type) {
@@ -482,7 +508,7 @@ pub const SemanticAnalyzer = struct {
         };
     }
 
-    fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes, Unimplemented }!struct { Block, ?[]const u8 } {
+    fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes, Unimplemented }!struct { Block, ?Reference } {
         std.debug.assert(node.type != .hdoc);
 
         switch (node.type) {
@@ -546,10 +572,10 @@ pub const SemanticAnalyzer = struct {
         return error.InvalidNodeType;
     }
 
-    fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?[]const u8 } {
+    fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
             lang: ?[]const u8 = null,
-            id: ?[]const u8 = null,
+            id: ?Reference = null,
         });
 
         const heading: Block.Heading = .{
@@ -709,23 +735,20 @@ pub const SemanticAnalyzer = struct {
             .@"\\link" => {
                 const props = try sema.get_attributes(node, struct {
                     lang: ?[]const u8 = null,
-                    uri: ?[]const u8 = null,
-                    ref: ?[]const u8 = null,
+                    uri: ?Uri = null,
+                    ref: ?Reference = null,
                 });
 
                 if (props.uri != null and props.ref != null) {
-                    try sema.emit_diagnostic(.invalid_link, node.location.offset); // TODO: Use proper attribute location
+                    try sema.emit_diagnostic(.invalid_link, node.location.offset);
                 }
 
                 const link: Link = if (props.uri) |uri| blk: {
-                    // TODO: Figure out where to put URI validation (not empty, no leading/trailing whitespace)
                     break :blk .{ .uri = uri };
                 } else if (props.ref) |ref| blk: {
-                    // TODO: Figure out where to put reference validation (no leading/trailing whitespace)
-                    // TODO: Reference validation must also happen for "id" attribute
                     break :blk .{ .ref = ref };
                 } else blk: {
-                    try sema.emit_diagnostic(.invalid_link, node.location.offset); // TODO: Use proper attribute location
+                    try sema.emit_diagnostic(.invalid_link, node.location.offset);
                     break :blk .none;
                 };
 
@@ -806,7 +829,6 @@ pub const SemanticAnalyzer = struct {
                 }
                 try text_buffer.ensureTotalCapacityPrecise(sema.arena, size);
 
-                var first_unpadded = true;
                 for (verbatim_lines, 0..) |line, index| {
                     if (index != 0) {
                         try text_buffer.append(sema.arena, '\n');
@@ -814,23 +836,12 @@ pub const SemanticAnalyzer = struct {
                     std.debug.assert(std.mem.startsWith(u8, line.text, "|"));
 
                     const is_padded = std.mem.startsWith(u8, line.text, "| ");
-
-                    if (!is_padded) {
-                        if (first_unpadded) {
-                            try sema.emit_diagnostic(.unpadded_verbatim_line, line.location.offset);
-                            first_unpadded = false;
-                        }
-                    }
-
                     const text = if (is_padded)
                         line.text[2..]
                     else
                         line.text[1..];
 
-                    const stripped = std.mem.trimRight(u8, text, " \t");
-                    if (text.len != stripped.len) {
-                        try sema.emit_diagnostic(.trailing_whitespace_in_verbatim_line, line.location.offset + stripped.len);
-                    }
+                    const stripped = std.mem.trimRight(u8, text, whitespace_chars);
 
                     text_buffer.appendSliceAssumeCapacity(stripped);
                 }
@@ -856,42 +867,52 @@ pub const SemanticAnalyzer = struct {
         }
     }
 
-    fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?[]const u8 } {
+    fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } {
         _ = sema;
         _ = node;
         return error.Unimplemented; // TODO: Implement this node type
     }
 
-    fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?[]const u8 } {
+    fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } {
         _ = sema;
         _ = node;
         return error.Unimplemented; // TODO: Implement this node type
     }
 
-    fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?[]const u8 } {
+    fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } {
         _ = sema;
         _ = node;
         return error.Unimplemented; // TODO: Implement this node type
     }
 
-    fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?[]const u8 } {
+    fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } {
         _ = sema;
         _ = node;
         return error.Unimplemented; // TODO: Implement this node type
     }
 
-    fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?[]const u8 } {
+    fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } {
         _ = sema;
         _ = node;
         return error.Unimplemented; // TODO: Implement this node type
     }
 
-    fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?[]const u8 } {
+    fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } {
         _ = sema;
         _ = node;
         return error.Unimplemented; // TODO: Implement this node type
     }
 
+    fn get_attribute_location(node: Parser.Node, attrib_name: []const u8, comptime key: enum { name, value }) ?Parser.Location {
+        var i = node.attributes.items.len;
+        while (i > 0) {
+            i -= 1;
+            if (std.mem.eql(u8, node.attributes.items[i].name.text, attrib_name))
+                return @field(node.attributes.items[i], @tagName(key)).location;
+        }
+        return null;
+    }
+
     fn get_attributes(sema: *SemanticAnalyzer, node: Parser.Node, comptime Attrs: type) error{ OutOfMemory, BadAttributes }!Attrs {
         const Fields = std.meta.FieldEnum(Attrs);
         const fields = @typeInfo(Attrs).@"struct".fields;
@@ -964,6 +985,22 @@ pub const SemanticAnalyzer = struct {
         return switch (T) {
             []const u8 => value,
 
+            Reference => {
+                const stripped = std.mem.trim(u8, value, whitespace_chars);
+                if (stripped.len != value.len) {
+                    try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location.offset);
+                }
+                return .init(stripped);
+            },
+
+            Uri => {
+                const stripped = std.mem.trim(u8, value, whitespace_chars);
+                if (stripped.len != value.len) {
+                    try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location.offset);
+                }
+                return .init(stripped);
+            },
+
             Version => Version.parse(value) catch return error.InvalidValue,
             DateTime => DateTime.parse(value) catch return error.InvalidValue,
             Date => Date.parse(value) catch return error.InvalidValue,
@@ -1711,9 +1748,8 @@ pub const Diagnostic = struct {
         verbatim_missing_space,
         trailing_whitespace,
         empty_inline_body,
-        unpadded_verbatim_line,
-        trailing_whitespace_in_verbatim_line,
         redundant_inline: InlineUsageError,
+        attribute_leading_trailing_whitespace,
 
         pub fn severity(code: Code) Severity {
             return switch (code) {
@@ -1741,9 +1777,8 @@ pub const Diagnostic = struct {
                 .verbatim_missing_space,
                 .trailing_whitespace,
                 .empty_inline_body,
-                .unpadded_verbatim_line,
-                .trailing_whitespace_in_verbatim_line,
                 .redundant_inline,
+                .attribute_leading_trailing_whitespace,
                 => .warning,
             };
         }
@@ -1778,13 +1813,12 @@ pub const Diagnostic = struct {
 
                 .empty_inline_body => try w.writeAll("Inline body is empty."),
 
-                .unpadded_verbatim_line => try w.writeAll("Verbatim line is not properly padded with a space character at the start."),
-                .trailing_whitespace_in_verbatim_line => try w.writeAll("Trailing whitespace at end of verbatim line."),
-
                 .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}),
                 .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }),
                 .link_not_nestable => try w.writeAll("Links are not nestable"),
                 .invalid_link => try w.writeAll("\\link requires either ref=\"…\" or uri=\"…\" attribute."),
+
+                .attribute_leading_trailing_whitespace => try w.writeAll("Attribute value has invalid leading or trailing whitespace."),
             }
         }
     };
diff --git a/src/main.zig b/src/main.zig
index 3791de7..2f9b602 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -126,11 +126,11 @@ fn writeSpanAttributes(writer: anytype, span: hdoc.Span) !void {
         .none => {},
         .ref => |value| {
             try writeAttrSeparator(writer, &first);
-            try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value)});
+            try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.text)});
         },
         .uri => |value| {
             try writeAttrSeparator(writer, &first);
-            try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value)});
+            try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value.text)});
         },
     }
     if (span.attribs.lang.len != 0) {
@@ -381,7 +381,7 @@ fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: [
     }
 }
 
-fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, values: []?[]const u8) !void {
+fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, values: []?hdoc.Reference) !void {
     try writeIndent(writer, indent);
     if (values.len == 0) {
         try writer.print("{s}: []\n", .{key});
@@ -391,7 +391,7 @@ fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8,
     for (values) |value| {
         try writeIndent(writer, indent + indent_step);
         try writer.writeAll("- ");
-        try writeOptionalStringValue(writer, value);
+        try writeOptionalStringValue(writer, if (value) |val| val.text else null);
         try writer.writeByte('\n');
     }
 }
@@ -420,8 +420,8 @@ fn dumpDocument(writer: anytype, doc: *const hdoc.Document) !void {
 test "dumpDocument escapes string values" {
     const title = "Doc \"Title\"\n";
     const span_text = "Hello \"world\"\n";
-    const link_ref = "section \"A\"";
-    const id_value = "id:1\n";
+    const link_ref: hdoc.Reference = .init("section \"A\"");
+    const id_value: hdoc.Reference = .init("id:1\n");
 
     var doc: hdoc.Document = .{
         .arena = std.heap.ArenaAllocator.init(std.testing.allocator),
@@ -453,7 +453,7 @@ test "dumpDocument escapes string values" {
     };
     doc.contents = blocks;
 
-    const ids = try arena_alloc.alloc(?[]const u8, 1);
+    const ids = try arena_alloc.alloc(?hdoc.Reference, 1);
     ids[0] = id_value;
     doc.ids = ids;
 
@@ -470,12 +470,12 @@ test "dumpDocument escapes string values" {
     const expected_span = try std.fmt.allocPrint(
         std.testing.allocator,
         "- [link=\"ref:{f}\"] \"{f}\"\n",
-        .{ std.zig.fmtString(link_ref), std.zig.fmtString(span_text) },
+        .{ std.zig.fmtString(link_ref.text), std.zig.fmtString(span_text) },
     );
     defer std.testing.allocator.free(expected_span);
     try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null);
 
-    const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value)});
+    const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value.text)});
     defer std.testing.allocator.free(expected_id);
     try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null);
 }

From 088c51eda3fb46aa851f54f7d958b08a28b61cf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 27 Dec 2025 15:03:22 +0100
Subject: [PATCH 025/116] Prepares most of the date/time inline parsing, except
 for the span to string conversion

---
 src/hyperdoc.zig | 73 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index dc2fe0b..2977cb9 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -773,8 +773,35 @@ pub const SemanticAnalyzer = struct {
             .@"\\time",
             .@"\\datetime",
             => {
-                // TODO: Implement date/time translation
-                std.log.err("TODO: Implement {t}", .{node.type});
+                const props = try sema.get_attributes(node, struct {
+                    lang: ?[]const u8 = null,
+                    fmt: []const u8 = "",
+                });
+
+                var content_spans: std.ArrayList(Span) = .empty;
+                defer content_spans.deinit(sema.arena);
+
+                // TODO: Implement automatic space insertion.
+                //       This must be done when two consecutive nodes are separated by a space
+
+                try sema.translate_inline_body(&content_spans, node.body, .{});
+
+                // TODO: Convert the content_spans into a "rendered string".
+                const content_text = "<undefined>";
+
+                const content: Span.Content = switch (node.type) {
+                    .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt),
+                    .@"\\time" => try sema.parse_date_body(node, .time, Time, content_text, props.fmt),
+                    .@"\\datetime" => try sema.parse_date_body(node, .datetime, DateTime, content_text, props.fmt),
+                    else => unreachable,
+                };
+
+                try spans.append(sema.arena, .{
+                    .content = content,
+                    .attribs = try sema.derive_attribute(node.location, attribs, .{
+                        .lang = attribs.lang,
+                    }),
+                });
             },
 
             .hdoc,
@@ -804,6 +831,40 @@ pub const SemanticAnalyzer = struct {
         }
     }
 
+    fn parse_date_body(
+        sema: *SemanticAnalyzer,
+        node: Parser.Node,
+        comptime body: enum { date, time, datetime },
+        comptime DTValue: type,
+        value_str: []const u8,
+        format_str: []const u8,
+    ) !Span.Content {
+        const Format: type = DTValue.Format;
+
+        const value: DTValue = if (DTValue.parse(value_str)) |value|
+            value
+        else |_| blk: {
+            // TODO: Report error for invalid value
+            try sema.emit_diagnostic(.invalid_date_time, node.location.offset);
+            break :blk std.mem.zeroes(DTValue);
+        };
+
+        const format: Format = if (format_str.len == 0)
+            .default
+        else if (std.meta.stringToEnum(Format, format_str)) |format|
+            format
+        else blk: {
+            // TODO: Report error about invalid format
+            try sema.emit_diagnostic(.invalid_date_time_fmt, (get_attribute_location(node, "fmt", .value) orelse node.location).offset);
+            break :blk .default;
+        };
+
+        return @unionInit(Span.Content, @tagName(body), .{
+            .format = format,
+            .value = value,
+        });
+    }
+
     fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void {
         switch (body) {
             .empty => |location| {
@@ -1739,6 +1800,8 @@ pub const Diagnostic = struct {
         invalid_inline_combination: InlineCombinationError,
         link_not_nestable,
         invalid_link,
+        invalid_date_time,
+        invalid_date_time_fmt,
 
         // warnings:
         unknown_attribute: NodeAttributeError,
@@ -1768,6 +1831,8 @@ pub const Diagnostic = struct {
                 .invalid_inline_combination,
                 .link_not_nestable,
                 .invalid_link,
+                .invalid_date_time,
+                .invalid_date_time_fmt,
                 => .@"error",
 
                 .unknown_attribute,
@@ -1819,6 +1884,10 @@ pub const Diagnostic = struct {
                 .invalid_link => try w.writeAll("\\link requires either ref=\"…\" or uri=\"…\" attribute."),
 
                 .attribute_leading_trailing_whitespace => try w.writeAll("Attribute value has invalid leading or trailing whitespace."),
+
+                .invalid_date_time => try w.writeAll("Invalid date/time value."),
+
+                .invalid_date_time_fmt => try w.writeAll("Invalid 'fmt' for date/time value."),
             }
         }
     };

From fa5d31ab26bee94f8ff864221abe94f7fbb18a07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 27 Dec 2025 15:06:57 +0100
Subject: [PATCH 026/116] Simplifies sema.emit_diagnostic by taking a
 Parser.Location

---
 src/hyperdoc.zig | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 2977cb9..09ca70f 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -452,7 +452,7 @@ pub const SemanticAnalyzer = struct {
         switch (node.type) {
             .hdoc => {
                 if (sema.header != null) {
-                    try sema.emit_diagnostic(.duplicate_hdoc_header, node.location.offset);
+                    try sema.emit_diagnostic(.duplicate_hdoc_header, node.location);
                 }
                 sema.header = sema.translate_header_node(node) catch |err| switch (err) {
                     error.OutOfMemory => |e| return e,
@@ -467,7 +467,7 @@ pub const SemanticAnalyzer = struct {
                         // This can only happen exactly once, as we either:
                         // - have already set a header block when the first non-header nodes arrives.
                         // - we have processed another block already, so the previous block would've emitted the warning already.
-                        try sema.emit_diagnostic(.missing_hdoc_header, node.location.offset);
+                        try sema.emit_diagnostic(.missing_hdoc_header, node.location);
                     }
                 }
 
@@ -544,7 +544,7 @@ pub const SemanticAnalyzer = struct {
             },
 
             .unknown_block, .unknown_inline => {
-                try sema.emit_diagnostic(.{ .unknown_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location.offset);
+                try sema.emit_diagnostic(.{ .unknown_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location);
                 return error.InvalidNodeType;
             },
 
@@ -564,7 +564,7 @@ pub const SemanticAnalyzer = struct {
             .td,
             .li,
             => {
-                try sema.emit_diagnostic(.{ .invalid_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location.offset);
+                try sema.emit_diagnostic(.{ .invalid_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location);
                 return error.InvalidNodeType;
             },
         }
@@ -626,7 +626,7 @@ pub const SemanticAnalyzer = struct {
 
         if (overlay.em) |v| {
             if (old.em) {
-                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .em } }, location.offset);
+                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .em } }, location);
             }
             new.em = v;
         }
@@ -634,7 +634,7 @@ pub const SemanticAnalyzer = struct {
         if (overlay.mono) |mono| {
             if (old.mono) {
                 if (std.mem.eql(u8, old.syntax, new.syntax)) {
-                    try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .mono } }, location.offset);
+                    try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .mono } }, location);
                 }
             }
             new.mono = mono;
@@ -646,7 +646,7 @@ pub const SemanticAnalyzer = struct {
 
         if (overlay.strike) |strike| {
             if (old.strike) {
-                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .strike } }, location.offset);
+                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .strike } }, location);
             }
             new.strike = strike;
         }
@@ -654,7 +654,7 @@ pub const SemanticAnalyzer = struct {
         if (overlay.position) |new_pos| {
             std.debug.assert(new_pos != .baseline); // we can never return to baseline script.
             if (old.position == new_pos) {
-                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .sub } }, location.offset);
+                try sema.emit_diagnostic(.{ .redundant_inline = .{ .attribute = .sub } }, location);
             } else if (old.position != .baseline) {
                 try sema.emit_diagnostic(.{ .invalid_inline_combination = .{
                     .first = switch (old.position) {
@@ -667,14 +667,14 @@ pub const SemanticAnalyzer = struct {
                         .subscript => .sub,
                         .baseline => unreachable,
                     },
-                } }, location.offset);
+                } }, location);
             }
             new.position = new_pos;
         }
 
         if (overlay.link) |link| {
             if (old.link != .none) {
-                try sema.emit_diagnostic(.link_not_nestable, location.offset);
+                try sema.emit_diagnostic(.link_not_nestable, location);
             }
             new.link = link;
         }
@@ -740,7 +740,7 @@ pub const SemanticAnalyzer = struct {
                 });
 
                 if (props.uri != null and props.ref != null) {
-                    try sema.emit_diagnostic(.invalid_link, node.location.offset);
+                    try sema.emit_diagnostic(.invalid_link, node.location);
                 }
 
                 const link: Link = if (props.uri) |uri| blk: {
@@ -748,7 +748,7 @@ pub const SemanticAnalyzer = struct {
                 } else if (props.ref) |ref| blk: {
                     break :blk .{ .ref = ref };
                 } else blk: {
-                    try sema.emit_diagnostic(.invalid_link, node.location.offset);
+                    try sema.emit_diagnostic(.invalid_link, node.location);
                     break :blk .none;
                 };
 
@@ -845,7 +845,7 @@ pub const SemanticAnalyzer = struct {
             value
         else |_| blk: {
             // TODO: Report error for invalid value
-            try sema.emit_diagnostic(.invalid_date_time, node.location.offset);
+            try sema.emit_diagnostic(.invalid_date_time, node.location);
             break :blk std.mem.zeroes(DTValue);
         };
 
@@ -855,7 +855,7 @@ pub const SemanticAnalyzer = struct {
             format
         else blk: {
             // TODO: Report error about invalid format
-            try sema.emit_diagnostic(.invalid_date_time_fmt, (get_attribute_location(node, "fmt", .value) orelse node.location).offset);
+            try sema.emit_diagnostic(.invalid_date_time_fmt, get_attribute_location(node, "fmt", .value) orelse node.location);
             break :blk .default;
         };
 
@@ -868,7 +868,7 @@ pub const SemanticAnalyzer = struct {
     fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void {
         switch (body) {
             .empty => |location| {
-                try sema.emit_diagnostic(.empty_inline_body, location.offset);
+                try sema.emit_diagnostic(.empty_inline_body, location);
             },
 
             .string => |string_body| {
@@ -996,11 +996,11 @@ pub const SemanticAnalyzer = struct {
             const key = attrib.name.text;
 
             const fld = std.meta.stringToEnum(Fields, key) orelse {
-                try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, attrib.name.location.offset);
+                try sema.emit_diagnostic(.{ .unknown_attribute = .{ .type = node.type, .name = key } }, attrib.name.location);
                 continue;
             };
             if (found.contains(fld)) {
-                try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, attrib.name.location.offset);
+                try sema.emit_diagnostic(.{ .duplicate_attribute = .{ .name = key } }, attrib.name.location);
             }
             found.insert(fld);
 
@@ -1011,7 +1011,7 @@ pub const SemanticAnalyzer = struct {
                     else => {
                         any_invalid = true;
 
-                        try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, attrib.value.location.offset);
+                        try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = key } }, attrib.value.location);
 
                         continue;
                     },
@@ -1025,7 +1025,7 @@ pub const SemanticAnalyzer = struct {
             var iter = required.iterator();
             while (iter.next()) |req_field| {
                 if (!found.contains(req_field)) {
-                    try sema.emit_diagnostic(.{ .missing_attribute = .{ .type = node.type, .name = @tagName(req_field) } }, node.location.offset);
+                    try sema.emit_diagnostic(.{ .missing_attribute = .{ .type = node.type, .name = @tagName(req_field) } }, node.location);
                     any_missing = true;
                 }
             }
@@ -1049,7 +1049,7 @@ pub const SemanticAnalyzer = struct {
             Reference => {
                 const stripped = std.mem.trim(u8, value, whitespace_chars);
                 if (stripped.len != value.len) {
-                    try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location.offset);
+                    try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location);
                 }
                 return .init(stripped);
             },
@@ -1057,7 +1057,7 @@ pub const SemanticAnalyzer = struct {
             Uri => {
                 const stripped = std.mem.trim(u8, value, whitespace_chars);
                 if (stripped.len != value.len) {
-                    try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location.offset);
+                    try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location);
                 }
                 return .init(stripped);
             },
@@ -1071,9 +1071,9 @@ pub const SemanticAnalyzer = struct {
         };
     }
 
-    fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, offset: usize) !void {
+    fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, location: Parser.Location) !void {
         if (sema.diagnostics) |diag| {
-            try diag.add(code, sema.make_location(offset));
+            try diag.add(code, sema.make_location(location.offset));
         }
     }
 

From 67f9a10e1c0a8781702200d76a4717ea5e283113 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 27 Dec 2025 15:07:52 +0100
Subject: [PATCH 027/116] Moves code around

---
 src/hyperdoc.zig | 72 ++++++++++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 09ca70f..e2c197d 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -592,6 +592,42 @@ pub const SemanticAnalyzer = struct {
         return .{ heading, attrs.id };
     }
 
+    fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } {
+        _ = sema;
+        _ = node;
+        return error.Unimplemented; // TODO: Implement this node type
+    }
+
+    fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } {
+        _ = sema;
+        _ = node;
+        return error.Unimplemented; // TODO: Implement this node type
+    }
+
+    fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } {
+        _ = sema;
+        _ = node;
+        return error.Unimplemented; // TODO: Implement this node type
+    }
+
+    fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } {
+        _ = sema;
+        _ = node;
+        return error.Unimplemented; // TODO: Implement this node type
+    }
+
+    fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } {
+        _ = sema;
+        _ = node;
+        return error.Unimplemented; // TODO: Implement this node type
+    }
+
+    fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } {
+        _ = sema;
+        _ = node;
+        return error.Unimplemented; // TODO: Implement this node type
+    }
+
     fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }![]Span {
         var spans: std.ArrayList(Span) = .empty;
         errdefer spans.deinit(sema.arena);
@@ -928,42 +964,6 @@ pub const SemanticAnalyzer = struct {
         }
     }
 
-    fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
-    }
-
-    fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
-    }
-
-    fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
-    }
-
-    fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
-    }
-
-    fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
-    }
-
-    fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
-    }
-
     fn get_attribute_location(node: Parser.Node, attrib_name: []const u8, comptime key: enum { name, value }) ?Parser.Location {
         var i = node.attributes.items.len;
         while (i > 0) {

From 98ee9090aa11864f085d4d896ac6499a40920205 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 19:52:06 +0100
Subject: [PATCH 028/116] Implements SemanticAnalyzer.unescape_string

---
 AGENTS.md         |   5 +
 src/hyperdoc.zig  | 233 ++++++++++++++++++++++++++++++++--
 src/testsuite.zig | 312 +++++++++++++++++++++++++++++++++++++---------
 3 files changed, 484 insertions(+), 66 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 0bb6695..2579445 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -10,3 +10,8 @@
 - Run `zig build` to validate the main application still compiles
 - Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/` and `test/`.
 - Avoid editing documentation unless the request explicitly asks for it.
+
+## Zig Programming Style
+
+- Do not use "inline functions" like `const func = struct { fn func(…) {} }.func;`
+- Zig has no methods. Functions used by "method like" functions can still be placed next to them, no need to put them into global scope nor into local scope.
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index e2c197d..b226fc1 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -370,16 +370,18 @@ pub const Reference = struct {
 pub fn parse(
     allocator: std.mem.Allocator,
     /// The source code to be parsed
-    plain_text: []const u8,
+    raw_plain_text: []const u8,
     /// An optional diagnostics element that receives diagnostic messages like errors and warnings.
     /// If present, will be filled out by the parser.
     diagnostics: ?*Diagnostics,
-) error{ OutOfMemory, SyntaxError, MalformedDocument }!Document {
+) error{ OutOfMemory, SyntaxError, MalformedDocument, InvalidUtf8 }!Document {
+    const source_text = try remove_byte_order_mark(diagnostics, raw_plain_text);
+
     var arena = std.heap.ArenaAllocator.init(allocator);
     errdefer arena.deinit();
 
     var parser: Parser = .{
-        .code = plain_text,
+        .code = source_text,
         .arena = arena.allocator(),
         .diagnostics = diagnostics,
     };
@@ -387,7 +389,7 @@ pub fn parse(
     var sema: SemanticAnalyzer = .{
         .arena = arena.allocator(),
         .diagnostics = diagnostics,
-        .code = plain_text,
+        .code = source_text,
     };
 
     while (true) {
@@ -429,6 +431,27 @@ pub fn parse(
     };
 }
 
+pub fn remove_byte_order_mark(diagnostics: ?*Diagnostics, plain_text: []const u8) error{ OutOfMemory, InvalidUtf8 }![]const u8 {
+    // First check if all of our code is valid UTF-8
+    // and if it potentially starts with a BOM.
+    var view = std.unicode.Utf8View.init(plain_text) catch {
+        return error.InvalidUtf8;
+    };
+
+    var iter = view.iterator();
+
+    if (iter.nextCodepointSlice()) |slice| {
+        const codepoint = std.unicode.utf8Decode(slice) catch unreachable;
+        if (codepoint == 0xFEFF) {
+            if (diagnostics) |diag| {
+                try diag.add(.document_starts_with_bom, .{ .column = 1, .line = 1 });
+            }
+            return plain_text[slice.len..];
+        }
+    }
+    return plain_text;
+}
+
 pub const SemanticAnalyzer = struct {
     const whitespace_chars = " \t";
 
@@ -1099,11 +1122,184 @@ pub const SemanticAnalyzer = struct {
         std.debug.assert(token.text.len >= 2);
         std.debug.assert(token.text[0] == '"' and token.text[token.text.len - 1] == '"');
 
-        _ = sema;
-        // TODO: Implement unescaping logic here.
+        const base_offset = token.location.offset + 1; // skip leading quote
+        const content = token.text[1 .. token.text.len - 1];
+
+        const Source = struct {
+            char: u8,
+            location: Parser.Location,
+        };
+
+        var output_buffer: std.MultiArrayList(Source) = .empty;
+        defer output_buffer.deinit(sema.arena);
+
+        try output_buffer.ensureTotalCapacity(sema.arena, content.len);
+
+        {
+            var out_chars_buffer: [4]u8 = undefined;
+
+            var i: usize = 0;
+            while (i < content.len) {
+                const start = i;
+
+                // We process bytes, even thought the input is UTF-8.
+                // This is fine as we only process ASCII-range escape sequences
+                const in_char = content[i];
+
+                // We process our in_char into 1..4 bytes, depending
+                // on the escape sequence. Worst input is \u{10FFFF}, which is
+                // encoded as {F4 8F BF BF}, so 4 bytes.
+                const out_chars: []const u8 = blk: {
+                    i += 1;
+                    if (in_char != '\\') {
+                        // Just return the actual character
+                        break :blk content[start..i];
+                    }
+
+                    // This would mean an uinterminated escape sequence, and
+                    // must be processed by the parser already:
+                    std.debug.assert(i < content.len);
+
+                    const esc_char = content[i];
+
+                    switch (esc_char) {
+                        '"' => {
+                            i += 1;
+                            break :blk "\"";
+                        },
+                        '\\' => {
+                            i += 1;
+                            break :blk "\\";
+                        },
+                        'n' => {
+                            i += 1;
+                            break :blk "\n";
+                        },
+                        'r' => {
+                            i += 1;
+                            break :blk "\r";
+                        },
+
+                        'u' => {
+                            while (content[i] != '}') {
+                                i += 1;
+                                if (i >= content.len) {
+                                    try sema.emit_diagnostic(.invalid_unicode_string_escape, .{ .offset = start, .length = i - start });
+                                    break :blk content[start..i];
+                                }
+                            }
+                            i += 1;
+                            const escape_part = content[start..i];
+                            std.debug.assert(escape_part.len > 2);
+                            std.debug.assert(escape_part[0] == '\\');
+                            std.debug.assert(escape_part[1] == 'u');
+                            std.debug.assert(escape_part[escape_part.len - 1] == '}');
+
+                            const location: Parser.Location = .{ .offset = start, .length = escape_part.len };
+
+                            if (escape_part[2] != '{') {
+                                try sema.emit_diagnostic(.invalid_unicode_string_escape, location);
+                            }
+
+                            const codepoint = std.fmt.parseInt(u21, escape_part[3 .. escape_part.len - 1], 16) catch {
+                                try sema.emit_diagnostic(.invalid_unicode_string_escape, location);
+                                break :blk "???";
+                            };
+
+                            const out_len = std.unicode.utf8Encode(codepoint, &out_chars_buffer) catch |err| switch (err) {
+                                error.Utf8CannotEncodeSurrogateHalf => {
+                                    try sema.emit_diagnostic(.{ .illegal_character = .{ .codepoint = codepoint } }, location);
+                                    break :blk "???";
+                                },
+                                error.CodepointTooLarge => {
+                                    try sema.emit_diagnostic(.invalid_unicode_string_escape, location);
+                                    break :blk "???";
+                                },
+                            };
+                            break :blk out_chars_buffer[0..out_len];
+                        },
+
+                        else => {
+                            // Unknown escape sequence, emit escaped char verbatim:
+                            // TODO: How to handle something like "\😭", which is
+                            //       definitly valid and in-scope.
+
+                            const len = std.unicode.utf8ByteSequenceLength(esc_char) catch unreachable;
+
+                            const esc_codepoint = std.unicode.utf8Decode(content[i .. i + len]) catch unreachable;
+
+                            i += len;
 
-        // For now, we just return the raw text.
-        return token.text[1 .. token.text.len - 1];
+                            try sema.emit_diagnostic(.{
+                                .invalid_string_escape = .{ .codepoint = esc_codepoint },
+                            }, .{ .offset = start, .length = i - start + 1 });
+
+                            break :blk content[start..i];
+                        },
+                    }
+                    @compileError("The switch above must be exhaustive and break to :blk for each code path.");
+                };
+
+                const loc: Parser.Location = .{
+                    .offset = base_offset + start,
+                    .length = i - start + 1,
+                };
+                for (out_chars) |out_char| {
+                    output_buffer.appendAssumeCapacity(.{
+                        .char = out_char,
+                        .location = loc,
+                    });
+                }
+            }
+        }
+
+        var output = output_buffer.toOwnedSlice();
+        errdefer output.deinit(sema.arena);
+
+        const view = std.unicode.Utf8View.init(output.items(.char)) catch {
+            std.log.err("invalid utf-8 input: \"{f}\"", .{std.zig.fmtString(output.items(.char))});
+            @panic("String unescape produced invalid UTF-8 sequence. This should not be possible.");
+        };
+
+        var iter = view.iterator();
+        while (iter.nextCodepointSlice()) |slice| {
+            const start = iter.i - slice.len;
+            const codepoint = std.unicode.utf8Decode(slice) catch unreachable;
+
+            if (is_illegal_character(codepoint)) {
+                try sema.emit_diagnostic(
+                    .{ .illegal_character = .{ .codepoint = codepoint } },
+                    output.get(start).location,
+                );
+            }
+        }
+
+        return view.bytes;
+    }
+
+    // TODO: Also validate the whole document against this rules.
+    fn is_illegal_character(codepoint: u21) bool {
+        // Surrogate codepoints are illegal, we're only ever using UTF-8 which doesn't need them.
+        if (std.unicode.isSurrogateCodepoint(codepoint))
+            return true;
+
+        // CR and LF are the only allowed control characters:
+        if (codepoint == std.ascii.control_code.cr)
+            return false;
+        if (codepoint == std.ascii.control_code.lf)
+            return false;
+
+        // Disallow characters from the "Control" category:
+        // <https://www.compart.com/en/unicode/category/Cc>
+        if (codepoint <= 0x1F) // C0 control characters
+            return true;
+        if (codepoint == 0x7F) // DEL
+            return true;
+        if (codepoint >= 0x80 and codepoint <= 0x9F) // C1 control characters
+            return true;
+
+        // All other characters are fine
+        return false;
     }
 };
 
@@ -1782,6 +1978,8 @@ pub const Diagnostic = struct {
     pub const InvalidBlockError = struct { name: []const u8 };
     pub const InlineUsageError = struct { attribute: InlineAttribute };
     pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute };
+    pub const InvalidStringEscape = struct { codepoint: u21 };
+    pub const ForbiddenControlCharacter = struct { codepoint: u21 };
 
     pub const Code = union(enum) {
         // errors:
@@ -1802,8 +2000,12 @@ pub const Diagnostic = struct {
         invalid_link,
         invalid_date_time,
         invalid_date_time_fmt,
+        invalid_unicode_string_escape,
+        invalid_string_escape: InvalidStringEscape,
+        illegal_character: ForbiddenControlCharacter,
 
         // warnings:
+        document_starts_with_bom,
         unknown_attribute: NodeAttributeError,
         duplicate_attribute: DuplicateAttribute,
         empty_verbatim_block,
@@ -1833,6 +2035,9 @@ pub const Diagnostic = struct {
                 .invalid_link,
                 .invalid_date_time,
                 .invalid_date_time_fmt,
+                .invalid_string_escape,
+                .illegal_character,
+                .invalid_unicode_string_escape,
                 => .@"error",
 
                 .unknown_attribute,
@@ -1844,12 +2049,15 @@ pub const Diagnostic = struct {
                 .empty_inline_body,
                 .redundant_inline,
                 .attribute_leading_trailing_whitespace,
+                .document_starts_with_bom,
                 => .warning,
             };
         }
 
         pub fn format(code: Code, w: anytype) !void {
             switch (code) {
+                .document_starts_with_bom => try w.writeAll("Document starts with BOM (U+FEFF). HyperDoc recommends not using the BOM with UTF-8."),
+
                 .unterminated_inline_list => try w.writeAll("Inline list body is unterminated (missing '}' before end of file)."),
                 .unexpected_eof => |ctx| {
                     if (ctx.expected_char) |ch| {
@@ -1888,6 +2096,15 @@ pub const Diagnostic = struct {
                 .invalid_date_time => try w.writeAll("Invalid date/time value."),
 
                 .invalid_date_time_fmt => try w.writeAll("Invalid 'fmt' for date/time value."),
+
+                .invalid_string_escape => |ctx| if (ctx.codepoint > 0x20 and ctx.codepoint <= 0x7F)
+                    try w.print("\\{u} is not a valid escape sequence.", .{ctx.codepoint})
+                else
+                    try w.print("U+{X:0>2} is not a valid escape sequence.", .{ctx.codepoint}),
+
+                .invalid_unicode_string_escape => try w.writeAll("Invalid unicode escape sequence"),
+
+                .illegal_character => |ctx| try w.print("Forbidden control character U+{X:0>4}.", .{ctx.codepoint}),
             }
         }
     };
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 6a79530..034501b 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -86,6 +86,96 @@ test "parser accept string literals and unescape" {
     try std.testing.expectEqualStrings("\"hello\\\\n\"", token.text);
 }
 
+test "semantic analyzer unescapes string literals" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    const source = "\"line\\\\break\\nquote \\\" unicode \\u{1F600}\"";
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var sema: hdoc.SemanticAnalyzer = .{
+        .arena = arena.allocator(),
+        .diagnostics = &diagnostics,
+        .code = source,
+    };
+
+    const token: hdoc.Parser.Token = .{ .text = source, .location = .{ .offset = 0, .length = source.len } };
+
+    const text = try sema.unescape_string(token);
+    try std.testing.expectEqualStrings("line\\break\nquote \" unicode 😀", text);
+    try std.testing.expect(!diagnostics.has_error());
+}
+
+test "semantic analyzer reports invalid string escapes" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    const source = "\"oops\\q\"";
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var sema: hdoc.SemanticAnalyzer = .{
+        .arena = arena.allocator(),
+        .diagnostics = &diagnostics,
+        .code = source,
+    };
+
+    const token: hdoc.Parser.Token = .{ .text = source, .location = .{ .offset = 0, .length = source.len } };
+
+    const text = try sema.unescape_string(token);
+    try std.testing.expectEqualStrings("oops\\q", text);
+    try std.testing.expectEqual(@as(usize, 1), diagnostics.items.items.len);
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .{ .invalid_string_escape = .{ .codepoint = 'q' } }));
+}
+
+test "semantic analyzer flags forbidden control characters" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    const source = "\"tab\\u{9}\"";
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var sema: hdoc.SemanticAnalyzer = .{
+        .arena = arena.allocator(),
+        .diagnostics = &diagnostics,
+        .code = source,
+    };
+
+    const token: hdoc.Parser.Token = .{ .text = source, .location = .{ .offset = 0, .length = source.len } };
+
+    const text = try sema.unescape_string(token);
+    try std.testing.expectEqualStrings("tab\t", text);
+    try std.testing.expectEqual(@as(usize, 1), diagnostics.items.items.len);
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .{ .illegal_character = .{ .codepoint = 0x9 } }));
+}
+
+test "semantic analyzer forbids raw control characters" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    const source = "\"bad\tvalue\"";
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var sema: hdoc.SemanticAnalyzer = .{
+        .arena = arena.allocator(),
+        .diagnostics = &diagnostics,
+        .code = source,
+    };
+
+    const token: hdoc.Parser.Token = .{ .text = source, .location = .{ .offset = 0, .length = source.len } };
+    _ = try sema.unescape_string(token);
+
+    try std.testing.expectEqual(@as(usize, 1), diagnostics.items.items.len);
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .{ .illegal_character = .{ .codepoint = 0x9 } }));
+}
+
 test "parser reports unterminated string literals" {
     var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
     defer arena.deinit();
@@ -245,81 +335,187 @@ test "parser handles unknown node types" {
     }
 }
 
-fn diagnosticsContain(diag: *const hdoc.Diagnostics, expected: hdoc.Diagnostic.Code) bool {
+fn diagnosticCodesEqual(a: hdoc.Diagnostic.Code, b: hdoc.Diagnostic.Code) bool {
+    if (std.meta.activeTag(a) != std.meta.activeTag(b)) return false;
+
+    return switch (a) {
+        .document_starts_with_bom,
+        .unterminated_inline_list,
+        .unterminated_string,
+        .unterminated_block_list,
+        .missing_hdoc_header,
+        .duplicate_hdoc_header,
+        .link_not_nestable,
+        .invalid_link,
+        .invalid_date_time,
+        .invalid_date_time_fmt,
+        .empty_verbatim_block,
+        .verbatim_missing_trailing_newline,
+        .verbatim_missing_space,
+        .trailing_whitespace,
+        .empty_inline_body,
+        .attribute_leading_trailing_whitespace,
+        .invalid_unicode_string_escape,
+        => true,
+
+        .unexpected_eof => |ctx| blk: {
+            const other = b.unexpected_eof;
+            break :blk ctx.expected_char == other.expected_char and std.mem.eql(u8, ctx.context, other.context);
+        },
+
+        .unexpected_character => |ctx| blk: {
+            const other = b.unexpected_character;
+            break :blk ctx.expected == other.expected and ctx.found == other.found;
+        },
+
+        .invalid_identifier_start => |ctx| blk: {
+            const other = b.invalid_identifier_start;
+            break :blk ctx.char == other.char;
+        },
+
+        .missing_attribute => |ctx| blk: {
+            const other = b.missing_attribute;
+            break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name);
+        },
+
+        .invalid_attribute => |ctx| blk: {
+            const other = b.invalid_attribute;
+            break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name);
+        },
+
+        .unknown_block_type => |ctx| blk: {
+            const other = b.unknown_block_type;
+            break :blk std.mem.eql(u8, ctx.name, other.name);
+        },
+
+        .invalid_block_type => |ctx| blk: {
+            const other = b.invalid_block_type;
+            break :blk std.mem.eql(u8, ctx.name, other.name);
+        },
+
+        .invalid_inline_combination => |ctx| blk: {
+            const other = b.invalid_inline_combination;
+            break :blk ctx.first == other.first and ctx.second == other.second;
+        },
+
+        .duplicate_attribute => |ctx| blk: {
+            const other = b.duplicate_attribute;
+            break :blk std.mem.eql(u8, ctx.name, other.name);
+        },
+
+        .unknown_attribute => |ctx| blk: {
+            const other = b.unknown_attribute;
+            break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name);
+        },
+
+        .redundant_inline => |ctx| blk: {
+            const other = b.redundant_inline;
+            break :blk ctx.attribute == other.attribute;
+        },
+
+        .invalid_string_escape => |ctx| blk: {
+            break :blk b.invalid_string_escape.codepoint == ctx.codepoint;
+        },
+
+        .illegal_character => |ctx| blk: {
+            const other = b.illegal_character;
+            break :blk ctx.codepoint == other.codepoint;
+        },
+    };
+}
+
+fn logDiagnostics(diag: *const hdoc.Diagnostics) void {
     for (diag.items.items) |item| {
-        if (std.meta.activeTag(item.code) == std.meta.activeTag(expected)) {
-            return true;
+        var buf: [256]u8 = undefined;
+        var stream = std.io.fixedBufferStream(&buf);
+        item.code.format(stream.writer()) catch {};
+        std.log.err("Diagnostic {d}:{d}: {s}", .{ item.location.line, item.location.column, stream.getWritten() });
+    }
+}
+
+fn validateDiagnostics(code: []const u8, expected: []const hdoc.Diagnostic.Code) !void {
+    try std.testing.expect(expected.len > 0);
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const maybe_doc = hdoc.parse(std.testing.allocator, code, &diagnostics) catch |err| switch (err) {
+        error.OutOfMemory => return err,
+        else => null,
+    };
+    if (maybe_doc) |doc| {
+        var owned = doc;
+        defer owned.deinit();
+    }
+
+    if (diagnostics.items.items.len != expected.len) {
+        logDiagnostics(&diagnostics);
+    }
+    try std.testing.expectEqual(expected.len, diagnostics.items.items.len);
+    for (expected, 0..) |exp, idx| {
+        const actual = diagnostics.items.items[idx].code;
+        if (!diagnosticCodesEqual(actual, exp)) {
+            logDiagnostics(&diagnostics);
+            return error.MissingDiagnosticCode;
         }
     }
-    return false;
 }
 
-test "parsing valid document yields empty diagnostics" {
+fn expectParseOk(code: []const u8) !void {
     var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
     defer diagnostics.deinit();
 
-    var doc = try hdoc.parse(std.testing.allocator, "hdoc(version=\"2.0\");", &diagnostics);
+    var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics);
     defer doc.deinit();
 
-    try std.testing.expect(!diagnostics.has_error());
-    try std.testing.expect(!diagnostics.has_warning());
-    try std.testing.expectEqual(@as(usize, 0), diagnostics.items.items.len);
+    if (diagnostics.has_error() or diagnostics.has_warning()) {
+        logDiagnostics(&diagnostics);
+        return error.TestExpectedEqual;
+    }
 }
 
-test "diagnostic codes are emitted for expected samples" {
-    const Case = struct {
-        code: hdoc.Diagnostic.Code,
-        samples: []const []const u8,
-    };
+fn expectParseNoFail(code: []const u8) !void {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
 
-    const cases = [_]Case{
-        .{ .code = .{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }, .samples = &.{"hdoc(version=\"2.0\"); h1("} },
-        .{ .code = .{ .unexpected_character = .{ .expected = '{', .found = '1' } }, .samples = &.{"hdoc(version=\"2.0\"); h1 123"} },
-        .{ .code = .unterminated_string, .samples = &.{"hdoc(version=\"2.0\"); h1 \"unterminated"} },
-        .{ .code = .{ .invalid_identifier_start = .{ .char = '-' } }, .samples = &.{"hdoc(version=\"2.0\"); -abc"} },
-        .{ .code = .unterminated_block_list, .samples = &.{"hdoc{h1 \"x\""} },
-        .{ .code = .unterminated_inline_list, .samples = &.{"hdoc(version=\"2.0\"); p {hello"} },
-        .{ .code = .{ .duplicate_attribute = .{ .name = "title" } }, .samples = &.{"hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");"} },
-        .{ .code = .empty_verbatim_block, .samples = &.{"hdoc(version=\"2.0\"); pre:\n"} },
-        .{ .code = .verbatim_missing_trailing_newline, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|line"} },
-        .{ .code = .verbatim_missing_space, .samples = &.{"hdoc(version=\"2.0\"); pre:\n|nospace\n"} },
-        .{ .code = .trailing_whitespace, .samples = &.{"hdoc(version=\"2.0\"); pre:\n| trailing \n"} },
-        .{ .code = .missing_hdoc_header, .samples = &.{"h1 \"Title\""} },
-        .{ .code = .duplicate_hdoc_header, .samples = &.{"hdoc(version=\"2.0\"); hdoc(version=\"2.0\");"} },
+    var doc = hdoc.parse(std.testing.allocator, code, &diagnostics) catch |err| switch (err) {
+        error.OutOfMemory => return err,
+        else => {
+            logDiagnostics(&diagnostics);
+            return error.TestExpectedEqual;
+        },
     };
+    defer doc.deinit();
 
-    inline for (cases) |case| {
-        for (case.samples) |sample| {
-            var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
-            defer diagnostics.deinit();
-
-            const maybe_doc = hdoc.parse(std.testing.allocator, sample, &diagnostics) catch |err| switch (err) {
-                error.OutOfMemory => return err,
-                else => null,
-            };
-
-            if (maybe_doc) |doc| {
-                var owned_doc = doc;
-                defer owned_doc.deinit();
-            }
+    if (diagnostics.has_error()) {
+        logDiagnostics(&diagnostics);
+        return error.TestExpectedEqual;
+    }
+}
 
-            if (!diagnosticsContain(&diagnostics, case.code)) {
-                std.log.err("Diagnostics did not contain expected code: '{t}'", .{case.code});
-                for (diagnostics.items.items) |item| {
-                    std.log.err("  Emitted diagnostic: {f}", .{item.code});
-                }
-                return error.MissingDiagnosticCode;
-            }
+test "parsing valid document yields empty diagnostics" {
+    try expectParseOk("hdoc(version=\"2.0\");");
+}
 
-            const expected_severity = case.code.severity();
-            if (expected_severity == .@"error") {
-                try std.testing.expect(diagnostics.has_error());
-            } else {
-                try std.testing.expect(!diagnostics.has_error());
-                try std.testing.expect(diagnostics.has_warning());
-            }
-        }
-    }
+test "diagnostic codes are emitted for expected samples" {
+    try validateDiagnostics("hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }});
+    try validateDiagnostics("hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }});
+    try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string});
+    try validateDiagnostics("hdoc(version=\"2.0\"); -abc", &.{.{ .invalid_identifier_start = .{ .char = '-' } }});
+    try validateDiagnostics("hdoc{h1 \"x\"", &.{.unterminated_block_list});
+    try validateDiagnostics("hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list});
+    try validateDiagnostics(
+        "hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");",
+        &.{ .{ .duplicate_attribute = .{ .name = "lang" } }, .empty_inline_body },
+    );
+    try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n", &.{.empty_verbatim_block});
+    try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n| line", &.{.verbatim_missing_trailing_newline});
+    try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n|nospace\n", &.{.verbatim_missing_space});
+    try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n| trailing \n", &.{.trailing_whitespace});
+    try validateDiagnostics("h1 \"Title\"", &.{.missing_hdoc_header});
+    try validateDiagnostics("hdoc(version=\"2.0\"); hdoc(version=\"2.0\");", &.{.duplicate_hdoc_header});
+    try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }});
+    try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
 }
 
 test "parser reports unterminated inline lists" {

From 01e548c576829457af9431bbe4118b28b1f4f1bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 20:00:31 +0100
Subject: [PATCH 029/116] Simplifies diagnosticCodesEqual with metaprogramming.

---
 src/testsuite.zig | 117 +++++++++++++---------------------------------
 1 file changed, 32 insertions(+), 85 deletions(-)

diff --git a/src/testsuite.zig b/src/testsuite.zig
index 034501b..cc5ad4a 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -335,93 +335,40 @@ test "parser handles unknown node types" {
     }
 }
 
-fn diagnosticCodesEqual(a: hdoc.Diagnostic.Code, b: hdoc.Diagnostic.Code) bool {
-    if (std.meta.activeTag(a) != std.meta.activeTag(b)) return false;
-
-    return switch (a) {
-        .document_starts_with_bom,
-        .unterminated_inline_list,
-        .unterminated_string,
-        .unterminated_block_list,
-        .missing_hdoc_header,
-        .duplicate_hdoc_header,
-        .link_not_nestable,
-        .invalid_link,
-        .invalid_date_time,
-        .invalid_date_time_fmt,
-        .empty_verbatim_block,
-        .verbatim_missing_trailing_newline,
-        .verbatim_missing_space,
-        .trailing_whitespace,
-        .empty_inline_body,
-        .attribute_leading_trailing_whitespace,
-        .invalid_unicode_string_escape,
-        => true,
-
-        .unexpected_eof => |ctx| blk: {
-            const other = b.unexpected_eof;
-            break :blk ctx.expected_char == other.expected_char and std.mem.eql(u8, ctx.context, other.context);
-        },
-
-        .unexpected_character => |ctx| blk: {
-            const other = b.unexpected_character;
-            break :blk ctx.expected == other.expected and ctx.found == other.found;
-        },
-
-        .invalid_identifier_start => |ctx| blk: {
-            const other = b.invalid_identifier_start;
-            break :blk ctx.char == other.char;
-        },
-
-        .missing_attribute => |ctx| blk: {
-            const other = b.missing_attribute;
-            break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name);
-        },
-
-        .invalid_attribute => |ctx| blk: {
-            const other = b.invalid_attribute;
-            break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name);
-        },
-
-        .unknown_block_type => |ctx| blk: {
-            const other = b.unknown_block_type;
-            break :blk std.mem.eql(u8, ctx.name, other.name);
-        },
-
-        .invalid_block_type => |ctx| blk: {
-            const other = b.invalid_block_type;
-            break :blk std.mem.eql(u8, ctx.name, other.name);
-        },
-
-        .invalid_inline_combination => |ctx| blk: {
-            const other = b.invalid_inline_combination;
-            break :blk ctx.first == other.first and ctx.second == other.second;
-        },
-
-        .duplicate_attribute => |ctx| blk: {
-            const other = b.duplicate_attribute;
-            break :blk std.mem.eql(u8, ctx.name, other.name);
-        },
-
-        .unknown_attribute => |ctx| blk: {
-            const other = b.unknown_attribute;
-            break :blk ctx.type == other.type and std.mem.eql(u8, ctx.name, other.name);
-        },
-
-        .redundant_inline => |ctx| blk: {
-            const other = b.redundant_inline;
-            break :blk ctx.attribute == other.attribute;
-        },
-
-        .invalid_string_escape => |ctx| blk: {
-            break :blk b.invalid_string_escape.codepoint == ctx.codepoint;
-        },
+fn diagnosticCodesEqual(lhs: hdoc.Diagnostic.Code, rhs: hdoc.Diagnostic.Code) bool {
+    if (std.meta.activeTag(lhs) != std.meta.activeTag(rhs))
+        return false;
+
+    switch (lhs) {
+        inline else => |_, tag_value| {
+            const tag = @tagName(tag_value);
+            const a_struct = @field(lhs, tag);
+            const b_struct = @field(rhs, tag);
+
+            const TagField = @FieldType(hdoc.Diagnostic.Code, tag);
+            const info = @typeInfo(TagField);
+
+            switch (info) {
+                .void => return true,
+
+                .@"struct" => |struct_info| {
+                    inline for (struct_info.fields) |fld| {
+                        const a = @field(a_struct, fld.name);
+                        const b = @field(b_struct, fld.name);
+                        const eql = switch (fld.type) {
+                            []const u8 => std.mem.eql(u8, a, b),
+                            else => (a == b),
+                        };
+                        if (!eql)
+                            return false;
+                    }
+                    return true;
+                },
 
-        .illegal_character => |ctx| blk: {
-            const other = b.illegal_character;
-            break :blk ctx.codepoint == other.codepoint;
+                else => @compileError("Unsupported type: " ++ @typeName(TagField)),
+            }
         },
-    };
+    }
 }
 
 fn logDiagnostics(diag: *const hdoc.Diagnostics) void {

From 02379dbfc40a0b79ce53fb09c6e3249df6eb8e5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 20:33:09 +0100
Subject: [PATCH 030/116] Fixes an edge case in unescape_string. Adds fuzzing
 for string unescaper.

---
 build.zig                            |   2 +-
 src/hyperdoc.zig                     | 139 ++++++++++++++++++++++++++-
 src/testsuite.zig                    |  24 ++---
 test/{parser => accept}/stress.hdoc  |   0
 test/{parser => accept}/workset.hdoc |   0
 5 files changed, 147 insertions(+), 18 deletions(-)
 rename test/{parser => accept}/stress.hdoc (100%)
 rename test/{parser => accept}/workset.hdoc (100%)

diff --git a/build.zig b/build.zig
index 5018027..1d265f5 100644
--- a/build.zig
+++ b/build.zig
@@ -45,7 +45,7 @@ pub fn build(b: *std.Build) void {
                 rawFileMod(b, "examples/featurematrix.hdoc"),
                 rawFileMod(b, "examples/demo.hdoc"),
                 rawFileMod(b, "examples/guide.hdoc"),
-                rawFileMod(b, "test/parser/stress.hdoc"),
+                rawFileMod(b, "test/accept/stress.hdoc"),
             },
         }),
         .use_llvm = true,
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index b226fc1..378d971 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -1190,7 +1190,7 @@ pub const SemanticAnalyzer = struct {
                             }
                             i += 1;
                             const escape_part = content[start..i];
-                            std.debug.assert(escape_part.len > 2);
+                            std.debug.assert(escape_part.len >= 3);
                             std.debug.assert(escape_part[0] == '\\');
                             std.debug.assert(escape_part[1] == 'u');
                             std.debug.assert(escape_part[escape_part.len - 1] == '}');
@@ -1199,6 +1199,14 @@ pub const SemanticAnalyzer = struct {
 
                             if (escape_part[2] != '{') {
                                 try sema.emit_diagnostic(.invalid_unicode_string_escape, location);
+                                break :blk "???";
+                            }
+
+                            if (escape_part.len == 4) {
+                                // Empty escape: \u{}
+                                std.debug.assert(std.mem.eql(u8, escape_part, "\\u{}"));
+                                try sema.emit_diagnostic(.invalid_unicode_string_escape, location);
+                                break :blk "???";
                             }
 
                             const codepoint = std.fmt.parseInt(u21, escape_part[3 .. escape_part.len - 1], 16) catch {
@@ -2185,7 +2193,134 @@ test "fuzz parser" {
             @embedFile("examples/featurematrix.hdoc"),
             @embedFile("examples/demo.hdoc"),
             @embedFile("examples/guide.hdoc"),
-            @embedFile("test/parser/stress.hdoc"),
+            @embedFile("test/accept/stress.hdoc"),
+        },
+    });
+}
+
+test "fuzz string unescape" {
+    const Impl = struct {
+        fn testOne(impl: @This(), string_literal: []const u8) !void {
+            // Don't test if the string doesn't follow our rules:
+            if (string_literal.len < 2)
+                return;
+            if (string_literal[0] != '"' or string_literal[string_literal.len - 1] != '"')
+                return;
+            if (string_literal.len >= 3 and string_literal[string_literal.len - 2] == '\\')
+                return;
+
+            // Check for valid UTF-8
+            _ = std.unicode.utf8CountCodepoints(string_literal) catch return;
+
+            _ = impl;
+
+            var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+            defer arena.deinit();
+
+            var diagnostics: Diagnostics = .init(std.testing.allocator);
+            defer diagnostics.deinit();
+
+            var sema: SemanticAnalyzer = .{
+                .arena = arena.allocator(),
+                .code = string_literal,
+                .diagnostics = &diagnostics,
+            };
+
+            const output = try sema.unescape_string(.{
+                .location = .{ .offset = 0, .length = string_literal.len },
+                .text = string_literal,
+            });
+
+            _ = output;
+        }
+    };
+
+    try std.testing.fuzz(Impl{}, Impl.testOne, .{
+        .corpus = &.{
+            \\""
+            ,
+            \\"hello"
+            ,
+            \\"simple ASCII 123"
+            ,
+            \\"quote: \"inside\" ok"
+            ,
+            \\"backslash: \\ path"
+            ,
+            \\"mixed: \"a\" and \\b\\"
+            ,
+            \\"line1\nline2"
+            ,
+            \\"windows\r\nnew line"
+            ,
+            \\"unicode snowman: \u{2603} yay"
+            ,
+            \\"emoji: \u{1F642} smile"
+            ,
+            \\"CJK: \u{65E5}\u{672C}\u{8A9E}"
+            ,
+            \\"math: \u{221E} infinity"
+            ,
+            \\"euro: \u{20AC} symbol"
+            ,
+            \\"accented: café"
+            ,
+            \\"escaped braces: \u{7B} \u{7D}"
+            ,
+            \\"leading zeros: \u{000041} is A"
+            ,
+            \\"json-ish: {\"k\":\"v\"}"
+            ,
+            \\"literal sequence: \\\" done"
+            ,
+            \\"multiple lines:\n- one\n- two"
+            ,
+            \\"CR only:\rreturn"
+            ,
+            \\"mix: \u{1F4A1} idea \"quoted\" \\slash"
+            ,
+            //
+            // Adversarial ones:
+            //
+            \\"tab escape: \t is not allowed"
+            ,
+            \\"backspace: \b not allowed"
+            ,
+            \\"null: \0 not allowed"
+            ,
+            \\"hex escape: \x20 not allowed"
+            ,
+            \\"octal-ish: \123 not allowed"
+            ,
+            \\"single quote escape: \' not allowed"
+            ,
+            \\"unicode short form: \u0041 not allowed"
+            ,
+            \\"empty unicode: \u{} not allowed"
+            ,
+            \\"missing closing brace: \u{41 not closed"
+            ,
+            \\"missing opening brace: \u41} not opened"
+            ,
+            \\"non-hex digit: \u{12G} invalid"
+            ,
+            \\"too many digits: \u{1234567} invalid"
+            ,
+            \\"out of range: \u{110000} invalid"
+            ,
+            \\"surrogate: \u{D800} invalid"
+            ,
+            \\"forbidden NUL via unicode: \u{0} invalid"
+            ,
+            \\"forbidden TAB via unicode: \u{9} invalid"
+            ,
+            \\"forbidden C1 control: \u{80} invalid"
+            ,
+            \\"unknown escape: \q invalid"
+            ,
+            \\"backslash-space escape: \ a invalid"
+            ,
+            \\"bad hex tail: \u{1F60Z} invalid"
         },
     });
 }
diff --git a/src/testsuite.zig b/src/testsuite.zig
index cc5ad4a..7a5d640 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -1,15 +1,12 @@
 const std = @import("std");
 const hdoc = @import("./hyperdoc.zig");
 
-fn testAcceptDocument(document: []const u8) !void {
-    var doc = try hdoc.parse(std.testing.allocator, document, null);
-    defer doc.deinit();
+test "validate examples directory" {
+    try parseDirectoryTree("examples");
 }
 
-fn parseFile(path: []const u8) !void {
-    const source = try std.fs.cwd().readFileAlloc(std.testing.allocator, path, 10 * 1024 * 1024);
-    defer std.testing.allocator.free(source);
-    try testAcceptDocument(source);
+test "validate tests directory" {
+    try parseDirectoryTree("test/accept");
 }
 
 fn parseDirectoryTree(path: []const u8) !void {
@@ -25,16 +22,13 @@ fn parseDirectoryTree(path: []const u8) !void {
         if (!std.mem.endsWith(u8, entry.path, ".hdoc"))
             continue;
 
-        const full_path = try std.fs.path.join(std.testing.allocator, &.{ path, entry.path });
-        defer std.testing.allocator.free(full_path);
+        errdefer std.log.err("failed to process \"{f}/{f}\"", .{ std.zig.fmtString(entry.path), std.zig.fmtString(entry.basename) });
 
-        try parseFile(full_path);
-    }
-}
+        const source = try entry.dir.readFileAlloc(std.testing.allocator, entry.basename, 10 * 1024 * 1024);
+        defer std.testing.allocator.free(source);
 
-test "parser accepts examples and test documents" {
-    try parseDirectoryTree("examples");
-    try parseDirectoryTree("test");
+        try expectParseOk(source);
+    }
 }
 
 test "parser accept identifier and word tokens" {
diff --git a/test/parser/stress.hdoc b/test/accept/stress.hdoc
similarity index 100%
rename from test/parser/stress.hdoc
rename to test/accept/stress.hdoc
diff --git a/test/parser/workset.hdoc b/test/accept/workset.hdoc
similarity index 100%
rename from test/parser/workset.hdoc
rename to test/accept/workset.hdoc

From f993c3e95653b4dea01d16f9ce828725d514143b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 20:51:08 +0100
Subject: [PATCH 031/116] Implements translation of paragraph nodes.

---
 src/hyperdoc.zig  | 24 ++++++++++++++---
 src/testsuite.zig | 65 ++++++++++++++++++++++++++++-------------------
 2 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 378d971..97ef573 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -616,9 +616,27 @@ pub const SemanticAnalyzer = struct {
     }
 
     fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+            id: ?Reference = null,
+        });
+
+        const heading: Block.Paragraph = .{
+            .kind = switch (node.type) {
+                .p => .p,
+                .note => .note,
+                .warning => .warning,
+                .danger => .danger,
+                .tip => .tip,
+                .quote => .quote,
+                .spoiler => .spoiler,
+                else => unreachable,
+            },
+            .lang = attrs.lang,
+            .content = try sema.translate_inline(node),
+        };
+
+        return .{ heading, attrs.id };
     }
 
     fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } {
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 7a5d640..d7d985e 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -16,18 +16,26 @@ fn parseDirectoryTree(path: []const u8) !void {
     var walker = try dir.walk(std.testing.allocator);
     defer walker.deinit();
 
+    var path_buffer: std.array_list.Managed(u8) = .init(std.testing.allocator);
+    defer path_buffer.deinit();
+
     while (try walker.next()) |entry| {
         if (entry.kind != .file)
             continue;
         if (!std.mem.endsWith(u8, entry.path, ".hdoc"))
             continue;
 
-        errdefer std.log.err("failed to process \"{f}/{f}\"", .{ std.zig.fmtString(entry.path), std.zig.fmtString(entry.basename) });
+        errdefer std.log.err("failed to process \"{f}/{f}\"", .{ std.zig.fmtString(path), std.zig.fmtString(entry.path) });
 
         const source = try entry.dir.readFileAlloc(std.testing.allocator, entry.basename, 10 * 1024 * 1024);
         defer std.testing.allocator.free(source);
 
-        try expectParseOk(source);
+        path_buffer.clearRetainingCapacity();
+        try path_buffer.appendSlice(path);
+        try path_buffer.append('/');
+        try path_buffer.appendSlice(entry.path);
+
+        try expectParseOk(.{ .file_path = path_buffer.items }, source);
     }
 }
 
@@ -365,16 +373,20 @@ fn diagnosticCodesEqual(lhs: hdoc.Diagnostic.Code, rhs: hdoc.Diagnostic.Code) bo
     }
 }
 
-fn logDiagnostics(diag: *const hdoc.Diagnostics) void {
+const LogDiagOptions = struct {
+    file_path: []const u8 = "",
+};
+
+fn logDiagnostics(diag: *const hdoc.Diagnostics, opts: LogDiagOptions) void {
     for (diag.items.items) |item| {
         var buf: [256]u8 = undefined;
         var stream = std.io.fixedBufferStream(&buf);
         item.code.format(stream.writer()) catch {};
-        std.log.err("Diagnostic {d}:{d}: {s}", .{ item.location.line, item.location.column, stream.getWritten() });
+        std.log.err("Diagnostic {s}:{d}:{d}: {s}", .{ opts.file_path, item.location.line, item.location.column, stream.getWritten() });
     }
 }
 
-fn validateDiagnostics(code: []const u8, expected: []const hdoc.Diagnostic.Code) !void {
+fn validateDiagnostics(opts: LogDiagOptions, code: []const u8, expected: []const hdoc.Diagnostic.Code) !void {
     try std.testing.expect(expected.len > 0);
 
     var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
@@ -390,19 +402,19 @@ fn validateDiagnostics(code: []const u8, expected: []const hdoc.Diagnostic.Code)
     }
 
     if (diagnostics.items.items.len != expected.len) {
-        logDiagnostics(&diagnostics);
+        logDiagnostics(&diagnostics, opts);
     }
     try std.testing.expectEqual(expected.len, diagnostics.items.items.len);
     for (expected, 0..) |exp, idx| {
         const actual = diagnostics.items.items[idx].code;
         if (!diagnosticCodesEqual(actual, exp)) {
-            logDiagnostics(&diagnostics);
+            logDiagnostics(&diagnostics, opts);
             return error.MissingDiagnosticCode;
         }
     }
 }
 
-fn expectParseOk(code: []const u8) !void {
+fn expectParseOk(opts: LogDiagOptions, code: []const u8) !void {
     var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
     defer diagnostics.deinit();
 
@@ -410,12 +422,12 @@ fn expectParseOk(code: []const u8) !void {
     defer doc.deinit();
 
     if (diagnostics.has_error() or diagnostics.has_warning()) {
-        logDiagnostics(&diagnostics);
+        logDiagnostics(&diagnostics, opts);
         return error.TestExpectedEqual;
     }
 }
 
-fn expectParseNoFail(code: []const u8) !void {
+fn expectParseNoFail(opts: LogDiagOptions, code: []const u8) !void {
     var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
     defer diagnostics.deinit();
 
@@ -429,34 +441,35 @@ fn expectParseNoFail(code: []const u8) !void {
     defer doc.deinit();
 
     if (diagnostics.has_error()) {
-        logDiagnostics(&diagnostics);
+        logDiagnostics(&diagnostics, opts);
         return error.TestExpectedEqual;
     }
 }
 
 test "parsing valid document yields empty diagnostics" {
-    try expectParseOk("hdoc(version=\"2.0\");");
+    try expectParseOk(.{}, "hdoc(version=\"2.0\");");
 }
 
 test "diagnostic codes are emitted for expected samples" {
-    try validateDiagnostics("hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }});
-    try validateDiagnostics("hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }});
-    try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string});
-    try validateDiagnostics("hdoc(version=\"2.0\"); -abc", &.{.{ .invalid_identifier_start = .{ .char = '-' } }});
-    try validateDiagnostics("hdoc{h1 \"x\"", &.{.unterminated_block_list});
-    try validateDiagnostics("hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); -abc", &.{.{ .invalid_identifier_start = .{ .char = '-' } }});
+    try validateDiagnostics(.{}, "hdoc{h1 \"x\"", &.{.unterminated_block_list});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list});
     try validateDiagnostics(
+        .{},
         "hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");",
         &.{ .{ .duplicate_attribute = .{ .name = "lang" } }, .empty_inline_body },
     );
-    try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n", &.{.empty_verbatim_block});
-    try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n| line", &.{.verbatim_missing_trailing_newline});
-    try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n|nospace\n", &.{.verbatim_missing_space});
-    try validateDiagnostics("hdoc(version=\"2.0\"); pre:\n| trailing \n", &.{.trailing_whitespace});
-    try validateDiagnostics("h1 \"Title\"", &.{.missing_hdoc_header});
-    try validateDiagnostics("hdoc(version=\"2.0\"); hdoc(version=\"2.0\");", &.{.duplicate_hdoc_header});
-    try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }});
-    try validateDiagnostics("hdoc(version=\"2.0\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n", &.{.empty_verbatim_block});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n| line", &.{.verbatim_missing_trailing_newline});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n|nospace\n", &.{.verbatim_missing_space});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n| trailing \n", &.{.trailing_whitespace});
+    try validateDiagnostics(.{}, "h1 \"Title\"", &.{.missing_hdoc_header});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); hdoc(version=\"2.0\");", &.{.duplicate_hdoc_header});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
 }
 
 test "parser reports unterminated inline lists" {

From 709423cf0dd1f3f8a6efb68b1218857fc4c60fcb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 21:02:46 +0100
Subject: [PATCH 032/116] Implements join_spans to enable date/time parsing,
 fixes bad example.

---
 examples/guide.hdoc |  2 +-
 src/hyperdoc.zig    | 38 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/examples/guide.hdoc b/examples/guide.hdoc
index 50f7b64..3f939f4 100644
--- a/examples/guide.hdoc
+++ b/examples/guide.hdoc
@@ -72,7 +72,7 @@ img(id="fig-diagram", alt="Diagram showing a HyperDoc example", path="./assets/d
 h2(id="dates") { Dates and Times }
 
 p {
-  The event was announced on \date(fmt="long"){2025-12-17} at \time(fmt="short"){13:45:00}.
+  The event was announced on \date(fmt="long"){2025-12-17} at \time(fmt="short"){13:45:00Z}.
   A combined timestamp looks like \datetime(fmt="iso"){2025-12-17T13:45:00Z}.
 }
 
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 97ef573..01655f7 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -863,8 +863,8 @@ pub const SemanticAnalyzer = struct {
 
                 try sema.translate_inline_body(&content_spans, node.body, .{});
 
-                // TODO: Convert the content_spans into a "rendered string".
-                const content_text = "<undefined>";
+                //  Convert the content_spans into a "rendered string".
+                const content_text = try sema.join_spans(content_spans.items, .no_space);
 
                 const content: Span.Content = switch (node.type) {
                     .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt),
@@ -942,6 +942,40 @@ pub const SemanticAnalyzer = struct {
         });
     }
 
+    const JoinStyle = enum { no_space, one_space };
+    fn join_spans(sema: *SemanticAnalyzer, source_spans: []const Span, style: JoinStyle) ![]const u8 {
+        var len: usize = switch (style) {
+            .no_space => 0,
+            .one_space => (source_spans.len -| 1),
+        };
+        for (source_spans) |span| {
+            len += switch (span.content) {
+                .text => |str| str.len,
+                .date, .time, .datetime => @panic("TODO: Implement date-to-text conversion!"),
+            };
+        }
+
+        var output_str: std.ArrayList(u8) = .empty;
+        defer output_str.deinit(sema.arena);
+
+        try output_str.ensureTotalCapacityPrecise(sema.arena, len);
+
+        for (source_spans, 0..) |span, index| {
+            switch (style) {
+                .no_space => {},
+                .one_space => if (index > 0)
+                    output_str.appendAssumeCapacity(' '),
+            }
+
+            switch (span.content) {
+                .text => |str| output_str.appendSliceAssumeCapacity(str),
+                .date, .time, .datetime => @panic("TODO: Implement date-to-text conversion!"),
+            }
+        }
+
+        return try output_str.toOwnedSlice(sema.arena);
+    }
+
     fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void {
         switch (body) {
             .empty => |location| {

From 535c4119d468eb2ea960d4e37d524a50be4e9762 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 21:42:56 +0100
Subject: [PATCH 033/116] Implements new attribute hdoc(tz) which provides a
 timezone hint for all time/datetime values

---
 docs/specification.md | 93 ++++++++++++++++++++--------------------
 src/hyperdoc.zig      | 99 ++++++++++++++++++++++++++++++-------------
 src/main.zig          |  1 +
 src/testsuite.zig     | 33 ++++++++++-----
 4 files changed, 139 insertions(+), 87 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index d3c0959..ce20dd7 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -431,32 +431,32 @@ This separation is intentional: it allows autoformatters to parse and rewrite do
 
 ## Element Overview
 
-| Element                                                     | Element Type | Allowed Children             | Attributes                                   |
-| ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------- |
-| `h1`, `h2`, `h3`                                            | Block        | Text Body                    | `lang`, \[`id`\]                             |
-| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block        | Text Body                    | `lang`, \[`id`\]                             |
-| `ul`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\]                             |
-| `ol`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\], `first`                    |
-| `img`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `alt`, `path`              |
-| `pre`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `syntax`                   |
-| `toc`                                                       | Block        | -                            | `lang`, \[`id`\], `depth`                    |
-| `table`                                                     | Block        | Table Rows                   | `lang`, \[`id`\]                             |
-| *Document*                                                  | Document     | `hdoc`, Blocks               |                                              |
-| `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author`, `date` |
-| `li`                                                        | List Item    | Blocks, String, Verbatim     | `lang`                                       |
-| `td`                                                        | Table Cell   | Blocks, String, Verbatim     | `lang`, `colspan`                            |
-| `columns`                                                   | Table Row    | `td` ≥ 1                     | `lang`                                       |
-| `group`                                                     | Table Row    | Text Body                    | `lang`,                                      |
-| `row`                                                       | Table Row    | `td` ≥ 1                     | `lang`, `title`                              |
-| `\em`                                                       | Text Body    | Text Body                    | `lang`                                       |
-| `\mono`                                                     | Text Body    | Text Body                    | `lang`, `syntax`                             |
-| `\strike`                                                   | Text Body    | Text Body                    | `lang`                                       |
-| `\sub`, `\sup`                                              | Text Body    | Text Body                    | `lang`                                       |
-| `\link`                                                     | Text Body    | Text Body                    | `lang`, (`ref` \| `uri`)                     |
-| `\date`, `\time`, `\datetime`                               | Text Body    | Plain Text, String, Verbatim | `lang`, `fmt`                                |
-| *Plain Text*                                                | Text Body    | -                            |                                              |
-| *String*                                                    | Text Body    | -                            |                                              |
-| *Verbatim*                                                  | Text Body    | -                            |                                              |
+| Element                                                     | Element Type | Allowed Children             | Attributes                                         |
+| ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------------- |
+| *Document*                                                  | Document     | `hdoc`, Blocks               |                                                    |
+| `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author`, `date`, `tz` |
+| `h1`, `h2`, `h3`                                            | Block        | Text Body                    | `lang`, \[`id`\]                                   |
+| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block        | Text Body                    | `lang`, \[`id`\]                                   |
+| `ul`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\]                                   |
+| `ol`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\], `first`                          |
+| `img`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `alt`, `path`                    |
+| `pre`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `syntax`                         |
+| `toc`                                                       | Block        | -                            | `lang`, \[`id`\], `depth`                          |
+| `table`                                                     | Block        | Table Rows                   | `lang`, \[`id`\]                                   |
+| `li`                                                        | List Item    | Blocks, String, Verbatim     | `lang`                                             |
+| `td`                                                        | Table Cell   | Blocks, String, Verbatim     | `lang`, `colspan`                                  |
+| `columns`                                                   | Table Row    | `td` ≥ 1                     | `lang`                                             |
+| `group`                                                     | Table Row    | Text Body                    | `lang`,                                            |
+| `row`                                                       | Table Row    | `td` ≥ 1                     | `lang`, `title`                                    |
+| `\em`                                                       | Text Body    | Text Body                    | `lang`                                             |
+| `\mono`                                                     | Text Body    | Text Body                    | `lang`, `syntax`                                   |
+| `\strike`                                                   | Text Body    | Text Body                    | `lang`                                             |
+| `\sub`, `\sup`                                              | Text Body    | Text Body                    | `lang`                                             |
+| `\link`                                                     | Text Body    | Text Body                    | `lang`, (`ref` \| `uri`)                           |
+| `\date`, `\time`, `\datetime`                               | Text Body    | Plain Text, String, Verbatim | `lang`, `fmt`                                      |
+| *Plain Text*                                                | Text Body    | -                            |                                                    |
+| *String*                                                    | Text Body    | -                            |                                                    |
+| *Verbatim*                                                  | Text Body    | -                            |                                                    |
 
 Notes:
 
@@ -466,23 +466,24 @@ Notes:
 
 ## Attribute Overview
 
-| Attribute | Required | Allowed Values                                                                                                                                                                                                            | Description                                                                     |
-| --------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
-| `version` | Yes      | `2.0`                                                                                                                                                                                                                     | Describes the version of this HyperDoc document.                                |
-| `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                                                                                                                                                      | Defines the language of the elements contents.                                  |
-| `title`   | No       | *Any*                                                                                                                                                                                                                     | Sets the title of the document or the table row.                                |
-| `author`  | No       | *Any*                                                                                                                                                                                                                     | Sets the author of the document.                                                |
-| `date`    | No       | A date-time value using the format specified below (a conservative intersection of [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), compatible with both) | Sets the authoring date of the document.                                        |
-| `id`      | No       | Non-empty                                                                                                                                                                                                                 | Sets a reference which can be linked to with `\link(ref="...")`.                |
-| `first`   | No       | Decimal integer numbers ≥ 0                                                                                                                                                                                               | Sets the number of the first list item.                                         |
-| `alt`     | No       | Non-empty                                                                                                                                                                                                                 | Sets the alternative text shown when an image cannot be loaded.                 |
-| `path`    | Yes      | Non-empty file path to an image file                                                                                                                                                                                      | Defines the file path where the image file can be found.                        |
-| `syntax`  | No       | *See element documentation*                                                                                                                                                                                               | Hints the syntax highlighter how how the elements context shall be highlighted. |
-| `depth`   | No       | `1`, `2` or `3`                                                                                                                                                                                                           | Defines how many levels of headings shall be included.                          |
-| `colspan` | No       | Decimal integer numbers ≥ 1                                                                                                                                                                                               | Sets how many columns the table cell spans.                                     |
-| `ref`     | No       | Any value present in an `id` attribute.                                                                                                                                                                                   | References any `id` inside this document.                                       |
-| `uri`     | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987)                                                                                                                              | Links to a foreign document with a URI.                                         |
-| `fmt`     | No       | *See element documentation*                                                                                                                                                                                               |                                                                                 |
+| Attribute | Required | Allowed Values                                                                               | Description                                                                     |
+| --------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
+| `version` | Yes      | `2.0`                                                                                        | Describes the version of this HyperDoc document.                                |
+| `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                         | Defines the language of the elements contents.                                  |
+| `title`   | No       | *Any*                                                                                        | Sets the title of the document or the table row.                                |
+| `author`  | No       | *Any*                                                                                        | Sets the author of the document.                                                |
+| `date`    | No       | A date-time value using the format specified below                                           | Sets the authoring date of the document.                                        |
+| `id`      | No       | Non-empty                                                                                    | Sets a reference which can be linked to with `\link(ref="...")`.                |
+| `first`   | No       | Decimal integer numbers ≥ 0                                                                  | Sets the number of the first list item.                                         |
+| `alt`     | No       | Non-empty                                                                                    | Sets the alternative text shown when an image cannot be loaded.                 |
+| `path`    | Yes      | Non-empty file path to an image file                                                         | Defines the file path where the image file can be found.                        |
+| `syntax`  | No       | *See element documentation*                                                                  | Hints the syntax highlighter how how the elements context shall be highlighted. |
+| `depth`   | No       | `1`, `2` or `3`                                                                              | Defines how many levels of headings shall be included.                          |
+| `colspan` | No       | Decimal integer numbers ≥ 1                                                                  | Sets how many columns the table cell spans.                                     |
+| `ref`     | No       | Any value present in an `id` attribute.                                                      | References any `id` inside this document.                                       |
+| `uri`     | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI.                                         |
+| `fmt`     | No       | *See element documentation*                                                                  | Defines how the date/time value shall be displayed.                             |
+| `tz`      | No       | `Z` for UTC or a `±HH:MM` timezone offset.                                                   | Defines the default timezone for time/datetime values.                          |
 
 ## Semantic Structure
 
@@ -732,11 +733,11 @@ Time strings MUST follow `hh:mm:ss` with a required time zone.
 - An optional fractional seconds component MAY follow the seconds field as `.` plus
   1, 2, 3, 6, or 9 digits.
 - The fractional separator MUST be `.`. Comma is not allowed.
-- A time zone is required and MUST be either `Z` (UTC) or a numeric offset
-  in the form `+hh:mm` or `-hh:mm` with two-digit hour/minute fields.
+- A time zone is required when no `tz` attribute is present on the header node and
+  MUST be either `Z` (UTC) or a numeric offset in the form `+hh:mm` or `-hh:mm` with two-digit hour/minute fields.
 - Offset hours MUST be in `00` to `23`, offset minutes MUST be in `00` to `59`.
 
-Examples: `22:30:46Z`, `22:30:46.136+01:00`, `21:30:46.136797358-05:30`.
+Examples: `22:30:46Z`, `22:30:46.136+01:00`, `21:30:46.136797358-05:30`, `22:30:46` (only with `tz` attribute).
 
 ### Date/Time Format
 
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 01655f7..2ca670a 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -17,6 +17,7 @@ pub const Document = struct {
     title: ?[]const u8,
     author: ?[]const u8,
     date: ?DateTime,
+    timezone: ?[]const u8,
 
     pub fn deinit(doc: *Document) void {
         doc.arena.deinit();
@@ -189,7 +190,7 @@ pub const DateTime = struct {
     date: Date,
     time: Time,
 
-    pub fn parse(text: []const u8) !DateTime {
+    pub fn parse(text: []const u8, default_timezone: ?[]const u8) !DateTime {
         const split_index = std.mem.indexOfScalar(u8, text, 'T') orelse return error.InvalidValue;
 
         const head = text[0..split_index];
@@ -197,7 +198,7 @@ pub const DateTime = struct {
 
         return .{
             .date = try Date.parse(head),
-            .time = try Time.parse(tail),
+            .time = try Time.parse(tail, default_timezone),
         };
     }
 };
@@ -265,8 +266,9 @@ pub const Time = struct {
     microsecond: u20, // 0-999999
     zone_offset: i32, // in minutes
 
-    pub fn parse(text: []const u8) !Time {
-        if (text.len < 9) return error.InvalidValue;
+    pub fn parse(text: []const u8, default_timezone: ?[]const u8) !Time {
+        if (text.len < 8) // "HH:MM:SS"
+            return error.InvalidValue;
 
         const hour = std.fmt.parseInt(u8, text[0..2], 10) catch return error.InvalidValue;
         if (text[2] != ':') return error.InvalidValue;
@@ -279,23 +281,30 @@ pub const Time = struct {
         var index: usize = 8;
         var microsecond: u20 = 0;
 
-        if (index >= text.len) return error.InvalidValue;
+        if (index < text.len) {
+            if (text[index] == '.') {
+                const start = index + 1;
+                var end = start;
+                while (end < text.len and std.ascii.isDigit(text[end])) : (end += 1) {}
+                if (end == start) return error.InvalidValue;
 
-        if (text[index] == '.') {
-            const start = index + 1;
-            var end = start;
-            while (end < text.len and std.ascii.isDigit(text[end])) : (end += 1) {}
-            if (end == start) return error.InvalidValue;
-
-            const fraction_value = std.fmt.parseInt(u64, text[start..end], 10) catch return error.InvalidValue;
-            microsecond = fractionToMicrosecond(end - start, fraction_value) orelse return error.InvalidValue;
-            index = end;
+                const fraction_value = std.fmt.parseInt(u64, text[start..end], 10) catch return error.InvalidValue;
+                microsecond = fractionToMicrosecond(end - start, fraction_value) orelse return error.InvalidValue;
+                index = end;
+            }
         }
 
-        if (index >= text.len) return error.InvalidValue;
+        const timezone = if (index == text.len)
+            default_timezone orelse return error.MissingTimezone
+        else
+            text[index..];
+
+        if (timezone.len != 1 and timezone.len != 6) // "Z" or "±HH:MM"
+            return error.InvalidValue;
 
-        if (text[index] == 'Z') {
-            if (index + 1 != text.len) return error.InvalidValue;
+        if (timezone.len == 1) {
+            if (timezone[0] != 'Z')
+                return error.InvalidValue;
             return .{
                 .hour = @intCast(hour),
                 .minute = @intCast(minute),
@@ -304,15 +313,19 @@ pub const Time = struct {
                 .zone_offset = 0,
             };
         }
+        std.debug.assert(timezone.len == 6);
 
-        const sign_char = text[index];
-        if (sign_char != '+' and sign_char != '-') return error.InvalidValue;
-        const sign: i32 = if (sign_char == '+') 1 else -1;
+        const sign_char = timezone[0];
+        const sign: i32 = switch (sign_char) {
+            '+' => 1,
+            '-' => -1,
+            else => return error.InvalidValue,
+        };
+        if (timezone[3] != ':')
+            return error.InvalidValue;
 
-        if (text.len - index != 6) return error.InvalidValue;
-        const zone_hour = std.fmt.parseInt(u8, text[index + 1 .. index + 3], 10) catch return error.InvalidValue;
-        if (text[index + 3] != ':') return error.InvalidValue;
-        const zone_minute = std.fmt.parseInt(u8, text[index + 4 .. index + 6], 10) catch return error.InvalidValue;
+        const zone_hour = std.fmt.parseInt(u8, timezone[1..3], 10) catch return error.InvalidValue;
+        const zone_minute = std.fmt.parseInt(u8, timezone[4..6], 10) catch return error.InvalidValue;
 
         if (zone_hour > 23 or zone_minute > 59) return error.InvalidValue;
 
@@ -428,6 +441,7 @@ pub fn parse(
         .version = header.version,
         .author = header.author,
         .date = header.date,
+        .timezone = header.timezone,
     };
 }
 
@@ -460,6 +474,7 @@ pub const SemanticAnalyzer = struct {
         lang: ?[]const u8,
         title: ?[]const u8,
         author: ?[]const u8,
+        timezone: ?[]const u8,
         date: ?DateTime,
     };
 
@@ -520,6 +535,7 @@ pub const SemanticAnalyzer = struct {
             author: ?[]const u8 = null,
             date: ?DateTime = null,
             lang: ?[]const u8 = null,
+            tz: ?[]const u8 = null,
         });
 
         return .{
@@ -528,6 +544,7 @@ pub const SemanticAnalyzer = struct {
             .title = attrs.title,
             .author = attrs.author,
             .date = attrs.date,
+            .timezone = attrs.tz,
         };
     }
 
@@ -918,11 +935,28 @@ pub const SemanticAnalyzer = struct {
     ) !Span.Content {
         const Format: type = DTValue.Format;
 
-        const value: DTValue = if (DTValue.parse(value_str)) |value|
+        const timezone_hint: ?[]const u8 = if (sema.header) |header| header.timezone else null;
+
+        const value_or_err: error{ InvalidValue, MissingTimezone }!DTValue = switch (DTValue) {
+            Date => Date.parse(value_str),
+            Time => Time.parse(value_str, timezone_hint),
+            DateTime => DateTime.parse(value_str, timezone_hint),
+            else => unreachable,
+        };
+
+        const value: DTValue = if (value_or_err) |value|
             value
-        else |_| blk: {
-            // TODO: Report error for invalid value
-            try sema.emit_diagnostic(.invalid_date_time, node.location);
+        else |err| blk: {
+            switch (err) {
+                error.InvalidValue => {
+                    try sema.emit_diagnostic(.invalid_date_time, node.location);
+                },
+                error.MissingTimezone => {
+                    // TODO: Use (timezone_hint != null) to emit diagnostic for hint with
+                    //       adding `tz` attribute when all date/time values share a common base.
+                    try sema.emit_diagnostic(.invalid_date_time, node.location);
+                },
+            }
             break :blk std.mem.zeroes(DTValue);
         };
 
@@ -1118,6 +1152,11 @@ pub const SemanticAnalyzer = struct {
 
         const value = try sema.unescape_string(attrib);
 
+        const timezone_hint = if (sema.header) |header|
+            header.timezone
+        else
+            null;
+
         return switch (T) {
             []const u8 => value,
 
@@ -1138,9 +1177,9 @@ pub const SemanticAnalyzer = struct {
             },
 
             Version => Version.parse(value) catch return error.InvalidValue,
-            DateTime => DateTime.parse(value) catch return error.InvalidValue,
             Date => Date.parse(value) catch return error.InvalidValue,
-            Time => Time.parse(value) catch return error.InvalidValue,
+            Time => Time.parse(value, timezone_hint) catch return error.InvalidValue,
+            DateTime => DateTime.parse(value, timezone_hint) catch return error.InvalidValue,
 
             else => @compileError("Unsupported attribute type: " ++ @typeName(T)),
         };
diff --git a/src/main.zig b/src/main.zig
index 2f9b602..645041c 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -432,6 +432,7 @@ test "dumpDocument escapes string values" {
         .title = title,
         .author = null,
         .date = null,
+        .timezone = null,
     };
     defer doc.deinit();
 
diff --git a/src/testsuite.zig b/src/testsuite.zig
index d7d985e..111649f 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -529,31 +529,42 @@ test "Date.parse accepts ISO dates" {
 }
 
 test "Time.parse accepts ISO times with zones" {
-    const utc = try hdoc.Time.parse("22:30:46Z");
+    const utc = try hdoc.Time.parse("22:30:46Z", null);
     try std.testing.expectEqual(@as(u5, 22), utc.hour);
     try std.testing.expectEqual(@as(u6, 30), utc.minute);
     try std.testing.expectEqual(@as(u6, 46), utc.second);
     try std.testing.expectEqual(@as(u20, 0), utc.microsecond);
     try std.testing.expectEqual(@as(i32, 0), utc.zone_offset);
 
-    const fractional = try hdoc.Time.parse("22:30:46.136+01:00");
+    const utc_hint = try hdoc.Time.parse("22:30:46", "Z");
+    try std.testing.expectEqual(@as(u5, 22), utc_hint.hour);
+    try std.testing.expectEqual(@as(u6, 30), utc_hint.minute);
+    try std.testing.expectEqual(@as(u6, 46), utc_hint.second);
+    try std.testing.expectEqual(@as(u20, 0), utc_hint.microsecond);
+    try std.testing.expectEqual(@as(i32, 0), utc_hint.zone_offset);
+
+    const fractional = try hdoc.Time.parse("22:30:46.136+01:00", null);
     try std.testing.expectEqual(@as(u20, 136_000), fractional.microsecond);
     try std.testing.expectEqual(@as(i32, 60), fractional.zone_offset);
 
-    const nanos = try hdoc.Time.parse("21:30:46.136797358-05:30");
+    const fractional_hint = try hdoc.Time.parse("22:30:46.136", "+01:30");
+    try std.testing.expectEqual(@as(u20, 136_000), fractional_hint.microsecond);
+    try std.testing.expectEqual(@as(i32, 90), fractional_hint.zone_offset);
+
+    const nanos = try hdoc.Time.parse("21:30:46.136797358-05:30", null);
     try std.testing.expectEqual(@as(u20, 136_797), nanos.microsecond);
     try std.testing.expectEqual(@as(i32, -330), nanos.zone_offset);
 
-    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("21:30:46,1Z"));
-    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("22:30:46"));
-    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("24:00:00Z"));
-    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:60:00Z"));
-    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:60Z"));
-    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:59.1234Z"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("21:30:46,1Z", null));
+    try std.testing.expectError(error.MissingTimezone, hdoc.Time.parse("22:30:46", null));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("24:00:00Z", null));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:60:00Z", null));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:60Z", null));
+    try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("23:59:59.1234Z", null));
 }
 
 test "DateTime.parse accepts ISO date-time" {
-    const datetime = try hdoc.DateTime.parse("2025-12-25T22:31:50.13+01:00");
+    const datetime = try hdoc.DateTime.parse("2025-12-25T22:31:50.13+01:00", null);
     try std.testing.expectEqual(@as(i32, 2025), datetime.date.year);
     try std.testing.expectEqual(@as(u4, 12), datetime.date.month);
     try std.testing.expectEqual(@as(u5, 25), datetime.date.day);
@@ -563,5 +574,5 @@ test "DateTime.parse accepts ISO date-time" {
     try std.testing.expectEqual(@as(u20, 130_000), datetime.time.microsecond);
     try std.testing.expectEqual(@as(i32, 60), datetime.time.zone_offset);
 
-    try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z"));
+    try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z", null));
 }

From 93fc34b2cefd83f1e5691bd9326072bbf98b8b22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 22:01:00 +0100
Subject: [PATCH 034/116] Adds tests for Date/Time/DateTime.parse

---
 src/hyperdoc.zig | 179 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 2ca670a..0715b6d 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -221,6 +221,8 @@ pub const Date = struct {
     day: u5, // 1-31
 
     pub fn parse(text: []const u8) !Date {
+        if (text.len < 7) // "Y-MM-DD"
+            return error.InvalidValue;
         const first_dash = std.mem.indexOfScalar(u8, text, '-') orelse return error.InvalidValue;
         const tail = text[first_dash + 1 ..];
         const second_dash_rel = std.mem.indexOfScalar(u8, tail, '-') orelse return error.InvalidValue;
@@ -2415,3 +2417,180 @@ test "fuzz string unescape" {
         },
     });
 }
+
+test "fuzz Date.parse" {
+    const Impl = struct {
+        fn testOne(impl: @This(), string_literal: []const u8) !void {
+            _ = impl;
+            _ = Date.parse(string_literal) catch return;
+        }
+    };
+
+    const corpus: []const []const u8 = &.{
+        "",
+        // good input:
+        "2025-12-25",
+        "1-01-01",
+        "0-01-01",
+        "1999-11-30",
+        "2024-02-29",
+        "2025-02-31",
+        "9999-12-31",
+        "10000-01-01",
+        "123456-07-04",
+        "42-03-15",
+        "2025-01-31",
+        "2025-04-30",
+        "2025-06-01",
+        "2025-10-10",
+        "2025-09-09",
+        "2025-08-08",
+        "2025-07-07",
+        "2025-05-05",
+        "2025-12-01",
+        "2025-11-11",
+        // bad input:
+        "2025-1-01",
+        "2025-01-1",
+        "2025/01/01",
+        "2025-00-10",
+        "2025-13-10",
+        "2025-12-00",
+        "2025-12-32",
+        "2025-12-3a",
+        "20a5-12-25",
+        "-2025-12-25",
+        "+2025-12-25",
+        "20251225",
+        "2025--12-25",
+        "2025-12-25 ",
+        " 2025-12-25",
+        "٢٠٢٥-١٢-٢٥",
+        "2025-12",
+        "2025-12-250",
+        "2025-12-25T00:00:00Z",
+        "2025-12-25\n",
+    };
+
+    for (corpus) |item| {
+        try Impl.testOne(.{}, item);
+    }
+
+    try std.testing.fuzz(Impl{}, Impl.testOne, .{
+        .corpus = corpus,
+    });
+}
+
+test "fuzz Time.parse" {
+    const Impl = struct {
+        fn testOne(impl: @This(), string_literal: []const u8) !void {
+            _ = impl;
+            _ = Time.parse(string_literal, null) catch return;
+        }
+    };
+
+    try std.testing.fuzz(Impl{}, Impl.testOne, .{
+        .corpus = &.{
+            "",
+            // good input:
+            "00:00:00Z",
+            "23:59:59Z",
+            "12:34:56Z",
+            "01:02:03+00:00",
+            "22:30:46+01:00",
+            "22:30:46-05:30",
+            "08:15:00+14:00",
+            "19:45:30-00:45",
+            "05:06:07.1Z",
+            "05:06:07.12Z",
+            "05:06:07.123Z",
+            "05:06:07.123456Z",
+            "05:06:07.123456789Z",
+            "23:59:59.000+02:00",
+            "10:20:30.000000-03:00",
+            "10:20:30.000000000+03:00",
+            "00:00:00.9-12:34",
+            "14:00:00+23:59",
+            "09:09:09.6+09:00",
+            "16:17:18.136+01:00",
+            // bad input:
+            "24:00:00Z",
+            "23:60:00Z",
+            "23:59:60Z",
+            "9:00:00Z",
+            "09:0:00Z",
+            "09:00:0Z",
+            "09:00Z",
+            "09:00:00",
+            "09:00:00z",
+            "09:00:00+1:00",
+            "09:00:00+01:0",
+            "09:00:00+0100",
+            "09:00:00+25:00",
+            "09:00:00+01:60",
+            "09:00:00,+01:00",
+            "09:00:00,123Z",
+            "09:00:00.1234Z",
+            "09:00:00.12345Z",
+            "09:00:00.1234567Z",
+            "٠٩:٠٠:٠٠Z",
+        },
+    });
+}
+
+test "fuzz DateTime.parse" {
+    const Impl = struct {
+        fn testOne(impl: @This(), string_literal: []const u8) !void {
+            _ = impl;
+            _ = DateTime.parse(string_literal, null) catch return;
+        }
+    };
+
+    try std.testing.fuzz(Impl{}, Impl.testOne, .{
+        .corpus = &.{
+            "",
+            // good input:
+            "2025-12-25T22:31:50Z",
+            "2025-12-25T22:31:50.1Z",
+            "2025-12-25T22:31:50.12+01:00",
+            "2025-12-25T22:31:50.123-05:30",
+            "1-01-01T00:00:00Z",
+            "0-01-01T00:00:00+00:00",
+            "1999-11-30T23:59:59-00:45",
+            "2024-02-29T12:00:00Z",
+            "2025-02-31T08:15:00+14:00",
+            "9999-12-31T23:59:59.123456Z",
+            "10000-01-01T00:00:00.123456789+03:00",
+            "42-03-15T01:02:03+23:59",
+            "2025-01-31T10:20:30.000000-03:00",
+            "2025-04-30T10:20:30.000+02:00",
+            "2025-06-01T16:17:18.136+01:00",
+            "2025-10-10T09:09:09.6+09:00",
+            "2025-09-09T19:45:30-00:45",
+            "2025-08-08T05:06:07.123Z",
+            "2025-07-07T05:06:07.123456789Z",
+            "123456-07-04T14:00:00Z",
+            // bad input:
+            "2025-12-25 22:31:50Z",
+            "2025-12-25t22:31:50Z",
+            "2025-12-25T22:31:50",
+            "2025-12-25T22:31Z",
+            "2025-12-25T24:00:00Z",
+            "2025-12-25T23:60:00Z",
+            "2025-12-25T23:59:60Z",
+            "2025-12-25T23:59:59.1234Z",
+            "2025-12-25T23:59:59,123Z",
+            "2025-12-25T23:59:59+0100",
+            "2025-12-25T23:59:59+01:60",
+            "2025-12-25T23:59:59+25:00",
+            "2025-00-25T23:59:59Z",
+            "2025-13-25T23:59:59Z",
+            "2025-12-00T23:59:59Z",
+            "2025-12-32T23:59:59Z",
+            "2025-12-25TT23:59:59Z",
+            "2025-12-25T23:59:59Z ",
+            "٢٠٢٥-١٢-٢٥T٢٢:٣١:٥٠Z",
+            "2025-12-25T23:59:59+01",
+        },
+    });
+}

From 103bdecf53ff4c3e9e24ee1637cd9b74672d5562 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 22:32:50 +0100
Subject: [PATCH 035/116] Adds parsing of lists and list nodes.

---
 src/hyperdoc.zig         | 127 +++++++++++++++++++++++++++++++++++++--
 src/main.zig             |   6 +-
 test/accept/workset.hdoc |   7 +++
 3 files changed, 131 insertions(+), 9 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 0715b6d..c45cd29 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -63,7 +63,7 @@ pub const Block = union(enum) {
 
     pub const ListItem = struct {
         lang: ?[]const u8,
-        content: []Span,
+        content: []Block,
     };
 
     pub const Image = struct {
@@ -114,7 +114,7 @@ pub const Block = union(enum) {
     pub const TableCell = struct {
         lang: ?[]const u8,
         colspan: ?u32,
-        content: []Span,
+        content: []Block,
     };
 };
 
@@ -550,6 +550,7 @@ pub const SemanticAnalyzer = struct {
         };
     }
 
+    /// Translates a top-level block node.
     fn translate_block_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, InvalidNodeType, BadAttributes, Unimplemented }!struct { Block, ?Reference } {
         std.debug.assert(node.type != .hdoc);
 
@@ -659,9 +660,46 @@ pub const SemanticAnalyzer = struct {
     }
 
     fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+            id: ?Reference = null,
+            first: ?u32 = null,
+        });
+
+        if (attrs.first != null and node.type != .ol) {
+            try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "first" } }, get_attribute_location(node, "first", .name).?);
+        }
+
+        var children: std.ArrayList(Block.ListItem) = .empty;
+        defer children.deinit(sema.arena);
+
+        switch (node.body) {
+            .list => |child_nodes| {
+                try children.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
+                for (child_nodes) |child_node| {
+                    const list_item = sema.translate_list_item_node(child_node) catch |err| switch (err) {
+                        error.InvalidNodeType => {
+                            try sema.emit_diagnostic(.illegal_child_item, node.location);
+                            continue;
+                        },
+                        else => |e| return e,
+                    };
+                    children.appendAssumeCapacity(list_item);
+                }
+            },
+
+            .empty, .string, .text_span, .verbatim => {
+                try sema.emit_diagnostic(.list_body_required, node.location);
+            },
+        }
+
+        const list: Block.List = .{
+            .first = attrs.first,
+            .lang = attrs.lang,
+            .items = try children.toOwnedSlice(sema.arena),
+        };
+
+        return .{ list, attrs.id };
     }
 
     fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } {
@@ -688,6 +726,67 @@ pub const SemanticAnalyzer = struct {
         return error.Unimplemented; // TODO: Implement this node type
     }
 
+    fn translate_list_item_node(sema: *SemanticAnalyzer, node: Parser.Node) !Block.ListItem {
+        switch (node.type) {
+            .li => {},
+            else => return error.InvalidNodeType,
+        }
+
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+        });
+
+        return .{
+            .lang = attrs.lang,
+            .content = try sema.translate_block_list(node, .text_to_p),
+        };
+    }
+
+    const BlockTextUpgrade = enum { no_upgrade, text_to_p };
+
+    fn translate_block_list(sema: *SemanticAnalyzer, node: Parser.Node, upgrade: BlockTextUpgrade) error{ Unimplemented, InvalidNodeType, OutOfMemory, BadAttributes }![]Block {
+        switch (node.body) {
+            .list => |child_nodes| {
+                var blocks: std.ArrayList(Block) = .empty;
+                defer blocks.deinit(sema.arena);
+
+                try blocks.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
+
+                for (child_nodes) |child_node| {
+                    const block, const id = try sema.translate_block_node(child_node);
+                    if (id != null) {
+                        try sema.emit_diagnostic(.illegal_id_attribute, get_attribute_location(child_node, "id", .name).?);
+                    }
+                    blocks.appendAssumeCapacity(block);
+                }
+
+                return try blocks.toOwnedSlice(sema.arena);
+            },
+
+            .empty, .string, .verbatim, .text_span => switch (upgrade) {
+                .no_upgrade => {
+                    try sema.emit_diagnostic(.list_body_required, node.location); // TODO: Use better diagnostic
+                    return &.{};
+                },
+                .text_to_p => {
+                    const spans = try sema.translate_inline(node);
+
+                    const blocks = try sema.arena.alloc(Block, 1);
+                    blocks[0] = .{
+                        .paragraph = .{
+                            .kind = .p,
+                            .lang = null,
+                            .content = spans,
+                        },
+                    };
+
+                    return blocks;
+                },
+            },
+        }
+    }
+
+    /// Translates a node into a sequence of inline spans.
     fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }![]Span {
         var spans: std.ArrayList(Span) = .empty;
         errdefer spans.deinit(sema.arena);
@@ -923,7 +1022,10 @@ pub const SemanticAnalyzer = struct {
             .td,
             .li,
             .unknown_block,
-            => @panic("PARSER ERROR: The parser emitted a block node inside an inline context"),
+            => {
+                std.log.err("type: {t} location: {}", .{ node.type, node.location });
+                @panic("PARSER ERROR: The parser emitted a block node inside an inline context");
+            },
         }
     }
 
@@ -1162,6 +1264,8 @@ pub const SemanticAnalyzer = struct {
         return switch (T) {
             []const u8 => value,
 
+            u32 => std.fmt.parseInt(u32, value, 10) catch return error.InvalidValue,
+
             Reference => {
                 const stripped = std.mem.trim(u8, value, whitespace_chars);
                 if (stripped.len != value.len) {
@@ -2104,6 +2208,9 @@ pub const Diagnostic = struct {
         invalid_unicode_string_escape,
         invalid_string_escape: InvalidStringEscape,
         illegal_character: ForbiddenControlCharacter,
+        illegal_child_item,
+        list_body_required,
+        illegal_id_attribute,
 
         // warnings:
         document_starts_with_bom,
@@ -2139,6 +2246,9 @@ pub const Diagnostic = struct {
                 .invalid_string_escape,
                 .illegal_character,
                 .invalid_unicode_string_escape,
+                .illegal_child_item,
+                .list_body_required,
+                .illegal_id_attribute,
                 => .@"error",
 
                 .unknown_attribute,
@@ -2206,6 +2316,11 @@ pub const Diagnostic = struct {
                 .invalid_unicode_string_escape => try w.writeAll("Invalid unicode escape sequence"),
 
                 .illegal_character => |ctx| try w.print("Forbidden control character U+{X:0>4}.", .{ctx.codepoint}),
+
+                .list_body_required => try w.writeAll("Node requires list body."),
+                .illegal_child_item => try w.writeAll("Node not allowed here."),
+
+                .illegal_id_attribute => try w.writeAll("Attribute 'id' not allowed here."),
             }
         }
     };
diff --git a/src/main.zig b/src/main.zig
index 645041c..e7e83a5 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -234,7 +234,7 @@ fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []c
 
 fn dumpListItem(writer: anytype, indent: usize, item: hdoc.Block.ListItem) !void {
     try dumpOptionalStringField(writer, indent, "lang", item.lang);
-    try dumpSpanListField(writer, indent, "content", item.content);
+    try dumpBlockListField(writer, indent, "content", item.content);
 }
 
 fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: []const hdoc.Block.ListItem) !void {
@@ -254,7 +254,7 @@ fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: []
 fn dumpTableCell(writer: anytype, indent: usize, cell: hdoc.Block.TableCell) !void {
     try dumpOptionalStringField(writer, indent, "lang", cell.lang);
     try dumpOptionalNumberField(writer, indent, "colspan", cell.colspan);
-    try dumpSpanListField(writer, indent, "content", cell.content);
+    try dumpBlockListField(writer, indent, "content", cell.content);
 }
 
 fn dumpTableCellsField(writer: anytype, indent: usize, key: []const u8, cells: []const hdoc.Block.TableCell) !void {
@@ -367,7 +367,7 @@ fn dumpBlockInline(writer: anytype, indent: usize, block: hdoc.Block) !void {
     }
 }
 
-fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: []const hdoc.Block) !void {
+fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: []const hdoc.Block) @TypeOf(writer).Error!void {
     try writeIndent(writer, indent);
     if (blocks.len == 0) {
         try writer.print("{s}: []\n", .{key});
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index fbfaf77..26561fb 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -4,3 +4,10 @@ h1:
 | Hello, World!
 
 h2{Hello \em{World}!}
+
+p { This is a span. }
+
+ul {
+  li "Item 1"
+  li { p { Item 2 } }
+}
\ No newline at end of file

From fa37e8b2e3d56068574cc6a76dc28b8e73994cd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 22:34:14 +0100
Subject: [PATCH 036/116] Fixes missing fallback for ol having first==1

---
 src/hyperdoc.zig         | 2 +-
 test/accept/workset.hdoc | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index c45cd29..2257d0b 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -694,7 +694,7 @@ pub const SemanticAnalyzer = struct {
         }
 
         const list: Block.List = .{
-            .first = attrs.first,
+            .first = attrs.first orelse if (node.type == .ol) 1 else null,
             .lang = attrs.lang,
             .items = try children.toOwnedSlice(sema.arena),
         };
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index 26561fb..696b944 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -10,4 +10,8 @@ p { This is a span. }
 ul {
   li "Item 1"
   li { p { Item 2 } }
+}
+ol {
+  li "Item 1"
+  li { p { Item 2 } }
 }
\ No newline at end of file

From 035ac7c37d10d11ca5dbbe1a9625b499bd1c6afd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 28 Dec 2025 23:25:33 +0100
Subject: [PATCH 037/116] Vibecoded: Implements rest of the node types, adds
 TODO comments for further tasks

---
 AGENTS.md                   |   3 +
 examples/featurematrix.hdoc |  18 +--
 examples/tables.hdoc        |   2 +-
 src/hyperdoc.zig            | 221 ++++++++++++++++++++++++++++++++----
 src/main.zig                |   2 +-
 test/accept/workset.hdoc    |   1 +
 6 files changed, 216 insertions(+), 31 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 2579445..2ab16dd 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -10,6 +10,9 @@
 - Run `zig build` to validate the main application still compiles
 - Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/` and `test/`.
 - Avoid editing documentation unless the request explicitly asks for it.
+- Treat `docs/specification.md` as the authoritative source of behavior; examples may be outdated or incorrect.
+- If the spec is unclear or conflicts with code/tests, ask before changing behavior.
+- Do not implement "just make it work" fallbacks that alter semantics to satisfy examples.
 
 ## Zig Programming Style
 
diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc
index 3600e02..d6dd2a3 100644
--- a/examples/featurematrix.hdoc
+++ b/examples/featurematrix.hdoc
@@ -5,10 +5,10 @@ h1 { Small Computer Feature Matrix }
 table {
   columns {
     td "Ashet Home Computer"
-    td { \link(uri="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" }
-    td { \link(uri="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" }
-    td { \link(uri="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" }
-    td { \link(uri="https://www.codycomputer.org/") "Cody Computer" }
+    td { p { \link(uri="https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/") "Raspberry Pi 4" } }
+    td { p { \link(uri="https://store.arduino.cc/products/arduino-uno-rev3") "Arduino Uno REV3" } }
+    td { p { \link(uri="https://github.com/neotron-Compute/neotron-Pico") "Neotron Pico" } }
+    td { p { \link(uri="https://www.codycomputer.org/") "Cody Computer" } }
   }
   row(title="CPU Bus Width") {
     td "32 bit"
@@ -25,11 +25,11 @@ table {
     td "6502"
   }
   row(title="CPU Model") {
-    td { \link(uri="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" }
-    td { \link(uri="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" }
-    td { \link(uri="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" }
-    td { \link(uri="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" }
-    td { \link(uri="https://wdc65xx.com/integrated-circuit") "W65C02S" }
+    td { p { \link(uri="https://www.raspberrypi.com/products/rp2350/") "Raspberry Pi RP2350" } }
+    td { p { \link(uri="https://www.raspberrypi.com/documentation/computers/processors.html#bcm2711") "Broadcom BCM2711" } }
+    td { p { \link(uri="https://www.microchip.com/en-us/product/atmega328p") "ATmega328P" } }
+    td { p { \link(uri="https://www.raspberrypi.com/products/rp2040/") "Raspberry Pi RP2040" } }
+    td { p { \link(uri="https://wdc65xx.com/integrated-circuit") "W65C02S" } }
   }
   row(title="CPU Cores") {
     td "2"
diff --git a/examples/tables.hdoc b/examples/tables.hdoc
index 28f73f9..70d0e8a 100644
--- a/examples/tables.hdoc
+++ b/examples/tables.hdoc
@@ -21,7 +21,7 @@ table(id="inventory") {
   row(title="Vegetables") {
     td "Carrots"
     td "7"
-    td { p { Store at \time(fmt="rough"){08:00:00}. } }
+    td { p { Store at \time(fmt="rough"){08:00:00Z}. } }
   }
   group { Pantry }
   row(title="Dry Goods") {
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 2257d0b..370c357 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -113,7 +113,7 @@ pub const Block = union(enum) {
 
     pub const TableCell = struct {
         lang: ?[]const u8,
-        colspan: ?u32,
+        colspan: u32,
         content: []Block,
     };
 };
@@ -364,7 +364,7 @@ pub const Uri = struct {
     text: []const u8,
 
     pub fn init(text: []const u8) Uri {
-        // TODO: Add correctness validation here
+        // TODO: Add correctness validation here (IRI syntax, non-empty).
         return .{ .text = text };
     }
 };
@@ -376,7 +376,7 @@ pub const Reference = struct {
     text: []const u8,
 
     pub fn init(text: []const u8) Reference {
-        // TODO: Add correctness validation here
+        // TODO: Add correctness validation here (non-empty, allowed characters).
         return .{ .text = text };
     }
 };
@@ -433,6 +433,7 @@ pub fn parse(
 
     const header = sema.header orelse return error.MalformedDocument;
 
+    // TODO: Validate document-level semantic constraints (unique ids, ref resolution, table shape).
     return .{
         .arena = arena,
         .contents = try sema.blocks.toOwnedSlice(arena.allocator()),
@@ -703,27 +704,205 @@ pub const SemanticAnalyzer = struct {
     }
 
     fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+            id: ?Reference = null,
+            alt: ?[]const u8 = null,
+            path: []const u8,
+        });
+
+        // TODO: Enforce non-empty "path" (required) and "alt" (if provided).
+        const content = switch (node.body) {
+            .empty => @constCast(&[_]Span{}),
+            else => try sema.translate_inline(node),
+        };
+
+        const image: Block.Image = .{
+            .lang = attrs.lang,
+            .alt = attrs.alt,
+            .path = attrs.path,
+            .content = content,
+        };
+
+        return .{ image, attrs.id };
     }
 
     fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+            id: ?Reference = null,
+            syntax: ?[]const u8 = null,
+        });
+
+        const preformatted: Block.Preformatted = .{
+            .lang = attrs.lang,
+            .syntax = attrs.syntax,
+            .content = try sema.translate_inline(node),
+        };
+
+        return .{ preformatted, attrs.id };
     }
 
     fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+            id: ?Reference = null,
+            depth: ?u32 = null,
+        });
+
+        var depth: ?u8 = null;
+        if (attrs.depth) |depth_value| {
+            if (depth_value < 1 or depth_value > 3) {
+                try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "depth" } }, get_attribute_location(node, "depth", .value) orelse node.location);
+            } else {
+                depth = @intCast(depth_value);
+            }
+        }
+
+        switch (node.body) {
+            .empty => {},
+            .list => |child_nodes| {
+                for (child_nodes) |child_node| {
+                    try sema.emit_diagnostic(.illegal_child_item, child_node.location);
+                }
+            },
+            .string, .verbatim, .text_span => {
+                try sema.emit_diagnostic(.illegal_child_item, node.location);
+            },
+        }
+
+        const toc: Block.TableOfContents = .{
+            .lang = attrs.lang,
+            .depth = depth,
+        };
+
+        return .{ toc, attrs.id };
     }
 
     fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } {
-        _ = sema;
-        _ = node;
-        return error.Unimplemented; // TODO: Implement this node type
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+            id: ?Reference = null,
+        });
+
+        var rows: std.ArrayList(Block.TableRow) = .empty;
+        defer rows.deinit(sema.arena);
+
+        switch (node.body) {
+            .list => |child_nodes| {
+                try rows.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
+                for (child_nodes) |child_node| {
+                    switch (child_node.type) {
+                        .columns => {
+                            const row_attrs = try sema.get_attributes(child_node, struct {
+                                lang: ?[]const u8 = null,
+                            });
+
+                            const cells = try sema.translate_table_cells(child_node);
+
+                            rows.appendAssumeCapacity(.{
+                                .columns = .{
+                                    .lang = row_attrs.lang,
+                                    .cells = cells,
+                                },
+                            });
+                        },
+                        .row => {
+                            const row_attrs = try sema.get_attributes(child_node, struct {
+                                lang: ?[]const u8 = null,
+                                title: ?[]const u8 = null,
+                            });
+
+                            const cells = try sema.translate_table_cells(child_node);
+
+                            rows.appendAssumeCapacity(.{
+                                .row = .{
+                                    .lang = row_attrs.lang,
+                                    .title = row_attrs.title,
+                                    .cells = cells,
+                                },
+                            });
+                        },
+                        .group => {
+                            const row_attrs = try sema.get_attributes(child_node, struct {
+                                lang: ?[]const u8 = null,
+                            });
+
+                            rows.appendAssumeCapacity(.{
+                                .group = .{
+                                    .lang = row_attrs.lang,
+                                    .content = try sema.translate_inline(child_node),
+                                },
+                            });
+                        },
+                        else => {
+                            try sema.emit_diagnostic(.illegal_child_item, child_node.location);
+                        },
+                    }
+                }
+            },
+            .empty, .string, .verbatim, .text_span => {
+                try sema.emit_diagnostic(.list_body_required, node.location);
+            },
+        }
+
+        // TODO: Validate column counts after colspan and title/group leading column rules.
+        const table: Block.Table = .{
+            .lang = attrs.lang,
+            .rows = try rows.toOwnedSlice(sema.arena),
+        };
+
+        return .{ table, attrs.id };
+    }
+
+    fn translate_table_cells(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes, InvalidNodeType, Unimplemented }![]Block.TableCell {
+        var cells: std.ArrayList(Block.TableCell) = .empty;
+        defer cells.deinit(sema.arena);
+
+        switch (node.body) {
+            .list => |child_nodes| {
+                try cells.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
+                for (child_nodes) |child_node| {
+                    const cell = sema.translate_table_cell_node(child_node) catch |err| switch (err) {
+                        error.InvalidNodeType => {
+                            try sema.emit_diagnostic(.illegal_child_item, child_node.location);
+                            continue;
+                        },
+                        else => |e| return e,
+                    };
+                    cells.appendAssumeCapacity(cell);
+                }
+            },
+            .empty, .string, .verbatim, .text_span => {
+                try sema.emit_diagnostic(.list_body_required, node.location);
+            },
+        }
+
+        return try cells.toOwnedSlice(sema.arena);
+    }
+
+    fn translate_table_cell_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes, InvalidNodeType, Unimplemented }!Block.TableCell {
+        switch (node.type) {
+            .td => {},
+            else => return error.InvalidNodeType,
+        }
+
+        const attrs = try sema.get_attributes(node, struct {
+            lang: ?[]const u8 = null,
+            colspan: ?u32 = null,
+        });
+
+        var colspan = attrs.colspan orelse 1;
+        if (colspan < 1) {
+            try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "colspan" } }, get_attribute_location(node, "colspan", .value) orelse node.location);
+            colspan = 1;
+        }
+
+        return .{
+            .lang = attrs.lang,
+            .colspan = colspan,
+            .content = try sema.translate_block_list(node, .text_to_p),
+        };
     }
 
     fn translate_list_item_node(sema: *SemanticAnalyzer, node: Parser.Node) !Block.ListItem {
@@ -979,10 +1158,11 @@ pub const SemanticAnalyzer = struct {
                 // TODO: Implement automatic space insertion.
                 //       This must be done when two consecutive nodes are separated by a space
 
+                // TODO: Enforce that date/time bodies only contain plain text/string/verbatim.
                 try sema.translate_inline_body(&content_spans, node.body, .{});
 
                 //  Convert the content_spans into a "rendered string".
-                const content_text = try sema.join_spans(content_spans.items, .no_space);
+                const content_text = try sema.render_spans_to_plaintext(content_spans.items, .no_space);
 
                 const content: Span.Content = switch (node.type) {
                     .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt),
@@ -1081,7 +1261,7 @@ pub const SemanticAnalyzer = struct {
     }
 
     const JoinStyle = enum { no_space, one_space };
-    fn join_spans(sema: *SemanticAnalyzer, source_spans: []const Span, style: JoinStyle) ![]const u8 {
+    fn render_spans_to_plaintext(sema: *SemanticAnalyzer, source_spans: []const Span, style: JoinStyle) ![]const u8 {
         var len: usize = switch (style) {
             .no_space => 0,
             .one_space => (source_spans.len -| 1),
@@ -1191,6 +1371,7 @@ pub const SemanticAnalyzer = struct {
         const Fields = std.meta.FieldEnum(Attrs);
         const fields = @typeInfo(Attrs).@"struct".fields;
 
+        // TODO: Enforce per-attribute constraints from the spec (non-empty strings, lang tag format, etc).
         var required: std.EnumSet(Fields) = .initEmpty();
 
         var attrs: Attrs = undefined;
@@ -1425,9 +1606,9 @@ pub const SemanticAnalyzer = struct {
                         },
 
                         else => {
-                            // Unknown escape sequence, emit escaped char verbatim:
-                            // TODO: How to handle something like "\😭", which is
-                            //       definitly valid and in-scope.
+                            // Unknown escape sequence, emit escaped char verbatim. Use the full UTF-8 codepoint
+                            // inside the error message, so we can tell that "\😢" is not a valid escape sequence
+                            // instead of saying that "\{F0}" is not a valid escape sequence
 
                             const len = std.unicode.utf8ByteSequenceLength(esc_char) catch unreachable;
 
diff --git a/src/main.zig b/src/main.zig
index e7e83a5..fb5fb83 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -253,7 +253,7 @@ fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: []
 
 fn dumpTableCell(writer: anytype, indent: usize, cell: hdoc.Block.TableCell) !void {
     try dumpOptionalStringField(writer, indent, "lang", cell.lang);
-    try dumpOptionalNumberField(writer, indent, "colspan", cell.colspan);
+    try dumpOptionalNumberField(writer, indent, "colspan", @as(?u32, cell.colspan));
     try dumpBlockListField(writer, indent, "content", cell.content);
 }
 
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index 696b944..19ffba1 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -11,6 +11,7 @@ ul {
   li "Item 1"
   li { p { Item 2 } }
 }
+
 ol {
   li "Item 1"
   li { p { Item 2 } }

From 40d82be6047f9b29ef38ff8a9f233a51cbff0fd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 14:18:07 +0100
Subject: [PATCH 038/116] Moves dump code into src/render/dump.zig

---
 src/hyperdoc.zig    |   5 +-
 src/main.zig        | 483 +-----------------------------------------
 src/render/dump.zig | 497 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 505 insertions(+), 480 deletions(-)
 create mode 100644 src/render/dump.zig

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 370c357..b752b72 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -1,5 +1,8 @@
 const std = @import("std");
-const parser_toolkit = @import("parser-toolkit");
+
+pub const render = struct {
+    pub const yaml = @import("render/dump.zig").render;
+};
 
 /// A HyperDoc document. Contains both memory and
 /// tree structure of the document.
diff --git a/src/main.zig b/src/main.zig
index fb5fb83..19161d5 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -4,483 +4,6 @@ const hdoc = @import("hyperdoc");
 
 var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
 
-const indent_step: usize = 2;
-
-fn writeIndent(writer: anytype, indent: usize) !void {
-    var i: usize = 0;
-    while (i < indent) : (i += 1) {
-        try writer.writeByte(' ');
-    }
-}
-
-fn writeStringValue(writer: anytype, value: []const u8) !void {
-    try writer.print("\"{f}\"", .{std.zig.fmtString(value)});
-}
-
-fn writeOptionalStringValue(writer: anytype, value: ?[]const u8) !void {
-    if (value) |text| {
-        try writeStringValue(writer, text);
-    } else {
-        try writer.writeAll("null");
-    }
-}
-
-fn writeOptionalIntValue(writer: anytype, value: anytype) !void {
-    if (value) |number| {
-        try writer.print("{}", .{number});
-    } else {
-        try writer.writeAll("null");
-    }
-}
-
-fn dumpOptionalStringField(writer: anytype, indent: usize, key: []const u8, value: ?[]const u8) !void {
-    try writeIndent(writer, indent);
-    try writer.print("{s}: ", .{key});
-    try writeOptionalStringValue(writer, value);
-    try writer.writeByte('\n');
-}
-
-fn dumpOptionalNumberField(writer: anytype, indent: usize, key: []const u8, value: anytype) !void {
-    try writeIndent(writer, indent);
-    try writer.print("{s}: ", .{key});
-    try writeOptionalIntValue(writer, value);
-    try writer.writeByte('\n');
-}
-
-fn dumpBoolField(writer: anytype, indent: usize, key: []const u8, value: bool) !void {
-    try writeIndent(writer, indent);
-    try writer.print("{s}: {}\n", .{ key, value });
-}
-
-fn dumpEnumField(writer: anytype, indent: usize, key: []const u8, value: anytype) !void {
-    try writeIndent(writer, indent);
-    try writer.print("{s}: {s}\n", .{ key, @tagName(value) });
-}
-
-fn dumpVersion(writer: anytype, indent: usize, version: hdoc.Version) !void {
-    try writeIndent(writer, indent);
-    try writer.writeAll("version:\n");
-    try writeIndent(writer, indent + indent_step);
-    try writer.print("major: {}\n", .{version.major});
-    try writeIndent(writer, indent + indent_step);
-    try writer.print("minor: {}\n", .{version.minor});
-}
-
-fn dumpDate(writer: anytype, indent: usize, date: hdoc.Date) !void {
-    try writeIndent(writer, indent);
-    try writer.print("year: {}\n", .{date.year});
-    try writeIndent(writer, indent);
-    try writer.print("month: {}\n", .{date.month});
-    try writeIndent(writer, indent);
-    try writer.print("day: {}\n", .{date.day});
-}
-
-fn dumpTime(writer: anytype, indent: usize, time: hdoc.Time) !void {
-    try writeIndent(writer, indent);
-    try writer.print("hour: {}\n", .{time.hour});
-    try writeIndent(writer, indent);
-    try writer.print("minute: {}\n", .{time.minute});
-    try writeIndent(writer, indent);
-    try writer.print("second: {}\n", .{time.second});
-    try writeIndent(writer, indent);
-    try writer.print("microsecond: {}\n", .{time.microsecond});
-}
-
-fn dumpDateTime(writer: anytype, indent: usize, datetime: hdoc.DateTime) !void {
-    try writeIndent(writer, indent);
-    try writer.writeAll("date:\n");
-    try dumpDate(writer, indent + indent_step, datetime.date);
-    try writeIndent(writer, indent);
-    try writer.writeAll("time:\n");
-    try dumpTime(writer, indent + indent_step, datetime.time);
-}
-
-fn writeAttrSeparator(writer: anytype, first: *bool) !void {
-    if (first.*) {
-        first.* = false;
-    } else {
-        try writer.writeByte(' ');
-    }
-}
-
-fn writeSpanAttributes(writer: anytype, span: hdoc.Span) !void {
-    try writer.writeByte('[');
-    var first = true;
-    if (span.attribs.em) {
-        try writeAttrSeparator(writer, &first);
-        try writer.writeAll("em");
-    }
-    if (span.attribs.mono) {
-        try writeAttrSeparator(writer, &first);
-        try writer.writeAll("mono");
-    }
-    if (span.attribs.strike) {
-        try writeAttrSeparator(writer, &first);
-        try writer.writeAll("strike");
-    }
-    if (span.attribs.position != .baseline) {
-        try writeAttrSeparator(writer, &first);
-        try writer.print("position=\"{s}\"", .{@tagName(span.attribs.position)});
-    }
-    switch (span.attribs.link) {
-        .none => {},
-        .ref => |value| {
-            try writeAttrSeparator(writer, &first);
-            try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.text)});
-        },
-        .uri => |value| {
-            try writeAttrSeparator(writer, &first);
-            try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value.text)});
-        },
-    }
-    if (span.attribs.lang.len != 0) {
-        try writeAttrSeparator(writer, &first);
-        try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang)});
-    }
-    if (span.attribs.syntax.len != 0) {
-        try writeAttrSeparator(writer, &first);
-        try writer.print("syntax=\"{f}\"", .{std.zig.fmtString(span.attribs.syntax)});
-    }
-    try writer.writeByte(']');
-}
-
-fn writeDateValue(writer: anytype, date: hdoc.Date) !void {
-    try writer.print("{d:0>4}-{d:0>2}-{d:0>2}", .{ date.year, date.month, date.day });
-}
-
-fn writeTimeValue(writer: anytype, time: hdoc.Time) !void {
-    try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ time.hour, time.minute, time.second });
-    if (time.microsecond != 0) {
-        try writer.print(".{d:0>6}", .{time.microsecond});
-    }
-}
-
-fn writeDateTimeValue(writer: anytype, datetime: hdoc.DateTime) !void {
-    try writeDateValue(writer, datetime.date);
-    try writer.writeByte('T');
-    try writeTimeValue(writer, datetime.time);
-}
-
-fn writeFormattedDateInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.Date)) !void {
-    try writer.writeAll("date:");
-    try writeDateValue(writer, formatted.value);
-    if (formatted.format != hdoc.Date.Format.default) {
-        try writer.writeByte('@');
-        try writer.writeAll(@tagName(formatted.format));
-    }
-}
-
-fn writeFormattedTimeInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.Time)) !void {
-    try writer.writeAll("time:");
-    try writeTimeValue(writer, formatted.value);
-    if (formatted.format != hdoc.Time.Format.default) {
-        try writer.writeByte('@');
-        try writer.writeAll(@tagName(formatted.format));
-    }
-}
-
-fn writeFormattedDateTimeInline(writer: anytype, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) !void {
-    try writer.writeAll("datetime:");
-    try writeDateTimeValue(writer, formatted.value);
-    if (formatted.format != hdoc.DateTime.Format.default) {
-        try writer.writeByte('@');
-        try writer.writeAll(@tagName(formatted.format));
-    }
-}
-
-fn writeSpanContentInline(writer: anytype, content: hdoc.Span.Content) !void {
-    switch (content) {
-        .text => |text| {
-            try writeStringValue(writer, text);
-        },
-        .date => |date| {
-            try writer.writeByte('"');
-            try writeFormattedDateInline(writer, date);
-            try writer.writeByte('"');
-        },
-        .time => |time| {
-            try writer.writeByte('"');
-            try writeFormattedTimeInline(writer, time);
-            try writer.writeByte('"');
-        },
-        .datetime => |datetime| {
-            try writer.writeByte('"');
-            try writeFormattedDateTimeInline(writer, datetime);
-            try writer.writeByte('"');
-        },
-    }
-}
-
-fn dumpSpanInline(writer: anytype, span: hdoc.Span) !void {
-    try writeSpanAttributes(writer, span);
-    try writer.writeByte(' ');
-    try writeSpanContentInline(writer, span.content);
-}
-
-fn dumpSpanListField(writer: anytype, indent: usize, key: []const u8, spans: []const hdoc.Span) !void {
-    try writeIndent(writer, indent);
-    if (spans.len == 0) {
-        try writer.print("{s}: []\n", .{key});
-        return;
-    }
-    try writer.print("{s}:\n", .{key});
-    for (spans) |span| {
-        try writeIndent(writer, indent + indent_step);
-        try writer.writeAll("- ");
-        try dumpSpanInline(writer, span);
-        try writer.writeByte('\n');
-    }
-}
-
-fn dumpListItem(writer: anytype, indent: usize, item: hdoc.Block.ListItem) !void {
-    try dumpOptionalStringField(writer, indent, "lang", item.lang);
-    try dumpBlockListField(writer, indent, "content", item.content);
-}
-
-fn dumpListItemsField(writer: anytype, indent: usize, key: []const u8, items: []const hdoc.Block.ListItem) !void {
-    try writeIndent(writer, indent);
-    if (items.len == 0) {
-        try writer.print("{s}: []\n", .{key});
-        return;
-    }
-    try writer.print("{s}:\n", .{key});
-    for (items) |item| {
-        try writeIndent(writer, indent + indent_step);
-        try writer.writeAll("-\n");
-        try dumpListItem(writer, indent + indent_step * 2, item);
-    }
-}
-
-fn dumpTableCell(writer: anytype, indent: usize, cell: hdoc.Block.TableCell) !void {
-    try dumpOptionalStringField(writer, indent, "lang", cell.lang);
-    try dumpOptionalNumberField(writer, indent, "colspan", @as(?u32, cell.colspan));
-    try dumpBlockListField(writer, indent, "content", cell.content);
-}
-
-fn dumpTableCellsField(writer: anytype, indent: usize, key: []const u8, cells: []const hdoc.Block.TableCell) !void {
-    try writeIndent(writer, indent);
-    if (cells.len == 0) {
-        try writer.print("{s}: []\n", .{key});
-        return;
-    }
-    try writer.print("{s}:\n", .{key});
-    for (cells) |cell| {
-        try writeIndent(writer, indent + indent_step);
-        try writer.writeAll("-\n");
-        try dumpTableCell(writer, indent + indent_step * 2, cell);
-    }
-}
-
-fn dumpTableColumns(writer: anytype, indent: usize, columns: hdoc.Block.TableColumns) !void {
-    try dumpOptionalStringField(writer, indent, "lang", columns.lang);
-    try dumpTableCellsField(writer, indent, "cells", columns.cells);
-}
-
-fn dumpTableDataRow(writer: anytype, indent: usize, row: hdoc.Block.TableDataRow) !void {
-    try dumpOptionalStringField(writer, indent, "lang", row.lang);
-    try dumpOptionalStringField(writer, indent, "title", row.title);
-    try dumpTableCellsField(writer, indent, "cells", row.cells);
-}
-
-fn dumpTableGroup(writer: anytype, indent: usize, group: hdoc.Block.TableGroup) !void {
-    try dumpOptionalStringField(writer, indent, "lang", group.lang);
-    try dumpSpanListField(writer, indent, "content", group.content);
-}
-
-fn dumpTableRow(writer: anytype, indent: usize, row: hdoc.Block.TableRow) !void {
-    switch (row) {
-        .columns => |columns| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("columns:\n");
-            try dumpTableColumns(writer, indent + indent_step, columns);
-        },
-        .row => |data_row| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("row:\n");
-            try dumpTableDataRow(writer, indent + indent_step, data_row);
-        },
-        .group => |group| {
-            try writeIndent(writer, indent);
-            try writer.writeAll("group:\n");
-            try dumpTableGroup(writer, indent + indent_step, group);
-        },
-    }
-}
-
-fn dumpTableRowsField(writer: anytype, indent: usize, key: []const u8, rows: []const hdoc.Block.TableRow) !void {
-    try writeIndent(writer, indent);
-    if (rows.len == 0) {
-        try writer.print("{s}: []\n", .{key});
-        return;
-    }
-    try writer.print("{s}:\n", .{key});
-    for (rows) |row| {
-        try writeIndent(writer, indent + indent_step);
-        try writer.writeAll("-\n");
-        try dumpTableRow(writer, indent + indent_step * 2, row);
-    }
-}
-
-fn dumpBlockInline(writer: anytype, indent: usize, block: hdoc.Block) !void {
-    switch (block) {
-        .heading => |heading| {
-            try writer.writeAll("heading:\n");
-            try dumpEnumField(writer, indent + indent_step, "level", heading.level);
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang);
-            try dumpSpanListField(writer, indent + indent_step, "content", heading.content);
-        },
-        .paragraph => |paragraph| {
-            try writer.writeAll("paragraph:\n");
-            try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind);
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang);
-            try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content);
-        },
-        .list => |list| {
-            try writer.writeAll("list:\n");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang);
-            try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first);
-            try dumpListItemsField(writer, indent + indent_step, "items", list.items);
-        },
-        .image => |image| {
-            try writer.writeAll("image:\n");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang);
-            try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt);
-            try dumpOptionalStringField(writer, indent + indent_step, "path", image.path);
-            try dumpSpanListField(writer, indent + indent_step, "content", image.content);
-        },
-        .preformatted => |preformatted| {
-            try writer.writeAll("preformatted:\n");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang);
-            try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax);
-            try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content);
-        },
-        .toc => |toc| {
-            try writer.writeAll("toc:\n");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang);
-            try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth);
-        },
-        .table => |table| {
-            try writer.writeAll("table:\n");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang);
-            try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows);
-        },
-    }
-}
-
-fn dumpBlockListField(writer: anytype, indent: usize, key: []const u8, blocks: []const hdoc.Block) @TypeOf(writer).Error!void {
-    try writeIndent(writer, indent);
-    if (blocks.len == 0) {
-        try writer.print("{s}: []\n", .{key});
-        return;
-    }
-    try writer.print("{s}:\n", .{key});
-    for (blocks) |block| {
-        try writeIndent(writer, indent + indent_step);
-        try writer.writeAll("- ");
-        try dumpBlockInline(writer, indent + indent_step, block);
-    }
-}
-
-fn dumpOptionalStringListField(writer: anytype, indent: usize, key: []const u8, values: []?hdoc.Reference) !void {
-    try writeIndent(writer, indent);
-    if (values.len == 0) {
-        try writer.print("{s}: []\n", .{key});
-        return;
-    }
-    try writer.print("{s}:\n", .{key});
-    for (values) |value| {
-        try writeIndent(writer, indent + indent_step);
-        try writer.writeAll("- ");
-        try writeOptionalStringValue(writer, if (value) |val| val.text else null);
-        try writer.writeByte('\n');
-    }
-}
-
-fn dumpOptionalDateTimeField(writer: anytype, indent: usize, key: []const u8, value: ?hdoc.DateTime) !void {
-    try writeIndent(writer, indent);
-    if (value) |datetime| {
-        try writer.print("{s}:\n", .{key});
-        try dumpDateTime(writer, indent + indent_step, datetime);
-    } else {
-        try writer.print("{s}: null\n", .{key});
-    }
-}
-
-fn dumpDocument(writer: anytype, doc: *const hdoc.Document) !void {
-    try writer.writeAll("document:\n");
-    try dumpVersion(writer, indent_step, doc.version);
-    try dumpOptionalStringField(writer, indent_step, "lang", doc.lang);
-    try dumpOptionalStringField(writer, indent_step, "title", doc.title);
-    try dumpOptionalStringField(writer, indent_step, "author", doc.author);
-    try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date);
-    try dumpBlockListField(writer, indent_step, "contents", doc.contents);
-    try dumpOptionalStringListField(writer, indent_step, "ids", doc.ids);
-}
-
-test "dumpDocument escapes string values" {
-    const title = "Doc \"Title\"\n";
-    const span_text = "Hello \"world\"\n";
-    const link_ref: hdoc.Reference = .init("section \"A\"");
-    const id_value: hdoc.Reference = .init("id:1\n");
-
-    var doc: hdoc.Document = .{
-        .arena = std.heap.ArenaAllocator.init(std.testing.allocator),
-        .version = .{ .major = 1, .minor = 2 },
-        .contents = &.{},
-        .ids = &.{},
-        .lang = null,
-        .title = title,
-        .author = null,
-        .date = null,
-        .timezone = null,
-    };
-    defer doc.deinit();
-
-    const arena_alloc = doc.arena.allocator();
-
-    const spans = try arena_alloc.alloc(hdoc.Span, 1);
-    spans[0] = .{
-        .content = .{ .text = span_text },
-        .attribs = .{ .link = .{ .ref = link_ref } },
-    };
-
-    const blocks = try arena_alloc.alloc(hdoc.Block, 1);
-    blocks[0] = .{
-        .heading = .{
-            .level = .h1,
-            .lang = null,
-            .content = spans,
-        },
-    };
-    doc.contents = blocks;
-
-    const ids = try arena_alloc.alloc(?hdoc.Reference, 1);
-    ids[0] = id_value;
-    doc.ids = ids;
-
-    var buffer: std.ArrayList(u8) = .empty;
-    defer buffer.deinit(std.testing.allocator);
-
-    try dumpDocument(buffer.writer(std.testing.allocator), &doc);
-    const output = buffer.items;
-
-    const expected_title = try std.fmt.allocPrint(std.testing.allocator, "title: \"{f}\"\n", .{std.zig.fmtString(title)});
-    defer std.testing.allocator.free(expected_title);
-    try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null);
-
-    const expected_span = try std.fmt.allocPrint(
-        std.testing.allocator,
-        "- [link=\"ref:{f}\"] \"{f}\"\n",
-        .{ std.zig.fmtString(link_ref.text), std.zig.fmtString(span_text) },
-    );
-    defer std.testing.allocator.free(expected_span);
-    try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null);
-
-    const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value.text)});
-    defer std.testing.allocator.free(expected_id);
-    try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null);
-}
-
 pub fn main() !u8 {
     defer if (builtin.mode == .Debug) {
         std.debug.assert(debug_allocator.deinit() == .ok);
@@ -512,8 +35,10 @@ pub fn main() !u8 {
     if (diagnostics.has_error())
         return 1;
 
-    const stdout = std.fs.File.stdout().deprecatedWriter();
-    try dumpDocument(stdout, &parsed);
+    var stdout_buffer: [4096]u8 = undefined;
+    var stdout = std.fs.File.stdout().writer(&stdout_buffer);
+
+    try hdoc.render.yaml(parsed, &stdout.interface);
 
     return 0;
 }
diff --git a/src/render/dump.zig b/src/render/dump.zig
new file mode 100644
index 0000000..347e90e
--- /dev/null
+++ b/src/render/dump.zig
@@ -0,0 +1,497 @@
+const std = @import("std");
+const hdoc = @import("../hyperdoc.zig");
+
+const Writer = std.Io.Writer;
+const indent_step: usize = 2;
+
+fn writeIndent(writer: *Writer, indent: usize) Writer.Error!void {
+    var i: usize = 0;
+    while (i < indent) : (i += 1) {
+        try writer.writeByte(' ');
+    }
+}
+
+fn writeStringValue(writer: *Writer, value: []const u8) Writer.Error!void {
+    try writer.print("\"{f}\"", .{std.zig.fmtString(value)});
+}
+
+fn writeOptionalStringValue(writer: *Writer, value: ?[]const u8) Writer.Error!void {
+    if (value) |text| {
+        try writeStringValue(writer, text);
+    } else {
+        try writer.writeAll("null");
+    }
+}
+
+fn writeOptionalIntValue(writer: *Writer, value: anytype) Writer.Error!void {
+    if (value) |number| {
+        try writer.print("{}", .{number});
+    } else {
+        try writer.writeAll("null");
+    }
+}
+
+fn dumpOptionalStringField(writer: *Writer, indent: usize, key: []const u8, value: ?[]const u8) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.print("{s}: ", .{key});
+    try writeOptionalStringValue(writer, value);
+    try writer.writeByte('\n');
+}
+
+fn dumpOptionalStringFieldInline(writer: *Writer, key: []const u8, value: ?[]const u8) Writer.Error!void {
+    try writer.print("{s}: ", .{key});
+    try writeOptionalStringValue(writer, value);
+    try writer.writeByte('\n');
+}
+
+fn dumpOptionalStringFieldWithIndent(writer: *Writer, indent: usize, key: []const u8, value: ?[]const u8) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try dumpOptionalStringFieldInline(writer, key, value);
+}
+
+fn dumpOptionalNumberField(writer: *Writer, indent: usize, key: []const u8, value: anytype) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.print("{s}: ", .{key});
+    try writeOptionalIntValue(writer, value);
+    try writer.writeByte('\n');
+}
+
+fn dumpBoolField(writer: *Writer, indent: usize, key: []const u8, value: bool) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.print("{s}: {}\n", .{ key, value });
+}
+
+fn dumpEnumField(writer: *Writer, indent: usize, key: []const u8, value: anytype) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.print("{s}: {s}\n", .{ key, @tagName(value) });
+}
+
+fn dumpVersion(writer: *Writer, indent: usize, version: hdoc.Version) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("version:\n");
+    try writeIndent(writer, indent + indent_step);
+    try writer.print("major: {}\n", .{version.major});
+    try writeIndent(writer, indent + indent_step);
+    try writer.print("minor: {}\n", .{version.minor});
+}
+
+fn dumpDate(writer: *Writer, indent: usize, date: hdoc.Date) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.print("year: {}\n", .{date.year});
+    try writeIndent(writer, indent);
+    try writer.print("month: {}\n", .{date.month});
+    try writeIndent(writer, indent);
+    try writer.print("day: {}\n", .{date.day});
+}
+
+fn dumpTime(writer: *Writer, indent: usize, time: hdoc.Time) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.print("hour: {}\n", .{time.hour});
+    try writeIndent(writer, indent);
+    try writer.print("minute: {}\n", .{time.minute});
+    try writeIndent(writer, indent);
+    try writer.print("second: {}\n", .{time.second});
+    try writeIndent(writer, indent);
+    try writer.print("microsecond: {}\n", .{time.microsecond});
+}
+
+fn dumpDateTime(writer: *Writer, indent: usize, datetime: hdoc.DateTime) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("date:\n");
+    try dumpDate(writer, indent + indent_step, datetime.date);
+    try writeIndent(writer, indent);
+    try writer.writeAll("time:\n");
+    try dumpTime(writer, indent + indent_step, datetime.time);
+}
+
+fn writeAttrSeparator(writer: *Writer, first: *bool) Writer.Error!void {
+    if (first.*) {
+        first.* = false;
+    } else {
+        try writer.writeByte(' ');
+    }
+}
+
+fn writeSpanAttributes(writer: *Writer, span: hdoc.Span) Writer.Error!void {
+    try writer.writeByte('[');
+    var first = true;
+    if (span.attribs.em) {
+        try writeAttrSeparator(writer, &first);
+        try writer.writeAll("em");
+    }
+    if (span.attribs.mono) {
+        try writeAttrSeparator(writer, &first);
+        try writer.writeAll("mono");
+    }
+    if (span.attribs.strike) {
+        try writeAttrSeparator(writer, &first);
+        try writer.writeAll("strike");
+    }
+    if (span.attribs.position != .baseline) {
+        try writeAttrSeparator(writer, &first);
+        try writer.print("position=\"{s}\"", .{@tagName(span.attribs.position)});
+    }
+    switch (span.attribs.link) {
+        .none => {},
+        .ref => |value| {
+            try writeAttrSeparator(writer, &first);
+            try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.text)});
+        },
+        .uri => |value| {
+            try writeAttrSeparator(writer, &first);
+            try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value.text)});
+        },
+    }
+    if (span.attribs.lang.len != 0) {
+        try writeAttrSeparator(writer, &first);
+        try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang)});
+    }
+    if (span.attribs.syntax.len != 0) {
+        try writeAttrSeparator(writer, &first);
+        try writer.print("syntax=\"{f}\"", .{std.zig.fmtString(span.attribs.syntax)});
+    }
+    try writer.writeByte(']');
+}
+
+fn writeDateValue(writer: *Writer, date: hdoc.Date) Writer.Error!void {
+    try writer.print("{d:0>4}-{d:0>2}-{d:0>2}", .{ date.year, date.month, date.day });
+}
+
+fn writeTimeValue(writer: *Writer, time: hdoc.Time) Writer.Error!void {
+    try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ time.hour, time.minute, time.second });
+    if (time.microsecond != 0) {
+        try writer.print(".{d:0>6}", .{time.microsecond});
+    }
+}
+
+fn writeDateTimeValue(writer: *Writer, datetime: hdoc.DateTime) Writer.Error!void {
+    try writeDateValue(writer, datetime.date);
+    try writer.writeByte('T');
+    try writeTimeValue(writer, datetime.time);
+}
+
+fn writeFormattedDateInline(writer: *Writer, formatted: hdoc.FormattedDateTime(hdoc.Date)) Writer.Error!void {
+    try writer.writeAll("date:");
+    try writeDateValue(writer, formatted.value);
+    if (formatted.format != hdoc.Date.Format.default) {
+        try writer.writeByte('@');
+        try writer.writeAll(@tagName(formatted.format));
+    }
+}
+
+fn writeFormattedTimeInline(writer: *Writer, formatted: hdoc.FormattedDateTime(hdoc.Time)) Writer.Error!void {
+    try writer.writeAll("time:");
+    try writeTimeValue(writer, formatted.value);
+    if (formatted.format != hdoc.Time.Format.default) {
+        try writer.writeByte('@');
+        try writer.writeAll(@tagName(formatted.format));
+    }
+}
+
+fn writeFormattedDateTimeInline(writer: *Writer, formatted: hdoc.FormattedDateTime(hdoc.DateTime)) Writer.Error!void {
+    try writer.writeAll("datetime:");
+    try writeDateTimeValue(writer, formatted.value);
+    if (formatted.format != hdoc.DateTime.Format.default) {
+        try writer.writeByte('@');
+        try writer.writeAll(@tagName(formatted.format));
+    }
+}
+
+fn writeSpanContentInline(writer: *Writer, content: hdoc.Span.Content) Writer.Error!void {
+    switch (content) {
+        .text => |text| {
+            try writeStringValue(writer, text);
+        },
+        .date => |date| {
+            try writer.writeByte('"');
+            try writeFormattedDateInline(writer, date);
+            try writer.writeByte('"');
+        },
+        .time => |time| {
+            try writer.writeByte('"');
+            try writeFormattedTimeInline(writer, time);
+            try writer.writeByte('"');
+        },
+        .datetime => |datetime| {
+            try writer.writeByte('"');
+            try writeFormattedDateTimeInline(writer, datetime);
+            try writer.writeByte('"');
+        },
+    }
+}
+
+fn dumpSpanInline(writer: *Writer, span: hdoc.Span) Writer.Error!void {
+    try writeSpanAttributes(writer, span);
+    try writer.writeByte(' ');
+    try writeSpanContentInline(writer, span.content);
+}
+
+fn writeTypeTag(writer: *Writer, tag: []const u8) Writer.Error!void {
+    try writer.print("{s}:\n", .{tag});
+}
+
+fn dumpSpanListField(writer: *Writer, indent: usize, key: []const u8, spans: []const hdoc.Span) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (spans.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (spans) |span| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("- ");
+        try dumpSpanInline(writer, span);
+        try writer.writeByte('\n');
+    }
+}
+
+fn dumpBlockListField(writer: *Writer, indent: usize, key: []const u8, blocks: []const hdoc.Block) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (blocks.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (blocks) |block| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("- ");
+        try dumpBlockInline(writer, indent + indent_step, block);
+    }
+}
+
+fn dumpOptionalStringListField(writer: *Writer, indent: usize, key: []const u8, values: []?hdoc.Reference) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (values.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (values) |value| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("- ");
+        try writeOptionalStringValue(writer, if (value) |val| val.text else null);
+        try writer.writeByte('\n');
+    }
+}
+
+fn dumpListItem(writer: *Writer, indent: usize, item: hdoc.Block.ListItem) Writer.Error!void {
+    try dumpOptionalStringFieldInline(writer, "lang", item.lang);
+    try dumpBlockListField(writer, indent + indent_step, "content", item.content);
+}
+
+fn dumpListItemsField(writer: *Writer, indent: usize, key: []const u8, items: []const hdoc.Block.ListItem) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (items.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (items) |item| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("- ");
+        try dumpListItem(writer, indent + indent_step, item);
+    }
+}
+
+fn dumpTableCell(writer: *Writer, indent: usize, cell: hdoc.Block.TableCell) Writer.Error!void {
+    try dumpOptionalStringFieldInline(writer, "lang", cell.lang);
+    try dumpOptionalNumberField(writer, indent + indent_step, "colspan", @as(?u32, cell.colspan));
+    try dumpBlockListField(writer, indent + indent_step, "content", cell.content);
+}
+
+fn dumpTableCellsField(writer: *Writer, indent: usize, key: []const u8, cells: []const hdoc.Block.TableCell) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (cells.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (cells) |cell| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("- ");
+        try dumpTableCell(writer, indent + indent_step, cell);
+    }
+}
+
+fn dumpTableColumns(writer: *Writer, indent: usize, columns: hdoc.Block.TableColumns) Writer.Error!void {
+    try dumpOptionalStringField(writer, indent, "lang", columns.lang);
+    try dumpTableCellsField(writer, indent, "cells", columns.cells);
+}
+
+fn dumpTableDataRow(writer: *Writer, indent: usize, row: hdoc.Block.TableDataRow) Writer.Error!void {
+    try dumpOptionalStringFieldWithIndent(writer, indent, "lang", row.lang);
+    try dumpOptionalStringField(writer, indent, "title", row.title);
+    try dumpTableCellsField(writer, indent, "cells", row.cells);
+}
+
+fn dumpTableGroup(writer: *Writer, indent: usize, group: hdoc.Block.TableGroup) Writer.Error!void {
+    try dumpOptionalStringFieldWithIndent(writer, indent, "lang", group.lang);
+    try dumpSpanListField(writer, indent, "content", group.content);
+}
+
+fn dumpTableRow(writer: *Writer, indent: usize, row: hdoc.Block.TableRow) Writer.Error!void {
+    switch (row) {
+        .columns => |columns| {
+            try writeTypeTag(writer, "columns");
+            try dumpTableColumns(writer, indent + indent_step, columns);
+        },
+        .row => |data_row| {
+            try writeTypeTag(writer, "row");
+            try dumpTableDataRow(writer, indent + indent_step, data_row);
+        },
+        .group => |group| {
+            try writeTypeTag(writer, "group");
+            try dumpTableGroup(writer, indent + indent_step, group);
+        },
+    }
+}
+
+fn dumpTableRowsField(writer: *Writer, indent: usize, key: []const u8, rows: []const hdoc.Block.TableRow) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (rows.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (rows) |row| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("- ");
+        try dumpTableRow(writer, indent + indent_step, row);
+    }
+}
+
+fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Error!void {
+    switch (block) {
+        .heading => |heading| {
+            try writeTypeTag(writer, "heading");
+            try dumpEnumField(writer, indent + indent_step, "level", heading.level);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang);
+            try dumpSpanListField(writer, indent + indent_step, "content", heading.content);
+        },
+        .paragraph => |paragraph| {
+            try writeTypeTag(writer, "paragraph");
+            try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang);
+            try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content);
+        },
+        .list => |list| {
+            try writeTypeTag(writer, "list");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang);
+            try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first);
+            try dumpListItemsField(writer, indent + indent_step, "items", list.items);
+        },
+        .image => |image| {
+            try writeTypeTag(writer, "image");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt);
+            try dumpOptionalStringField(writer, indent + indent_step, "path", image.path);
+            try dumpSpanListField(writer, indent + indent_step, "content", image.content);
+        },
+        .preformatted => |preformatted| {
+            try writeTypeTag(writer, "preformatted");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax);
+            try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content);
+        },
+        .toc => |toc| {
+            try writeTypeTag(writer, "toc");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang);
+            try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth);
+        },
+        .table => |table| {
+            try writeTypeTag(writer, "table");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang);
+            try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows);
+        },
+    }
+}
+
+fn dumpOptionalDateTimeField(writer: *Writer, indent: usize, key: []const u8, value: ?hdoc.DateTime) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (value) |datetime| {
+        try writer.print("{s}:\n", .{key});
+        try dumpDateTime(writer, indent + indent_step, datetime);
+    } else {
+        try writer.print("{s}: null\n", .{key});
+    }
+}
+
+fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void {
+    try writer.writeAll("document:\n");
+    try dumpVersion(writer, indent_step, doc.version);
+    try dumpOptionalStringField(writer, indent_step, "lang", doc.lang);
+    try dumpOptionalStringField(writer, indent_step, "title", doc.title);
+    try dumpOptionalStringField(writer, indent_step, "author", doc.author);
+    try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date);
+    try dumpBlockListField(writer, indent_step, "contents", doc.contents);
+    try dumpOptionalStringListField(writer, indent_step, "ids", doc.ids);
+}
+
+pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void {
+    try dumpDocument(writer, &doc);
+}
+
+test "render escapes string values" {
+    const title = "Doc \"Title\"\n";
+    const span_text = "Hello \"world\"\n";
+    const link_ref: hdoc.Reference = .init("section \"A\"");
+    const id_value: hdoc.Reference = .init("id:1\n");
+
+    var doc: hdoc.Document = .{
+        .arena = std.heap.ArenaAllocator.init(std.testing.allocator),
+        .version = .{ .major = 1, .minor = 2 },
+        .contents = &.{},
+        .ids = &.{},
+        .lang = null,
+        .title = title,
+        .author = null,
+        .date = null,
+        .timezone = null,
+    };
+    defer doc.deinit();
+
+    const arena_alloc = doc.arena.allocator();
+
+    const spans = try arena_alloc.alloc(hdoc.Span, 1);
+    spans[0] = .{
+        .content = .{ .text = span_text },
+        .attribs = .{ .link = .{ .ref = link_ref } },
+    };
+
+    const blocks = try arena_alloc.alloc(hdoc.Block, 1);
+    blocks[0] = .{
+        .heading = .{
+            .level = .h1,
+            .lang = null,
+            .content = spans,
+        },
+    };
+    doc.contents = blocks;
+
+    const ids = try arena_alloc.alloc(?hdoc.Reference, 1);
+    ids[0] = id_value;
+    doc.ids = ids;
+
+    var buffer = Writer.Allocating.init(std.testing.allocator);
+    defer buffer.deinit();
+
+    try render(doc, &buffer.writer);
+    try buffer.writer.flush();
+    const output = buffer.writer.buffered();
+
+    const expected_title = try std.fmt.allocPrint(std.testing.allocator, "title: \"{f}\"\n", .{std.zig.fmtString(title)});
+    defer std.testing.allocator.free(expected_title);
+    try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null);
+
+    const expected_span = try std.fmt.allocPrint(
+        std.testing.allocator,
+        "- [link=\"ref:{f}\"] \"{f}\"\n",
+        .{ std.zig.fmtString(link_ref.text), std.zig.fmtString(span_text) },
+    );
+    defer std.testing.allocator.free(expected_span);
+    try std.testing.expect(std.mem.indexOf(u8, output, expected_span) != null);
+
+    const expected_id = try std.fmt.allocPrint(std.testing.allocator, "- \"{f}\"\n", .{std.zig.fmtString(id_value.text)});
+    defer std.testing.allocator.free(expected_id);
+    try std.testing.expect(std.mem.indexOf(u8, output, expected_id) != null);
+}

From 98d386ed31781ae58a5b0f4868025dcea1005224 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 14:31:06 +0100
Subject: [PATCH 039/116] Cleans up main a bit and implements diagnostic
 printing.

---
 src/main.zig | 52 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/src/main.zig b/src/main.zig
index 19161d5..5b6cd6f 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -13,32 +13,62 @@ pub fn main() !u8 {
     else
         std.heap.smp_allocator;
 
+    var stderr_buffer: [4096]u8 = undefined;
+    var stderr = std.fs.File.stderr().writer(&stderr_buffer);
+
+    var stdout_buffer: [4096]u8 = undefined;
+    var stdout = std.fs.File.stdout().writer(&stdout_buffer);
+
     const args = try std.process.argsAlloc(allocator);
     defer std.process.argsFree(allocator, args);
 
     if (args.len < 2) {
-        const stderr = std.fs.File.stderr().deprecatedWriter();
-        try stderr.print("usage: {s} <file>\n", .{args[0]});
+        try stderr.interface.print("usage: {s} <file>\n", .{args[0]});
+        try stderr.interface.flush();
         return 1;
     }
 
     const path = args[1];
-    const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10);
-    defer allocator.free(document);
 
     var diagnostics: hdoc.Diagnostics = .init(allocator);
     defer diagnostics.deinit();
 
-    var parsed = try hdoc.parse(allocator, document, &diagnostics);
-    defer parsed.deinit();
+    const parse_result = parse_and_process(
+        allocator,
+        &diagnostics,
+        &stdout.interface,
+        path,
+    );
 
-    if (diagnostics.has_error())
-        return 1;
+    for (diagnostics.items.items) |diag| {
+        try stderr.interface.print("{s}:{f}: {f}\n", .{
+            path,
+            diag.location,
+            diag.code,
+        });
+    }
+    try stderr.interface.flush();
 
-    var stdout_buffer: [4096]u8 = undefined;
-    var stdout = std.fs.File.stdout().writer(&stdout_buffer);
+    parse_result catch |err| {
+        std.log.err("failed to parse \"{s}\": {t}", .{ path, err });
+        return 1;
+    };
 
-    try hdoc.render.yaml(parsed, &stdout.interface);
+    try stdout.interface.flush();
 
     return 0;
 }
+
+fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostics, output_stream: *std.Io.Writer, path: []const u8) !void {
+    const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10);
+    defer allocator.free(document);
+
+    var parsed = try hdoc.parse(allocator, document, diagnostics);
+    defer parsed.deinit();
+
+    if (diagnostics.has_error()) {
+        return error.InvalidFile;
+    }
+
+    try hdoc.render.yaml(parsed, output_stream);
+}

From 76d40851ba3d1847582e7e2c61ee22b574b82697 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 14:40:27 +0100
Subject: [PATCH 040/116] Unifies handling for empty spans

---
 src/hyperdoc.zig         | 55 ++++++++++++++++++----------------------
 test/accept/workset.hdoc | 28 ++++++++++----------
 2 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index b752b72..85605b5 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -633,7 +633,7 @@ pub const SemanticAnalyzer = struct {
                 else => unreachable,
             },
             .lang = attrs.lang,
-            .content = try sema.translate_inline(node),
+            .content = try sema.translate_inline(node, .emit_diagnostic),
         };
 
         return .{ heading, attrs.id };
@@ -657,7 +657,7 @@ pub const SemanticAnalyzer = struct {
                 else => unreachable,
             },
             .lang = attrs.lang,
-            .content = try sema.translate_inline(node),
+            .content = try sema.translate_inline(node, .emit_diagnostic),
         };
 
         return .{ heading, attrs.id };
@@ -715,16 +715,12 @@ pub const SemanticAnalyzer = struct {
         });
 
         // TODO: Enforce non-empty "path" (required) and "alt" (if provided).
-        const content = switch (node.body) {
-            .empty => @constCast(&[_]Span{}),
-            else => try sema.translate_inline(node),
-        };
 
         const image: Block.Image = .{
             .lang = attrs.lang,
             .alt = attrs.alt,
             .path = attrs.path,
-            .content = content,
+            .content = try sema.translate_inline(node, .allow_empty),
         };
 
         return .{ image, attrs.id };
@@ -740,7 +736,7 @@ pub const SemanticAnalyzer = struct {
         const preformatted: Block.Preformatted = .{
             .lang = attrs.lang,
             .syntax = attrs.syntax,
-            .content = try sema.translate_inline(node),
+            .content = try sema.translate_inline(node, .emit_diagnostic),
         };
 
         return .{ preformatted, attrs.id };
@@ -834,7 +830,7 @@ pub const SemanticAnalyzer = struct {
                             rows.appendAssumeCapacity(.{
                                 .group = .{
                                     .lang = row_attrs.lang,
-                                    .content = try sema.translate_inline(child_node),
+                                    .content = try sema.translate_inline(child_node, .emit_diagnostic),
                                 },
                             });
                         },
@@ -951,7 +947,7 @@ pub const SemanticAnalyzer = struct {
                     return &.{};
                 },
                 .text_to_p => {
-                    const spans = try sema.translate_inline(node);
+                    const spans = try sema.translate_inline(node, .emit_diagnostic);
 
                     const blocks = try sema.arena.alloc(Block, 1);
                     blocks[0] = .{
@@ -969,14 +965,14 @@ pub const SemanticAnalyzer = struct {
     }
 
     /// Translates a node into a sequence of inline spans.
-    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }![]Span {
+    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }![]Span {
         var spans: std.ArrayList(Span) = .empty;
         errdefer spans.deinit(sema.arena);
 
         // TODO: Implement automatic space insertion.
         //       This must be done when two consecutive nodes are separated by a space
 
-        try sema.translate_inline_body(&spans, node.body, .{});
+        try sema.translate_inline_body(&spans, node.body, .{}, empty_handling);
 
         // TODO: Compact spans by joining spans with equal properties
 
@@ -1063,7 +1059,7 @@ pub const SemanticAnalyzer = struct {
         switch (node.type) {
             .unknown_inline,
             .text,
-            => try sema.translate_inline_body(spans, node.body, attribs),
+            => try sema.translate_inline_body(spans, node.body, attribs, .emit_diagnostic),
 
             .@"\\em" => {
                 const props = try sema.get_attributes(node, struct {
@@ -1073,7 +1069,7 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .em = true,
-                }));
+                }), .emit_diagnostic);
             },
 
             .@"\\strike" => {
@@ -1084,7 +1080,7 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .strike = true,
-                }));
+                }), .emit_diagnostic);
             },
 
             .@"\\sub" => {
@@ -1095,7 +1091,7 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .position = .superscript,
-                }));
+                }), .emit_diagnostic);
             },
 
             .@"\\sup" => {
@@ -1106,7 +1102,7 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .position = .subscript,
-                }));
+                }), .emit_diagnostic);
             },
 
             .@"\\link" => {
@@ -1131,7 +1127,7 @@ pub const SemanticAnalyzer = struct {
 
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .link = link,
-                }));
+                }), .emit_diagnostic);
             },
 
             .@"\\mono" => {
@@ -1143,7 +1139,7 @@ pub const SemanticAnalyzer = struct {
                     .mono = true,
                     .lang = props.lang,
                     .syntax = props.syntax,
-                }));
+                }), .emit_diagnostic);
             },
 
             .@"\\date",
@@ -1155,17 +1151,11 @@ pub const SemanticAnalyzer = struct {
                     fmt: []const u8 = "",
                 });
 
-                var content_spans: std.ArrayList(Span) = .empty;
-                defer content_spans.deinit(sema.arena);
-
-                // TODO: Implement automatic space insertion.
-                //       This must be done when two consecutive nodes are separated by a space
-
                 // TODO: Enforce that date/time bodies only contain plain text/string/verbatim.
-                try sema.translate_inline_body(&content_spans, node.body, .{});
+                const content_spans = try sema.translate_inline(node, .emit_diagnostic);
 
                 //  Convert the content_spans into a "rendered string".
-                const content_text = try sema.render_spans_to_plaintext(content_spans.items, .no_space);
+                const content_text = try sema.render_spans_to_plaintext(content_spans, .no_space);
 
                 const content: Span.Content = switch (node.type) {
                     .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt),
@@ -1297,10 +1287,15 @@ pub const SemanticAnalyzer = struct {
         return try output_str.toOwnedSlice(sema.arena);
     }
 
-    fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes) error{ OutOfMemory, BadAttributes }!void {
+    const EmptyHandling = enum {
+        allow_empty,
+        emit_diagnostic,
+    };
+    fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }!void {
         switch (body) {
-            .empty => |location| {
-                try sema.emit_diagnostic(.empty_inline_body, location);
+            .empty => |location| switch (empty_handling) {
+                .allow_empty => {},
+                .emit_diagnostic => try sema.emit_diagnostic(.empty_inline_body, location),
             },
 
             .string => |string_body| {
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index 19ffba1..b8717d6 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -1,18 +1,18 @@
 hdoc(version="2.0");
 
-h1:
-| Hello, World!
-
-h2{Hello \em{World}!}
-
-p { This is a span. }
-
-ul {
-  li "Item 1"
-  li { p { Item 2 } }
+p {
+  In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed
+  steps like: export → transform → validate → import, then added a parenthetical aside (which itself contained
+  parentheses): "Use the staging key (not the production key (seriously))". A different person pasted a pseudo-path,
+  /var/tmp/builds/2025-12-23/, and then warned, "If you see `NULL` in the output, don't 'fix' it by replacing it with
+  '0'—that's how we broke reporting last time."
 }
 
-ol {
-  li "Item 1"
-  li { p { Item 2 } }
-}
\ No newline at end of file
+pre(syntax="zig") {
+  pub fn FormattedDateTime(comptime DT: type) type {
+      return struct {
+          value: DT,
+          format: DT.Format = .default,
+      };
+  }
+}

From 0c77468e712ac6c9d532cdd38e036a2dd8887e6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 16:03:52 +0100
Subject: [PATCH 041/116] Implements span compaction for regular 'single
 whitespace' join spans. The parser now also emits whitespace-only spans
 separately from the word spans. pre compaction is not implemented yet.

---
 src/hyperdoc.zig         | 181 +++++++++++++++++++++++++++++++++++++--
 src/testsuite.zig        |  29 ++++---
 test/accept/workset.hdoc |   2 +-
 3 files changed, 196 insertions(+), 16 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 85605b5..9b3e528 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -144,10 +144,35 @@ pub const Span = struct {
         strike: bool = false,
         link: Link = .none,
         syntax: []const u8 = "", // empty is absence
+
+        pub fn eql(lhs: Attributes, rhs: Attributes) bool {
+            // Trivial comparisons:
+            if (lhs.position != rhs.position)
+                return false;
+            if (lhs.em != rhs.em)
+                return false;
+            if (lhs.mono != rhs.mono)
+                return false;
+            if (lhs.strike != rhs.strike)
+                return false;
+
+            // string comparison:
+            if (!std.mem.eql(u8, lhs.lang, rhs.lang))
+                return false;
+            if (!std.mem.eql(u8, lhs.syntax, rhs.syntax))
+                return false;
+
+            // complex comparison
+            if (!lhs.link.eql(rhs.link))
+                return false;
+
+            return true;
+        }
     };
 
     content: Content,
     attribs: Attributes,
+    location: Parser.Location,
 };
 
 pub const ScriptPosition = enum {
@@ -160,6 +185,14 @@ pub const Link = union(enum) {
     none,
     ref: Reference,
     uri: Uri,
+
+    pub fn eql(lhs: Link, rhs: Link) bool {
+        return switch (lhs) {
+            .none => (rhs == .none),
+            .ref => (rhs == .ref) and std.mem.eql(u8, lhs.ref.text, rhs.ref.text),
+            .uri => (rhs == .uri) and std.mem.eql(u8, lhs.uri.text, rhs.uri.text),
+        };
+    }
 };
 
 /// HyperDoc Version Number
@@ -473,7 +506,7 @@ pub fn remove_byte_order_mark(diagnostics: ?*Diagnostics, plain_text: []const u8
 }
 
 pub const SemanticAnalyzer = struct {
-    const whitespace_chars = " \t";
+    const whitespace_chars = " \t\r\n";
 
     const Header = struct {
         version: Version,
@@ -967,18 +1000,120 @@ pub const SemanticAnalyzer = struct {
     /// Translates a node into a sequence of inline spans.
     fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }![]Span {
         var spans: std.ArrayList(Span) = .empty;
-        errdefer spans.deinit(sema.arena);
+        defer spans.deinit(sema.arena);
 
         // TODO: Implement automatic space insertion.
         //       This must be done when two consecutive nodes are separated by a space
 
         try sema.translate_inline_body(&spans, node.body, .{}, empty_handling);
 
-        // TODO: Compact spans by joining spans with equal properties
+        // TODO: Use different whitespace strategies here:
+        return try sema.compact_spans(spans.items, .one_space);
+    }
+
+    const Whitespace = enum {
+        one_space,
+        keep_space,
+    };
+
+    /// Compacts and merges spans of equal attributes by `whitespace` ruling.
+    fn compact_spans(sema: *SemanticAnalyzer, input: []const Span, whitespace: Whitespace) ![]Span {
+        var merger: SpanMerger = .{
+            .arena = sema.arena,
+            .whitespace = whitespace,
+        };
+
+        for (input) |span| {
+            try merger.push(span);
+        }
+
+        try merger.flush();
 
-        return try spans.toOwnedSlice(sema.arena);
+        return try merger.output.toOwnedSlice(sema.arena);
     }
 
+    /// Checks if only
+    fn is_only_whitespace(str: []const u8) bool {
+        return std.mem.indexOfNone(u8, str, whitespace_chars) == null;
+    }
+
+    const SpanMerger = struct {
+        arena: std.mem.Allocator,
+        whitespace: Whitespace,
+
+        output: std.ArrayList(Span) = .empty,
+
+        span_start: usize = 0,
+        current_span: std.ArrayList(u8) = .empty,
+        attribs: Span.Attributes = .{},
+        last_end: usize = std.math.maxInt(usize),
+
+        fn push(merger: *SpanMerger, span: Span) !void {
+            if (merger.last_end == std.math.maxInt(usize)) {
+                merger.last_end = span.location.offset;
+            }
+
+            if (!span.attribs.eql(merger.attribs)) {
+                try merger.flush_internal(.keep);
+                std.debug.assert(merger.current_span.items.len == 0);
+                merger.attribs = span.attribs;
+                std.debug.assert(span.attribs.eql(merger.attribs));
+            }
+            switch (span.content) {
+                .date, .time, .datetime => {
+                    // All date/time/datetime require to be passed verbatim into the output
+                    try merger.flush_internal(.keep);
+                    std.debug.assert(merger.current_span.items.len == 0);
+
+                    try merger.output.append(merger.arena, span);
+                },
+                .text => |text_content| {
+                    std.debug.assert(span.attribs.eql(merger.attribs));
+
+                    const append_text, const skip_head = if (is_only_whitespace(text_content))
+                        switch (merger.whitespace) {
+                            .one_space => .{ " ", true },
+                            .keep_space => .{ text_content, false },
+                        }
+                    else
+                        .{ text_content, false };
+
+                    // check if we already have text, and if not, if we should keep the whitespace
+                    if (merger.current_span.items.len > 0 or !skip_head) {
+                        try merger.current_span.appendSlice(merger.arena, append_text);
+                    }
+                },
+            }
+            merger.last_end = span.location.offset_one_after();
+        }
+
+        pub fn flush(merger: *SpanMerger) !void {
+            return merger.flush_internal(.strip);
+        }
+
+        fn flush_internal(merger: *SpanMerger, mode: enum { strip, keep }) !void {
+            if (merger.current_span.items.len == 0)
+                return;
+
+            const raw_string = try merger.current_span.toOwnedSlice(merger.arena);
+
+            const string = switch (mode) {
+                .strip => std.mem.trimRight(u8, raw_string, whitespace_chars),
+                .keep => raw_string,
+            };
+
+            try merger.output.append(merger.arena, .{
+                .attribs = merger.attribs,
+                .content = .{ .text = string },
+                .location = .{
+                    .offset = merger.span_start,
+                    .length = merger.last_end - merger.span_start,
+                },
+            });
+            merger.span_start = merger.last_end;
+        }
+    };
+
     pub const AttribOverrides = struct {
         lang: ?[]const u8 = null,
         em: ?bool = null,
@@ -1169,6 +1304,7 @@ pub const SemanticAnalyzer = struct {
                     .attribs = try sema.derive_attribute(node.location, attribs, .{
                         .lang = attribs.lang,
                     }),
+                    .location = node.location,
                 });
             },
 
@@ -1224,11 +1360,13 @@ pub const SemanticAnalyzer = struct {
         const value: DTValue = if (value_or_err) |value|
             value
         else |err| blk: {
+            std.log.warn("failed to parse {t}: \"{s}\"", .{ body, value_str });
             switch (err) {
                 error.InvalidValue => {
                     try sema.emit_diagnostic(.invalid_date_time, node.location);
                 },
                 error.MissingTimezone => {
+                    std.log.err("emit missing timezone for {}", .{node.location});
                     // TODO: Use (timezone_hint != null) to emit diagnostic for hint with
                     //       adding `tz` attribute when all date/time values share a common base.
                     try sema.emit_diagnostic(.invalid_date_time, node.location);
@@ -1304,6 +1442,7 @@ pub const SemanticAnalyzer = struct {
                 try spans.append(sema.arena, .{
                     .content = .{ .text = text },
                     .attribs = attribs,
+                    .location = string_body.location,
                 });
             },
 
@@ -1334,9 +1473,19 @@ pub const SemanticAnalyzer = struct {
                     text_buffer.appendSliceAssumeCapacity(stripped);
                 }
 
+                const location: Parser.Location = if (verbatim_lines.len > 0) blk: {
+                    const head = verbatim_lines[0].location.offset;
+                    const tail = verbatim_lines[verbatim_lines.len - 1].location.offset_one_after();
+                    break :blk .{
+                        .offset = head,
+                        .length = tail - head,
+                    };
+                } else .{ .offset = 0, .length = 0 };
+
                 try spans.append(sema.arena, .{
                     .content = .{ .text = try text_buffer.toOwnedSlice(sema.arena) },
                     .attribs = attribs,
+                    .location = location,
                 });
             },
 
@@ -1350,6 +1499,7 @@ pub const SemanticAnalyzer = struct {
                 try spans.append(sema.arena, .{
                     .content = .{ .text = text_span.text },
                     .attribs = attribs,
+                    .location = text_span.location,
                 });
             },
         }
@@ -1854,7 +2004,24 @@ pub const Parser = struct {
         var nesting: usize = 0;
 
         while (true) {
-            parser.skip_whitespace();
+            // If necessary, emit a whitespace span:
+            {
+                const before = parser.offset;
+                parser.skip_whitespace();
+                const after = parser.offset;
+                std.debug.assert(after >= before);
+                if (after > before) {
+                    // We've skipped over whitespace, so we emit a "whitespace" node here:
+                    const whitespace = parser.slice(before, after);
+                    try children.append(parser.arena, .{
+                        .location = whitespace.location,
+                        .type = .text,
+                        .body = .{
+                            .text_span = whitespace,
+                        },
+                    });
+                }
+            }
 
             const head = parser.peek_char() orelse {
                 emitDiagnostic(parser, .unterminated_inline_list, parser.make_diagnostic_location(parser.offset));
@@ -2187,6 +2354,10 @@ pub const Parser = struct {
     pub const Location = struct {
         offset: usize,
         length: usize,
+
+        pub fn offset_one_after(loc: Location) usize {
+            return loc.offset + loc.length;
+        }
     };
 
     pub const NodeType = enum {
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 111649f..d366816 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -294,16 +294,25 @@ test "parser handles inline node lists" {
     try std.testing.expectEqual(hdoc.Parser.NodeType.p, node.type);
     switch (node.body) {
         .list => |children| {
-            try std.testing.expectEqual(@as(usize, 2), children.len);
-            try std.testing.expectEqual(hdoc.Parser.NodeType.text, children[0].type);
-            try std.testing.expectEqual(@as(usize, 5), children[0].location.length);
+            try std.testing.expectEqual(@as(usize, 5), children.len);
+
+            try std.testing.expectEqual(.text, children[0].type);
+            try std.testing.expectEqual(.text, children[1].type);
+            try std.testing.expectEqual(.text, children[2].type);
+            try std.testing.expectEqual(.@"\\em", children[3].type);
+            try std.testing.expectEqual(.text, children[4].type);
+
+            try std.testing.expectEqual(" ".len, children[0].location.length);
+            try std.testing.expectEqual("Hello".len, children[1].location.length);
+            try std.testing.expectEqual(" ".len, children[2].location.length);
+            try std.testing.expectEqual("\\em{world}".len, children[3].location.length);
+            try std.testing.expectEqual(" ".len, children[4].location.length);
 
-            try std.testing.expectEqual(hdoc.Parser.NodeType.@"\\em", children[1].type);
-            switch (children[1].body) {
+            switch (children[3].body) {
                 .list => |inline_children| {
-                    try std.testing.expectEqual(@as(usize, 1), inline_children.len);
-                    try std.testing.expectEqual(hdoc.Parser.NodeType.text, inline_children[0].type);
-                    try std.testing.expectEqual(@as(usize, 5), inline_children[0].location.length);
+                    try std.testing.expectEqual(1, inline_children.len);
+                    try std.testing.expectEqual(.text, inline_children[0].type);
+                    try std.testing.expectEqual("world".len, inline_children[0].location.length);
                 },
                 else => return error.TestExpectedEqual,
             }
@@ -423,7 +432,7 @@ fn expectParseOk(opts: LogDiagOptions, code: []const u8) !void {
 
     if (diagnostics.has_error() or diagnostics.has_warning()) {
         logDiagnostics(&diagnostics, opts);
-        return error.TestExpectedEqual;
+        return error.TestExpectedNoDiagnostics;
     }
 }
 
@@ -442,7 +451,7 @@ fn expectParseNoFail(opts: LogDiagOptions, code: []const u8) !void {
 
     if (diagnostics.has_error()) {
         logDiagnostics(&diagnostics, opts);
-        return error.TestExpectedEqual;
+        return error.TestExpectedNoErrors;
     }
 }
 
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index b8717d6..65ffc31 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -4,7 +4,7 @@ p {
   In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed
   steps like: export → transform → validate → import, then added a parenthetical aside (which itself contained
   parentheses): "Use the staging key (not the production key (seriously))". A different person pasted a pseudo-path,
-  /var/tmp/builds/2025-12-23/, and then warned, "If you see `NULL` in the output, don't 'fix' it by replacing it with
+  /var/tmp/builds/\date(fmt="iso"){2025-12-23}/, and then warned, "If you see \mono{NULL} in the output, don't 'fix' it by replacing it with
   '0'—that's how we broke reporting last time."
 }
 

From 923d01f1dc9a8440e175aca75e7992f70a150623 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 16:09:08 +0100
Subject: [PATCH 042/116] Implements p/pre split for whitespace compaction

---
 src/hyperdoc.zig | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 9b3e528..bc92d96 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -666,7 +666,7 @@ pub const SemanticAnalyzer = struct {
                 else => unreachable,
             },
             .lang = attrs.lang,
-            .content = try sema.translate_inline(node, .emit_diagnostic),
+            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
         };
 
         return .{ heading, attrs.id };
@@ -690,7 +690,7 @@ pub const SemanticAnalyzer = struct {
                 else => unreachable,
             },
             .lang = attrs.lang,
-            .content = try sema.translate_inline(node, .emit_diagnostic),
+            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
         };
 
         return .{ heading, attrs.id };
@@ -753,7 +753,7 @@ pub const SemanticAnalyzer = struct {
             .lang = attrs.lang,
             .alt = attrs.alt,
             .path = attrs.path,
-            .content = try sema.translate_inline(node, .allow_empty),
+            .content = try sema.translate_inline(node, .allow_empty, .one_space),
         };
 
         return .{ image, attrs.id };
@@ -769,7 +769,7 @@ pub const SemanticAnalyzer = struct {
         const preformatted: Block.Preformatted = .{
             .lang = attrs.lang,
             .syntax = attrs.syntax,
-            .content = try sema.translate_inline(node, .emit_diagnostic),
+            .content = try sema.translate_inline(node, .emit_diagnostic, .keep_space),
         };
 
         return .{ preformatted, attrs.id };
@@ -863,7 +863,7 @@ pub const SemanticAnalyzer = struct {
                             rows.appendAssumeCapacity(.{
                                 .group = .{
                                     .lang = row_attrs.lang,
-                                    .content = try sema.translate_inline(child_node, .emit_diagnostic),
+                                    .content = try sema.translate_inline(child_node, .emit_diagnostic, .one_space),
                                 },
                             });
                         },
@@ -980,7 +980,7 @@ pub const SemanticAnalyzer = struct {
                     return &.{};
                 },
                 .text_to_p => {
-                    const spans = try sema.translate_inline(node, .emit_diagnostic);
+                    const spans = try sema.translate_inline(node, .emit_diagnostic, .one_space);
 
                     const blocks = try sema.arena.alloc(Block, 1);
                     blocks[0] = .{
@@ -998,7 +998,7 @@ pub const SemanticAnalyzer = struct {
     }
 
     /// Translates a node into a sequence of inline spans.
-    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }![]Span {
+    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling, whitespace_handling: Whitespace) error{ OutOfMemory, BadAttributes }![]Span {
         var spans: std.ArrayList(Span) = .empty;
         defer spans.deinit(sema.arena);
 
@@ -1008,7 +1008,7 @@ pub const SemanticAnalyzer = struct {
         try sema.translate_inline_body(&spans, node.body, .{}, empty_handling);
 
         // TODO: Use different whitespace strategies here:
-        return try sema.compact_spans(spans.items, .one_space);
+        return try sema.compact_spans(spans.items, whitespace_handling);
     }
 
     const Whitespace = enum {
@@ -1287,7 +1287,7 @@ pub const SemanticAnalyzer = struct {
                 });
 
                 // TODO: Enforce that date/time bodies only contain plain text/string/verbatim.
-                const content_spans = try sema.translate_inline(node, .emit_diagnostic);
+                const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space);
 
                 //  Convert the content_spans into a "rendered string".
                 const content_text = try sema.render_spans_to_plaintext(content_spans, .no_space);

From 73763b7cd2f61a7429ee9e2f8149db1bb3ce6ff9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 16:29:48 +0100
Subject: [PATCH 043/116] Resolves several TODOs: Table shape (column count)
 validation, date/time/datetime nesting detection, image attribute validation.

---
 src/hyperdoc.zig | 118 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 87 insertions(+), 31 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index bc92d96..07440ca 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -71,8 +71,8 @@ pub const Block = union(enum) {
 
     pub const Image = struct {
         lang: ?[]const u8,
-        alt: ?[]const u8,
-        path: ?[]const u8,
+        alt: []const u8, // empty means none
+        path: []const u8,
         content: []Span,
     };
 
@@ -469,7 +469,7 @@ pub fn parse(
 
     const header = sema.header orelse return error.MalformedDocument;
 
-    // TODO: Validate document-level semantic constraints (unique ids, ref resolution, table shape).
+    // TODO: Validate document-level semantic constraints (unique ids, ref resolution).
     return .{
         .arena = arena,
         .contents = try sema.blocks.toOwnedSlice(arena.allocator()),
@@ -747,12 +747,30 @@ pub const SemanticAnalyzer = struct {
             path: []const u8,
         });
 
-        // TODO: Enforce non-empty "path" (required) and "alt" (if provided).
+        const alt = if (attrs.alt) |alt|
+            std.mem.trim(u8, alt, whitespace_chars)
+        else
+            "";
+
+        const path = std.mem.trim(u8, attrs.path, whitespace_chars);
+        if (path.len == 0) {
+            // The path must be non-empty.
+
+            // TODO: Implement better diagnostic message
+            try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = .img, .name = "path" } }, get_attribute_location(node, "path", .value).?);
+        }
+
+        if (attrs.alt != null and alt.len == 0) {
+            // If alt is present, it must be non-empty, and not fully whitespace.
+
+            // TODO: Implement better diagnostic message
+            try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = .img, .name = "alt" } }, get_attribute_location(node, "alt", .value).?);
+        }
 
         const image: Block.Image = .{
             .lang = attrs.lang,
-            .alt = attrs.alt,
-            .path = attrs.path,
+            .alt = alt,
+            .path = path,
             .content = try sema.translate_inline(node, .allow_empty, .one_space),
         };
 
@@ -820,6 +838,8 @@ pub const SemanticAnalyzer = struct {
         var rows: std.ArrayList(Block.TableRow) = .empty;
         defer rows.deinit(sema.arena);
 
+        var column_count: ?usize = null;
+
         switch (node.body) {
             .list => |child_nodes| {
                 try rows.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
@@ -831,13 +851,26 @@ pub const SemanticAnalyzer = struct {
                             });
 
                             const cells = try sema.translate_table_cells(child_node);
-
                             rows.appendAssumeCapacity(.{
                                 .columns = .{
                                     .lang = row_attrs.lang,
                                     .cells = cells,
                                 },
                             });
+
+                            var width: usize = 0;
+                            for (cells) |cell| {
+                                std.debug.assert(cell.colspan > 0);
+                                width += cell.colspan;
+                            }
+
+                            column_count = column_count orelse width;
+                            if (width != column_count) {
+                                try sema.emit_diagnostic(.{ .column_count_mismatch = .{
+                                    .expected = column_count.?,
+                                    .actual = width,
+                                } }, child_node.location);
+                            }
                         },
                         .row => {
                             const row_attrs = try sema.get_attributes(child_node, struct {
@@ -854,6 +887,20 @@ pub const SemanticAnalyzer = struct {
                                     .cells = cells,
                                 },
                             });
+
+                            var width: usize = 0;
+                            for (cells) |cell| {
+                                std.debug.assert(cell.colspan > 0);
+                                width += cell.colspan;
+                            }
+
+                            column_count = column_count orelse width;
+                            if (width != column_count) {
+                                try sema.emit_diagnostic(.{ .column_count_mismatch = .{
+                                    .expected = column_count.?,
+                                    .actual = width,
+                                } }, child_node.location);
+                            }
                         },
                         .group => {
                             const row_attrs = try sema.get_attributes(child_node, struct {
@@ -878,7 +925,6 @@ pub const SemanticAnalyzer = struct {
             },
         }
 
-        // TODO: Validate column counts after colspan and title/group leading column rules.
         const table: Block.Table = .{
             .lang = attrs.lang,
             .rows = try rows.toOwnedSlice(sema.arena),
@@ -1002,12 +1048,8 @@ pub const SemanticAnalyzer = struct {
         var spans: std.ArrayList(Span) = .empty;
         defer spans.deinit(sema.arena);
 
-        // TODO: Implement automatic space insertion.
-        //       This must be done when two consecutive nodes are separated by a space
-
         try sema.translate_inline_body(&spans, node.body, .{}, empty_handling);
 
-        // TODO: Use different whitespace strategies here:
         return try sema.compact_spans(spans.items, whitespace_handling);
     }
 
@@ -1280,17 +1322,33 @@ pub const SemanticAnalyzer = struct {
             .@"\\date",
             .@"\\time",
             .@"\\datetime",
-            => {
+            => blk: {
                 const props = try sema.get_attributes(node, struct {
                     lang: ?[]const u8 = null,
                     fmt: []const u8 = "",
                 });
 
-                // TODO: Enforce that date/time bodies only contain plain text/string/verbatim.
                 const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space);
 
+                // Enforce that date/time bodies only contain plain text/string/verbatim.
+                // HyperDoc cannot format date/time values on it's own so we can't render
+                // \date, \time and \datetime into a string. It also doesn't make any sense
+                // to nest them.
+                for (content_spans) |span| {
+                    switch (span.content) {
+                        .text => {},
+                        .date, .time, .datetime => {
+                            try sema.emit_diagnostic(.nested_date_time, span.location);
+                            break :blk;
+                        },
+                    }
+                }
+
                 //  Convert the content_spans into a "rendered string".
-                const content_text = try sema.render_spans_to_plaintext(content_spans, .no_space);
+                const content_text = sema.render_spans_to_plaintext(content_spans) catch |err| switch (err) {
+                    error.DateTimeRenderingUnsupported => unreachable,
+                    else => |e| return e,
+                };
 
                 const content: Span.Content = switch (node.type) {
                     .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt),
@@ -1391,16 +1449,12 @@ pub const SemanticAnalyzer = struct {
         });
     }
 
-    const JoinStyle = enum { no_space, one_space };
-    fn render_spans_to_plaintext(sema: *SemanticAnalyzer, source_spans: []const Span, style: JoinStyle) ![]const u8 {
-        var len: usize = switch (style) {
-            .no_space => 0,
-            .one_space => (source_spans.len -| 1),
-        };
+    fn render_spans_to_plaintext(sema: *SemanticAnalyzer, source_spans: []const Span) error{ OutOfMemory, DateTimeRenderingUnsupported }![]const u8 {
+        var len: usize = 0;
         for (source_spans) |span| {
             len += switch (span.content) {
                 .text => |str| str.len,
-                .date, .time, .datetime => @panic("TODO: Implement date-to-text conversion!"),
+                .date, .time, .datetime => return error.DateTimeRenderingUnsupported,
             };
         }
 
@@ -1409,16 +1463,10 @@ pub const SemanticAnalyzer = struct {
 
         try output_str.ensureTotalCapacityPrecise(sema.arena, len);
 
-        for (source_spans, 0..) |span, index| {
-            switch (style) {
-                .no_space => {},
-                .one_space => if (index > 0)
-                    output_str.appendAssumeCapacity(' '),
-            }
-
+        for (source_spans) |span| {
             switch (span.content) {
                 .text => |str| output_str.appendSliceAssumeCapacity(str),
-                .date, .time, .datetime => @panic("TODO: Implement date-to-text conversion!"),
+                .date, .time, .datetime => unreachable,
             }
         }
 
@@ -1519,7 +1567,6 @@ pub const SemanticAnalyzer = struct {
         const Fields = std.meta.FieldEnum(Attrs);
         const fields = @typeInfo(Attrs).@"struct".fields;
 
-        // TODO: Enforce per-attribute constraints from the spec (non-empty strings, lang tag format, etc).
         var required: std.EnumSet(Fields) = .initEmpty();
 
         var attrs: Attrs = undefined;
@@ -2535,6 +2582,7 @@ pub const Diagnostic = struct {
     pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute };
     pub const InvalidStringEscape = struct { codepoint: u21 };
     pub const ForbiddenControlCharacter = struct { codepoint: u21 };
+    pub const TableShapeError = struct { actual: usize, expected: usize };
 
     pub const Code = union(enum) {
         // errors:
@@ -2554,6 +2602,7 @@ pub const Diagnostic = struct {
         link_not_nestable,
         invalid_link,
         invalid_date_time,
+        nested_date_time,
         invalid_date_time_fmt,
         invalid_unicode_string_escape,
         invalid_string_escape: InvalidStringEscape,
@@ -2561,6 +2610,7 @@ pub const Diagnostic = struct {
         illegal_child_item,
         list_body_required,
         illegal_id_attribute,
+        column_count_mismatch: TableShapeError,
 
         // warnings:
         document_starts_with_bom,
@@ -2599,6 +2649,8 @@ pub const Diagnostic = struct {
                 .illegal_child_item,
                 .list_body_required,
                 .illegal_id_attribute,
+                .nested_date_time,
+                .column_count_mismatch,
                 => .@"error",
 
                 .unknown_attribute,
@@ -2671,6 +2723,10 @@ pub const Diagnostic = struct {
                 .illegal_child_item => try w.writeAll("Node not allowed here."),
 
                 .illegal_id_attribute => try w.writeAll("Attribute 'id' not allowed here."),
+
+                .nested_date_time => try w.writeAll("Nesting \\date, \\time and \\datetime is not allowed."),
+
+                .column_count_mismatch => |ctx| try w.print("Expected {} columns, but found {}", .{ ctx.expected, ctx.actual }),
             }
         }
     };

From 68eb43e039c77446037d9e9fe4c3abfac9afcc16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 16:36:30 +0100
Subject: [PATCH 044/116] Implements id uniqueness check.

---
 src/hyperdoc.zig    | 38 +++++++++++++++++++++++++++++++++++---
 src/render/dump.zig |  3 ++-
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 07440ca..441ba35 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -13,7 +13,8 @@ pub const Document = struct {
 
     // document contents:
     contents: []Block,
-    ids: []?Reference,
+    content_ids: []?Reference,
+    id_map: std.StringArrayHashMapUnmanaged(usize), // id -> index
 
     // header information
     lang: ?[]const u8,
@@ -469,11 +470,34 @@ pub fn parse(
 
     const header = sema.header orelse return error.MalformedDocument;
 
-    // TODO: Validate document-level semantic constraints (unique ids, ref resolution).
+    const content_ids = try sema.ids.toOwnedSlice(arena.allocator());
+
+    var id_map: std.StringArrayHashMapUnmanaged(usize) = .empty;
+    errdefer id_map.deinit(arena.allocator());
+
+    try id_map.ensureTotalCapacity(arena.allocator(), content_ids.len);
+
+    for (content_ids, 0..) |id_or_null, index| {
+        const id = id_or_null orelse continue;
+
+        const gop = id_map.getOrPutAssumeCapacity(id.text);
+        if (gop.found_existing) {
+            try sema.emit_diagnostic(
+                .{ .duplicate_id = .{ .ref = id.text } },
+                .{ .offset = 0, .length = 0 }, // TODO: Figure out proper node location
+            );
+            continue;
+        }
+        gop.value_ptr.* = index;
+    }
+
+    // TODO: Validate document-level semantic constraints (ref resolution).
+
     return .{
         .arena = arena,
         .contents = try sema.blocks.toOwnedSlice(arena.allocator()),
-        .ids = try sema.ids.toOwnedSlice(arena.allocator()),
+        .content_ids = content_ids,
+        .id_map = id_map,
 
         .lang = header.lang,
         .title = header.title,
@@ -2583,6 +2607,7 @@ pub const Diagnostic = struct {
     pub const InvalidStringEscape = struct { codepoint: u21 };
     pub const ForbiddenControlCharacter = struct { codepoint: u21 };
     pub const TableShapeError = struct { actual: usize, expected: usize };
+    pub const ReferenceError = struct { ref: []const u8 };
 
     pub const Code = union(enum) {
         // errors:
@@ -2611,6 +2636,8 @@ pub const Diagnostic = struct {
         list_body_required,
         illegal_id_attribute,
         column_count_mismatch: TableShapeError,
+        duplicate_id: ReferenceError,
+        unknown_id: ReferenceError,
 
         // warnings:
         document_starts_with_bom,
@@ -2651,6 +2678,8 @@ pub const Diagnostic = struct {
                 .illegal_id_attribute,
                 .nested_date_time,
                 .column_count_mismatch,
+                .duplicate_id,
+                .unknown_id,
                 => .@"error",
 
                 .unknown_attribute,
@@ -2727,6 +2756,9 @@ pub const Diagnostic = struct {
                 .nested_date_time => try w.writeAll("Nesting \\date, \\time and \\datetime is not allowed."),
 
                 .column_count_mismatch => |ctx| try w.print("Expected {} columns, but found {}", .{ ctx.expected, ctx.actual }),
+
+                .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}),
+                .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}),
             }
         }
     };
diff --git a/src/render/dump.zig b/src/render/dump.zig
index 347e90e..bedf742 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -424,7 +424,8 @@ fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void {
     try dumpOptionalStringField(writer, indent_step, "author", doc.author);
     try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date);
     try dumpBlockListField(writer, indent_step, "contents", doc.contents);
-    try dumpOptionalStringListField(writer, indent_step, "ids", doc.ids);
+    try dumpOptionalStringListField(writer, indent_step, "ids", doc.content_ids);
+    // TODO: Dump ID map
 }
 
 pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void {

From cdd8245fb029d7a843d0d100501cc74ed8ce779c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 17:19:19 +0100
Subject: [PATCH 045/116] Spec/code alignment: Allows trailing commas in
 attribute lists, allows 'fmt=iso' for \date and \time as well, checks
 document version, also allows '-' and ':' inside identifiers, adds more TODO
 comments

---
 docs/specification.md    |  6 +--
 src/hyperdoc.zig         | 95 ++++++++++++++++++++++++++++++++--------
 src/testsuite.zig        | 15 ++++++-
 test/accept/workset.hdoc |  2 +-
 4 files changed, 93 insertions(+), 25 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index ce20dd7..4e0d0f7 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -133,7 +133,7 @@ attribute       ::= attr_key , ws , "=" , ws , string_literal ;
 *)
 attr_key        ::= attr_key_char , { attr_key_char } ;
 
-attr_key_char   ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | ":" | "\" ;
+attr_key_char   ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | "\" ;
 
 
 (* ---------- Block-list content ---------- *)
@@ -703,8 +703,8 @@ Adds a hyperlink to the contents. This allows a reader to navigate by typically
 
 | Element    | Attribute | Function                                                                                                    |
 | ---------- | --------- | ----------------------------------------------------------------------------------------------------------- |
-| `date`     | `fmt`     | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`.                                             |
-| `time`     | `fmt`     | `short`, `long`, `rough`, `relative`.                                                                       |
+| `date`     | `fmt`     | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` (raw ISO 8601).                       |
+| `time`     | `fmt`     | `short`, `long`, `rough`, `relative`, `iso` (raw ISO 8601).                                                 |
 | `datetime` | `fmt`     | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). |
 
 Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner.
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 441ba35..12b8cee 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -426,8 +426,10 @@ pub fn parse(
     /// An optional diagnostics element that receives diagnostic messages like errors and warnings.
     /// If present, will be filled out by the parser.
     diagnostics: ?*Diagnostics,
-) error{ OutOfMemory, SyntaxError, MalformedDocument, InvalidUtf8 }!Document {
-    const source_text = try remove_byte_order_mark(diagnostics, raw_plain_text);
+) error{ OutOfMemory, SyntaxError, MalformedDocument, UnsupportedVersion, InvalidUtf8 }!Document {
+    const source_text = try clean_utf8_input(diagnostics, raw_plain_text);
+
+    // We now know that the source code is 'fine' and
 
     var arena = std.heap.ArenaAllocator.init(allocator);
     errdefer arena.deinit();
@@ -508,25 +510,39 @@ pub fn parse(
     };
 }
 
-pub fn remove_byte_order_mark(diagnostics: ?*Diagnostics, plain_text: []const u8) error{ OutOfMemory, InvalidUtf8 }![]const u8 {
+pub fn clean_utf8_input(diagnostics: ?*Diagnostics, raw_plain_text: []const u8) error{ OutOfMemory, InvalidUtf8 }![]const u8 {
+
     // First check if all of our code is valid UTF-8
     // and if it potentially starts with a BOM.
-    var view = std.unicode.Utf8View.init(plain_text) catch {
+    var view = std.unicode.Utf8View.init(raw_plain_text) catch {
         return error.InvalidUtf8;
     };
 
     var iter = view.iterator();
-
     if (iter.nextCodepointSlice()) |slice| {
         const codepoint = std.unicode.utf8Decode(slice) catch unreachable;
         if (codepoint == 0xFEFF) {
             if (diagnostics) |diag| {
                 try diag.add(.document_starts_with_bom, .{ .column = 1, .line = 1 });
             }
-            return plain_text[slice.len..];
+            std.debug.assert(iter.i == slice.len);
+        } else {
+            iter.i = 0; // Reset iterator to start position
         }
     }
-    return plain_text;
+    const source_head = iter.i;
+
+    while (iter.nextCodepointSlice()) |slice| {
+        const codepoint = std.unicode.utf8Decode(slice) catch unreachable;
+
+        // TODO: Write codepoint validation which rejects the file if invalid codepoints are detected and
+        //       emits warnings for TAB characters.
+        //       Bare CR is forbidden, just CR LF or LF is allowed.
+
+        _ = codepoint;
+    }
+
+    return raw_plain_text[source_head..];
 }
 
 pub const SemanticAnalyzer = struct {
@@ -549,16 +565,26 @@ pub const SemanticAnalyzer = struct {
     blocks: std.ArrayList(Block) = .empty,
     ids: std.ArrayList(?Reference) = .empty,
 
-    fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{OutOfMemory}!void {
+    fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, UnsupportedVersion }!void {
         switch (node.type) {
             .hdoc => {
+                const header = sema.translate_header_node(node) catch |err| switch (err) {
+                    error.OutOfMemory, error.UnsupportedVersion => |e| return e,
+                    error.BadAttributes => null,
+                };
                 if (sema.header != null) {
                     try sema.emit_diagnostic(.duplicate_hdoc_header, node.location);
+                } else {
+                    sema.header = header orelse .{
+                        .version = .{ .major = 2, .minor = 0 },
+                        .lang = null,
+                        .title = null,
+                        .author = null,
+                        .timezone = null,
+                        .date = null,
+                    };
                 }
-                sema.header = sema.translate_header_node(node) catch |err| switch (err) {
-                    error.OutOfMemory => |e| return e,
-                    error.BadAttributes => null,
-                };
+                std.debug.assert(sema.header != null);
             },
 
             else => {
@@ -589,7 +615,7 @@ pub const SemanticAnalyzer = struct {
         }
     }
 
-    fn translate_header_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes }!Header {
+    fn translate_header_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes, UnsupportedVersion }!Header {
         std.debug.assert(node.type == .hdoc);
 
         const attrs = try sema.get_attributes(node, struct {
@@ -597,10 +623,17 @@ pub const SemanticAnalyzer = struct {
             title: ?[]const u8 = null,
             author: ?[]const u8 = null,
             date: ?DateTime = null,
-            lang: ?[]const u8 = null,
+            lang: ?[]const u8 = null, // TODO: Introduce with "LanguageTag" type for all "lang" attributes which performs proper validation
             tz: ?[]const u8 = null,
         });
 
+        if (attrs.version.major != 2)
+            return error.UnsupportedVersion;
+        if (attrs.version.minor != 0)
+            return error.UnsupportedVersion;
+
+        // TODO: Validate TZ format
+
         return .{
             .version = attrs.version,
             .lang = attrs.lang,
@@ -754,6 +787,8 @@ pub const SemanticAnalyzer = struct {
             },
         }
 
+        // TODO: Validate `children.items.len >= 1`
+
         const list: Block.List = .{
             .first = attrs.first orelse if (node.type == .ol) 1 else null,
             .lang = attrs.lang,
@@ -980,6 +1015,8 @@ pub const SemanticAnalyzer = struct {
             },
         }
 
+        // TODO: Validate `children.items.len >= 1`
+
         return try cells.toOwnedSlice(sema.arena);
     }
 
@@ -1291,7 +1328,7 @@ pub const SemanticAnalyzer = struct {
 
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
-                    .position = .superscript,
+                    .position = .subscript,
                 }), .emit_diagnostic);
             },
 
@@ -1302,7 +1339,7 @@ pub const SemanticAnalyzer = struct {
 
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
-                    .position = .subscript,
+                    .position = .superscript,
                 }), .emit_diagnostic);
             },
 
@@ -1327,6 +1364,7 @@ pub const SemanticAnalyzer = struct {
                 };
 
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                    .lang = props.lang,
                     .link = link,
                 }), .emit_diagnostic);
             },
@@ -1799,13 +1837,22 @@ pub const SemanticAnalyzer = struct {
                                 break :blk "???";
                             }
 
-                            if (escape_part.len == 4) {
+                            const min_len = "\\u{}".len;
+                            const max_len = "\\u{123456}".len;
+
+                            if (escape_part.len == min_len) {
                                 // Empty escape: \u{}
                                 std.debug.assert(std.mem.eql(u8, escape_part, "\\u{}"));
                                 try sema.emit_diagnostic(.invalid_unicode_string_escape, location);
                                 break :blk "???";
                             }
 
+                            if (escape_part.len > max_len) {
+                                // Escape sequence is more than 6 chars long
+                                try sema.emit_diagnostic(.invalid_unicode_string_escape, location);
+                                break :blk "???";
+                            }
+
                             const codepoint = std.fmt.parseInt(u21, escape_part[3 .. escape_part.len - 1], 16) catch {
                                 try sema.emit_diagnostic(.invalid_unicode_string_escape, location);
                                 break :blk "???";
@@ -1956,6 +2003,8 @@ pub const Parser = struct {
                 // so we know that the next token must be the attribute name.
 
                 while (true) {
+                    if (parser.try_accept_char(')'))
+                        break;
                     const attr_name = try parser.accept_identifier();
                     _ = try parser.accept_char('=');
                     const attr_value = try parser.accept_string();
@@ -1966,10 +2015,10 @@ pub const Parser = struct {
                     });
 
                     if (!parser.try_accept_char(',')) {
+                        try parser.accept_char(')');
                         break;
                     }
                 }
-                try parser.accept_char(')');
             }
         }
 
@@ -2285,6 +2334,8 @@ pub const Parser = struct {
             parser.offset += 1;
 
             switch (c) {
+                '\n' => return error.UnterminatedStringLiteral,
+
                 '"' => return parser.slice(start, parser.offset),
 
                 '\\' => {
@@ -2412,7 +2463,13 @@ pub const Parser = struct {
 
     pub fn is_ident_char(c: u8) bool {
         return switch (c) {
-            'a'...'z', 'A'...'Z', '0'...'9', '_', '\\' => true,
+            'a'...'z',
+            'A'...'Z',
+            '0'...'9',
+            '_',
+            '-',
+            '\\',
+            => true,
             else => false,
         };
     }
diff --git a/src/testsuite.zig b/src/testsuite.zig
index d366816..bfa3d17 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -1,6 +1,17 @@
 const std = @import("std");
 const hdoc = @import("./hyperdoc.zig");
 
+// TODO: Write unit test for trailing comma in attribute lists
+// TODO: Write unit test for invalid escape sequence detection when more than 6 (hex) chars are used
+// TODO: Write unit test for invalid version detection (must be 2.0)
+// TODO: Write unit test for duplicate header recognition
+// TODO: Write unit test for clean_utf8_input() passthrough
+// TODO: Write unit test for clean_utf8_input() BOM detection
+// TODO: Write unit test for clean_utf8_input() invalid UTF-8 detection
+// TODO: Write unit test for clean_utf8_input() illegal codepoint detection (bare CR -> error)
+// TODO: Write unit test for clean_utf8_input() illegal codepoint detection (TAB -> warning)
+// TODO: Write unit test for clean_utf8_input() illegal codepoint detection (any other control character -> error)
+
 test "validate examples directory" {
     try parseDirectoryTree("examples");
 }
@@ -66,7 +77,7 @@ test "parser rejects identifiers with invalid start characters" {
     defer arena.deinit();
 
     var parser: hdoc.Parser = .{
-        .code = "-abc",
+        .code = "*abc",
         .arena = arena.allocator(),
         .diagnostics = null,
     };
@@ -463,7 +474,7 @@ test "diagnostic codes are emitted for expected samples" {
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }});
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }});
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); -abc", &.{.{ .invalid_identifier_start = .{ .char = '-' } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); *abc", &.{.{ .invalid_identifier_start = .{ .char = '*' } }});
     try validateDiagnostics(.{}, "hdoc{h1 \"x\"", &.{.unterminated_block_list});
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list});
     try validateDiagnostics(
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index 65ffc31..cf91f7b 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -1,4 +1,4 @@
-hdoc(version="2.0");
+hdoc(version="2.0",);
 
 p {
   In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed

From b8f36e2aaa17212b09df92d11a1ea4caf80e1189 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 20:40:59 +0100
Subject: [PATCH 046/116] Refactored code to use LanguageTag and TimeZoneOffset
 instead of ?[]const u8/i32 to increase type safety.

---
 docs/specification.md |   2 +-
 src/hyperdoc.zig      | 228 ++++++++++++++++++++++++------------------
 src/render/dump.zig   |  30 +++---
 src/testsuite.zig     |  18 ++--
 4 files changed, 158 insertions(+), 120 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 4e0d0f7..50c255d 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -129,7 +129,7 @@ attribute_list  ::= "(" , ws ,
 attribute       ::= attr_key , ws , "=" , ws , string_literal ;
 
 (*
-  Attribute keys may include '-' and ':' in addition to node-name characters.
+  Attribute keys may include '-' in addition to node-name characters.
 *)
 attr_key        ::= attr_key_char , { attr_key_char } ;
 
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 12b8cee..f6d4386 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -17,11 +17,11 @@ pub const Document = struct {
     id_map: std.StringArrayHashMapUnmanaged(usize), // id -> index
 
     // header information
-    lang: ?[]const u8,
+    lang: LanguageTag = .inherit, // inherit here means "unset"
     title: ?[]const u8,
     author: ?[]const u8,
     date: ?DateTime,
-    timezone: ?[]const u8,
+    timezone: ?TimeZoneOffset,
 
     pub fn deinit(doc: *Document) void {
         doc.arena.deinit();
@@ -45,7 +45,7 @@ pub const Block = union(enum) {
 
     pub const Heading = struct {
         level: HeadingLevel,
-        lang: ?[]const u8,
+        lang: LanguageTag,
         content: []Span,
     };
 
@@ -53,43 +53,43 @@ pub const Block = union(enum) {
 
     pub const Paragraph = struct {
         kind: ParagraphKind,
-        lang: ?[]const u8,
+        lang: LanguageTag,
         content: []Span,
     };
 
     pub const ParagraphKind = enum { p, note, warning, danger, tip, quote, spoiler };
 
     pub const List = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         first: ?u32,
         items: []ListItem,
     };
 
     pub const ListItem = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         content: []Block,
     };
 
     pub const Image = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         alt: []const u8, // empty means none
         path: []const u8,
         content: []Span,
     };
 
     pub const Preformatted = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         syntax: ?[]const u8,
         content: []Span,
     };
 
     pub const TableOfContents = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         depth: ?u8,
     };
 
     pub const Table = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         rows: []TableRow,
     };
 
@@ -100,23 +100,23 @@ pub const Block = union(enum) {
     };
 
     pub const TableColumns = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         cells: []TableCell,
     };
 
     pub const TableDataRow = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         title: ?[]const u8,
         cells: []TableCell,
     };
 
     pub const TableGroup = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         content: []Span,
     };
 
     pub const TableCell = struct {
-        lang: ?[]const u8,
+        lang: LanguageTag,
         colspan: u32,
         content: []Block,
     };
@@ -138,7 +138,7 @@ pub const Span = struct {
     };
 
     pub const Attributes = struct {
-        lang: []const u8 = "", // empty is absence
+        lang: LanguageTag = .inherit,
         position: ScriptPosition = .baseline,
         em: bool = false,
         mono: bool = false,
@@ -158,12 +158,12 @@ pub const Span = struct {
                 return false;
 
             // string comparison:
-            if (!std.mem.eql(u8, lhs.lang, rhs.lang))
-                return false;
             if (!std.mem.eql(u8, lhs.syntax, rhs.syntax))
                 return false;
 
             // complex comparison
+            if (!lhs.lang.eql(rhs.lang))
+                return false;
             if (!lhs.link.eql(rhs.link))
                 return false;
 
@@ -227,7 +227,7 @@ pub const DateTime = struct {
     date: Date,
     time: Time,
 
-    pub fn parse(text: []const u8, default_timezone: ?[]const u8) !DateTime {
+    pub fn parse(text: []const u8, timezone_hint: ?TimeZoneOffset) !DateTime {
         const split_index = std.mem.indexOfScalar(u8, text, 'T') orelse return error.InvalidValue;
 
         const head = text[0..split_index];
@@ -235,7 +235,7 @@ pub const DateTime = struct {
 
         return .{
             .date = try Date.parse(head),
-            .time = try Time.parse(tail, default_timezone),
+            .time = try Time.parse(tail, timezone_hint),
         };
     }
 };
@@ -303,9 +303,9 @@ pub const Time = struct {
     minute: u6, // 0-59
     second: u6, // 0-59
     microsecond: u20, // 0-999999
-    zone_offset: i32, // in minutes
+    timezone: TimeZoneOffset,
 
-    pub fn parse(text: []const u8, default_timezone: ?[]const u8) !Time {
+    pub fn parse(text: []const u8, timezone_hint: ?TimeZoneOffset) !Time {
         if (text.len < 8) // "HH:MM:SS"
             return error.InvalidValue;
 
@@ -334,23 +334,59 @@ pub const Time = struct {
         }
 
         const timezone = if (index == text.len)
-            default_timezone orelse return error.MissingTimezone
+            timezone_hint orelse return error.MissingTimezone
         else
-            text[index..];
+            try TimeZoneOffset.parse(text[index..]);
+
+        return .{
+            .hour = @intCast(hour),
+            .minute = @intCast(minute),
+            .second = @intCast(second),
+            .microsecond = microsecond,
+            .timezone = timezone,
+        };
+    }
 
+    fn fractionToMicrosecond(len: usize, value: u64) ?u20 {
+        const micro: u64 = switch (len) {
+            1 => value * 100_000,
+            2 => value * 10_000,
+            3 => value * 1_000,
+            6 => value,
+            9 => value / 1_000,
+            else => return null,
+        };
+        if (micro > 999_999) return null;
+        return @intCast(micro);
+    }
+};
+
+/// A time offset to timezones in minutes.
+pub const TimeZoneOffset = enum(i32) {
+    utc = 0,
+
+    _,
+
+    pub fn from_hhmm(hour: i8, minute: u8) error{InvalidValue}!TimeZoneOffset {
+        const hour_pos = @abs(hour);
+        const sign = std.math.sign(hour);
+
+        if (hour < -23 and hour > 23)
+            return error.InvalidValue;
+        if (minute >= 60)
+            return error.InvalidValue;
+
+        return @enumFromInt(@as(i32, sign) * (hour_pos * @as(i32, 60) + minute));
+    }
+
+    pub fn parse(timezone: []const u8) error{InvalidValue}!TimeZoneOffset {
         if (timezone.len != 1 and timezone.len != 6) // "Z" or "±HH:MM"
             return error.InvalidValue;
 
         if (timezone.len == 1) {
             if (timezone[0] != 'Z')
                 return error.InvalidValue;
-            return .{
-                .hour = @intCast(hour),
-                .minute = @intCast(minute),
-                .second = @intCast(second),
-                .microsecond = microsecond,
-                .zone_offset = 0,
-            };
+            return .utc;
         }
         std.debug.assert(timezone.len == 6);
 
@@ -371,26 +407,7 @@ pub const Time = struct {
         const zone_total: u16 = @as(u16, zone_hour) * 60 + zone_minute;
         const offset_minutes: i32 = sign * @as(i32, zone_total);
 
-        return .{
-            .hour = @intCast(hour),
-            .minute = @intCast(minute),
-            .second = @intCast(second),
-            .microsecond = microsecond,
-            .zone_offset = offset_minutes,
-        };
-    }
-
-    fn fractionToMicrosecond(len: usize, value: u64) ?u20 {
-        const micro: u64 = switch (len) {
-            1 => value * 100_000,
-            2 => value * 10_000,
-            3 => value * 1_000,
-            6 => value,
-            9 => value / 1_000,
-            else => return null,
-        };
-        if (micro > 999_999) return null;
-        return @intCast(micro);
+        return @enumFromInt(offset_minutes);
     }
 };
 
@@ -412,10 +429,33 @@ pub const Reference = struct {
 
     text: []const u8,
 
-    pub fn init(text: []const u8) Reference {
+    pub fn parse(text: []const u8) !Reference {
         // TODO: Add correctness validation here (non-empty, allowed characters).
         return .{ .text = text };
     }
+
+    pub fn eql(lhs: Reference, rhs: Reference) bool {
+        return std.mem.eql(u8, lhs.text, rhs.text);
+    }
+};
+
+/// A BCP 47 language tag.
+pub const LanguageTag = struct {
+    //! https://datatracker.ietf.org/doc/html/rfc5646
+
+    /// The empty language tag means that the language is inherited from the parent.
+    pub const inherit: LanguageTag = .{ .text = "" };
+
+    text: []const u8,
+
+    pub fn parse(tag_str: []const u8) !LanguageTag {
+        // TODO: Implement proper BCP 47 tag verification
+        return .{ .text = tag_str };
+    }
+
+    pub fn eql(lhs: LanguageTag, rhs: LanguageTag) bool {
+        return std.mem.eql(u8, lhs.text, rhs.text);
+    }
 };
 
 /// Parses a HyperDoc document.
@@ -495,13 +535,18 @@ pub fn parse(
 
     // TODO: Validate document-level semantic constraints (ref resolution).
 
+    const doc_lang = header.lang orelse blk: {
+        // TODO: Emit diagnostic warning for missing document language.
+        break :blk LanguageTag.inherit;
+    };
+
     return .{
         .arena = arena,
         .contents = try sema.blocks.toOwnedSlice(arena.allocator()),
         .content_ids = content_ids,
         .id_map = id_map,
 
-        .lang = header.lang,
+        .lang = doc_lang,
         .title = header.title,
         .version = header.version,
         .author = header.author,
@@ -550,10 +595,10 @@ pub const SemanticAnalyzer = struct {
 
     const Header = struct {
         version: Version,
-        lang: ?[]const u8,
+        lang: ?LanguageTag,
         title: ?[]const u8,
         author: ?[]const u8,
-        timezone: ?[]const u8,
+        timezone: ?TimeZoneOffset,
         date: ?DateTime,
     };
 
@@ -623,8 +668,8 @@ pub const SemanticAnalyzer = struct {
             title: ?[]const u8 = null,
             author: ?[]const u8 = null,
             date: ?DateTime = null,
-            lang: ?[]const u8 = null, // TODO: Introduce with "LanguageTag" type for all "lang" attributes which performs proper validation
-            tz: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
+            tz: ?TimeZoneOffset = null,
         });
 
         if (attrs.version.major != 2)
@@ -632,8 +677,6 @@ pub const SemanticAnalyzer = struct {
         if (attrs.version.minor != 0)
             return error.UnsupportedVersion;
 
-        // TODO: Validate TZ format
-
         return .{
             .version = attrs.version,
             .lang = attrs.lang,
@@ -711,7 +754,7 @@ pub const SemanticAnalyzer = struct {
 
     fn translate_heading_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Heading, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
             id: ?Reference = null,
         });
 
@@ -731,7 +774,7 @@ pub const SemanticAnalyzer = struct {
 
     fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
             id: ?Reference = null,
         });
 
@@ -755,7 +798,7 @@ pub const SemanticAnalyzer = struct {
 
     fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
             id: ?Reference = null,
             first: ?u32 = null,
         });
@@ -800,7 +843,7 @@ pub const SemanticAnalyzer = struct {
 
     fn translate_image_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Image, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
             id: ?Reference = null,
             alt: ?[]const u8 = null,
             path: []const u8,
@@ -838,7 +881,7 @@ pub const SemanticAnalyzer = struct {
 
     fn translate_preformatted_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Preformatted, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
             id: ?Reference = null,
             syntax: ?[]const u8 = null,
         });
@@ -854,7 +897,7 @@ pub const SemanticAnalyzer = struct {
 
     fn translate_toc_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.TableOfContents, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
             id: ?Reference = null,
             depth: ?u32 = null,
         });
@@ -890,7 +933,7 @@ pub const SemanticAnalyzer = struct {
 
     fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
             id: ?Reference = null,
         });
 
@@ -906,7 +949,7 @@ pub const SemanticAnalyzer = struct {
                     switch (child_node.type) {
                         .columns => {
                             const row_attrs = try sema.get_attributes(child_node, struct {
-                                lang: ?[]const u8 = null,
+                                lang: LanguageTag = .inherit,
                             });
 
                             const cells = try sema.translate_table_cells(child_node);
@@ -933,7 +976,7 @@ pub const SemanticAnalyzer = struct {
                         },
                         .row => {
                             const row_attrs = try sema.get_attributes(child_node, struct {
-                                lang: ?[]const u8 = null,
+                                lang: LanguageTag = .inherit,
                                 title: ?[]const u8 = null,
                             });
 
@@ -963,7 +1006,7 @@ pub const SemanticAnalyzer = struct {
                         },
                         .group => {
                             const row_attrs = try sema.get_attributes(child_node, struct {
-                                lang: ?[]const u8 = null,
+                                lang: LanguageTag = .inherit,
                             });
 
                             rows.appendAssumeCapacity(.{
@@ -1027,7 +1070,7 @@ pub const SemanticAnalyzer = struct {
         }
 
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
             colspan: ?u32 = null,
         });
 
@@ -1051,7 +1094,7 @@ pub const SemanticAnalyzer = struct {
         }
 
         const attrs = try sema.get_attributes(node, struct {
-            lang: ?[]const u8 = null,
+            lang: LanguageTag = .inherit,
         });
 
         return .{
@@ -1093,7 +1136,7 @@ pub const SemanticAnalyzer = struct {
                     blocks[0] = .{
                         .paragraph = .{
                             .kind = .p,
-                            .lang = null,
+                            .lang = .inherit,
                             .content = spans,
                         },
                     };
@@ -1218,7 +1261,7 @@ pub const SemanticAnalyzer = struct {
     };
 
     pub const AttribOverrides = struct {
-        lang: ?[]const u8 = null,
+        lang: ?LanguageTag = null,
         em: ?bool = null,
         mono: ?bool = null,
         strike: ?bool = null,
@@ -1301,7 +1344,7 @@ pub const SemanticAnalyzer = struct {
 
             .@"\\em" => {
                 const props = try sema.get_attributes(node, struct {
-                    lang: ?[]const u8 = null,
+                    lang: LanguageTag = .inherit,
                 });
 
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
@@ -1312,7 +1355,7 @@ pub const SemanticAnalyzer = struct {
 
             .@"\\strike" => {
                 const props = try sema.get_attributes(node, struct {
-                    lang: ?[]const u8 = null,
+                    lang: LanguageTag = .inherit,
                 });
 
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
@@ -1323,7 +1366,7 @@ pub const SemanticAnalyzer = struct {
 
             .@"\\sub" => {
                 const props = try sema.get_attributes(node, struct {
-                    lang: ?[]const u8 = null,
+                    lang: LanguageTag = .inherit,
                 });
 
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
@@ -1334,7 +1377,7 @@ pub const SemanticAnalyzer = struct {
 
             .@"\\sup" => {
                 const props = try sema.get_attributes(node, struct {
-                    lang: ?[]const u8 = null,
+                    lang: LanguageTag = .inherit,
                 });
 
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
@@ -1345,7 +1388,7 @@ pub const SemanticAnalyzer = struct {
 
             .@"\\link" => {
                 const props = try sema.get_attributes(node, struct {
-                    lang: ?[]const u8 = null,
+                    lang: LanguageTag = .inherit,
                     uri: ?Uri = null,
                     ref: ?Reference = null,
                 });
@@ -1371,7 +1414,7 @@ pub const SemanticAnalyzer = struct {
 
             .@"\\mono" => {
                 const props = try sema.get_attributes(node, struct {
-                    lang: ?[]const u8 = null,
+                    lang: LanguageTag = .inherit,
                     syntax: []const u8 = "",
                 });
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
@@ -1386,7 +1429,7 @@ pub const SemanticAnalyzer = struct {
             .@"\\datetime",
             => blk: {
                 const props = try sema.get_attributes(node, struct {
-                    lang: ?[]const u8 = null,
+                    lang: LanguageTag = .inherit,
                     fmt: []const u8 = "",
                 });
 
@@ -1468,7 +1511,7 @@ pub const SemanticAnalyzer = struct {
     ) !Span.Content {
         const Format: type = DTValue.Format;
 
-        const timezone_hint: ?[]const u8 = if (sema.header) |header| header.timezone else null;
+        const timezone_hint: ?TimeZoneOffset = if (sema.header) |header| header.timezone else null;
 
         const value_or_err: error{ InvalidValue, MissingTimezone }!DTValue = switch (DTValue) {
             Date => Date.parse(value_str),
@@ -1692,7 +1735,12 @@ pub const SemanticAnalyzer = struct {
             return try sema.cast_value(attrib, @typeInfo(T).optional.child);
         }
 
-        const value = try sema.unescape_string(attrib);
+        const unstripped_value = try sema.unescape_string(attrib);
+
+        const value = std.mem.trim(u8, unstripped_value, whitespace_chars);
+        if (value.len != unstripped_value.len) {
+            try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location);
+        }
 
         const timezone_hint = if (sema.header) |header|
             header.timezone
@@ -1704,26 +1752,16 @@ pub const SemanticAnalyzer = struct {
 
             u32 => std.fmt.parseInt(u32, value, 10) catch return error.InvalidValue,
 
-            Reference => {
-                const stripped = std.mem.trim(u8, value, whitespace_chars);
-                if (stripped.len != value.len) {
-                    try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location);
-                }
-                return .init(stripped);
-            },
+            Reference => Reference.parse(value) catch return error.InvalidValue,
 
-            Uri => {
-                const stripped = std.mem.trim(u8, value, whitespace_chars);
-                if (stripped.len != value.len) {
-                    try sema.emit_diagnostic(.attribute_leading_trailing_whitespace, attrib.location);
-                }
-                return .init(stripped);
-            },
+            Uri => Uri.init(value),
 
             Version => Version.parse(value) catch return error.InvalidValue,
             Date => Date.parse(value) catch return error.InvalidValue,
             Time => Time.parse(value, timezone_hint) catch return error.InvalidValue,
             DateTime => DateTime.parse(value, timezone_hint) catch return error.InvalidValue,
+            LanguageTag => LanguageTag.parse(value) catch return error.InvalidValue,
+            TimeZoneOffset => TimeZoneOffset.parse(value) catch return error.InvalidValue,
 
             else => @compileError("Unsupported attribute type: " ++ @typeName(T)),
         };
diff --git a/src/render/dump.zig b/src/render/dump.zig
index bedf742..94e25da 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -142,9 +142,9 @@ fn writeSpanAttributes(writer: *Writer, span: hdoc.Span) Writer.Error!void {
             try writer.print("link=\"uri:{f}\"", .{std.zig.fmtString(value.text)});
         },
     }
-    if (span.attribs.lang.len != 0) {
+    if (span.attribs.lang.text.len != 0) {
         try writeAttrSeparator(writer, &first);
-        try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang)});
+        try writer.print("lang=\"{f}\"", .{std.zig.fmtString(span.attribs.lang.text)});
     }
     if (span.attribs.syntax.len != 0) {
         try writeAttrSeparator(writer, &first);
@@ -275,7 +275,7 @@ fn dumpOptionalStringListField(writer: *Writer, indent: usize, key: []const u8,
 }
 
 fn dumpListItem(writer: *Writer, indent: usize, item: hdoc.Block.ListItem) Writer.Error!void {
-    try dumpOptionalStringFieldInline(writer, "lang", item.lang);
+    try dumpOptionalStringFieldInline(writer, "lang", item.lang.text);
     try dumpBlockListField(writer, indent + indent_step, "content", item.content);
 }
 
@@ -294,7 +294,7 @@ fn dumpListItemsField(writer: *Writer, indent: usize, key: []const u8, items: []
 }
 
 fn dumpTableCell(writer: *Writer, indent: usize, cell: hdoc.Block.TableCell) Writer.Error!void {
-    try dumpOptionalStringFieldInline(writer, "lang", cell.lang);
+    try dumpOptionalStringFieldInline(writer, "lang", cell.lang.text);
     try dumpOptionalNumberField(writer, indent + indent_step, "colspan", @as(?u32, cell.colspan));
     try dumpBlockListField(writer, indent + indent_step, "content", cell.content);
 }
@@ -314,18 +314,18 @@ fn dumpTableCellsField(writer: *Writer, indent: usize, key: []const u8, cells: [
 }
 
 fn dumpTableColumns(writer: *Writer, indent: usize, columns: hdoc.Block.TableColumns) Writer.Error!void {
-    try dumpOptionalStringField(writer, indent, "lang", columns.lang);
+    try dumpOptionalStringField(writer, indent, "lang", columns.lang.text);
     try dumpTableCellsField(writer, indent, "cells", columns.cells);
 }
 
 fn dumpTableDataRow(writer: *Writer, indent: usize, row: hdoc.Block.TableDataRow) Writer.Error!void {
-    try dumpOptionalStringFieldWithIndent(writer, indent, "lang", row.lang);
+    try dumpOptionalStringFieldWithIndent(writer, indent, "lang", row.lang.text);
     try dumpOptionalStringField(writer, indent, "title", row.title);
     try dumpTableCellsField(writer, indent, "cells", row.cells);
 }
 
 fn dumpTableGroup(writer: *Writer, indent: usize, group: hdoc.Block.TableGroup) Writer.Error!void {
-    try dumpOptionalStringFieldWithIndent(writer, indent, "lang", group.lang);
+    try dumpOptionalStringFieldWithIndent(writer, indent, "lang", group.lang.text);
     try dumpSpanListField(writer, indent, "content", group.content);
 }
 
@@ -365,42 +365,42 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err
         .heading => |heading| {
             try writeTypeTag(writer, "heading");
             try dumpEnumField(writer, indent + indent_step, "level", heading.level);
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang.text);
             try dumpSpanListField(writer, indent + indent_step, "content", heading.content);
         },
         .paragraph => |paragraph| {
             try writeTypeTag(writer, "paragraph");
             try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind);
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang.text);
             try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content);
         },
         .list => |list| {
             try writeTypeTag(writer, "list");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang.text);
             try dumpOptionalNumberField(writer, indent + indent_step, "first", list.first);
             try dumpListItemsField(writer, indent + indent_step, "items", list.items);
         },
         .image => |image| {
             try writeTypeTag(writer, "image");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", image.lang.text);
             try dumpOptionalStringField(writer, indent + indent_step, "alt", image.alt);
             try dumpOptionalStringField(writer, indent + indent_step, "path", image.path);
             try dumpSpanListField(writer, indent + indent_step, "content", image.content);
         },
         .preformatted => |preformatted| {
             try writeTypeTag(writer, "preformatted");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", preformatted.lang.text);
             try dumpOptionalStringField(writer, indent + indent_step, "syntax", preformatted.syntax);
             try dumpSpanListField(writer, indent + indent_step, "content", preformatted.content);
         },
         .toc => |toc| {
             try writeTypeTag(writer, "toc");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang.text);
             try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth);
         },
         .table => |table| {
             try writeTypeTag(writer, "table");
-            try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang.text);
             try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows);
         },
     }
@@ -419,7 +419,7 @@ fn dumpOptionalDateTimeField(writer: *Writer, indent: usize, key: []const u8, va
 fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void {
     try writer.writeAll("document:\n");
     try dumpVersion(writer, indent_step, doc.version);
-    try dumpOptionalStringField(writer, indent_step, "lang", doc.lang);
+    try dumpOptionalStringField(writer, indent_step, "lang", doc.lang.text);
     try dumpOptionalStringField(writer, indent_step, "title", doc.title);
     try dumpOptionalStringField(writer, indent_step, "author", doc.author);
     try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date);
diff --git a/src/testsuite.zig b/src/testsuite.zig
index bfa3d17..f1fbabd 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -554,26 +554,26 @@ test "Time.parse accepts ISO times with zones" {
     try std.testing.expectEqual(@as(u6, 30), utc.minute);
     try std.testing.expectEqual(@as(u6, 46), utc.second);
     try std.testing.expectEqual(@as(u20, 0), utc.microsecond);
-    try std.testing.expectEqual(@as(i32, 0), utc.zone_offset);
+    try std.testing.expectEqual(.utc, utc.timezone);
 
-    const utc_hint = try hdoc.Time.parse("22:30:46", "Z");
+    const utc_hint = try hdoc.Time.parse("22:30:46", .utc);
     try std.testing.expectEqual(@as(u5, 22), utc_hint.hour);
     try std.testing.expectEqual(@as(u6, 30), utc_hint.minute);
     try std.testing.expectEqual(@as(u6, 46), utc_hint.second);
     try std.testing.expectEqual(@as(u20, 0), utc_hint.microsecond);
-    try std.testing.expectEqual(@as(i32, 0), utc_hint.zone_offset);
+    try std.testing.expectEqual(.utc, utc_hint.timezone);
 
-    const fractional = try hdoc.Time.parse("22:30:46.136+01:00", null);
+    const fractional = try hdoc.Time.parse("22:30:46.136-01:00", null);
     try std.testing.expectEqual(@as(u20, 136_000), fractional.microsecond);
-    try std.testing.expectEqual(@as(i32, 60), fractional.zone_offset);
+    try std.testing.expectEqual(try hdoc.TimeZoneOffset.from_hhmm(-1, 0), fractional.timezone);
 
-    const fractional_hint = try hdoc.Time.parse("22:30:46.136", "+01:30");
+    const fractional_hint = try hdoc.Time.parse("22:30:46.136", try .parse("+01:30"));
     try std.testing.expectEqual(@as(u20, 136_000), fractional_hint.microsecond);
-    try std.testing.expectEqual(@as(i32, 90), fractional_hint.zone_offset);
+    try std.testing.expectEqual(@as(hdoc.TimeZoneOffset, @enumFromInt(90)), fractional_hint.timezone);
 
     const nanos = try hdoc.Time.parse("21:30:46.136797358-05:30", null);
     try std.testing.expectEqual(@as(u20, 136_797), nanos.microsecond);
-    try std.testing.expectEqual(@as(i32, -330), nanos.zone_offset);
+    try std.testing.expectEqual(@as(hdoc.TimeZoneOffset, @enumFromInt(-330)), nanos.timezone);
 
     try std.testing.expectError(error.InvalidValue, hdoc.Time.parse("21:30:46,1Z", null));
     try std.testing.expectError(error.MissingTimezone, hdoc.Time.parse("22:30:46", null));
@@ -592,7 +592,7 @@ test "DateTime.parse accepts ISO date-time" {
     try std.testing.expectEqual(@as(u6, 31), datetime.time.minute);
     try std.testing.expectEqual(@as(u6, 50), datetime.time.second);
     try std.testing.expectEqual(@as(u20, 130_000), datetime.time.microsecond);
-    try std.testing.expectEqual(@as(i32, 60), datetime.time.zone_offset);
+    try std.testing.expectEqual(@as(hdoc.TimeZoneOffset, @enumFromInt(60)), datetime.time.timezone);
 
     try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z", null));
 }

From 075821cbf9665fd16012b076eda79477220e1379 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 21:59:16 +0100
Subject: [PATCH 047/116] Vibecoded: Resolves all diagnostic-related TODOs

---
 src/hyperdoc.zig         | 271 +++++++++++++++++++++++++++++++++++----
 src/testsuite.zig        |  42 +++---
 test/accept/workset.hdoc |   2 +-
 3 files changed, 271 insertions(+), 44 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index f6d4386..f7d810e 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -513,6 +513,8 @@ pub fn parse(
     const header = sema.header orelse return error.MalformedDocument;
 
     const content_ids = try sema.ids.toOwnedSlice(arena.allocator());
+    const id_locations = sema.id_locations.items;
+    std.debug.assert(id_locations.len == content_ids.len);
 
     var id_map: std.StringArrayHashMapUnmanaged(usize) = .empty;
     errdefer id_map.deinit(arena.allocator());
@@ -521,24 +523,22 @@ pub fn parse(
 
     for (content_ids, 0..) |id_or_null, index| {
         const id = id_or_null orelse continue;
+        const id_location = id_locations[index] orelse Parser.Location{ .offset = 0, .length = 0 };
 
         const gop = id_map.getOrPutAssumeCapacity(id.text);
         if (gop.found_existing) {
             try sema.emit_diagnostic(
                 .{ .duplicate_id = .{ .ref = id.text } },
-                .{ .offset = 0, .length = 0 }, // TODO: Figure out proper node location
+                id_location,
             );
             continue;
         }
         gop.value_ptr.* = index;
     }
 
-    // TODO: Validate document-level semantic constraints (ref resolution).
+    try sema.validate_references(&id_map);
 
-    const doc_lang = header.lang orelse blk: {
-        // TODO: Emit diagnostic warning for missing document language.
-        break :blk LanguageTag.inherit;
-    };
+    const doc_lang = header.lang orelse LanguageTag.inherit;
 
     return .{
         .arena = arena,
@@ -577,16 +577,69 @@ pub fn clean_utf8_input(diagnostics: ?*Diagnostics, raw_plain_text: []const u8)
     }
     const source_head = iter.i;
 
+    var line: u32 = 1;
+    var column: u32 = 1;
+    var saw_invalid = false;
+
+    var prev_was_cr = false;
+    var prev_cr_location: Diagnostic.Location = undefined;
+
     while (iter.nextCodepointSlice()) |slice| {
         const codepoint = std.unicode.utf8Decode(slice) catch unreachable;
 
-        // TODO: Write codepoint validation which rejects the file if invalid codepoints are detected and
-        //       emits warnings for TAB characters.
-        //       Bare CR is forbidden, just CR LF or LF is allowed.
+        const location: Diagnostic.Location = .{ .line = line, .column = column };
+
+        if (prev_was_cr) {
+            if (codepoint != '\n') {
+                if (diagnostics) |diag| {
+                    try diag.add(.bare_carriage_return, prev_cr_location);
+                }
+                saw_invalid = true;
+            }
+            prev_was_cr = false;
+            if (codepoint == '\n') {
+                continue;
+            }
+        }
+
+        if (codepoint == '\r') {
+            prev_was_cr = true;
+            prev_cr_location = location;
+            line += 1;
+            column = 1;
+            continue;
+        }
+
+        if (codepoint == '\n') {
+            line += 1;
+            column = 1;
+            continue;
+        }
 
-        _ = codepoint;
+        if (codepoint == '\t') {
+            if (diagnostics) |diag| {
+                try diag.add(.tab_character, location);
+            }
+        } else if (SemanticAnalyzer.is_illegal_character(codepoint)) {
+            if (diagnostics) |diag| {
+                try diag.add(.{ .illegal_character = .{ .codepoint = codepoint } }, location);
+            }
+            saw_invalid = true;
+        }
+
+        column += 1;
     }
 
+    if (prev_was_cr) {
+        if (diagnostics) |diag| {
+            try diag.add(.bare_carriage_return, prev_cr_location);
+        }
+        saw_invalid = true;
+    }
+
+    if (saw_invalid)
+        return error.InvalidUtf8;
+
     return raw_plain_text[source_head..];
 }
 
@@ -602,6 +655,11 @@ pub const SemanticAnalyzer = struct {
         date: ?DateTime,
     };
 
+    const RefUse = struct {
+        ref: Reference,
+        location: Parser.Location,
+    };
+
     arena: std.mem.Allocator,
     diagnostics: ?*Diagnostics,
     code: []const u8,
@@ -609,6 +667,8 @@ pub const SemanticAnalyzer = struct {
     header: ?Header = null,
     blocks: std.ArrayList(Block) = .empty,
     ids: std.ArrayList(?Reference) = .empty,
+    id_locations: std.ArrayList(?Parser.Location) = .empty,
+    pending_refs: std.ArrayList(RefUse) = .empty,
 
     fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, UnsupportedVersion }!void {
         switch (node.type) {
@@ -654,8 +714,14 @@ pub const SemanticAnalyzer = struct {
                     },
                 };
 
+                const id_location = if (id != null)
+                    get_attribute_location(node, "id", .value) orelse get_attribute_location(node, "id", .name) orelse node.location
+                else
+                    null;
+
                 try sema.blocks.append(sema.arena, block);
                 try sema.ids.append(sema.arena, id);
+                try sema.id_locations.append(sema.arena, id_location);
             },
         }
     }
@@ -672,6 +738,11 @@ pub const SemanticAnalyzer = struct {
             tz: ?TimeZoneOffset = null,
         });
 
+        const lang_location = get_attribute_location(node, "lang", .name);
+        if (lang_location == null) {
+            try sema.emit_diagnostic(.missing_document_language, node.location);
+        }
+
         if (attrs.version.major != 2)
             return error.UnsupportedVersion;
         if (attrs.version.minor != 0)
@@ -679,7 +750,7 @@ pub const SemanticAnalyzer = struct {
 
         return .{
             .version = attrs.version,
-            .lang = attrs.lang,
+            .lang = if (lang_location != null) attrs.lang else null,
             .title = attrs.title,
             .author = attrs.author,
             .date = attrs.date,
@@ -810,8 +881,10 @@ pub const SemanticAnalyzer = struct {
         var children: std.ArrayList(Block.ListItem) = .empty;
         defer children.deinit(sema.arena);
 
+        var saw_list_body = false;
         switch (node.body) {
             .list => |child_nodes| {
+                saw_list_body = true;
                 try children.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
                 for (child_nodes) |child_node| {
                     const list_item = sema.translate_list_item_node(child_node) catch |err| switch (err) {
@@ -830,7 +903,9 @@ pub const SemanticAnalyzer = struct {
             },
         }
 
-        // TODO: Validate `children.items.len >= 1`
+        if (saw_list_body and children.items.len == 0) {
+            try sema.emit_diagnostic(.list_body_required, node.location);
+        }
 
         const list: Block.List = .{
             .first = attrs.first orelse if (node.type == .ol) 1 else null,
@@ -858,15 +933,13 @@ pub const SemanticAnalyzer = struct {
         if (path.len == 0) {
             // The path must be non-empty.
 
-            // TODO: Implement better diagnostic message
-            try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = .img, .name = "path" } }, get_attribute_location(node, "path", .value).?);
+            try sema.emit_diagnostic(.{ .empty_attribute = .{ .type = .img, .name = "path" } }, get_attribute_location(node, "path", .value).?);
         }
 
         if (attrs.alt != null and alt.len == 0) {
             // If alt is present, it must be non-empty, and not fully whitespace.
 
-            // TODO: Implement better diagnostic message
-            try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = .img, .name = "alt" } }, get_attribute_location(node, "alt", .value).?);
+            try sema.emit_diagnostic(.{ .empty_attribute = .{ .type = .img, .name = "alt" } }, get_attribute_location(node, "alt", .value).?);
         }
 
         const image: Block.Image = .{
@@ -1039,8 +1112,10 @@ pub const SemanticAnalyzer = struct {
         var cells: std.ArrayList(Block.TableCell) = .empty;
         defer cells.deinit(sema.arena);
 
+        var saw_list_body = false;
         switch (node.body) {
             .list => |child_nodes| {
+                saw_list_body = true;
                 try cells.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
                 for (child_nodes) |child_node| {
                     const cell = sema.translate_table_cell_node(child_node) catch |err| switch (err) {
@@ -1058,7 +1133,9 @@ pub const SemanticAnalyzer = struct {
             },
         }
 
-        // TODO: Validate `children.items.len >= 1`
+        if (saw_list_body and cells.items.len == 0) {
+            try sema.emit_diagnostic(.list_body_required, node.location);
+        }
 
         return try cells.toOwnedSlice(sema.arena);
     }
@@ -1126,7 +1203,7 @@ pub const SemanticAnalyzer = struct {
 
             .empty, .string, .verbatim, .text_span => switch (upgrade) {
                 .no_upgrade => {
-                    try sema.emit_diagnostic(.list_body_required, node.location); // TODO: Use better diagnostic
+                    try sema.emit_diagnostic(.{ .block_list_required = .{ .type = node.type } }, node.location);
                     return &.{};
                 },
                 .text_to_p => {
@@ -1406,6 +1483,13 @@ pub const SemanticAnalyzer = struct {
                     break :blk .none;
                 };
 
+                if (props.ref) |ref| {
+                    if (props.uri == null) {
+                        const ref_location = get_attribute_location(node, "ref", .value) orelse node.location;
+                        try sema.pending_refs.append(sema.arena, .{ .ref = ref, .location = ref_location });
+                    }
+                }
+
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .link = link,
@@ -1523,16 +1607,12 @@ pub const SemanticAnalyzer = struct {
         const value: DTValue = if (value_or_err) |value|
             value
         else |err| blk: {
-            std.log.warn("failed to parse {t}: \"{s}\"", .{ body, value_str });
             switch (err) {
                 error.InvalidValue => {
                     try sema.emit_diagnostic(.invalid_date_time, node.location);
                 },
                 error.MissingTimezone => {
-                    std.log.err("emit missing timezone for {}", .{node.location});
-                    // TODO: Use (timezone_hint != null) to emit diagnostic for hint with
-                    //       adding `tz` attribute when all date/time values share a common base.
-                    try sema.emit_diagnostic(.invalid_date_time, node.location);
+                    try sema.emit_diagnostic(.missing_timezone, node.location);
                 },
             }
             break :blk std.mem.zeroes(DTValue);
@@ -1543,8 +1623,7 @@ pub const SemanticAnalyzer = struct {
         else if (std.meta.stringToEnum(Format, format_str)) |format|
             format
         else blk: {
-            // TODO: Report error about invalid format
-            try sema.emit_diagnostic(.invalid_date_time_fmt, get_attribute_location(node, "fmt", .value) orelse node.location);
+            try sema.emit_diagnostic(.{ .invalid_date_time_fmt = .{ .fmt = format_str } }, get_attribute_location(node, "fmt", .value) orelse node.location);
             break :blk .default;
         };
 
@@ -1767,6 +1846,14 @@ pub const SemanticAnalyzer = struct {
         };
     }
 
+    fn validate_references(sema: *SemanticAnalyzer, id_map: *const std.StringArrayHashMapUnmanaged(usize)) !void {
+        for (sema.pending_refs.items) |ref_use| {
+            if (!id_map.contains(ref_use.ref.text)) {
+                try sema.emit_diagnostic(.{ .unknown_id = .{ .ref = ref_use.ref.text } }, ref_use.location);
+            }
+        }
+    }
+
     fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, location: Parser.Location) !void {
         if (sema.diagnostics) |diag| {
             try diag.add(code, sema.make_location(location.offset));
@@ -1967,7 +2054,6 @@ pub const SemanticAnalyzer = struct {
         return view.bytes;
     }
 
-    // TODO: Also validate the whole document against this rules.
     fn is_illegal_character(codepoint: u21) bool {
         // Surrogate codepoints are illegal, we're only ever using UTF-8 which doesn't need them.
         if (std.unicode.isSurrogateCodepoint(codepoint))
@@ -2694,11 +2780,13 @@ pub const Diagnostic = struct {
     pub const InvalidIdentifierStart = struct { char: u8 };
     pub const DuplicateAttribute = struct { name: []const u8 };
     pub const NodeAttributeError = struct { type: Parser.NodeType, name: []const u8 };
+    pub const NodeBodyError = struct { type: Parser.NodeType };
     pub const MissingHdocHeader = struct {};
     pub const DuplicateHdocHeader = struct {};
     pub const InvalidBlockError = struct { name: []const u8 };
     pub const InlineUsageError = struct { attribute: InlineAttribute };
     pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute };
+    pub const DateTimeFormatError = struct { fmt: []const u8 };
     pub const InvalidStringEscape = struct { codepoint: u21 };
     pub const ForbiddenControlCharacter = struct { codepoint: u21 };
     pub const TableShapeError = struct { actual: usize, expected: usize };
@@ -2716,17 +2804,21 @@ pub const Diagnostic = struct {
         duplicate_hdoc_header: DuplicateHdocHeader,
         missing_attribute: NodeAttributeError,
         invalid_attribute: NodeAttributeError,
+        empty_attribute: NodeAttributeError,
         unknown_block_type: InvalidBlockError,
         invalid_block_type: InvalidBlockError,
+        block_list_required: NodeBodyError,
         invalid_inline_combination: InlineCombinationError,
         link_not_nestable,
         invalid_link,
         invalid_date_time,
         nested_date_time,
-        invalid_date_time_fmt,
+        invalid_date_time_fmt: DateTimeFormatError,
+        missing_timezone,
         invalid_unicode_string_escape,
         invalid_string_escape: InvalidStringEscape,
         illegal_character: ForbiddenControlCharacter,
+        bare_carriage_return,
         illegal_child_item,
         list_body_required,
         illegal_id_attribute,
@@ -2736,6 +2828,7 @@ pub const Diagnostic = struct {
 
         // warnings:
         document_starts_with_bom,
+        missing_document_language,
         unknown_attribute: NodeAttributeError,
         duplicate_attribute: DuplicateAttribute,
         empty_verbatim_block,
@@ -2745,6 +2838,7 @@ pub const Diagnostic = struct {
         empty_inline_body,
         redundant_inline: InlineUsageError,
         attribute_leading_trailing_whitespace,
+        tab_character,
 
         pub fn severity(code: Code) Severity {
             return switch (code) {
@@ -2758,15 +2852,19 @@ pub const Diagnostic = struct {
                 .duplicate_hdoc_header,
                 .invalid_attribute,
                 .missing_attribute,
+                .empty_attribute,
                 .unknown_block_type,
                 .invalid_block_type,
+                .block_list_required,
                 .invalid_inline_combination,
                 .link_not_nestable,
                 .invalid_link,
                 .invalid_date_time,
                 .invalid_date_time_fmt,
+                .missing_timezone,
                 .invalid_string_escape,
                 .illegal_character,
+                .bare_carriage_return,
                 .invalid_unicode_string_escape,
                 .illegal_child_item,
                 .list_body_required,
@@ -2777,6 +2875,7 @@ pub const Diagnostic = struct {
                 .unknown_id,
                 => .@"error",
 
+                .missing_document_language,
                 .unknown_attribute,
                 .duplicate_attribute,
                 .empty_verbatim_block,
@@ -2786,6 +2885,7 @@ pub const Diagnostic = struct {
                 .empty_inline_body,
                 .redundant_inline,
                 .attribute_leading_trailing_whitespace,
+                .tab_character,
                 .document_starts_with_bom,
                 => .warning,
             };
@@ -2817,9 +2917,11 @@ pub const Diagnostic = struct {
 
                 .missing_attribute => |ctx| try w.print("Missing required attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }),
                 .invalid_attribute => |ctx| try w.print("Invalid value for attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }),
+                .empty_attribute => |ctx| try w.print("Attribute '{s}' for node type '{t}' must be non-empty.", .{ ctx.name, ctx.type }),
                 .unknown_attribute => |ctx| try w.print("Unknown attribute '{s}' for node type '{t}'.", .{ ctx.name, ctx.type }),
                 .unknown_block_type => |ctx| try w.print("Unknown block type '{s}'.", .{ctx.name}),
                 .invalid_block_type => |ctx| try w.print("Invalid block type '{s}' in this context.", .{ctx.name}),
+                .block_list_required => |ctx| try w.print("Node type '{t}' requires a block list body.", .{ctx.type}),
 
                 .empty_inline_body => try w.writeAll("Inline body is empty."),
 
@@ -2832,7 +2934,9 @@ pub const Diagnostic = struct {
 
                 .invalid_date_time => try w.writeAll("Invalid date/time value."),
 
-                .invalid_date_time_fmt => try w.writeAll("Invalid 'fmt' for date/time value."),
+                .missing_timezone => try w.writeAll("Missing timezone offset; add a 'tz' header attribute or include a timezone in the value."),
+
+                .invalid_date_time_fmt => |ctx| try w.print("Invalid 'fmt' value '{s}' for date/time.", .{ctx.fmt}),
 
                 .invalid_string_escape => |ctx| if (ctx.codepoint > 0x20 and ctx.codepoint <= 0x7F)
                     try w.print("\\{u} is not a valid escape sequence.", .{ctx.codepoint})
@@ -2842,6 +2946,7 @@ pub const Diagnostic = struct {
                 .invalid_unicode_string_escape => try w.writeAll("Invalid unicode escape sequence"),
 
                 .illegal_character => |ctx| try w.print("Forbidden control character U+{X:0>4}.", .{ctx.codepoint}),
+                .bare_carriage_return => try w.writeAll("Bare carriage return (CR) is not allowed; use LF or CRLF."),
 
                 .list_body_required => try w.writeAll("Node requires list body."),
                 .illegal_child_item => try w.writeAll("Node not allowed here."),
@@ -2854,6 +2959,9 @@ pub const Diagnostic = struct {
 
                 .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}),
                 .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}),
+
+                .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."),
+                .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."),
             }
         }
     };
@@ -2911,6 +3019,113 @@ pub const InlineAttribute = enum {
     syntax,
 };
 
+test "diagnostics for missing language and empty image attributes" {
+    var diagnostics: Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        \\hdoc(version="2.0");
+        \\img(path="", alt="");
+    ;
+
+    var doc = try parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var saw_missing_lang = false;
+    var saw_empty_path = false;
+    var saw_empty_alt = false;
+
+    for (diagnostics.items.items) |item| {
+        switch (item.code) {
+            .missing_document_language => saw_missing_lang = true,
+            .empty_attribute => |ctx| {
+                if (ctx.type == .img and std.mem.eql(u8, ctx.name, "path")) {
+                    saw_empty_path = true;
+                }
+                if (ctx.type == .img and std.mem.eql(u8, ctx.name, "alt")) {
+                    saw_empty_alt = true;
+                }
+            },
+            else => {},
+        }
+    }
+
+    try std.testing.expect(saw_missing_lang);
+    try std.testing.expect(saw_empty_path);
+    try std.testing.expect(saw_empty_alt);
+}
+
+test "diagnostics for missing timezone and unknown id" {
+    var diagnostics: Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        \\hdoc(version="2.0");
+        \\p{ \time"12:00:00" \link(ref="missing"){missing} }
+    ;
+
+    var doc = try parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var saw_missing_timezone = false;
+    var saw_unknown_id = false;
+
+    for (diagnostics.items.items) |item| {
+        switch (item.code) {
+            .missing_timezone => saw_missing_timezone = true,
+            .unknown_id => |ctx| {
+                if (std.mem.eql(u8, ctx.ref, "missing")) {
+                    saw_unknown_id = true;
+                }
+            },
+            else => {},
+        }
+    }
+
+    try std.testing.expect(saw_missing_timezone);
+    try std.testing.expect(saw_unknown_id);
+}
+
+test "diagnostics for tab characters" {
+    var diagnostics: Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source = "hdoc(version=\"2.0\");\n\tp{ ok }";
+
+    var doc = try parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var saw_tab = false;
+
+    for (diagnostics.items.items) |item| {
+        switch (item.code) {
+            .tab_character => saw_tab = true,
+            else => {},
+        }
+    }
+
+    try std.testing.expect(saw_tab);
+}
+
+test "diagnostics for bare carriage return" {
+    var diagnostics: Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source = "hdoc(version=\"2.0\");\r";
+
+    try std.testing.expectError(error.InvalidUtf8, parse(std.testing.allocator, source, &diagnostics));
+
+    var saw_bare_cr = false;
+    for (diagnostics.items.items) |item| {
+        switch (item.code) {
+            .bare_carriage_return => saw_bare_cr = true,
+            else => {},
+        }
+    }
+
+    try std.testing.expect(saw_bare_cr);
+}
+
 test "fuzz parser" {
     const Impl = struct {
         fn testOne(impl: @This(), data: []const u8) !void {
diff --git a/src/testsuite.zig b/src/testsuite.zig
index f1fbabd..cf048f0 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -441,10 +441,22 @@ fn expectParseOk(opts: LogDiagOptions, code: []const u8) !void {
     var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics);
     defer doc.deinit();
 
-    if (diagnostics.has_error() or diagnostics.has_warning()) {
+    if (diagnostics.has_error()) {
         logDiagnostics(&diagnostics, opts);
         return error.TestExpectedNoDiagnostics;
     }
+
+    for (diagnostics.items.items) |item| {
+        if (item.code.severity() != .warning)
+            continue;
+        switch (item.code) {
+            .missing_document_language => {},
+            else => {
+                logDiagnostics(&diagnostics, opts);
+                return error.TestExpectedNoDiagnostics;
+            },
+        }
+    }
 }
 
 fn expectParseNoFail(opts: LogDiagOptions, code: []const u8) !void {
@@ -467,29 +479,29 @@ fn expectParseNoFail(opts: LogDiagOptions, code: []const u8) !void {
 }
 
 test "parsing valid document yields empty diagnostics" {
-    try expectParseOk(.{}, "hdoc(version=\"2.0\");");
+    try expectParseOk(.{}, "hdoc(version=\"2.0\",lang=\"en\");");
 }
 
 test "diagnostic codes are emitted for expected samples" {
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"unterminated", &.{.unterminated_string});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); *abc", &.{.{ .invalid_identifier_start = .{ .char = '*' } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1(", &.{.{ .unexpected_eof = .{ .context = "identifier", .expected_char = null } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 123", &.{.{ .unexpected_character = .{ .expected = '{', .found = '1' } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"unterminated", &.{.unterminated_string});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); *abc", &.{.{ .invalid_identifier_start = .{ .char = '*' } }});
     try validateDiagnostics(.{}, "hdoc{h1 \"x\"", &.{.unterminated_block_list});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); p {hello", &.{.unterminated_inline_list});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); p {hello", &.{.unterminated_inline_list});
     try validateDiagnostics(
         .{},
-        "hdoc(version=\"2.0\"); h1(lang=\"a\",lang=\"b\");",
+        "hdoc(version=\"2.0\",lang=\"en\"); h1(lang=\"a\",lang=\"b\");",
         &.{ .{ .duplicate_attribute = .{ .name = "lang" } }, .empty_inline_body },
     );
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n", &.{.empty_verbatim_block});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n| line", &.{.verbatim_missing_trailing_newline});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n|nospace\n", &.{.verbatim_missing_space});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); pre:\n| trailing \n", &.{.trailing_whitespace});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n", &.{.empty_verbatim_block});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n| line", &.{.verbatim_missing_trailing_newline});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n|nospace\n", &.{.verbatim_missing_space});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n| trailing \n", &.{.trailing_whitespace});
     try validateDiagnostics(.{}, "h1 \"Title\"", &.{.missing_hdoc_header});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); hdoc(version=\"2.0\");", &.{.duplicate_hdoc_header});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{.duplicate_hdoc_header});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
 }
 
 test "parser reports unterminated inline lists" {
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index cf91f7b..efb6bf4 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -1,4 +1,4 @@
-hdoc(version="2.0",);
+hdoc(version="2.0", lang="en");
 
 p {
   In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed

From 1a61967a3079e2934ecd8f937433f2ac5905bfe2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 29 Dec 2025 22:35:03 +0100
Subject: [PATCH 048/116] Moves some tests from hyperdoc.zig to testsuite.zig

---
 src/hyperdoc.zig  | 107 ----------------------------------------------
 src/testsuite.zig | 107 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 107 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index f7d810e..2bf170c 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -3019,113 +3019,6 @@ pub const InlineAttribute = enum {
     syntax,
 };
 
-test "diagnostics for missing language and empty image attributes" {
-    var diagnostics: Diagnostics = .init(std.testing.allocator);
-    defer diagnostics.deinit();
-
-    const source =
-        \\hdoc(version="2.0");
-        \\img(path="", alt="");
-    ;
-
-    var doc = try parse(std.testing.allocator, source, &diagnostics);
-    defer doc.deinit();
-
-    var saw_missing_lang = false;
-    var saw_empty_path = false;
-    var saw_empty_alt = false;
-
-    for (diagnostics.items.items) |item| {
-        switch (item.code) {
-            .missing_document_language => saw_missing_lang = true,
-            .empty_attribute => |ctx| {
-                if (ctx.type == .img and std.mem.eql(u8, ctx.name, "path")) {
-                    saw_empty_path = true;
-                }
-                if (ctx.type == .img and std.mem.eql(u8, ctx.name, "alt")) {
-                    saw_empty_alt = true;
-                }
-            },
-            else => {},
-        }
-    }
-
-    try std.testing.expect(saw_missing_lang);
-    try std.testing.expect(saw_empty_path);
-    try std.testing.expect(saw_empty_alt);
-}
-
-test "diagnostics for missing timezone and unknown id" {
-    var diagnostics: Diagnostics = .init(std.testing.allocator);
-    defer diagnostics.deinit();
-
-    const source =
-        \\hdoc(version="2.0");
-        \\p{ \time"12:00:00" \link(ref="missing"){missing} }
-    ;
-
-    var doc = try parse(std.testing.allocator, source, &diagnostics);
-    defer doc.deinit();
-
-    var saw_missing_timezone = false;
-    var saw_unknown_id = false;
-
-    for (diagnostics.items.items) |item| {
-        switch (item.code) {
-            .missing_timezone => saw_missing_timezone = true,
-            .unknown_id => |ctx| {
-                if (std.mem.eql(u8, ctx.ref, "missing")) {
-                    saw_unknown_id = true;
-                }
-            },
-            else => {},
-        }
-    }
-
-    try std.testing.expect(saw_missing_timezone);
-    try std.testing.expect(saw_unknown_id);
-}
-
-test "diagnostics for tab characters" {
-    var diagnostics: Diagnostics = .init(std.testing.allocator);
-    defer diagnostics.deinit();
-
-    const source = "hdoc(version=\"2.0\");\n\tp{ ok }";
-
-    var doc = try parse(std.testing.allocator, source, &diagnostics);
-    defer doc.deinit();
-
-    var saw_tab = false;
-
-    for (diagnostics.items.items) |item| {
-        switch (item.code) {
-            .tab_character => saw_tab = true,
-            else => {},
-        }
-    }
-
-    try std.testing.expect(saw_tab);
-}
-
-test "diagnostics for bare carriage return" {
-    var diagnostics: Diagnostics = .init(std.testing.allocator);
-    defer diagnostics.deinit();
-
-    const source = "hdoc(version=\"2.0\");\r";
-
-    try std.testing.expectError(error.InvalidUtf8, parse(std.testing.allocator, source, &diagnostics));
-
-    var saw_bare_cr = false;
-    for (diagnostics.items.items) |item| {
-        switch (item.code) {
-            .bare_carriage_return => saw_bare_cr = true,
-            else => {},
-        }
-    }
-
-    try std.testing.expect(saw_bare_cr);
-}
-
 test "fuzz parser" {
     const Impl = struct {
         fn testOne(impl: @This(), data: []const u8) !void {
diff --git a/src/testsuite.zig b/src/testsuite.zig
index cf048f0..aa26072 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -608,3 +608,110 @@ test "DateTime.parse accepts ISO date-time" {
 
     try std.testing.expectError(error.InvalidValue, hdoc.DateTime.parse("2025-12-25 22:31:50Z", null));
 }
+
+test "diagnostics for missing language and empty image attributes" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        \\hdoc(version="2.0");
+        \\img(path="", alt="");
+    ;
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var saw_missing_lang = false;
+    var saw_empty_path = false;
+    var saw_empty_alt = false;
+
+    for (diagnostics.items.items) |item| {
+        switch (item.code) {
+            .missing_document_language => saw_missing_lang = true,
+            .empty_attribute => |ctx| {
+                if (ctx.type == .img and std.mem.eql(u8, ctx.name, "path")) {
+                    saw_empty_path = true;
+                }
+                if (ctx.type == .img and std.mem.eql(u8, ctx.name, "alt")) {
+                    saw_empty_alt = true;
+                }
+            },
+            else => {},
+        }
+    }
+
+    try std.testing.expect(saw_missing_lang);
+    try std.testing.expect(saw_empty_path);
+    try std.testing.expect(saw_empty_alt);
+}
+
+test "diagnostics for missing timezone and unknown id" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        \\hdoc(version="2.0");
+        \\p{ \time"12:00:00" \link(ref="missing"){missing} }
+    ;
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var saw_missing_timezone = false;
+    var saw_unknown_id = false;
+
+    for (diagnostics.items.items) |item| {
+        switch (item.code) {
+            .missing_timezone => saw_missing_timezone = true,
+            .unknown_id => |ctx| {
+                if (std.mem.eql(u8, ctx.ref, "missing")) {
+                    saw_unknown_id = true;
+                }
+            },
+            else => {},
+        }
+    }
+
+    try std.testing.expect(saw_missing_timezone);
+    try std.testing.expect(saw_unknown_id);
+}
+
+test "diagnostics for tab characters" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source = "hdoc(version=\"2.0\");\n\tp{ ok }";
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var saw_tab = false;
+
+    for (diagnostics.items.items) |item| {
+        switch (item.code) {
+            .tab_character => saw_tab = true,
+            else => {},
+        }
+    }
+
+    try std.testing.expect(saw_tab);
+}
+
+test "diagnostics for bare carriage return" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source = "hdoc(version=\"2.0\");\r";
+
+    try std.testing.expectError(error.InvalidUtf8, hdoc.parse(std.testing.allocator, source, &diagnostics));
+
+    var saw_bare_cr = false;
+    for (diagnostics.items.items) |item| {
+        switch (item.code) {
+            .bare_carriage_return => saw_bare_cr = true,
+            else => {},
+        }
+    }
+
+    try std.testing.expect(saw_bare_cr);
+}

From 274263709314fe6ecf8603120cf80a23aaabab27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Tue, 30 Dec 2025 21:43:01 +0100
Subject: [PATCH 049/116] Improves specification and fixes some minor
 mismatches in implementation

---
 docs/specification.md | 58 +++++++++++++++++++++++++++----------------
 src/hyperdoc.zig      | 22 +++++++++++++---
 2 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 50c255d..3b6420b 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -34,7 +34,7 @@ This section defines the required byte-level encoding and line structure of Hype
 
 **Byte Order Mark (BOM):**
 
-- A UTF-8 BOM (the byte sequence `EF BB BF`) **SHOULD NOT** be used. Tooling **MAY** accept it and treat it as U+FEFF at the beginning of the document.
+- A UTF-8 BOM (the byte sequence `EF BB BF`) **SHOULD NOT** be used. Tooling **MAY** accept it and treat it as whitespace at the beginning of the document.
 
 ### Line endings
 
@@ -466,24 +466,40 @@ Notes:
 
 ## Attribute Overview
 
-| Attribute | Required | Allowed Values                                                                               | Description                                                                     |
-| --------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
-| `version` | Yes      | `2.0`                                                                                        | Describes the version of this HyperDoc document.                                |
-| `lang`    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                         | Defines the language of the elements contents.                                  |
-| `title`   | No       | *Any*                                                                                        | Sets the title of the document or the table row.                                |
-| `author`  | No       | *Any*                                                                                        | Sets the author of the document.                                                |
-| `date`    | No       | A date-time value using the format specified below                                           | Sets the authoring date of the document.                                        |
-| `id`      | No       | Non-empty                                                                                    | Sets a reference which can be linked to with `\link(ref="...")`.                |
-| `first`   | No       | Decimal integer numbers ≥ 0                                                                  | Sets the number of the first list item.                                         |
-| `alt`     | No       | Non-empty                                                                                    | Sets the alternative text shown when an image cannot be loaded.                 |
-| `path`    | Yes      | Non-empty file path to an image file                                                         | Defines the file path where the image file can be found.                        |
-| `syntax`  | No       | *See element documentation*                                                                  | Hints the syntax highlighter how how the elements context shall be highlighted. |
-| `depth`   | No       | `1`, `2` or `3`                                                                              | Defines how many levels of headings shall be included.                          |
-| `colspan` | No       | Decimal integer numbers ≥ 1                                                                  | Sets how many columns the table cell spans.                                     |
-| `ref`     | No       | Any value present in an `id` attribute.                                                      | References any `id` inside this document.                                       |
-| `uri`     | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI.                                         |
-| `fmt`     | No       | *See element documentation*                                                                  | Defines how the date/time value shall be displayed.                             |
-| `tz`      | No       | `Z` for UTC or a `±HH:MM` timezone offset.                                                   | Defines the default timezone for time/datetime values.                          |
+| Attribute | Type            | Required | Allowed Values                                                                               | Description                                                                     |
+| --------- | --------------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
+| `version` | Version         | Yes      | `2.0`                                                                                        | Describes the version of this HyperDoc document.                                |
+| `lang`    | Language Tag    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                         | Defines the language of the elements contents.                                  |
+| `title`   | String          | No       | *Any*                                                                                        | Sets the title of the document or the table row.                                |
+| `author`  | String          | No       | *Any*                                                                                        | Sets the author of the document.                                                |
+| `date`    | Date            | No       | A date-time value using the format specified below                                           | Sets the authoring date of the document.                                        |
+| `id`      | Reference       | No       | Non-empty                                                                                    | Sets a reference which can be linked to with `\link(ref="...")`.                |
+| `first`   | Integer         | No       | Decimal integer numbers ≥ 0                                                                  | Sets the number of the first list item.                                         |
+| `alt`     | String          | No       | Non-empty                                                                                    | Sets the alternative text shown when an image cannot be loaded.                 |
+| `path`    | String          | Yes      | Non-empty file path to an image file                                                         | Defines the file path where the image file can be found.                        |
+| `syntax`  | String          | No       | *See element documentation*                                                                  | Hints the syntax highlighter how how the elements context shall be highlighted. |
+| `depth`   | Integer         | No       | `1`, `2` or `3`                                                                              | Defines how many levels of headings shall be included.                          |
+| `colspan` | Integer         | No       | Decimal integer numbers ≥ 1                                                                  | Sets how many columns the table cell spans.                                     |
+| `ref`     | Reference       | No       | Any value present in an `id` attribute.                                                      | References any `id` inside this document.                                       |
+| `uri`     | URI             | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI.                                         |
+| `fmt`     | Enum            | No       | *See element documentation*                                                                  | Defines how the date/time value shall be displayed.                             |
+| `tz`      | Timezone Offset | No       | `Z` for UTC or a `±HH:MM` timezone offset.                                                   | Defines the default timezone for time/datetime values.                          |
+
+NOTE: All attribute values allow leading and trailing whitespace, but it's heavily discouraged and should yield a non-fatal diagnostic or hint in implementations.
+
+## Attribute Types
+
+| Type              | Example                             | Syntax                                                          | Notes                                                                                                 |
+| ----------------- | ----------------------------------- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- |
+| `Date`            | `2025-12-31`                        | `\d+-{00..12}-{00..31}`                                         | A date value as specified below.                                                                      |
+| `Enum`            | `auto`                              | `\w+`                                                           |                                                                                                       |
+| `Integer`         | `10`                                | `\d+`                                                           | Leading zeroes are allowed, but discouraged.                                                          |
+| `Language Tag`    | `de-DE`                             | *See [RFC 5646](https://datatracker.ietf.org/doc/html/rfc5646)* |                                                                                                       |
+| `Reference`       | `attribute-types`                   | *No control characters or whitespace*                           |                                                                                                       |
+| `String`          | `This image shows a cat and a dog.` | *Any Value*                                                     | Any textual value.                                                                                    |
+| `Timezone Offset` | `+13:30`                            | `Z\|[+-]{00..23}:{00..59}`                                      | Expresses the UTC timezone with `Z` or a relative offset in hours + minutes                           |
+| `URI`             | `www://example.com`                 | *See [RFC 3987](https://datatracker.ietf.org/doc/html/rfc3987)  | The type actually allows IRIs (unicode-enabled URIs), but is called URI to use the core common term.  |
+| `Version`         | `2.0`                               | `\d+\.\d+`                                                      | Has no semantic meaning yet, and is forced to be `2.0`. All other values are reserved for future use. |
 
 ## Semantic Structure
 
@@ -589,7 +605,7 @@ These elements wrap a sequence of blocks that will be rendered for this list ite
 
 It also allows a string to be used as it's content directly, this will be equivalent to having a nested paragraph with that strings content:
 
-```
+```hdoc
 ul {
   li { p { This is a normal item. } }
   li "This is a normal item."
@@ -688,7 +704,7 @@ Renders the text a bit smaller and moved upwards (`sup`) or downwards (`sub`) to
 
 ### Linking: `link`
 
-**Nesting:** Yes
+**Nesting:** No
 
 | Attribute | Function                                                                                                 |
 | --------- | -------------------------------------------------------------------------------------------------------- |
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 2bf170c..a22376c 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -430,7 +430,20 @@ pub const Reference = struct {
     text: []const u8,
 
     pub fn parse(text: []const u8) !Reference {
-        // TODO: Add correctness validation here (non-empty, allowed characters).
+        if (text.len == 0)
+            return error.InvalidValue;
+
+        var view: std.unicode.Utf8View = try .init(text);
+        var iter = view.iterator();
+        while (iter.nextCodepoint()) |codepoint| {
+            if (SemanticAnalyzer.is_illegal_character(codepoint))
+                return error.InvalidValue;
+            switch (codepoint) {
+                '\t', '\r', '\n', ' ' => return error.InvalidValue,
+                else => {},
+            }
+        }
+
         return .{ .text = text };
     }
 
@@ -733,7 +746,7 @@ pub const SemanticAnalyzer = struct {
             version: Version,
             title: ?[]const u8 = null,
             author: ?[]const u8 = null,
-            date: ?DateTime = null,
+            date: ?Date = null,
             lang: LanguageTag = .inherit,
             tz: ?TimeZoneOffset = null,
         });
@@ -1321,7 +1334,10 @@ pub const SemanticAnalyzer = struct {
             const raw_string = try merger.current_span.toOwnedSlice(merger.arena);
 
             const string = switch (mode) {
-                .strip => std.mem.trimRight(u8, raw_string, whitespace_chars),
+                .strip => switch (merger.whitespace) {
+                    .one_space => std.mem.trimRight(u8, raw_string, whitespace_chars),
+                    .keep_space => raw_string,
+                },
                 .keep => raw_string,
             };
 

From 734c49969500c0b16e4ce07d9494fd87ccce6da4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Tue, 30 Dec 2025 22:05:30 +0100
Subject: [PATCH 050/116] Updates spec to have hdoc(date=...) be a DateTime
 value, so the author can include the timestamp.

---
 docs/specification.md | 4 ++--
 src/hyperdoc.zig      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 3b6420b..a4ecd99 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -472,7 +472,7 @@ Notes:
 | `lang`    | Language Tag    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                         | Defines the language of the elements contents.                                  |
 | `title`   | String          | No       | *Any*                                                                                        | Sets the title of the document or the table row.                                |
 | `author`  | String          | No       | *Any*                                                                                        | Sets the author of the document.                                                |
-| `date`    | Date            | No       | A date-time value using the format specified below                                           | Sets the authoring date of the document.                                        |
+| `date`    | DateTime        | No       | A date-time value using the format specified below                                           | Sets the authoring date of the document.                                        |
 | `id`      | Reference       | No       | Non-empty                                                                                    | Sets a reference which can be linked to with `\link(ref="...")`.                |
 | `first`   | Integer         | No       | Decimal integer numbers ≥ 0                                                                  | Sets the number of the first list item.                                         |
 | `alt`     | String          | No       | Non-empty                                                                                    | Sets the alternative text shown when an image cannot be loaded.                 |
@@ -491,7 +491,7 @@ NOTE: All attribute values allow leading and trailing whitespace, but it's heavi
 
 | Type              | Example                             | Syntax                                                          | Notes                                                                                                 |
 | ----------------- | ----------------------------------- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- |
-| `Date`            | `2025-12-31`                        | `\d+-{00..12}-{00..31}`                                         | A date value as specified below.                                                                      |
+| `Date`            | `2025-12-31T13:37:42`               | *See below*                                                     | A date value as specified below.                                                                      |
 | `Enum`            | `auto`                              | `\w+`                                                           |                                                                                                       |
 | `Integer`         | `10`                                | `\d+`                                                           | Leading zeroes are allowed, but discouraged.                                                          |
 | `Language Tag`    | `de-DE`                             | *See [RFC 5646](https://datatracker.ietf.org/doc/html/rfc5646)* |                                                                                                       |
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index a22376c..1f5225f 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -746,7 +746,7 @@ pub const SemanticAnalyzer = struct {
             version: Version,
             title: ?[]const u8 = null,
             author: ?[]const u8 = null,
-            date: ?Date = null,
+            date: ?DateTime = null, // TODO: Allow skipping TZ value!
             lang: LanguageTag = .inherit,
             tz: ?TimeZoneOffset = null,
         });

From 949ec597fc03f631d5b48d7aadf7fffaff3c80a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Tue, 30 Dec 2025 22:42:15 +0100
Subject: [PATCH 051/116] Prepares HTML5 rendering task.

---
 src/hyperdoc.zig     |   1 +
 src/main.zig         |   4 +-
 src/render/html5.zig | 101 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 src/render/html5.zig

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 1f5225f..8afc0fe 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 
 pub const render = struct {
     pub const yaml = @import("render/dump.zig").render;
+    pub const html5 = @import("render/html5.zig").render;
 };
 
 /// A HyperDoc document. Contains both memory and
diff --git a/src/main.zig b/src/main.zig
index 5b6cd6f..d7807f3 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -70,5 +70,7 @@ fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostic
         return error.InvalidFile;
     }
 
-    try hdoc.render.yaml(parsed, output_stream);
+    // TODO: Make render format selectable via CLI:
+    // try hdoc.render.yaml(parsed, output_stream);
+    try hdoc.render.html5(parsed, output_stream);
 }
diff --git a/src/render/html5.zig b/src/render/html5.zig
new file mode 100644
index 0000000..f46a3b8
--- /dev/null
+++ b/src/render/html5.zig
@@ -0,0 +1,101 @@
+//!
+//! This file implements a HTML content renderer for HyperDoc.
+//!
+const std = @import("std");
+const hdoc = @import("../hyperdoc.zig");
+
+const Writer = std.Io.Writer;
+const indent_step: usize = 2;
+
+// TODO: Implementation hints:
+// - Use writeStartTag, writeEndTag to construct the document
+// - Use and expand writeEscapedHtml to suite the needs of HyperDoc.
+// - Implement a custom formatter for string attribute values so they have proper escaping applied.
+// - Use semantic HTML. Never use `div` or `span`. If necessary, ask back when you encounter the need for a "custom tag".
+// - For the different paragraph types, use a class="hdoc-${kind}", so for example class="hdoc-warning" to distinguish the special paragraphs from regular <p> ones.
+// - The TOC element must be unrolled manually and should auto-link to the h1,h2,h3 elements.
+
+/// This function emits the body-only part of a HyperDoc document as
+/// valid HTML5.
+pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void {
+    _ = doc;
+
+    // TODO: Implement this proper
+
+    try writeStartTag(writer, "p", .regular, .{
+        .style = "font-weight: bold",
+    });
+    try writeEscapedHtml(writer, "Hello, World!");
+    try writeEndTag(writer, "p");
+    try writer.writeAll("\n");
+}
+
+fn writeEscapedHtml(writer: *Writer, text: []const u8) !void {
+    var view = std.unicode.Utf8View.init(text) catch @panic("invalid utf-8 passed");
+    var iter = view.iterator();
+    while (iter.nextCodepointSlice()) |slice| {
+        const codepoint = std.unicode.utf8Decode(slice) catch unreachable;
+        switch (codepoint) {
+            '<' => try writer.writeAll("&lt;"),
+            '>' => try writer.writeAll("&gt;"),
+            '&' => try writer.writeAll("&amp;"),
+            '"' => try writer.writeAll("&quot;"),
+            '\'' => try writer.writeAll("&apos;"),
+
+            0xA0 => try writer.writeAll("&nbsp;"),
+
+            // TODO: Fill out other required codes.
+
+            else => try writer.writeAll(slice),
+        }
+    }
+}
+
+fn writeStartTag(writer: *Writer, tag: []const u8, style: enum { regular, auto_close }, attribs: anytype) !void {
+    try writer.print("<{s}", .{tag});
+
+    const Attribs = @TypeOf(attribs);
+    inline for (@typeInfo(Attribs).@"struct".fields) |fld| {
+        const value = @field(attribs, fld.name);
+
+        if (fld.type == bool) {
+            if (value) {
+                try writer.print(" {s}", .{fld.name});
+            }
+        } else {
+            try writer.print(" {s}=", .{fld.name});
+
+            switch (@typeInfo(fld.type)) {
+                .int, .comptime_int => try writer.print("\"{}\"", .{value}),
+                .float, .comptime_float => try writer.print("\"{d}\"", .{value}),
+
+                .pointer => |info| if (info.size == .one) {
+                    const child = @typeInfo(info.child);
+
+                    if (child != .array)
+                        @compileError("unsupported pointer type " ++ @typeName(fld.type));
+                    if (child.array.child != u8)
+                        @compileError("unsupported pointer type " ++ @typeName(fld.type));
+
+                    try writer.print("\"{s}\"", .{value}); // TODO: Implement proper HTML escaping!
+                },
+
+                else => switch (fld.type) {
+                    bool => unreachable,
+
+                    []u8, []const u8 => try writer.print("\"{s}\"", .{value}), // TODO: Implement proper HTML escaping!
+
+                    else => @compileError("unsupported tag type " ++ @typeName(fld.type) ++ ", implement support above."),
+                },
+            }
+        }
+    }
+    switch (style) {
+        .auto_close => try writer.writeAll("/>"),
+        .regular => try writer.writeAll(">"),
+    }
+}
+
+fn writeEndTag(writer: *Writer, tag: []const u8) !void {
+    try writer.print("</{s}>", .{tag});
+}

From 27b658369d3ebf771f546a6e2ac256883b9d1b24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Tue, 30 Dec 2025 22:46:13 +0100
Subject: [PATCH 052/116] Adds new spec draft

---
 docs/specification-proper-draft.md | 538 +++++++++++++++++++++++++++++
 1 file changed, 538 insertions(+)
 create mode 100644 docs/specification-proper-draft.md

diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
new file mode 100644
index 0000000..e331d96
--- /dev/null
+++ b/docs/specification-proper-draft.md
@@ -0,0 +1,538 @@
+# HyperDoc 2.0
+
+**Status:** Cleaned-up draft.
+
+---
+
+## 1. Introduction
+
+HyperDoc 2.0 ("HyperDoc") is a plain-text markup language for hypertext documents.
+
+Design goals:
+
+- Deterministic, unambiguous parsing.
+- Convenient authoring in plain text.
+- Round-trippable formatting (tooling can rewrite without losing information).
+
+## 2. Conformance and terminology
+
+The key words **MUST**, **MUST NOT**, **SHOULD**, **SHOULD NOT**, and **MAY** are to be interpreted as described in RFC 2119.
+
+A document can be:
+
+- **Syntactically valid**: conforms to the grammar and additional syntax rules.
+- **Semantically valid**: syntactically valid **and** conforms to semantic rules (elements, attributes, escape decoding, IDs/refs, etc.).
+
+Unless explicitly stated, rules in chapters 3–5 are **syntax** rules; rules in chapters 6–9 are **semantic** rules.
+
+## 3. Document encoding (byte- and line-level)
+
+### 3.1 Character encoding
+
+- A HyperDoc document **MUST** be encoded as UTF-8.
+- A HyperDoc document **MUST NOT** contain invalid UTF-8 byte sequences.
+
+**UTF-8 BOM**
+
+- A UTF-8 BOM (`EF BB BF`) **SHOULD NOT** be used.
+- Tooling **MAY** accept a BOM and treat it as whitespace at the beginning of the document.
+
+### 3.2 Line endings
+
+- Lines **MUST** be terminated by either:
+  - `<LF>` (U+000A), or
+  - `<CR><LF>` (U+000D U+000A).
+- A bare `<CR>` **MUST NOT** appear except as part of `<CR><LF>`.
+
+A document **MAY** mix `<LF>` and `<CR><LF>` line endings, but tooling **SHOULD** normalize to a single convention when rewriting documents.
+
+The canonical line ending emitted by tooling **SHOULD** be `<LF>`.
+
+### 3.3 Control characters in source text
+
+- A syntactically valid document **MAY** contain `<TAB>` (U+0009).
+- Other Unicode control characters (General Category `Cc`) **MUST NOT** appear in source text, except:
+  - U+000A (LF) and
+  - U+000D (CR) as part of a valid line ending.
+
+A semantic validator **MAY** reject TABs in source text (see §6.2).
+
+### 3.4 Unicode text
+
+Apart from the restrictions above, arbitrary Unicode scalar values are allowed.
+
+### 3.5 Recommendations for directionality (non-normative)
+
+HyperDoc does not define special handling for right-to-left scripts or bidirectional layout.
+
+Authors **SHOULD** keep each paragraph primarily in a single writing system/directionality where practical. Tooling **MAY** warn when paragraphs contain bidi override/formatting characters.
+
+## 4. Syntactic model
+
+A HyperDoc document is a sequence of **nodes**.
+
+Each node has:
+
+- a **name** (identifier),
+- an optional **attribute list** `(key="value", ...)`,
+- and a mandatory **body**.
+
+### 4.1 Bodies
+
+A body is one of:
+
+- `;` — empty body
+- `"..."` — string literal body
+- `:` — verbatim body (one or more `|` lines)
+- `{ ... }` — list body
+
+### 4.2 List bodies and modes
+
+A list body `{ ... }` is parsed in one of two modes:
+
+- **Block-list mode**: contains nested nodes.
+- **Inline-list mode**: contains an inline token stream of text items and inline nodes.
+
+The grammar is intentionally ambiguous; a deterministic external rule selects a mode (see §5.2).
+
+### 4.3 Attributes (syntax)
+
+- Attribute lists are comma-separated `(key="value", ...)`.
+- Trailing commas are allowed.
+- Attribute values are **string literals** (see §5.5).
+- Attribute keys are identifiers with hyphen-separated segments (see §5.1 and §9.1).
+
+## 5. Grammar and additional syntax rules
+
+### 5.1 Grammar (EBNF)
+
+The grammar below is syntax-only.
+
+```ebnf
+document        ::= ws , { node , ws } , EOF ;
+
+node            ::= node_name , ws , [ attribute_list , ws ] , body ;
+
+body            ::= ";" | string_literal | verbatim_body | list_body ;
+
+list_body       ::= "{" , list_content , "}" ;
+list_content    ::= inline_content | block_content ;
+
+attribute_list  ::= "(" , ws ,
+                    [ attribute , { ws , "," , ws , attribute } , [ ws , "," ] ] ,
+                    ws , ")" ;
+attribute       ::= attr_key , ws , "=" , ws , string_literal ;
+
+block_content   ::= ws , { node , ws } ;
+
+inline_content  ::= ws , { inline_item , ws } ;
+inline_item     ::= word | escape_text | inline_node | inline_group ;
+inline_group    ::= "{" , inline_content , "}" ;
+
+escape_text     ::= "\\" , ( "\\" | "{" | "}" ) ;
+inline_node     ::= inline_name , ws , [ attribute_list , ws ] , body ;
+
+(* Identifiers *)
+node_name       ::= [ "\\" ] , ident_char , { ident_char } ;
+inline_name     ::= "\\" , ident_char , { ident_char } ;
+attr_key        ::= key_seg , { "-" , key_seg } ;
+
+ident_char      ::= "A".."Z" | "a".."z" | "0".."9" | "_" ;
+key_seg         ::= ident_char , { ident_char } ;
+
+string_literal  ::= '"' , { string_unit } , '"' ;
+
+(* verbatim_body and ws productions match the source spec. *)
+```
+
+### 5.2 Deterministic list-mode disambiguation
+
+Before parsing the contents of any `{ ... }` list body, the parser **MUST** choose exactly one list mode.
+
+The mode is determined solely from the **node name token**:
+
+1. If the node name begins with `\`, the parser **MUST** choose **Inline-list mode**.
+2. Else, if the node name is a recognized built-in with a specified list mode, the parser **MUST** choose that mode.
+3. Otherwise (unknown node name), the parser **MUST** choose **Inline-list mode**.
+
+Built-in elements and their list modes are defined in §8.1.
+
+
+### 5.3 Maximal munch
+
+When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consume the longest possible sequence of allowed identifier characters.
+
+### 5.4 Inline-list brace balancing and backslash dispatch
+
+In Inline-list mode:
+
+- Literal braces are structural (`inline_group`) and therefore **must be balanced**.
+- If braces cannot be balanced, they **must** be written as escape-text tokens `\\{` and `\\}`.
+- A backslash in inline content is interpreted as:
+  - one of the three escape-text tokens `\\\\`, `\\{`, `\\}`, or
+  - the start of an inline node otherwise.
+
+### 5.5 String literals (syntax)
+
+String literals are delimiter-based and do **not** validate escape *meaning*.
+
+Syntactically invalid inside `"..."`:
+
+- raw LF or CR
+- a backslash immediately followed by a control character (Unicode `Cc`) — **note:** this includes TAB.
+
+## 6. Escape processing (semantic)
+
+### 6.1 Scope
+
+Escape sequences are recognized only in:
+
+1. String literals (node bodies of the `"..."` form and attribute values).
+2. Inline escape-text tokens emitted by the parser: `\\\\`, `\\{`, `\\}`.
+
+No other syntax performs escape decoding.
+
+### 6.2 Control character policy (semantic)
+
+- A semantic validator **MAY** reject TAB (U+0009) in source text.
+- Regardless of whether TAB is accepted in source text, TAB **MUST** be rejected in the **resolved value of any string literal** (quoted node bodies and attribute values). This includes TAB that appears literally between quotes and TAB produced via `\u{...}`.
+
+Apart from LF/CR line terminators and TAB (U+0009) in source text, a semantically valid document **MUST NOT** contain other Unicode control characters (General Category `Cc`). Resolved string-literal values are restricted by the rules above (TAB is always forbidden there).
+
+### 6.3 Supported escapes in string literals
+
+A semantic validator/decoder **MUST** accept exactly:
+
+| Escape      | Decodes to                  |
+| ----------- | --------------------------- |
+| `\\\\`      | U+005C (`\\`)               |
+| `\\"`       | U+0022 (`"`)                |
+| `\\n`       | U+000A (LF)                 |
+| `\\r`       | U+000D (CR)                 |
+| `\\u{H...}` | Unicode scalar value U+H... |
+
+#### 6.3.1 Unicode escape `\\u{H...}`
+
+- 1–6 hex digits
+- value in `0x0..0x10FFFF`
+- not in `0xD800..0xDFFF` (surrogates)
+- must not decode to a forbidden control character (§6.2)
+
+### 6.4 Invalid escapes
+
+A semantic validator/decoder **MUST** reject a string literal that contains:
+
+- any other escape (`\\t`, `\\xHH`, `\\0`, etc.)
+- an unterminated escape (string ends after `\\`)
+- malformed `\\u{...}` (missing braces, empty, non-hex, >6 digits)
+- out-of-range or surrogate code points
+- forbidden control characters produced by `\\u{...}`
+
+### 6.5 Inline escape-text tokens
+
+In inline-list bodies, the parser emits three special text tokens:
+
+- `\\\\`
+- `\\{`
+- `\\}`
+
+During semantic text construction, implementations **MAY** decode these to literal `\\`, `{`, `}`.
+
+Tooling that aims to preserve author intent **SHOULD** preserve whether braces were written as balanced groups vs escaped brace tokens.
+
+## 7. Semantic document model
+
+### 7.1 Document structure
+
+- A semantically valid document **MUST** contain exactly one `hdoc` header node.
+- The `hdoc` node **MUST** be the first node in the document.
+- The `hdoc` node **MUST NOT** appear anywhere else.
+- The `hdoc` node **MUST** have an empty body (`;`).
+
+### 7.2 Inline text construction and normalization
+
+Many elements (e.g. `p`, headings, and inline elements) produce **inline text** for rendering. Inline text is constructed from one of:
+
+- a string body (`"..."`),
+- a verbatim body (`:`), or
+- an inline-list body (`{ ... }` in Inline-list mode).
+
+Semantic processing **MUST** construct inline text as a sequence of **spans**, where each span has:
+
+- a Unicode string, and
+- an attribute set (e.g. emphasis/monospace/link, language overrides, etc.).
+
+Processing rules:
+
+1. **Parse → tree:** Parsing preserves `ws` and yields an inline tree (text items, inline nodes, and inline groups).
+2. **Tree → spans:** Convert the inline tree into a sequence of spans.
+3. **Span merging:** Adjacent spans with identical attribute sets **MUST** be merged.
+4. **Whitespace normalization (non-`pre` only):** For elements other than `pre`, the resulting text (across all spans) **MUST** be normalized so that:
+   - any run of whitespace is collapsed to a single U+0020 SPACE, and
+   - leading and trailing whitespace is removed.
+
+The renderer **MUST** see the post-normalization result.
+
+**String and verbatim bodies:** When a string body or verbatim body is converted into spans, it is treated as a single text source (no nested inline nodes) and then processed using the same rules above, including whitespace normalization for non-`pre` elements.
+
+### 7.3 Attribute uniqueness
+
+- Within a node, attribute keys **MUST** be unique (case-sensitive).
+
+### 7.4 Attribute validity
+
+- Attributes **MUST** be allowed on the element they appear on.
+- Required attributes **MUST** be present.
+- Attributes not defined for an element **MUST** be rejected.
+
+### 7.5 IDs and references
+
+- `id` is allowed only on **top-level block elements** (direct children of the document; not inside another node).
+- `id` values **MUST** be non-empty and **MUST** be unique (case-sensitive) across the document.
+- `\link(ref="...")` **MUST** reference an existing `id`.
+
+### 7.6 Built-in element recognition
+
+- Built-in element names are defined in §8.
+- Unknown elements are syntactically valid (parseable), but semantically invalid.
+
+## 8. Elements and attributes
+
+### 8.1 Built-in elements and list mode
+
+#### 8.1.1 Inline vs block
+
+- Any element name starting with `\` is an **inline element**.
+- Any element name not starting with `\` is a **block element**.
+
+#### 8.1.2 List-body mode per built-in element
+
+When a built-in element uses a `{ ... }` list body, it is parsed in the mode below:
+
+- **Inline-list mode:** `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\date`, `\time`, `\datetime`, ...).
+- **Block-list mode:** `ul`, `ol`, `li`, `table`, `columns`, `row`, `td`.
+
+- Containers (`ul`, `ol`, `table`, `row`, `columns`) naturally contain nested nodes.
+- Text blocks (`p`, headings, etc.) contain inline text streams.
+- `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode.
+
+### 8.2 Element catalog (normative)
+
+#### 8.2.1 `hdoc` (header)
+
+- **Role:** document header
+- **Body:** `;` (empty)
+- **Attributes:**
+  - `version` (required): must be `"2.0"`
+  - `lang` (optional)
+  - `title` (optional)
+  - `author` (optional)
+  - `date` (optional): datetime lexical format (§9.2.3)
+  - `tz` (optional): default timezone for time/datetime values (§9.2)
+
+#### 8.2.2 Headings: `h1`, `h2`, `h3`
+
+- **Role:** block heading levels 1–3
+- **Body:** inline text (string body or inline-list body)
+- **Attributes:** `lang` (optional), `id` (optional; top-level only)
+
+#### 8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
+
+- **Role:** paragraph-like block with semantic hint
+- **Body:** inline text (string body or inline-list body)
+- **Attributes:** `lang` (optional), `id` (optional; top-level only)
+
+#### 8.2.4 Lists: `ul`, `ol`
+
+- **Body:** block-list containing `li` (at least one)
+- **Attributes:** `lang` (optional), `id` (optional; top-level only)
+
+`ol` additional attribute:
+
+- `first` (optional Integer ≥ 0; default 1): number of the first list item
+
+#### 8.2.5 List item: `li`
+
+- **Body:** either
+  - a block-list of block elements, or
+  - a single string body, or
+  - a verbatim body
+- **Attributes:** `lang` (optional)
+
+#### 8.2.6 Figure: `img`
+
+- **Body:** inline text caption/description (may be empty)
+- **Attributes:**
+  - `path` (required, non-empty)
+  - `alt` (optional, non-empty recommended)
+  - `lang` (optional)
+  - `id` (optional; top-level only)
+
+#### 8.2.7 Preformatted: `pre`
+
+- **Body:** either
+  - verbatim body (`:`) for literal lines (**recommended**), or
+  - inline text body (string or inline-list); whitespace is preserved (no trimming/collapse)
+- **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only)
+
+#### 8.2.8 Table of contents: `toc`
+
+- **Body:** `;` (empty)
+- **Attributes:** `depth` (optional Integer in {1,2,3}; default 3), `lang` (optional), `id` (optional; top-level only)
+
+#### 8.2.9 Tables: `table`
+
+- **Body:** block-list containing:
+  - optional `columns`, then
+  - zero or more `row` and `group` nodes
+- **Attributes:** `lang` (optional), `id` (optional; top-level only)
+
+Table layout rules:
+
+- `columns` defines header labels and the column count.
+- Each `row` defines a data row.
+- Each `group` acts as a section heading for subsequent rows.
+- After applying `td.colspan`, all `row` and `columns` entries **MUST** resolve to the same effective column count.
+- If any `row` has a `title` attribute **or** any `group` is present, renderers **MUST** reserve a leading title column.
+  - In that case, `columns` **SHOULD** include an empty leading header cell.
+
+#### 8.2.10 `columns` (table header row)
+
+- **Body:** block-list containing `td` (at least one)
+- **Attributes:** `lang` (optional)
+
+#### 8.2.11 `row` (table data row)
+
+- **Body:** block-list containing `td` (at least one)
+- **Attributes:** `title` (optional string), `lang` (optional)
+
+#### 8.2.12 `group` (table row group)
+
+- **Body:** inline text
+- **Attributes:** `lang` (optional)
+
+#### 8.2.13 `td` (table cell)
+
+- **Body:** either
+  - a block-list of block elements, or
+  - a single string body, or
+  - a verbatim body
+- **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional)
+
+### 8.3 Inline elements
+
+Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer).
+
+#### 8.3.1 `\\em`
+
+- **Role:** emphasis
+- **Body:** inline text
+- **Attributes:** `lang` (optional)
+
+#### 8.3.2 `\\mono`
+
+- **Role:** monospaced span
+- **Body:** inline text
+- **Attributes:** `syntax` (optional), `lang` (optional)
+
+#### 8.3.3 `\\strike`, `\\sub`, `\\sup`
+
+- **Role:** strike-through / subscript / superscript
+- **Body:** inline text
+- **Attributes:** `lang` (optional)
+
+#### 8.3.4 `\\link`
+
+- **Role:** hyperlink
+- **Body:** inline text
+- **Attributes:**
+  - `ref` or `uri` (**exactly one required**)
+  - `lang` (optional)
+
+#### 8.3.5 `\\date`, `\\time`, `\\datetime`
+
+- **Role:** localized date/time rendering
+- **Body:** must be plain text, a single string, or verbatim (no nested inline elements)
+- **Attributes:** `fmt` (optional; per element), `lang` (optional)
+
+## 9. Attribute types and date/time formats
+
+### 9.1 Common attribute types
+
+- **Version:** must be `2.0`.
+- **Integer:** ASCII decimal digits; leading zeros allowed but discouraged.
+- **Reference:** non-empty; must not contain whitespace or control characters.
+- **Language tag:** BCP 47 (RFC 5646).
+- **Timezone offset:** `Z` or `±HH:MM`.
+- **URI/IRI:** per RFC 3987.
+
+### 9.2 Date / time lexical formats (normative)
+
+These formats are a conservative intersection of RFC 3339 and ISO 8601.
+
+#### 9.2.1 Date
+
+`YYYY-MM-DD`
+
+- `YYYY`: one or more digits
+- `MM`: `01`–`12`
+- `DD`: `01`–`31`
+
+#### 9.2.2 Time
+
+`hh:mm:ss` with a required time zone unless a default `tz` is defined in `hdoc`.
+
+- `hh`: `00`–`23`
+- `mm`: `00`–`59`
+- `ss`: `00`–`59`
+- optional fraction: `.` followed by 1,2,3,6, or 9 digits
+- zone:
+  - `Z`, or
+  - `+hh:mm` / `-hh:mm` (two-digit hour/minute)
+
+If `hdoc(tz="...")` is present, a time value **MAY** omit the zone.
+
+#### 9.2.3 Datetime
+
+`YYYY-MM-DD` `T` `hh:mm:ss` (with optional fraction and required zone, unless `hdoc.tz` is present)
+
+If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is permitted specifically for `hdoc(date="...")` and for `\datetime` bodies.
+
+### 9.3 `fmt` values
+
+
+- `\\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso`
+- `\\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso`
+- `\\datetime(fmt=...)`: `short`, `long`, `relative`, `iso`
+
+Defaults when omitted:
+
+- `\date(fmt=...)`: default `short`
+- `\time(fmt=...)`: default `long`
+- `\datetime(fmt=...)`: default `short`
+
+## 10. Non-normative guidance for tooling
+
+- Formatters should normalize line endings to LF.
+- Provide diagnostics for discouraged patterns (leading/trailing whitespace in attribute values, leading zeros, mixed directionality, etc.).
+- For typo recovery, treat unknown nodes as inline-list mode (§5.2).
+
+---
+
+## Appendix A. Example
+
+```hdoc
+hdoc(version="2.0", title="Example", lang="en");
+
+h1 "Introduction"
+
+p { This is my first HyperDoc 2.0 document! }
+
+pre(syntax="c"):
+| #include <stdio.h>
+| int main(int argc, char *argv[]) {
+|   printf("Hello, World!");
+|   return 0;
+| }
+```
+

From 0891fcf80a55a1d891d411806d7e84c265df58f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Tue, 30 Dec 2025 23:51:04 +0100
Subject: [PATCH 053/116] Implement automatic table of contents generation

---
 src/hyperdoc.zig    | 130 +++++++++++++++++++++++++++++++++++++++++++-
 src/render/dump.zig |  69 +++++++++++++++++++++--
 src/testsuite.zig   |  44 +++++++++++++++
 3 files changed, 238 insertions(+), 5 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 8afc0fe..dd2aa16 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -16,6 +16,7 @@ pub const Document = struct {
     contents: []Block,
     content_ids: []?Reference,
     id_map: std.StringArrayHashMapUnmanaged(usize), // id -> index
+    toc: TableOfContents,
 
     // header information
     lang: LanguageTag = .inherit, // inherit here means "unset"
@@ -24,6 +25,12 @@ pub const Document = struct {
     date: ?DateTime,
     timezone: ?TimeZoneOffset,
 
+    pub const TableOfContents = struct {
+        level: Block.HeadingLevel,
+        headings: []usize,
+        children: []TableOfContents,
+    };
+
     pub fn deinit(doc: *Document) void {
         doc.arena.deinit();
         doc.* = undefined;
@@ -553,12 +560,16 @@ pub fn parse(
     try sema.validate_references(&id_map);
 
     const doc_lang = header.lang orelse LanguageTag.inherit;
+    const contents = try sema.blocks.toOwnedSlice(arena.allocator());
+    const block_locations = try sema.block_locations.toOwnedSlice(arena.allocator());
+    const toc = try sema.build_toc(contents, block_locations);
 
     return .{
         .arena = arena,
-        .contents = try sema.blocks.toOwnedSlice(arena.allocator()),
+        .contents = contents,
         .content_ids = content_ids,
         .id_map = id_map,
+        .toc = toc,
 
         .lang = doc_lang,
         .title = header.title,
@@ -674,12 +685,27 @@ pub const SemanticAnalyzer = struct {
         location: Parser.Location,
     };
 
+    const TocBuilder = struct {
+        level: Block.HeadingLevel,
+        headings: std.ArrayList(usize),
+        children: std.ArrayList(*TocBuilder),
+
+        fn init(level: Block.HeadingLevel) @This() {
+            return .{
+                .level = level,
+                .headings = .empty,
+                .children = .empty,
+            };
+        }
+    };
+
     arena: std.mem.Allocator,
     diagnostics: ?*Diagnostics,
     code: []const u8,
 
     header: ?Header = null,
     blocks: std.ArrayList(Block) = .empty,
+    block_locations: std.ArrayList(Parser.Location) = .empty,
     ids: std.ArrayList(?Reference) = .empty,
     id_locations: std.ArrayList(?Parser.Location) = .empty,
     pending_refs: std.ArrayList(RefUse) = .empty,
@@ -734,6 +760,7 @@ pub const SemanticAnalyzer = struct {
                     null;
 
                 try sema.blocks.append(sema.arena, block);
+                try sema.block_locations.append(sema.arena, node.location);
                 try sema.ids.append(sema.arena, id);
                 try sema.id_locations.append(sema.arena, id_location);
             },
@@ -1871,6 +1898,102 @@ pub const SemanticAnalyzer = struct {
         }
     }
 
+    fn build_toc(sema: *SemanticAnalyzer, contents: []const Block, block_locations: []const Parser.Location) !Document.TableOfContents {
+        std.debug.assert(contents.len == block_locations.len);
+
+        var root_builder = TocBuilder.init(.h1);
+        defer root_builder.headings.deinit(sema.arena);
+        defer root_builder.children.deinit(sema.arena);
+
+        var stack: std.ArrayList(*TocBuilder) = .empty;
+        defer stack.deinit(sema.arena);
+
+        try stack.append(sema.arena, &root_builder);
+
+        for (contents, 0..) |block, block_index| {
+            const heading = switch (block) {
+                .heading => |value| value,
+                else => continue,
+            };
+
+            const target_depth = heading_level_index(heading.level);
+
+            while (stack.items.len > target_depth) {
+                _ = stack.pop();
+            }
+
+            while (stack.items.len < target_depth) {
+                const parent = stack.items[stack.items.len - 1];
+                try sema.append_toc_entry(&stack, parent, block_index, block_locations, .automatic);
+            }
+
+            const parent = stack.items[stack.items.len - 1];
+            try sema.append_toc_entry(&stack, parent, block_index, block_locations, .real);
+        }
+
+        return sema.materialize_toc(&root_builder);
+    }
+
+    fn append_toc_entry(
+        sema: *SemanticAnalyzer,
+        stack: *std.ArrayList(*TocBuilder),
+        parent: *TocBuilder,
+        heading_index: usize,
+        block_locations: []const Parser.Location,
+        kind: enum { automatic, real },
+    ) !void {
+        if (kind == .automatic) {
+            const heading_location = block_locations[heading_index];
+            try sema.emit_diagnostic(
+                .{ .automatic_heading_insertion = .{ .level = parent.level } },
+                heading_location,
+            );
+        }
+
+        try parent.headings.append(sema.arena, heading_index);
+
+        const child_level = next_heading_level(parent.level);
+        if (child_level == parent.level) {
+            return;
+        }
+
+        const child = try sema.arena.create(TocBuilder);
+        child.* = TocBuilder.init(child_level);
+
+        try parent.children.append(sema.arena, child);
+        try stack.append(sema.arena, child);
+    }
+
+    fn materialize_toc(sema: *SemanticAnalyzer, builder: *TocBuilder) !Document.TableOfContents {
+        var node: Document.TableOfContents = .{
+            .level = builder.level,
+            .headings = try builder.headings.toOwnedSlice(sema.arena),
+            .children = try sema.arena.alloc(Document.TableOfContents, builder.children.items.len),
+        };
+
+        for (builder.children.items, 0..) |child_builder, index| {
+            node.children[index] = try sema.materialize_toc(child_builder);
+        }
+
+        return node;
+    }
+
+    fn heading_level_index(level: Block.HeadingLevel) usize {
+        return switch (level) {
+            .h1 => 1,
+            .h2 => 2,
+            .h3 => 3,
+        };
+    }
+
+    fn next_heading_level(level: Block.HeadingLevel) Block.HeadingLevel {
+        return switch (level) {
+            .h1 => .h2,
+            .h2 => .h3,
+            .h3 => .h3,
+        };
+    }
+
     fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, location: Parser.Location) !void {
         if (sema.diagnostics) |diag| {
             try diag.add(code, sema.make_location(location.offset));
@@ -2808,6 +2931,7 @@ pub const Diagnostic = struct {
     pub const ForbiddenControlCharacter = struct { codepoint: u21 };
     pub const TableShapeError = struct { actual: usize, expected: usize };
     pub const ReferenceError = struct { ref: []const u8 };
+    pub const AutomaticHeading = struct { level: Block.HeadingLevel };
 
     pub const Code = union(enum) {
         // errors:
@@ -2856,6 +2980,7 @@ pub const Diagnostic = struct {
         redundant_inline: InlineUsageError,
         attribute_leading_trailing_whitespace,
         tab_character,
+        automatic_heading_insertion: AutomaticHeading,
 
         pub fn severity(code: Code) Severity {
             return switch (code) {
@@ -2904,6 +3029,7 @@ pub const Diagnostic = struct {
                 .attribute_leading_trailing_whitespace,
                 .tab_character,
                 .document_starts_with_bom,
+                .automatic_heading_insertion,
                 => .warning,
             };
         }
@@ -2979,6 +3105,8 @@ pub const Diagnostic = struct {
 
                 .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."),
                 .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."),
+
+                .automatic_heading_insertion => |ctx| try w.print("Inserted automatic {t} to fill heading level gap.", .{ctx.level}),
             }
         }
     };
diff --git a/src/render/dump.zig b/src/render/dump.zig
index 94e25da..1635df9 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -259,6 +259,19 @@ fn dumpBlockListField(writer: *Writer, indent: usize, key: []const u8, blocks: [
     }
 }
 
+fn dumpNumberListField(writer: *Writer, indent: usize, key: []const u8, values: []const usize) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (values.len == 0) {
+        try writer.print("{s}: []\n", .{key});
+        return;
+    }
+    try writer.print("{s}:\n", .{key});
+    for (values) |value| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.print("- {}\n", .{value});
+    }
+}
+
 fn dumpOptionalStringListField(writer: *Writer, indent: usize, key: []const u8, values: []?hdoc.Reference) Writer.Error!void {
     try writeIndent(writer, indent);
     if (values.len == 0) {
@@ -360,6 +373,32 @@ fn dumpTableRowsField(writer: *Writer, indent: usize, key: []const u8, rows: []c
     }
 }
 
+fn dumpTableOfContentsChildren(writer: *Writer, indent: usize, children: []const hdoc.Document.TableOfContents) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (children.len == 0) {
+        try writer.writeAll("children: []\n");
+        return;
+    }
+    try writer.writeAll("children:\n");
+    for (children) |child| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("-\n");
+        try dumpTableOfContentsNode(writer, indent + 2 * indent_step, child);
+    }
+}
+
+fn dumpTableOfContentsNode(writer: *Writer, indent: usize, toc: hdoc.Document.TableOfContents) Writer.Error!void {
+    try dumpEnumField(writer, indent, "level", toc.level);
+    try dumpNumberListField(writer, indent, "headings", toc.headings);
+    try dumpTableOfContentsChildren(writer, indent, toc.children);
+}
+
+fn dumpTableOfContents(writer: *Writer, indent: usize, toc: hdoc.Document.TableOfContents) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.writeAll("toc:\n");
+    try dumpTableOfContentsNode(writer, indent + indent_step, toc);
+}
+
 fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Error!void {
     switch (block) {
         .heading => |heading| {
@@ -423,6 +462,7 @@ fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void {
     try dumpOptionalStringField(writer, indent_step, "title", doc.title);
     try dumpOptionalStringField(writer, indent_step, "author", doc.author);
     try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date);
+    try dumpTableOfContents(writer, indent_step, doc.toc);
     try dumpBlockListField(writer, indent_step, "contents", doc.contents);
     try dumpOptionalStringListField(writer, indent_step, "ids", doc.content_ids);
     // TODO: Dump ID map
@@ -442,8 +482,10 @@ test "render escapes string values" {
         .arena = std.heap.ArenaAllocator.init(std.testing.allocator),
         .version = .{ .major = 1, .minor = 2 },
         .contents = &.{},
-        .ids = &.{},
-        .lang = null,
+        .content_ids = &.{},
+        .id_map = .{},
+        .toc = undefined,
+        .lang = .inherit,
         .title = title,
         .author = null,
         .date = null,
@@ -452,6 +494,13 @@ test "render escapes string values" {
     defer doc.deinit();
 
     const arena_alloc = doc.arena.allocator();
+    doc.contents = try arena_alloc.alloc(hdoc.Block, 0);
+    doc.content_ids = try arena_alloc.alloc(?hdoc.Reference, 0);
+    doc.toc = .{
+        .level = .h1,
+        .headings = try arena_alloc.alloc(usize, 0),
+        .children = try arena_alloc.alloc(hdoc.Document.TableOfContents, 0),
+    };
 
     const spans = try arena_alloc.alloc(hdoc.Span, 1);
     spans[0] = .{
@@ -463,7 +512,7 @@ test "render escapes string values" {
     blocks[0] = .{
         .heading = .{
             .level = .h1,
-            .lang = null,
+            .lang = .inherit,
             .content = spans,
         },
     };
@@ -471,7 +520,19 @@ test "render escapes string values" {
 
     const ids = try arena_alloc.alloc(?hdoc.Reference, 1);
     ids[0] = id_value;
-    doc.ids = ids;
+    doc.content_ids = ids;
+
+    const headings = try arena_alloc.alloc(usize, 1);
+    headings[0] = 0;
+
+    const children = try arena_alloc.alloc(hdoc.Document.TableOfContents, 1);
+    children[0] = .{ .level = .h2, .headings = &.{}, .children = &.{} };
+
+    doc.toc = .{
+        .level = .h1,
+        .headings = headings,
+        .children = children,
+    };
 
     var buffer = Writer.Allocating.init(std.testing.allocator);
     defer buffer.deinit();
diff --git a/src/testsuite.zig b/src/testsuite.zig
index aa26072..dd5ffd1 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -357,6 +357,50 @@ test "parser handles unknown node types" {
     }
 }
 
+test "table of contents inserts automatic headings when skipping levels" {
+    const source =
+        \\hdoc(version="2.0");
+        \\h3{Third}
+        \\h2{Second}
+        \\h1{First}
+    ;
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expectEqual(@as(usize, 3), diagnostics.items.items.len);
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .missing_document_language));
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[1].code, .{ .automatic_heading_insertion = .{ .level = .h1 } }));
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[2].code, .{ .automatic_heading_insertion = .{ .level = .h2 } }));
+
+    const toc = doc.toc;
+    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h1, toc.level);
+    try std.testing.expectEqualSlices(usize, &.{ 0, 2 }, toc.headings);
+    try std.testing.expectEqual(@as(usize, 2), toc.children.len);
+
+    const auto_h1 = toc.children[0];
+    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h2, auto_h1.level);
+    try std.testing.expectEqualSlices(usize, &.{ 0, 1 }, auto_h1.headings);
+    try std.testing.expectEqual(@as(usize, 2), auto_h1.children.len);
+
+    const auto_h2 = auto_h1.children[0];
+    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h3, auto_h2.level);
+    try std.testing.expectEqualSlices(usize, &.{0}, auto_h2.headings);
+
+    const h2_child = auto_h1.children[1];
+    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h3, h2_child.level);
+    try std.testing.expectEqual(@as(usize, 0), h2_child.headings.len);
+    try std.testing.expectEqual(@as(usize, 0), h2_child.children.len);
+
+    const trailing_h1_child = toc.children[1];
+    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h2, trailing_h1_child.level);
+    try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.headings.len);
+    try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.children.len);
+}
+
 fn diagnosticCodesEqual(lhs: hdoc.Diagnostic.Code, rhs: hdoc.Diagnostic.Code) bool {
     if (std.meta.activeTag(lhs) != std.meta.activeTag(rhs))
         return false;

From 6bd87f8eb0eacda91e235c28b399a2eaddc5e01f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Wed, 31 Dec 2025 14:58:25 +0100
Subject: [PATCH 054/116] Adjust golden fixtures and table rendering semantics

---
 src/render/html5.zig                | 793 ++++++++++++++++++++++++++--
 test/html5/AGENTS.md                |   7 +
 test/html5/media_and_toc.hdoc       |  21 +
 test/html5/media_and_toc.html       |  23 +
 test/html5/nesting_and_inlines.hdoc |  21 +
 test/html5/nesting_and_inlines.html |  23 +
 test/html5/paragraph_styles.hdoc    |  17 +
 test/html5/paragraph_styles.html    |   8 +
 test/html5/tables.hdoc              |  28 +
 test/html5/tables.html              |  58 ++
 10 files changed, 951 insertions(+), 48 deletions(-)
 create mode 100644 test/html5/AGENTS.md
 create mode 100644 test/html5/media_and_toc.hdoc
 create mode 100644 test/html5/media_and_toc.html
 create mode 100644 test/html5/nesting_and_inlines.hdoc
 create mode 100644 test/html5/nesting_and_inlines.html
 create mode 100644 test/html5/paragraph_styles.hdoc
 create mode 100644 test/html5/paragraph_styles.html
 create mode 100644 test/html5/tables.hdoc
 create mode 100644 test/html5/tables.html

diff --git a/src/render/html5.zig b/src/render/html5.zig
index f46a3b8..275453a 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -5,32 +5,511 @@ const std = @import("std");
 const hdoc = @import("../hyperdoc.zig");
 
 const Writer = std.Io.Writer;
+const RenderError = Writer.Error || error{NoSpaceLeft};
 const indent_step: usize = 2;
 
-// TODO: Implementation hints:
-// - Use writeStartTag, writeEndTag to construct the document
-// - Use and expand writeEscapedHtml to suite the needs of HyperDoc.
-// - Implement a custom formatter for string attribute values so they have proper escaping applied.
-// - Use semantic HTML. Never use `div` or `span`. If necessary, ask back when you encounter the need for a "custom tag".
-// - For the different paragraph types, use a class="hdoc-${kind}", so for example class="hdoc-warning" to distinguish the special paragraphs from regular <p> ones.
-// - The TOC element must be unrolled manually and should auto-link to the h1,h2,h3 elements.
+pub fn render(doc: hdoc.Document, writer: *Writer) RenderError!void {
+    var ctx: RenderContext = .{ .doc = &doc, .writer = writer };
 
-/// This function emits the body-only part of a HyperDoc document as
-/// valid HTML5.
-pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void {
-    _ = doc;
+    for (doc.contents, 0..) |block, index| {
+        try ctx.renderBlock(block, index, 0);
+    }
+}
+
+const RenderContext = struct {
+    doc: *const hdoc.Document,
+    writer: *Writer,
+
+    fn renderBlock(ctx: *RenderContext, block: hdoc.Block, block_index: ?usize, indent: usize) RenderError!void {
+        switch (block) {
+            .heading => |heading| try ctx.renderHeading(heading, block_index, indent),
+            .paragraph => |paragraph| try ctx.renderParagraph(paragraph, block_index, indent),
+            .list => |list| try ctx.renderList(list, block_index, indent),
+            .image => |image| try ctx.renderImage(image, block_index, indent),
+            .preformatted => |preformatted| try ctx.renderPreformatted(preformatted, block_index, indent),
+            .toc => |toc| try ctx.renderTableOfContents(toc, block_index, indent),
+            .table => |table| try ctx.renderTable(table, block_index, indent),
+        }
+    }
+
+    fn renderBlocks(ctx: *RenderContext, blocks: []const hdoc.Block, indent: usize) RenderError!void {
+        for (blocks) |block| {
+            try ctx.renderBlock(block, null, indent);
+        }
+    }
+
+    fn renderHeading(ctx: *RenderContext, heading: hdoc.Block.Heading, block_index: ?usize, indent: usize) RenderError!void {
+        const lang_attr = langAttribute(heading.lang);
+
+        var id_buffer: [32]u8 = undefined;
+        const id_attr = if (block_index) |idx|
+            ctx.resolveHeadingId(idx, &id_buffer)
+        else
+            null;
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, headingTag(heading.level), .regular, .{
+            .id = id_attr,
+            .lang = lang_attr,
+        });
+        try ctx.renderSpans(heading.content);
+        try writeEndTag(ctx.writer, headingTag(heading.level));
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderParagraph(ctx: *RenderContext, paragraph: hdoc.Block.Paragraph, block_index: ?usize, indent: usize) RenderError!void {
+        const lang_attr = langAttribute(paragraph.lang);
+        const id_attr = ctx.resolveBlockId(block_index);
+
+        var class_buffer: [32]u8 = undefined;
+        const class_attr: ?[]const u8 = switch (paragraph.kind) {
+            .p => null,
+            else => std.fmt.bufPrint(&class_buffer, "hdoc-{s}", .{@tagName(paragraph.kind)}) catch unreachable,
+        };
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "p", .regular, .{
+            .id = id_attr,
+            .lang = lang_attr,
+            .class = class_attr,
+        });
+        try ctx.renderSpans(paragraph.content);
+        try writeEndTag(ctx.writer, "p");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderList(ctx: *RenderContext, list: hdoc.Block.List, block_index: ?usize, indent: usize) RenderError!void {
+        const lang_attr = langAttribute(list.lang);
+        const id_attr = ctx.resolveBlockId(block_index);
+
+        const tag = if (list.first != null)
+            "ol"
+        else
+            "ul";
+
+        try writeIndent(ctx.writer, indent);
+        if (std.mem.eql(u8, tag, "ol")) {
+            try writeStartTag(ctx.writer, tag, .regular, .{
+                .id = id_attr,
+                .lang = lang_attr,
+                .start = list.first,
+            });
+        } else {
+            try writeStartTag(ctx.writer, tag, .regular, .{
+                .id = id_attr,
+                .lang = lang_attr,
+            });
+        }
+        try ctx.writer.writeByte('\n');
+
+        for (list.items) |item| {
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeStartTag(ctx.writer, "li", .regular, .{ .lang = langAttribute(item.lang) });
+            if (item.content.len > 0) {
+                try ctx.writer.writeByte('\n');
+                try ctx.renderBlocks(item.content, indent + 2 * indent_step);
+                try writeIndent(ctx.writer, indent + indent_step);
+            }
+            try writeEndTag(ctx.writer, "li");
+            try ctx.writer.writeByte('\n');
+        }
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, tag);
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderImage(ctx: *RenderContext, image: hdoc.Block.Image, block_index: ?usize, indent: usize) RenderError!void {
+        const lang_attr = langAttribute(image.lang);
+        const id_attr = ctx.resolveBlockId(block_index);
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "figure", .regular, .{ .id = id_attr, .lang = lang_attr });
+        try ctx.writer.writeByte('\n');
+
+        try writeIndent(ctx.writer, indent + indent_step);
+        try writeStartTag(ctx.writer, "img", .auto_close, .{
+            .src = image.path,
+            .alt = image.alt,
+        });
+        try ctx.writer.writeByte('\n');
+
+        if (image.content.len > 0) {
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeStartTag(ctx.writer, "figcaption", .regular, .{});
+            try ctx.renderSpans(image.content);
+            try writeEndTag(ctx.writer, "figcaption");
+            try ctx.writer.writeByte('\n');
+        }
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, "figure");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderPreformatted(ctx: *RenderContext, preformatted: hdoc.Block.Preformatted, block_index: ?usize, indent: usize) RenderError!void {
+        const lang_attr = langAttribute(preformatted.lang);
+        const id_attr = ctx.resolveBlockId(block_index);
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "pre", .regular, .{ .id = id_attr, .lang = lang_attr });
+        const class_attr = "hdoc-code";
+        if (preformatted.syntax) |syntax| {
+            try writeStartTag(ctx.writer, "code", .regular, .{ .class = class_attr, .data_syntax = syntax });
+        } else {
+            try writeStartTag(ctx.writer, "code", .regular, .{ .class = class_attr });
+        }
+        try ctx.renderSpans(preformatted.content);
+        try writeEndTag(ctx.writer, "code");
+        try writeEndTag(ctx.writer, "pre");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderTableOfContents(ctx: *RenderContext, toc_block: hdoc.Block.TableOfContents, block_index: ?usize, indent: usize) RenderError!void {
+        const depth = toc_block.depth orelse 3;
+        const lang_attr = langAttribute(toc_block.lang);
+        const id_attr = ctx.resolveBlockId(block_index);
+
+        if (!tocHasEntries(ctx.doc.toc)) {
+            return;
+        }
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "nav", .regular, .{
+            .id = id_attr,
+            .lang = lang_attr,
+            .aria_label = "Table of contents",
+        });
+        try ctx.writer.writeByte('\n');
+
+        try ctx.renderTocList(ctx.doc.toc, indent + indent_step, depth, 1);
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, "nav");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderTocList(ctx: *RenderContext, node: hdoc.Document.TableOfContents, indent: usize, max_depth: u8, current_depth: u8) RenderError!void {
+        if (node.headings.len == 0) {
+            return;
+        }
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "ol", .regular, .{});
+        try ctx.writer.writeByte('\n');
+
+        for (node.headings, 0..) |heading_index, child_index| {
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeStartTag(ctx.writer, "li", .regular, .{});
+
+            const heading_block = ctx.doc.contents[heading_index].heading;
+            var id_buffer: [32]u8 = undefined;
+            const target_id = ctx.resolveHeadingId(heading_index, &id_buffer);
+
+            var href_buffer: [64]u8 = undefined;
+            const href = std.fmt.bufPrint(&href_buffer, "#{s}", .{target_id}) catch unreachable;
+
+            try writeStartTag(ctx.writer, "a", .regular, .{ .href = href });
+            try ctx.renderSpans(heading_block.content);
+            try writeEndTag(ctx.writer, "a");
+
+            const child_allowed = current_depth < max_depth and
+                child_index < node.children.len and
+                tocHasEntries(node.children[child_index]);
+            if (child_allowed) {
+                try ctx.writer.writeByte('\n');
+                try ctx.renderTocList(node.children[child_index], indent + 2 * indent_step, max_depth, current_depth + 1);
+                try writeIndent(ctx.writer, indent + indent_step);
+            }
+
+            try writeEndTag(ctx.writer, "li");
+            try ctx.writer.writeByte('\n');
+        }
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, "ol");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderTable(ctx: *RenderContext, table: hdoc.Block.Table, block_index: ?usize, indent: usize) RenderError!void {
+        const lang_attr = langAttribute(table.lang);
+        const id_attr = ctx.resolveBlockId(block_index);
+
+        const column_count = inferColumnCount(table.rows) orelse 0;
+        const has_title_column = tableHasTitleColumn(table.rows);
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "table", .regular, .{ .id = id_attr, .lang = lang_attr });
+        try ctx.writer.writeByte('\n');
+
+        const header_index = findHeaderIndex(table.rows);
+        if (header_index) |index| {
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeStartTag(ctx.writer, "thead", .regular, .{});
+            try ctx.writer.writeByte('\n');
+            try ctx.renderHeaderRow(table.rows[index].columns, indent + 2 * indent_step, has_title_column);
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeEndTag(ctx.writer, "thead");
+            try ctx.writer.writeByte('\n');
+        }
+
+        try writeIndent(ctx.writer, indent + indent_step);
+        try writeStartTag(ctx.writer, "tbody", .regular, .{});
+        try ctx.writer.writeByte('\n');
+
+        for (table.rows, 0..) |row, index| {
+            if (header_index) |head_idx| {
+                if (index == head_idx) continue;
+            }
+            switch (row) {
+                .columns => |columns| try ctx.renderHeaderRow(columns, indent + 2 * indent_step, has_title_column),
+                .row => |data_row| try ctx.renderDataRow(data_row, indent + 2 * indent_step, has_title_column),
+                .group => |group| try ctx.renderGroupRow(group, indent + 2 * indent_step, column_count, has_title_column),
+            }
+        }
+
+        try writeIndent(ctx.writer, indent + indent_step);
+        try writeEndTag(ctx.writer, "tbody");
+        try ctx.writer.writeByte('\n');
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, "table");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderHeaderRow(ctx: *RenderContext, columns: hdoc.Block.TableColumns, indent: usize, has_title_column: bool) RenderError!void {
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "tr", .regular, .{ .lang = langAttribute(columns.lang) });
+        try ctx.writer.writeByte('\n');
+
+        if (has_title_column) {
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeStartTag(ctx.writer, "th", .regular, .{ .scope = "col" });
+            try writeEndTag(ctx.writer, "th");
+            try ctx.writer.writeByte('\n');
+        }
+
+        for (columns.cells) |cell| {
+            try ctx.renderTableCellWithScope(cell, indent + indent_step, true, "col");
+        }
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, "tr");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderDataRow(ctx: *RenderContext, row: hdoc.Block.TableDataRow, indent: usize, has_title_column: bool) RenderError!void {
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "tr", .regular, .{ .lang = langAttribute(row.lang) });
+        try ctx.writer.writeByte('\n');
+
+        if (has_title_column) {
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeStartTag(ctx.writer, "th", .regular, .{ .scope = "row" });
+            if (row.title) |title| {
+                try writeEscapedHtml(ctx.writer, title);
+            }
+            try writeEndTag(ctx.writer, "th");
+            try ctx.writer.writeByte('\n');
+        }
+
+        for (row.cells) |cell| {
+            try ctx.renderTableCell(cell, indent + indent_step, false);
+        }
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, "tr");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderGroupRow(ctx: *RenderContext, group: hdoc.Block.TableGroup, indent: usize, column_count: usize, has_title_column: bool) RenderError!void {
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "tr", .regular, .{ .lang = langAttribute(group.lang) });
+        try ctx.writer.writeByte('\n');
+
+        if (has_title_column) {
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeStartTag(ctx.writer, "td", .regular, .{});
+            try writeEndTag(ctx.writer, "td");
+            try ctx.writer.writeByte('\n');
+        }
+
+        try writeIndent(ctx.writer, indent + indent_step);
+        try writeStartTag(ctx.writer, "th", .regular, .{
+            .scope = "colgroup",
+            .colspan = @as(u32, @intCast(@max(@as(usize, 1), column_count))),
+        });
+        try ctx.renderSpans(group.content);
+        try writeEndTag(ctx.writer, "th");
+        try ctx.writer.writeByte('\n');
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, "tr");
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn renderTableCell(ctx: *RenderContext, cell: hdoc.Block.TableCell, indent: usize, is_header: bool) RenderError!void {
+        try ctx.renderTableCellWithScope(cell, indent, is_header, null);
+    }
+
+    fn renderTableCellWithScope(ctx: *RenderContext, cell: hdoc.Block.TableCell, indent: usize, is_header: bool, scope: ?[]const u8) RenderError!void {
+        const tag = if (is_header) "th" else "td";
+        const lang_attr = langAttribute(cell.lang);
+        const colspan_attr: ?u32 = if (cell.colspan > 1) cell.colspan else null;
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, tag, .regular, .{ .lang = lang_attr, .colspan = colspan_attr, .scope = scope });
+        if (cell.content.len > 0) {
+            try ctx.writer.writeByte('\n');
+            try ctx.renderBlocks(cell.content, indent + indent_step);
+            try writeIndent(ctx.writer, indent);
+        }
+        try writeEndTag(ctx.writer, tag);
+        try ctx.writer.writeByte('\n');
+    }
+
+    fn resolveHeadingId(ctx: *RenderContext, index: usize, buffer: *[32]u8) []const u8 {
+        if (index < ctx.doc.content_ids.len) {
+            if (ctx.doc.content_ids[index]) |value| {
+                return value.text;
+            }
+        }
+
+        return std.fmt.bufPrint(buffer, "hdoc-auto-{d}", .{index}) catch unreachable;
+    }
+
+    fn resolveBlockId(ctx: *RenderContext, block_index: ?usize) ?[]const u8 {
+        if (block_index) |idx| {
+            if (idx < ctx.doc.content_ids.len) {
+                if (ctx.doc.content_ids[idx]) |value| {
+                    return value.text;
+                }
+            }
+        }
+        return null;
+    }
+
+    fn renderSpans(ctx: *RenderContext, spans: []const hdoc.Span) RenderError!void {
+        for (spans) |span| {
+            try ctx.renderSpan(span);
+        }
+    }
+
+    fn renderSpan(ctx: *RenderContext, span: hdoc.Span) RenderError!void {
+        var pending_lang = langAttribute(span.attribs.lang);
+
+        var opened: [6][]const u8 = undefined;
+        var opened_len: usize = 0;
+
+        const link_tag = span.attribs.link != .none;
+        if (link_tag) {
+            const href_value = switch (span.attribs.link) {
+                .none => unreachable,
+                .ref => |reference| blk: {
+                    var href_buffer: [128]u8 = undefined;
+                    break :blk std.fmt.bufPrint(&href_buffer, "#{s}", .{reference.text}) catch unreachable;
+                },
+                .uri => |uri| uri.text,
+            };
+
+            try writeStartTag(ctx.writer, "a", .regular, .{ .href = href_value, .lang = takeLang(&pending_lang) });
+            opened[opened_len] = "a";
+            opened_len += 1;
+        }
+
+        switch (span.attribs.position) {
+            .baseline => {},
+            .subscript => {
+                try writeStartTag(ctx.writer, "sub", .regular, .{ .lang = takeLang(&pending_lang) });
+                opened[opened_len] = "sub";
+                opened_len += 1;
+            },
+            .superscript => {
+                try writeStartTag(ctx.writer, "sup", .regular, .{ .lang = takeLang(&pending_lang) });
+                opened[opened_len] = "sup";
+                opened_len += 1;
+            },
+        }
+
+        if (span.attribs.strike) {
+            try writeStartTag(ctx.writer, "s", .regular, .{ .lang = takeLang(&pending_lang) });
+            opened[opened_len] = "s";
+            opened_len += 1;
+        }
+
+        if (span.attribs.em) {
+            try writeStartTag(ctx.writer, "em", .regular, .{ .lang = takeLang(&pending_lang) });
+            opened[opened_len] = "em";
+            opened_len += 1;
+        }
+
+        if (span.attribs.mono) {
+            const syntax_attr = if (span.attribs.syntax.len > 0) span.attribs.syntax else null;
+            try writeStartTag(ctx.writer, "code", .regular, .{ .lang = takeLang(&pending_lang), .class = "hdoc-code", .data_syntax = syntax_attr });
+            opened[opened_len] = "code";
+            opened_len += 1;
+        }
+
+        const content_lang = takeLang(&pending_lang);
+        switch (span.content) {
+            .text => |text| {
+                if (content_lang) |lang| {
+                    try writeStartTag(ctx.writer, "bdi", .regular, .{ .lang = lang });
+                    try writeEscapedHtml(ctx.writer, text);
+                    try writeEndTag(ctx.writer, "bdi");
+                } else {
+                    try writeEscapedHtml(ctx.writer, text);
+                }
+            },
+            .date => |date| try ctx.renderDateTimeValue(.date, date, content_lang),
+            .time => |time| try ctx.renderDateTimeValue(.time, time, content_lang),
+            .datetime => |datetime| try ctx.renderDateTimeValue(.datetime, datetime, content_lang),
+        }
+
+        while (opened_len > 0) {
+            opened_len -= 1;
+            try writeEndTag(ctx.writer, opened[opened_len]);
+        }
+    }
+
+    fn renderDateTimeValue(ctx: *RenderContext, comptime kind: enum { date, time, datetime }, value: anytype, lang_attr: ?[]const u8) RenderError!void {
+        var datetime_buffer: [128]u8 = undefined;
+        const datetime_value = switch (kind) {
+            .date => try formatIsoDate(value.value, &datetime_buffer),
+            .time => try formatIsoTime(value.value, &datetime_buffer),
+            .datetime => try formatIsoDateTime(value.value, &datetime_buffer),
+        };
 
-    // TODO: Implement this proper
+        var display_buffer: [128]u8 = undefined;
+        const display_text = switch (kind) {
+            .date => try formatDateValue(value, &display_buffer),
+            .time => try formatTimeValue(value, &display_buffer),
+            .datetime => try formatDateTimeValue(value, &display_buffer),
+        };
+
+        try writeStartTag(ctx.writer, "time", .regular, .{ .datetime = datetime_value, .lang = lang_attr });
+        try ctx.writer.writeAll(display_text);
+        try writeEndTag(ctx.writer, "time");
+    }
+};
 
-    try writeStartTag(writer, "p", .regular, .{
-        .style = "font-weight: bold",
-    });
-    try writeEscapedHtml(writer, "Hello, World!");
-    try writeEndTag(writer, "p");
-    try writer.writeAll("\n");
+fn writeIndent(writer: *Writer, indent: usize) RenderError!void {
+    var i: usize = 0;
+    while (i < indent) : (i += 1) {
+        try writer.writeByte(' ');
+    }
 }
 
-fn writeEscapedHtml(writer: *Writer, text: []const u8) !void {
+fn writeAttributeName(writer: *Writer, name: []const u8) RenderError!void {
+    for (name) |char| {
+        if (char == '_')
+            try writer.writeByte('-')
+        else
+            try writer.writeByte(char);
+    }
+}
+
+fn writeEscapedHtml(writer: *Writer, text: []const u8) RenderError!void {
     var view = std.unicode.Utf8View.init(text) catch @panic("invalid utf-8 passed");
     var iter = view.iterator();
     while (iter.nextCodepointSlice()) |slice| {
@@ -44,58 +523,276 @@ fn writeEscapedHtml(writer: *Writer, text: []const u8) !void {
 
             0xA0 => try writer.writeAll("&nbsp;"),
 
-            // TODO: Fill out other required codes.
-
             else => try writer.writeAll(slice),
         }
     }
 }
 
-fn writeStartTag(writer: *Writer, tag: []const u8, style: enum { regular, auto_close }, attribs: anytype) !void {
+fn writeStartTag(writer: *Writer, tag: []const u8, style: enum { regular, auto_close }, attribs: anytype) RenderError!void {
     try writer.print("<{s}", .{tag});
 
     const Attribs = @TypeOf(attribs);
     inline for (@typeInfo(Attribs).@"struct".fields) |fld| {
         const value = @field(attribs, fld.name);
+        try writeAttribute(writer, fld.name, value);
+    }
+
+    switch (style) {
+        .auto_close => try writer.writeAll("/>"),
+        .regular => try writer.writeAll(">"),
+    }
+}
 
-        if (fld.type == bool) {
+fn writeAttribute(writer: *Writer, name: []const u8, value: anytype) RenderError!void {
+    const T = @TypeOf(value);
+    switch (@typeInfo(T)) {
+        .bool => {
             if (value) {
-                try writer.print(" {s}", .{fld.name});
+                try writer.writeByte(' ');
+                try writeAttributeName(writer, name);
             }
-        } else {
-            try writer.print(" {s}=", .{fld.name});
+        },
+        .optional => {
+            if (value) |inner| {
+                try writeAttribute(writer, name, inner);
+            }
+        },
+        .int, .comptime_int => try writeNumericAttribute(writer, name, value),
+        .float, .comptime_float => try writeFloatAttribute(writer, name, value),
+        .@"enum" => try writeStringAttribute(writer, name, @tagName(value)),
+        .pointer => |info| switch (info.size) {
+            .slice => {
+                if (info.child != u8) @compileError("unsupported pointer type " ++ @typeName(T));
+                try writeStringAttribute(writer, name, value);
+            },
+            .one => {
+                const child = @typeInfo(info.child);
+                if (child != .array) @compileError("unsupported pointer type " ++ @typeName(T));
+                if (child.array.child != u8) @compileError("unsupported pointer type " ++ @typeName(T));
+                const slice: []const u8 = value[0..child.array.len];
+                try writeStringAttribute(writer, name, slice);
+            },
+            else => @compileError("unsupported pointer type " ++ @typeName(T)),
+        },
+        .array => |info| {
+            if (info.child != u8) @compileError("unsupported array type " ++ @typeName(T));
+            const slice: []const u8 = value[0..];
+            try writeStringAttribute(writer, name, slice);
+        },
+        else => switch (T) {
+            []u8, []const u8 => try writeStringAttribute(writer, name, value),
+            else => @compileError("unsupported tag type " ++ @typeName(T) ++ ", implement support above."),
+        },
+    }
+}
 
-            switch (@typeInfo(fld.type)) {
-                .int, .comptime_int => try writer.print("\"{}\"", .{value}),
-                .float, .comptime_float => try writer.print("\"{d}\"", .{value}),
+fn writeStringAttribute(writer: *Writer, name: []const u8, value: []const u8) RenderError!void {
+    try writer.writeByte(' ');
+    try writeAttributeName(writer, name);
+    try writer.writeByte('=');
+    try writer.writeByte('"');
+    try writeEscapedHtml(writer, value);
+    try writer.writeByte('"');
+}
 
-                .pointer => |info| if (info.size == .one) {
-                    const child = @typeInfo(info.child);
+fn writeNumericAttribute(writer: *Writer, name: []const u8, value: anytype) RenderError!void {
+    try writer.writeByte(' ');
+    try writeAttributeName(writer, name);
+    try writer.print("=\"{}\"", .{value});
+}
 
-                    if (child != .array)
-                        @compileError("unsupported pointer type " ++ @typeName(fld.type));
-                    if (child.array.child != u8)
-                        @compileError("unsupported pointer type " ++ @typeName(fld.type));
+fn writeFloatAttribute(writer: *Writer, name: []const u8, value: anytype) RenderError!void {
+    try writer.writeByte(' ');
+    try writeAttributeName(writer, name);
+    try writer.print("=\"{d}\"", .{value});
+}
 
-                    try writer.print("\"{s}\"", .{value}); // TODO: Implement proper HTML escaping!
-                },
+fn writeEndTag(writer: *Writer, tag: []const u8) RenderError!void {
+    try writer.print("</{s}>", .{tag});
+}
 
-                else => switch (fld.type) {
-                    bool => unreachable,
+fn langAttribute(lang: hdoc.LanguageTag) ?[]const u8 {
+    if (lang.text.len == 0)
+        return null;
+    return lang.text;
+}
 
-                    []u8, []const u8 => try writer.print("\"{s}\"", .{value}), // TODO: Implement proper HTML escaping!
+fn takeLang(lang: *?[]const u8) ?[]const u8 {
+    if (lang.*) |value| {
+        lang.* = null;
+        return value;
+    }
+    return null;
+}
 
-                    else => @compileError("unsupported tag type " ++ @typeName(fld.type) ++ ", implement support above."),
-                },
-            }
+fn headingTag(level: hdoc.Block.HeadingLevel) []const u8 {
+    return switch (level) {
+        .h1 => "h1",
+        .h2 => "h2",
+        .h3 => "h3",
+    };
+}
+
+fn tocHasEntries(node: hdoc.Document.TableOfContents) bool {
+    if (node.headings.len > 0) return true;
+    for (node.children) |child| {
+        if (tocHasEntries(child)) return true;
+    }
+    return false;
+}
+
+fn inferColumnCount(rows: []const hdoc.Block.TableRow) ?usize {
+    for (rows) |row| {
+        switch (row) {
+            .columns => |columns| {
+                var width: usize = 0;
+                for (columns.cells) |cell| {
+                    width += cell.colspan;
+                }
+                return width;
+            },
+            .row => |data_row| {
+                var width: usize = 0;
+                for (data_row.cells) |cell| {
+                    width += cell.colspan;
+                }
+                return width;
+            },
+            .group => {},
         }
     }
-    switch (style) {
-        .auto_close => try writer.writeAll("/>"),
-        .regular => try writer.writeAll(">"),
+    return null;
+}
+
+fn tableHasTitleColumn(rows: []const hdoc.Block.TableRow) bool {
+    for (rows) |row| {
+        switch (row) {
+            .row => |data_row| if (data_row.title != null) return true,
+            .group => return true,
+            .columns => {},
+        }
     }
+    return false;
 }
 
-fn writeEndTag(writer: *Writer, tag: []const u8) !void {
-    try writer.print("</{s}>", .{tag});
+fn findHeaderIndex(rows: []const hdoc.Block.TableRow) ?usize {
+    for (rows, 0..) |row, index| {
+        if (row == .columns) return index;
+    }
+    return null;
+}
+
+fn formatIsoDate(value: hdoc.Date, buffer: []u8) RenderError![]const u8 {
+    return std.fmt.bufPrint(buffer, "{d:0>4}-{d:0>2}-{d:0>2}", .{ value.year, value.month, value.day }) catch unreachable;
+}
+
+fn writeTimeZone(writer: anytype, timezone: hdoc.TimeZoneOffset) RenderError!void {
+    const minutes = @intFromEnum(timezone);
+    if (minutes == 0) {
+        try writer.writeByte('Z');
+        return;
+    }
+
+    const sign: u8 = if (minutes < 0) '-' else '+';
+    const abs_minutes: u32 = @intCast(@abs(minutes));
+    const hour: u32 = abs_minutes / 60;
+    const minute: u32 = abs_minutes % 60;
+
+    try writer.print("{c}{d:0>2}:{d:0>2}", .{ sign, hour, minute });
+}
+
+fn formatIsoTime(value: hdoc.Time, buffer: []u8) RenderError![]const u8 {
+    var stream = std.io.fixedBufferStream(buffer);
+    const writer = stream.writer();
+
+    try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ value.hour, value.minute, value.second });
+    if (value.microsecond > 0) {
+        try writer.print(".{d:0>6}", .{value.microsecond});
+    }
+    try writeTimeZone(writer, value.timezone);
+
+    return stream.getWritten();
+}
+
+fn formatIsoDateTime(value: hdoc.DateTime, buffer: []u8) RenderError![]const u8 {
+    var date_buffer: [32]u8 = undefined;
+    var time_buffer: [64]u8 = undefined;
+
+    const date_text = try formatIsoDate(value.date, &date_buffer);
+    const time_text = try formatIsoTime(value.time, &time_buffer);
+
+    return std.fmt.bufPrint(buffer, "{s}T{s}", .{ date_text, time_text }) catch unreachable;
+}
+
+fn formatDateValue(value: hdoc.FormattedDateTime(hdoc.Date), buffer: []u8) RenderError![]const u8 {
+    return switch (value.format) {
+        .year => std.fmt.bufPrint(buffer, "{d}", .{value.value.year}) catch unreachable,
+        .month => std.fmt.bufPrint(buffer, "{d:0>4}-{d:0>2}", .{ value.value.year, value.value.month }) catch unreachable,
+        .day => std.fmt.bufPrint(buffer, "{d:0>2}", .{value.value.day}) catch unreachable,
+        .weekday => std.fmt.bufPrint(buffer, "{s}", .{weekdayName(value.value)}) catch unreachable,
+        .short, .long, .relative, .iso => formatIsoDate(value.value, buffer),
+    };
+}
+
+fn formatTimeValue(value: hdoc.FormattedDateTime(hdoc.Time), buffer: []u8) RenderError![]const u8 {
+    var stream = std.io.fixedBufferStream(buffer);
+    const writer = stream.writer();
+
+    switch (value.format) {
+        .short, .rough => try writer.print("{d:0>2}:{d:0>2}", .{ value.value.hour, value.value.minute }),
+        .long, .relative => {
+            try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ value.value.hour, value.value.minute, value.value.second });
+            if (value.value.microsecond > 0) {
+                try writer.print(".{d:0>6}", .{value.value.microsecond});
+            }
+        },
+        .iso => try writer.writeAll(try formatIsoTime(value.value, buffer)),
+    }
+
+    if (value.format != .iso) {
+        try writer.writeByte(' ');
+        try writeTimeZone(writer, value.value.timezone);
+    }
+
+    return stream.getWritten();
+}
+
+fn formatDateTimeValue(value: hdoc.FormattedDateTime(hdoc.DateTime), buffer: []u8) RenderError![]const u8 {
+    var date_buffer: [32]u8 = undefined;
+    var time_buffer: [64]u8 = undefined;
+
+    const date_text = try formatIsoDate(value.value.date, &date_buffer);
+
+    return switch (value.format) {
+        .short => std.fmt.bufPrint(buffer, "{s} {s}", .{
+            date_text,
+            try formatTimeValue(.{ .format = .short, .value = value.value.time }, &time_buffer),
+        }) catch unreachable,
+        .long, .relative => std.fmt.bufPrint(buffer, "{s} {s}", .{
+            date_text,
+            try formatTimeValue(.{ .format = .long, .value = value.value.time }, &time_buffer),
+        }) catch unreachable,
+        .iso => formatIsoDateTime(value.value, buffer),
+    };
+}
+
+fn weekdayName(date: hdoc.Date) []const u8 {
+    const y = if (date.month < 3) date.year - 1 else date.year;
+    const m = if (date.month < 3) date.month + 12 else date.month;
+    const k: i32 = @mod(y, 100);
+    const j: i32 = @divTrunc(y, 100);
+
+    const day_component: i32 = @intCast(date.day);
+    const z: i32 = day_component + @divTrunc(13 * (m + 1), 5) + k + @divTrunc(k, 4) + @divTrunc(j, 4) + 5 * j;
+    const h: i32 = @mod(z, 7);
+    return switch (h) {
+        0 => "Saturday",
+        1 => "Sunday",
+        2 => "Monday",
+        3 => "Tuesday",
+        4 => "Wednesday",
+        5 => "Thursday",
+        6 => "Friday",
+        else => "",
+    };
 }
diff --git a/test/html5/AGENTS.md b/test/html5/AGENTS.md
new file mode 100644
index 0000000..b79d9ed
--- /dev/null
+++ b/test/html5/AGENTS.md
@@ -0,0 +1,7 @@
+# AGENTS
+
+These files are HTML5 renderer golden tests.
+
+- Each `.hdoc` example here is paired with a `.html` file rendered by `./zig-out/bin/hyperdoc`.
+- When changing the HTML5 renderer, update the corresponding `.html` outputs to match the new behavior.
+- Keep scenarios focused: each example should target specific constructs (paragraph styles, nesting, tables, media/toc, etc.).
diff --git a/test/html5/media_and_toc.hdoc b/test/html5/media_and_toc.hdoc
new file mode 100644
index 0000000..a4f0cf4
--- /dev/null
+++ b/test/html5/media_and_toc.hdoc
@@ -0,0 +1,21 @@
+hdoc(version="2.0", title="Media and TOC", lang="en", tz="+00:00");
+
+h1(id="intro") "Media and TOC"
+
+toc(depth="3");
+
+h2(id="code") "Preformatted"
+
+pre(syntax="python") { print("hello world") }
+
+h2(id="figure") "Figure"
+
+img(id="fig-code",path="./example.png",alt="Example figure") { Figure caption text. }
+
+h2(id="dates") "Dates and Times"
+
+p { Today is \date(fmt="iso"){2024-03-01}. }
+
+p { The meeting is at \time(fmt="long"){14:30:45+00:00}. }
+
+p { Release happens on \datetime(fmt="short"){2024-04-15T08:00:00+00:00}. }
diff --git a/test/html5/media_and_toc.html b/test/html5/media_and_toc.html
new file mode 100644
index 0000000..563874e
--- /dev/null
+++ b/test/html5/media_and_toc.html
@@ -0,0 +1,23 @@
+<h1 id="intro">Media and TOC</h1>
+<nav aria-label="Table of contents">
+  <ol>
+    <li><a href="#intro">Media and TOC</a>
+      <ol>
+        <li><a href="#code">Preformatted</a></li>
+        <li><a href="#figure">Figure</a></li>
+        <li><a href="#dates">Dates and Times</a></li>
+      </ol>
+    </li>
+  </ol>
+</nav>
+<h2 id="code">Preformatted</h2>
+<pre><code class="hdoc-code" data-syntax="python"> print(&quot;hello world&quot;) </code></pre>
+<h2 id="figure">Figure</h2>
+<figure id="fig-code">
+  <img src="./example.png" alt="Example figure"/>
+  <figcaption>Figure caption text.</figcaption>
+</figure>
+<h2 id="dates">Dates and Times</h2>
+<p>Today is <time datetime="+2024-03-01">+2024-03-01</time>.</p>
+<p>The meeting is at <time datetime="14:30:45Z">14:30:45 Z</time>.</p>
+<p>Release happens on <time datetime="+2024-04-15T08:00:00Z">+2024-04-15 08:00 Z</time>.</p>
diff --git a/test/html5/nesting_and_inlines.hdoc b/test/html5/nesting_and_inlines.hdoc
new file mode 100644
index 0000000..f1bd8a2
--- /dev/null
+++ b/test/html5/nesting_and_inlines.hdoc
@@ -0,0 +1,21 @@
+hdoc(version="2.0", title="Nesting and Inlines", lang="en");
+
+h1(id="top") "Nesting and Inline Styling"
+
+p "This document exercises inline formatting and nested lists."
+
+p { We can mix \em{emphasis}, \strike{strike}, \mono{monospace} text. Superscript x\sup{2} and subscript x\sub{2} also appear. }
+
+p { Links point to \link(ref="top"){local anchors} or \link(uri="https://example.com"){external sites}. }
+
+ul {
+  li { p "Top-level item one" }
+  li {
+    p "Top-level item two with nested list"
+    ol(first="1") {
+      li "Nested ordered item A"
+      li "Nested ordered item B"
+    }
+  }
+  li { p "Top-level item three" }
+}
diff --git a/test/html5/nesting_and_inlines.html b/test/html5/nesting_and_inlines.html
new file mode 100644
index 0000000..5db4d36
--- /dev/null
+++ b/test/html5/nesting_and_inlines.html
@@ -0,0 +1,23 @@
+<h1 id="top">Nesting and Inline Styling</h1>
+<p>This document exercises inline formatting and nested lists.</p>
+<p>We can mix <em>emphasis</em>, <s>strike</s>, <code class="hdoc-code">monospace</code>text. Superscript x<sup>2</sup>and subscript x<sub>2</sub>also appear.</p>
+<p>Links point to <a href="#top">local anchors</a>or <a href="https://example.com">external sites</a>.</p>
+<ul>
+  <li>
+    <p>Top-level item one</p>
+  </li>
+  <li>
+    <p>Top-level item two with nested list</p>
+    <ol start="1">
+      <li>
+        <p>Nested ordered item A</p>
+      </li>
+      <li>
+        <p>Nested ordered item B</p>
+      </li>
+    </ol>
+  </li>
+  <li>
+    <p>Top-level item three</p>
+  </li>
+</ul>
diff --git a/test/html5/paragraph_styles.hdoc b/test/html5/paragraph_styles.hdoc
new file mode 100644
index 0000000..f5b3a96
--- /dev/null
+++ b/test/html5/paragraph_styles.hdoc
@@ -0,0 +1,17 @@
+hdoc(version="2.0", title="Paragraph Styles", lang="en");
+
+h1 "Paragraph Styles"
+
+p "A standard paragraph introducing the styles below."
+
+note "Notes provide informational context without urgency."
+
+warning "Warnings highlight potential issues to watch for."
+
+danger "Danger blocks signal critical problems."
+
+tip "Tips offer helpful hints for readers."
+
+quote "Quoted material sits in its own paragraph style."
+
+spoiler "This is a spoiler; renderers may hide or blur this content."
diff --git a/test/html5/paragraph_styles.html b/test/html5/paragraph_styles.html
new file mode 100644
index 0000000..82e8555
--- /dev/null
+++ b/test/html5/paragraph_styles.html
@@ -0,0 +1,8 @@
+<h1 id="hdoc-auto-0">Paragraph Styles</h1>
+<p>A standard paragraph introducing the styles below.</p>
+<p class="hdoc-note">Notes provide informational context without urgency.</p>
+<p class="hdoc-warning">Warnings highlight potential issues to watch for.</p>
+<p class="hdoc-danger">Danger blocks signal critical problems.</p>
+<p class="hdoc-tip">Tips offer helpful hints for readers.</p>
+<p class="hdoc-quote">Quoted material sits in its own paragraph style.</p>
+<p class="hdoc-spoiler">This is a spoiler; renderers may hide or blur this content.</p>
diff --git a/test/html5/tables.hdoc b/test/html5/tables.hdoc
new file mode 100644
index 0000000..22e728f
--- /dev/null
+++ b/test/html5/tables.hdoc
@@ -0,0 +1,28 @@
+hdoc(version="2.0", title="Tables", lang="en");
+
+h1 "Table Coverage"
+
+p "This file covers header rows, data rows with titles, groups, and colspans."
+
+table {
+  columns {
+    td { p "Column A" }
+    td { p "Column B" }
+    td { p "Column C" }
+  }
+  group { "Section One" }
+  row(title="Row 1") {
+    td { p "A1" }
+    td(colspan="2") { p "B1-C1" }
+  }
+  row(title="Row 2") {
+    td(colspan="2") { p "A2-B2" }
+    td { p "C2" }
+  }
+  group { "Section Two" }
+  row(title="Row 3") {
+    td { p "A3" }
+    td { p "B3" }
+    td { p "C3" }
+  }
+}
diff --git a/test/html5/tables.html b/test/html5/tables.html
new file mode 100644
index 0000000..bfce614
--- /dev/null
+++ b/test/html5/tables.html
@@ -0,0 +1,58 @@
+<h1 id="hdoc-auto-0">Table Coverage</h1>
+<p>This file covers header rows, data rows with titles, groups, and colspans.</p>
+<table>
+  <thead>
+    <tr>
+      <th scope="col"></th>
+      <th scope="col">
+        <p>Column A</p>
+      </th>
+      <th scope="col">
+        <p>Column B</p>
+      </th>
+      <th scope="col">
+        <p>Column C</p>
+      </th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td></td>
+      <th scope="colgroup" colspan="3">&quot;Section One&quot;</th>
+    </tr>
+    <tr>
+      <th scope="row">Row 1</th>
+      <td>
+        <p>A1</p>
+      </td>
+      <td colspan="2">
+        <p>B1-C1</p>
+      </td>
+    </tr>
+    <tr>
+      <th scope="row">Row 2</th>
+      <td colspan="2">
+        <p>A2-B2</p>
+      </td>
+      <td>
+        <p>C2</p>
+      </td>
+    </tr>
+    <tr>
+      <td></td>
+      <th scope="colgroup" colspan="3">&quot;Section Two&quot;</th>
+    </tr>
+    <tr>
+      <th scope="row">Row 3</th>
+      <td>
+        <p>A3</p>
+      </td>
+      <td>
+        <p>B3</p>
+      </td>
+      <td>
+        <p>C3</p>
+      </td>
+    </tr>
+  </tbody>
+</table>

From 71315b170451fc61991b996aa29ec8447823d8b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 31 Dec 2025 14:25:51 +0100
Subject: [PATCH 055/116] Overhauls docs/specification-proper-draft.md with an
 additional AGENTS.md and TODO.md to keep an overview over what is still open

---
 docs/AGENTS.md                     |  17 +++
 docs/TODO.md                       |  10 ++
 docs/specification-proper-draft.md | 200 +++++++++++++++++++++++++----
 3 files changed, 199 insertions(+), 28 deletions(-)
 create mode 100644 docs/AGENTS.md
 create mode 100644 docs/TODO.md

diff --git a/docs/AGENTS.md b/docs/AGENTS.md
new file mode 100644
index 0000000..238ae4e
--- /dev/null
+++ b/docs/AGENTS.md
@@ -0,0 +1,17 @@
+# Specification Editing
+
+## General
+
+- `specification.md` is the current "status quo" specifiction. Do not edit unless explicitly asked.
+- `docs/specification-proper-draft.md` is the new "shiny" specification. This is the one you should edit if only asked about the "specification".
+  - This file contains a chapter `0. Chapter Status`. This chapter marks each other chapter of the file as FROZEN, DONE, DRAFT or MISSING
+    - If a chapter is marked FROZEN, you are not permitted to change anything in it.
+    - If a chapter is marked DONE, you are only permitted to perform language changes, but not semantic changes.
+    - If a chapter is marked DRAFT, you are permitted to change it's semantic meaning.
+    - If a chapter is marked MISSING, the chapter does not yet exist and shall be added eventually. You are permitted to do so.
+  - A block quote starting with `> TODO:` notes some tasks that shall be done. These lines can be removed if, and only if the task was fully completed.
+
+## Formatting
+
+- Do not use any dashes except for `-`. Do NOT use En-Dashes (`–`) or Em-Dashes (`—`).
+- Stick to ASCII text as good as possible. If you require symbols from the unicode plane, use them, but inform the user about it.
diff --git a/docs/TODO.md b/docs/TODO.md
new file mode 100644
index 0000000..e55f3c7
--- /dev/null
+++ b/docs/TODO.md
@@ -0,0 +1,10 @@
+# Specification TODOs
+
+- Introduction of `\ref` vs. `\link`
+  - <https://chatgpt.com/s/t_695508371f6481918106960ddef5bf4d>
+- Introduction of `\footnote{body}`, `\footnote(id="foo"){body}` and `\footnote(ref="");`
+  - `id` namespace is separate from toplevel `id` namespace. requires better naming
+  - Equal to `\footnote{}` introduce a `\cite{}` inline for citations or use \footnote{style="citation") {}
+- Assign semantics to node types, paragraph kinds, ...
+- Specify "syntax" proper
+- Add links to RFCs where possible
\ No newline at end of file
diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index e331d96..6966c2b 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -2,6 +2,75 @@
 
 **Status:** Cleaned-up draft.
 
+## 0. Chapter Status
+
+Chapters that are marked FROZEN must not be changed by AI agents.
+
+FROZEN:  No changes allowed.
+DONE:    Semantics are correct, language might need improvement.
+DRAFT:   Current semantics are not finalized yet.
+MISSING: Chapter needs to be added still.
+
+- "1. Introduction": DONE
+- "2. Conformance and terminology": FROZEN
+- "3. Document encoding (byte- and line-level)": DONE
+- "4. Syntactic model": DONE
+- "5. Grammar and additional syntax rules"
+  - "5.1 Grammar (EBNF)": DRAFT
+  - "5.2 Deterministic list-mode disambiguation: DONE
+  - "5.3 Maximal munch": FROZEN
+  - "5.4 Inline-list brace balancing and backslash dispatch": DONE
+  - "5.5 String literals (syntax)": DRAFT
+- "6. Escape processing (semantic)": DRAFT
+  - "6.1 Scope": DRAFT
+  - "6.2 Control character policy (semantic)": DRAFT
+  - "6.3 Supported escapes in string literals": DRAFT
+    - "6.3.1 Unicode escape `\\u{H...}`": DRAFT
+  - "6.4 Invalid escapes": DRAFT
+  - "6.5 Inline escape-text tokens": DRAFT
+- "7. Semantic document model": DRAFT
+  - "7.1 Document structure": DONE
+  - "7.2 Inline text construction and normalization": DONE
+  - "7.3 Attribute uniqueness": DONE
+  - "7.4 Attribute validity": DONE
+  - "7.5 IDs and references": DRAFT
+  - "7.6 Built-in element recognition": DONE
+- "8. Elements and attributes"
+  - "8.1 Built-in elements and list mode"
+    - "8.1.1 Inline vs block": DONE
+    - "8.1.2 List-body mode per built-in element": TODO
+  - "8.2 Element catalog (normative)": DRAFT
+    - "8.2.1 `hdoc` (header)": DONE
+    - "8.2.2 Headings: `h1`, `h2`, `h3`": DRAFT
+    - "8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT
+    - "8.2.4 Lists: `ul`, `ol`": DRAFT
+    - "8.2.5 List item: `li`": DRAFT
+    - "8.2.6 Figure: `img`": DRAFT
+    - "8.2.7 Preformatted: `pre`": DRAFT
+    - "8.2.8 Table of contents: `toc`": DRAFT
+    - "8.2.9 Tables: `table`": DRAFT
+    - "8.2.10 `columns` (table header row)": DRAFT
+    - "8.2.11 `row` (table data row)": DRAFT
+    - "8.2.12 `group` (table row group)": DRAFT
+    - "8.2.13 `td` (table cell)": DRAFT
+  - "8.3 Inline elements"
+    - "8.3.1 `\\em`": DRAFT
+    - "8.3.2 `\\mono`": DRAFT
+    - "8.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT
+    - "8.3.4 `\\link`": DRAFT
+    - "8.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT
+- "9. Attribute types and date/time formats": DRAFT
+  - "9.1 Common attribute types": DRAFT
+  - "9.2 Date / time lexical formats (normative)": DRAFT
+    - "9.2.1 Date": DRAFT
+    - "9.2.2 Time": DRAFT
+    - "9.2.3 Datetime": DRAFT
+  - "9.3 `fmt` values": DRAFT
+- "10. Non-normative guidance for tooling": DRAFT
+- "Appendix A. Example": DRAFT
+- "Appendix B. Element Overview": MISSING
+- "Appendix C. Attribute Overview": MISSING
+
 ---
 
 ## 1. Introduction
@@ -23,7 +92,7 @@ A document can be:
 - **Syntactically valid**: conforms to the grammar and additional syntax rules.
 - **Semantically valid**: syntactically valid **and** conforms to semantic rules (elements, attributes, escape decoding, IDs/refs, etc.).
 
-Unless explicitly stated, rules in chapters 3–5 are **syntax** rules; rules in chapters 6–9 are **semantic** rules.
+Unless explicitly stated, rules in chapters 3-5 are **syntax** rules; rules in chapters 6-9 are **semantic** rules.
 
 ## 3. Document encoding (byte- and line-level)
 
@@ -32,7 +101,7 @@ Unless explicitly stated, rules in chapters 3–5 are **syntax** rules; rules in
 - A HyperDoc document **MUST** be encoded as UTF-8.
 - A HyperDoc document **MUST NOT** contain invalid UTF-8 byte sequences.
 
-**UTF-8 BOM**
+#### UTF-8 BOM
 
 - A UTF-8 BOM (`EF BB BF`) **SHOULD NOT** be used.
 - Tooling **MAY** accept a BOM and treat it as whitespace at the beginning of the document.
@@ -54,6 +123,7 @@ The canonical line ending emitted by tooling **SHOULD** be `<LF>`.
 - Other Unicode control characters (General Category `Cc`) **MUST NOT** appear in source text, except:
   - U+000A (LF) and
   - U+000D (CR) as part of a valid line ending.
+- Surrogate characters (Plane "unassigned", U+D800…U+DFFF) **MUST NOT** appear in the source text. A conforming parser **MUST** reject them.
 
 A semantic validator **MAY** reject TABs in source text (see §6.2).
 
@@ -81,10 +151,10 @@ Each node has:
 
 A body is one of:
 
-- `;` — empty body
-- `"..."` — string literal body
-- `:` — verbatim body (one or more `|` lines)
-- `{ ... }` — list body
+- `;` - empty body
+- `"..."` - string literal body
+- `:` - verbatim body (one or more `|` lines)
+- `{ ... }` - list body
 
 ### 4.2 List bodies and modes
 
@@ -157,7 +227,6 @@ The mode is determined solely from the **node name token**:
 
 Built-in elements and their list modes are defined in §8.1.
 
-
 ### 5.3 Maximal munch
 
 When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consume the longest possible sequence of allowed identifier characters.
@@ -167,22 +236,46 @@ When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consum
 In Inline-list mode:
 
 - Literal braces are structural (`inline_group`) and therefore **must be balanced**.
-- If braces cannot be balanced, they **must** be written as escape-text tokens `\\{` and `\\}`.
+- If braces cannot be balanced, they **must** be written as escape-text tokens `\{` and `\}`.
 - A backslash in inline content is interpreted as:
-  - one of the three escape-text tokens `\\\\`, `\\{`, `\\}`, or
+  - one of the three escape-text tokens `\\`, `\{`, `\}`, or
   - the start of an inline node otherwise.
 
 ### 5.5 String literals (syntax)
 
+> TODO: This chapter requires improved wording. String literals are basically parsed by:
+>
+> ```pseudo
+> assert next() == '"'
+> while(not eof()):
+>   char = next()
+>   if char == '\\':
+>     _ = next() # skip character
+>   elif char == '"':
+>     break # end of string literal
+>   elif is_control(char): # includes CR, LF, TAB and all other control characters
+>     abort() # invalid character
+> ```
+
 String literals are delimiter-based and do **not** validate escape *meaning*.
 
 Syntactically invalid inside `"..."`:
 
 - raw LF or CR
-- a backslash immediately followed by a control character (Unicode `Cc`) — **note:** this includes TAB.
+- a backslash in the last position of the string (`\"` never terminates the string literal)
+- a control character (Unicode `Cc`) - **note:** this includes TAB.
 
 ## 6. Escape processing (semantic)
 
+> TODO: This chapter must be split into two chapters:
+>
+> - "Inline Text Escape Processing"
+> - "String Literal Escape Processing"
+>
+> This includes renumbering all chapters and their references for the markdown spec.
+>
+> Chapter "6.1 Scope" will be removed then.
+
 ### 6.1 Scope
 
 Escape sequences are recognized only in:
@@ -194,6 +287,8 @@ No other syntax performs escape decoding.
 
 ### 6.2 Control character policy (semantic)
 
+> TODO: The same rules as in §3 are applied, except that `TAB` is also additionally forbidden after escaping.
+
 - A semantic validator **MAY** reject TAB (U+0009) in source text.
 - Regardless of whether TAB is accepted in source text, TAB **MUST** be rejected in the **resolved value of any string literal** (quoted node bodies and attribute values). This includes TAB that appears literally between quotes and TAB produced via `\u{...}`.
 
@@ -213,7 +308,7 @@ A semantic validator/decoder **MUST** accept exactly:
 
 #### 6.3.1 Unicode escape `\\u{H...}`
 
-- 1–6 hex digits
+- 1-6 hex digits
 - value in `0x0..0x10FFFF`
 - not in `0xD800..0xDFFF` (surrogates)
 - must not decode to a forbidden control character (§6.2)
@@ -222,21 +317,25 @@ A semantic validator/decoder **MUST** accept exactly:
 
 A semantic validator/decoder **MUST** reject a string literal that contains:
 
-- any other escape (`\\t`, `\\xHH`, `\\0`, etc.)
-- an unterminated escape (string ends after `\\`)
-- malformed `\\u{...}` (missing braces, empty, non-hex, >6 digits)
+- any other escape (`\t`, `\\xHH`, `\0`, etc.)
+- an unterminated escape (string ends after `\`)
+- malformed `\u{...}` (missing braces, empty, non-hex, >6 digits)
 - out-of-range or surrogate code points
-- forbidden control characters produced by `\\u{...}`
+- forbidden control characters produced by `\u{...}`
 
 ### 6.5 Inline escape-text tokens
 
+> TODO: Move to chapter "Inline Text Escape Processing"
+
 In inline-list bodies, the parser emits three special text tokens:
 
-- `\\\\`
-- `\\{`
-- `\\}`
+- `\\`
+- `\{`
+- `\}`
+
+During semantic text construction, implementations **MUST** decode these to literal `\`, `{`, `}`.
 
-During semantic text construction, implementations **MAY** decode these to literal `\\`, `{`, `}`.
+> TODO: The following sentence is unclear. The intent is: "When parsing, tooling should not perform ad-hoc conversion of escape sequences, so the output can be rendered again as-is. The escape sequences must always be display their escaped variant."
 
 Tooling that aims to preserve author intent **SHOULD** preserve whether braces were written as balanced groups vs escaped brace tokens.
 
@@ -287,6 +386,8 @@ The renderer **MUST** see the post-normalization result.
 
 ### 7.5 IDs and references
 
+> TODO: References must not contain control characters or whitespace. They can be any sequence of characters that are not spaces or control characters.
+
 - `id` is allowed only on **top-level block elements** (direct children of the document; not inside another node).
 - `id` values **MUST** be non-empty and **MUST** be unique (case-sensitive) across the document.
 - `\link(ref="...")` **MUST** reference an existing `id`.
@@ -307,6 +408,9 @@ The renderer **MUST** see the post-normalization result.
 
 #### 8.1.2 List-body mode per built-in element
 
+> TODO: `li` and `td` have an auto-upgrade rule, which performs a conversion of string/verbatim body to `{ p { <content of body> } }`.
+>       This means they auto-upgrade their body from literal to "paragraph with literal content"
+
 When a built-in element uses a `{ ... }` list body, it is parsed in the mode below:
 
 - **Inline-list mode:** `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\date`, `\time`, `\datetime`, ...).
@@ -318,6 +422,14 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 
 ### 8.2 Element catalog (normative)
 
+> TODO: "inline text" bodies are:
+>
+> - inline list body
+> - string body
+> - verbatim body
+>
+> So only an empty body is not "inline text"
+
 #### 8.2.1 `hdoc` (header)
 
 - **Role:** document header
@@ -332,7 +444,7 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 
 #### 8.2.2 Headings: `h1`, `h2`, `h3`
 
-- **Role:** block heading levels 1–3
+- **Role:** block heading levels 1-3
 - **Body:** inline text (string body or inline-list body)
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
@@ -344,6 +456,8 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 
 #### 8.2.4 Lists: `ul`, `ol`
 
+> TODO: Split into two separate parts "Unordered Lists" and "Ordered Lists"
+
 - **Body:** block-list containing `li` (at least one)
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
@@ -353,6 +467,8 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 
 #### 8.2.5 List item: `li`
 
+> TODO: Include correct body upgrade rules
+
 - **Body:** either
   - a block-list of block elements, or
   - a single string body, or
@@ -364,12 +480,14 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 - **Body:** inline text caption/description (may be empty)
 - **Attributes:**
   - `path` (required, non-empty)
-  - `alt` (optional, non-empty recommended)
+  - `alt` (optional, non-empty)
   - `lang` (optional)
   - `id` (optional; top-level only)
 
 #### 8.2.7 Preformatted: `pre`
 
+> TODO: Body is always just "inline text", as verbatim bodies are also always inline text.
+
 - **Body:** either
   - verbatim body (`:`) for literal lines (**recommended**), or
   - inline text body (string or inline-list); whitespace is preserved (no trimming/collapse)
@@ -389,6 +507,15 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 
 Table layout rules:
 
+> TODO: `group` is not a "row with implicit title and no cells", but basically
+>       `group { <text> }` is equivalent to `columns { td(colspan="<all>") { <text> } }`,
+>       so a regular row with a single cell spanning all columns.
+>       `group` never implies the existence of the "leading title column"
+
+> TODO: The `row(title="…")` does never affect the effective column count.
+>       It implies an additional untitled first column, which is blank in `columns` and `group` rows.
+>       The `title` row is designed to form matrices with an empty top-left field.
+
 - `columns` defines header labels and the column count.
 - Each `row` defines a data row.
 - Each `group` acts as a section heading for subsequent rows.
@@ -413,6 +540,8 @@ Table layout rules:
 
 #### 8.2.13 `td` (table cell)
 
+> TODO: Include correct body upgrade rules
+
 - **Body:** either
   - a block-list of block elements, or
   - a single string body, or
@@ -457,6 +586,13 @@ Inline elements appear only in inline-list bodies (or inside string/verbatim, de
 
 ## 9. Attribute types and date/time formats
 
+> TODO: Attributes should be documented well and not only be mentioned in the element catalog.
+>       This chapter shall document attributes and their types, including detailled descriptions for both.
+
+> TODO: Specify that leading and trailing whitespay is allowed but discouraged.
+>       Non-fatal diagnostics **MUST** be emitted for that.
+>       Leading and trailing whitespace must be stripped.
+
 ### 9.1 Common attribute types
 
 - **Version:** must be `2.0`.
@@ -475,16 +611,16 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601.
 `YYYY-MM-DD`
 
 - `YYYY`: one or more digits
-- `MM`: `01`–`12`
-- `DD`: `01`–`31`
+- `MM`: `01`-`12`
+- `DD`: `01`-`31`
 
 #### 9.2.2 Time
 
 `hh:mm:ss` with a required time zone unless a default `tz` is defined in `hdoc`.
 
-- `hh`: `00`–`23`
-- `mm`: `00`–`59`
-- `ss`: `00`–`59`
+- `hh`: `00`-`23`
+- `mm`: `00`-`59`
+- `ss`: `00`-`59`
 - optional fraction: `.` followed by 1,2,3,6, or 9 digits
 - zone:
   - `Z`, or
@@ -500,6 +636,15 @@ If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is
 
 ### 9.3 `fmt` values
 
+> TODO: `fmt` values need a proper description of what the expected output is.
+>       The output is using the `lang` context of the \date, \time, \datetime element and
+>       we provide examples in german and english for each `fmt` option.
+
+> TODO: This chapter shall be split into:
+>
+> - `fmt` for `\date`
+> - `fmt` for `\time`
+> - `fmt` for `\datetime`
 
 - `\\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso`
 - `\\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso`
@@ -508,7 +653,7 @@ If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is
 Defaults when omitted:
 
 - `\date(fmt=...)`: default `short`
-- `\time(fmt=...)`: default `long`
+- `\time(fmt=...)`: default `short`
 - `\datetime(fmt=...)`: default `short`
 
 ## 10. Non-normative guidance for tooling
@@ -535,4 +680,3 @@ pre(syntax="c"):
 |   return 0;
 | }
 ```
-

From d8e7388797a52191ce6314f8b7cc223b63811b3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 31 Dec 2025 14:37:48 +0100
Subject: [PATCH 056/116] Adds two new TODOs to the Table type

---
 src/hyperdoc.zig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index dd2aa16..2dd2ca9 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -97,6 +97,8 @@ pub const Block = union(enum) {
     };
 
     pub const Table = struct {
+        // TODO: column_count: usize,
+        // TODO: has_row_titles: bool, // not counted inside `Table.column_count`!
         lang: LanguageTag,
         rows: []TableRow,
     };

From 92eef4affb9727b1ec9b9e371acb0440fef66cfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 31 Dec 2025 15:36:21 +0100
Subject: [PATCH 057/116] Implements basic CLI parser

---
 src/main.zig             | 74 ++++++++++++++++++++++++++++++++--------
 test/accept/workset.hdoc | 16 +--------
 2 files changed, 60 insertions(+), 30 deletions(-)

diff --git a/src/main.zig b/src/main.zig
index d7807f3..693a2f1 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -22,13 +22,7 @@ pub fn main() !u8 {
     const args = try std.process.argsAlloc(allocator);
     defer std.process.argsFree(allocator, args);
 
-    if (args.len < 2) {
-        try stderr.interface.print("usage: {s} <file>\n", .{args[0]});
-        try stderr.interface.flush();
-        return 1;
-    }
-
-    const path = args[1];
+    const options = try parse_options(&stderr.interface, args);
 
     var diagnostics: hdoc.Diagnostics = .init(allocator);
     defer diagnostics.deinit();
@@ -37,12 +31,12 @@ pub fn main() !u8 {
         allocator,
         &diagnostics,
         &stdout.interface,
-        path,
+        options,
     );
 
     for (diagnostics.items.items) |diag| {
         try stderr.interface.print("{s}:{f}: {f}\n", .{
-            path,
+            options.file_path,
             diag.location,
             diag.code,
         });
@@ -50,7 +44,7 @@ pub fn main() !u8 {
     try stderr.interface.flush();
 
     parse_result catch |err| {
-        std.log.err("failed to parse \"{s}\": {t}", .{ path, err });
+        std.log.err("failed to parse \"{s}\": {t}", .{ options.file_path, err });
         return 1;
     };
 
@@ -59,8 +53,8 @@ pub fn main() !u8 {
     return 0;
 }
 
-fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostics, output_stream: *std.Io.Writer, path: []const u8) !void {
-    const document = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024 * 10);
+fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostics, output_stream: *std.Io.Writer, options: CliOptions) !void {
+    const document = try std.fs.cwd().readFileAlloc(allocator, options.file_path, 1024 * 1024 * 10);
     defer allocator.free(document);
 
     var parsed = try hdoc.parse(allocator, document, diagnostics);
@@ -70,7 +64,57 @@ fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostic
         return error.InvalidFile;
     }
 
-    // TODO: Make render format selectable via CLI:
-    // try hdoc.render.yaml(parsed, output_stream);
-    try hdoc.render.html5(parsed, output_stream);
+    switch (options.format) {
+        .dump => try hdoc.render.yaml(parsed, output_stream),
+        .html => try hdoc.render.html5(parsed, output_stream),
+    }
+}
+
+const CliOptions = struct {
+    format: RenderFormat = .html,
+    file_path: []const u8,
+};
+
+const RenderFormat = enum {
+    dump,
+    html,
+};
+
+fn parse_options(stderr: *std.Io.Writer, argv: []const []const u8) !CliOptions {
+    var options: CliOptions = .{
+        .file_path = "",
+    };
+
+    const app_name = argv[0];
+
+    {
+        var i: usize = 1;
+        while (i < argv.len) {
+            const value = argv[i];
+            if (std.mem.startsWith(u8, value, "--")) {
+                if (std.mem.eql(u8, value, "--format")) {
+                    i += 1;
+                    options.format = std.meta.stringToEnum(RenderFormat, argv[i]) orelse return error.InvalidCli;
+                    i += 1;
+                    continue;
+                }
+                return error.InvalidCli;
+            }
+
+            if (options.file_path.len > 0) {
+                return error.InvalidCli;
+            }
+            options.file_path = value;
+
+            i += 1;
+        }
+    }
+
+    if (options.file_path.len == 0) {
+        try stderr.print("usage: {s} <file>\n", .{app_name});
+        try stderr.flush();
+        return error.InvalidCli;
+    }
+
+    return options;
 }
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index efb6bf4..77cabaf 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -1,18 +1,4 @@
 hdoc(version="2.0", lang="en");
 
-p {
-  In the meeting notes (version 3.2.1), someone wrote: The "simple" migration is no longer simple. They listed
-  steps like: export → transform → validate → import, then added a parenthetical aside (which itself contained
-  parentheses): "Use the staging key (not the production key (seriously))". A different person pasted a pseudo-path,
-  /var/tmp/builds/\date(fmt="iso"){2025-12-23}/, and then warned, "If you see \mono{NULL} in the output, don't 'fix' it by replacing it with
-  '0'—that's how we broke reporting last time."
-}
+p { We can mix \em{emphasis}, \strike{strike}, \mono{monospace} text. Superscript x\sup{2} and subscript x\sub{2} also appear. }
 
-pre(syntax="zig") {
-  pub fn FormattedDateTime(comptime DT: type) type {
-      return struct {
-          value: DT,
-          format: DT.Format = .default,
-      };
-  }
-}

From 988520edcb8b8f253cd5e18d657fdd018d13580c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 31 Dec 2025 15:59:11 +0100
Subject: [PATCH 058/116] Fixes bug in span merger that would prevent leading
 whitespace of elements be trimmed even if text was already emitted.

---
 src/hyperdoc.zig  |  4 ++--
 src/testsuite.zig | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 2dd2ca9..ee7ef20 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -1344,8 +1344,8 @@ pub const SemanticAnalyzer = struct {
                     else
                         .{ text_content, false };
 
-                    // check if we already have text, and if not, if we should keep the whitespace
-                    if (merger.current_span.items.len > 0 or !skip_head) {
+                    // check if we already have any text collected, and if not, if we should keep the whitespace
+                    if (merger.output.items.len > 0 or merger.current_span.items.len > 0 or !skip_head) {
                         try merger.current_span.appendSlice(merger.arena, append_text);
                     }
                 },
diff --git a/src/testsuite.zig b/src/testsuite.zig
index dd5ffd1..a4133fb 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -189,6 +189,41 @@ test "semantic analyzer forbids raw control characters" {
     try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .{ .illegal_character = .{ .codepoint = 0x9 } }));
 }
 
+test "span merger preserves whitespace after inline mono" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\p{ \mono{monospace} text. }
+    ;
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 1), doc.contents.len);
+
+    switch (doc.contents[0]) {
+        .paragraph => |para| {
+            try std.testing.expectEqual(@as(usize, 2), para.content.len);
+            try std.testing.expect(para.content[0].attribs.mono);
+            try std.testing.expect(!para.content[1].attribs.mono);
+
+            switch (para.content[0].content) {
+                .text => |text| try std.testing.expectEqualStrings("monospace", text),
+                else => return error.TestExpectedEqual,
+            }
+
+            switch (para.content[1].content) {
+                .text => |text| try std.testing.expectEqualStrings(" text.", text),
+                else => return error.TestExpectedEqual,
+            }
+        },
+        else => return error.TestExpectedEqual,
+    }
+}
+
 test "parser reports unterminated string literals" {
     var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
     defer arena.deinit();

From 8a6e302ca8f9251b1707be67670e2968faabc533 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 31 Dec 2025 16:20:00 +0100
Subject: [PATCH 059/116] Updates spec to include \footnote, footnotes{}, \ref
 and updated \link.

---
 docs/TODO.md                       |   3 +-
 docs/specification-proper-draft.md | 153 +++++++++++++++++++++++++++--
 2 files changed, 146 insertions(+), 10 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index e55f3c7..3bfde40 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -7,4 +7,5 @@
   - Equal to `\footnote{}` introduce a `\cite{}` inline for citations or use \footnote{style="citation") {}
 - Assign semantics to node types, paragraph kinds, ...
 - Specify "syntax" proper
-- Add links to RFCs where possible
\ No newline at end of file
+- Add links to RFCs where possible
+- Document `lang` inheritance. No `lang` attribute means that parent language is used.
diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index 6966c2b..2d94b41 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -348,6 +348,26 @@ Tooling that aims to preserve author intent **SHOULD** preserve whether braces w
 - The `hdoc` node **MUST NOT** appear anywhere else.
 - The `hdoc` node **MUST** have an empty body (`;`).
 
+#### Document title
+
+- A document **MAY** contain one `title` node (document-level title).
+- If present, `title` **MUST** be the second node in the document (i.e., the first node after `hdoc`).
+- `title` **MUST** be a top-level block element (direct child of the document).
+- `title` **MUST NOT** have an `id` attribute.
+
+`hdoc(title="...")` and `title { ... }` interact as follows:
+
+- If exactly one of `hdoc(title="...")` or `title { ... }` is present, implementations **SHOULD** treat the single value as both:
+  - the document metadata title, and
+  - the document display title.
+  If the single value is `title { ... }`, tooling **SHOULD** derive a plaintext title (via inline-text construction) for use as metadata where needed.
+
+- If both are present, tooling **SHOULD** compare their plaintext forms:
+  - If they match, tooling **SHOULD** emit a diagnostic hint that `hdoc(title)` is redundant.
+
+- If neither is present, tooling **MAY** emit a diagnostic hint that the document has no title.
+
+
 ### 7.2 Inline text construction and normalization
 
 Many elements (e.g. `p`, headings, and inline elements) produce **inline text** for rendering. Inline text is constructed from one of:
@@ -390,7 +410,20 @@ The renderer **MUST** see the post-normalization result.
 
 - `id` is allowed only on **top-level block elements** (direct children of the document; not inside another node).
 - `id` values **MUST** be non-empty and **MUST** be unique (case-sensitive) across the document.
-- `\link(ref="...")` **MUST** reference an existing `id`.
+
+#### Interior references (`ref`)
+
+- A `ref` attribute value **MUST** be a valid Reference value (§9.1).
+- `\ref(ref="...")` **MUST** reference an existing top-level `id`.
+
+#### Footnote references (`key` / `ref`)
+
+Footnotes define a separate reference namespace from top-level `id`:
+
+- `\footnote(key="..."){...}` defines a footnote key in the **footnote namespace**.
+- Footnote keys **MUST** be unique (case-sensitive) within the footnote namespace.
+- `\footnote(ref="...");` **MUST** reference an existing footnote key.
+
 
 ### 7.6 Built-in element recognition
 
@@ -413,11 +446,11 @@ The renderer **MUST** see the post-normalization result.
 
 When a built-in element uses a `{ ... }` list body, it is parsed in the mode below:
 
-- **Inline-list mode:** `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\date`, `\time`, `\datetime`, ...).
+- **Inline-list mode:** `title`, `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\ref`, `\footnote`, `\date`, `\time`, `\datetime`, ...).
 - **Block-list mode:** `ul`, `ol`, `li`, `table`, `columns`, `row`, `td`.
 
 - Containers (`ul`, `ol`, `table`, `row`, `columns`) naturally contain nested nodes.
-- Text blocks (`p`, headings, etc.) contain inline text streams.
+- Text blocks (`title`, `p`, headings, etc.) contain inline text streams.
 - `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode.
 
 ### 8.2 Element catalog (normative)
@@ -548,6 +581,37 @@ Table layout rules:
   - a verbatim body
 - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional)
 
+#### 8.2.X `title` (document title)
+
+- **Role:** document-level display title
+- **Body:** inline text (string body or inline-list body)
+- **Attributes:** `lang` (optional)
+
+Semantic constraints:
+
+- `title` **MUST** be a top-level block element.
+- `title` **MUST** appear at most once.
+- If present, `title` **MUST** be the second node in the document (after `hdoc`).
+- `title` **MUST NOT** have an `id` attribute.
+
+#### 8.2.X Footnote dump: `footnotes`
+
+- **Role:** collect and render accumulated footnotes
+- **Body:** `;` (empty)
+- **Attributes:**
+  - `kind` (optional; one of `footnote`, `citation`)
+  - `lang` (optional)
+
+Semantics:
+
+- `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet).
+- `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node.
+- `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node.
+- Each invocation of `footnotes(...)` **MUST** advance the “collection cursor” for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set).
+- `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`.
+- Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears.
+
+
 ### 8.3 Inline elements
 
 Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer).
@@ -570,20 +634,86 @@ Inline elements appear only in inline-list bodies (or inside string/verbatim, de
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
-#### 8.3.4 `\\link`
+#### 8.3.4 `\link`
 
-- **Role:** hyperlink
+- **Role:** foreign hyperlink (external or non-validated target)
 - **Body:** inline text
 - **Attributes:**
-  - `ref` or `uri` (**exactly one required**)
+  - `uri` (**required**)
   - `lang` (optional)
 
+Notes:
+
+- `\link` is used for hyperlinks that are not validated as interior document references.
+- Interior references use `\ref(ref="...")`.
+
+
 #### 8.3.5 `\\date`, `\\time`, `\\datetime`
 
 - **Role:** localized date/time rendering
 - **Body:** must be plain text, a single string, or verbatim (no nested inline elements)
 - **Attributes:** `fmt` (optional; per element), `lang` (optional)
 
+#### 8.3.X `\ref`
+
+- **Role:** validated interior reference (to a top-level `id`)
+- **Body:** inline text (optional; may be empty)
+- **Attributes:**
+  - `ref` (**required**; must reference an existing `id`)
+  - `fmt` (optional; one of `full`, `name`, `index`; default `full`)
+  - `lang` (optional)
+
+Semantics:
+
+- `\ref(ref="X")` **MUST** resolve to a top-level element with `id="X"`, otherwise it is semantically invalid.
+- If `\ref` has a non-empty body, the body **MUST** be used as the rendered link text.
+- If `\ref` has an empty body (`;`), the renderer **MUST** synthesize link text from the referenced target and `fmt`:
+
+  - `fmt="full"`: renders `"<index> <name>"` (default)
+  - `fmt="name"`: renders `"<name>"`
+  - `fmt="index"`: renders `"<index>"`
+
+Target-derived values:
+
+- For heading targets (`h1`, `h2`, `h3`), `<name>` is the heading’s constructed plaintext inline text.
+- For heading targets, `<index>` is the heading’s hierarchical number within the document (e.g. `3.` / `3.2.` / `3.2.1.`).
+
+If the referenced target is not a heading:
+
+- `\ref(ref="X");` (implicit body) is semantically invalid and **MUST** be rejected.
+- `\ref(ref="X"){...}` remains valid.
+
+When computing `<name>` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored).
+
+#### 8.3.X `\footnote`
+
+- **Role:** footnote/citation marker and definition
+- **Body:** inline text (required for defining form; empty for reference form)
+- **Attributes:**
+  - `key` (optional; defines a named footnote)
+  - `ref` (optional; references a previously defined named footnote)
+  - `kind` (optional; one of `footnote`, `citation`; default `footnote`)
+  - `lang` (optional)
+
+Attribute rules:
+
+- `key` and `ref` are mutually exclusive.
+- `kind` is only valid on the defining form (a `\footnote` with a non-empty body). A `\footnote(ref="...");` **MUST NOT** specify `kind`.
+
+Semantics:
+
+- `\footnote{...}` defines an anonymous footnote entry at the marker position.
+- `\footnote(key="X"){...}` defines a named footnote entry in the footnote namespace and emits its marker at the marker position.
+- `\footnote(ref="X");` emits a marker for the previously defined named footnote `X`.
+- Each `kind` has an independent numeric namespace: footnotes and citations are numbered separately.
+- A renderer **MAY** hyperlink markers and dumped entries back-and-forth.
+
+Marker rendering (normative):
+
+- A renderer **SHALL** render a regular footnote marker as `\sup{\link{\d+}}`.
+- A renderer **SHALL** render a citation marker as `\sup{\link{[\d+]}}`.
+
+
 ## 9. Attribute types and date/time formats
 
 > TODO: Attributes should be documented well and not only be mentioned in the element catalog.
@@ -646,21 +776,26 @@ If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is
 > - `fmt` for `\time`
 > - `fmt` for `\datetime`
 
-- `\\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso`
-- `\\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso`
-- `\\datetime(fmt=...)`: `short`, `long`, `relative`, `iso`
+- `\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso`
+- `\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso`
+- `\datetime(fmt=...)`: `short`, `long`, `relative`, `iso`
+- `\ref(fmt=...)`: `full`, `name`, `index`
 
 Defaults when omitted:
 
 - `\date(fmt=...)`: default `short`
 - `\time(fmt=...)`: default `short`
 - `\datetime(fmt=...)`: default `short`
+- `\ref(fmt=...)`: default `full`
 
 ## 10. Non-normative guidance for tooling
 
 - Formatters should normalize line endings to LF.
 - Provide diagnostics for discouraged patterns (leading/trailing whitespace in attribute values, leading zeros, mixed directionality, etc.).
 - For typo recovery, treat unknown nodes as inline-list mode (§5.2).
+- Emit a warning when `\footnote(...)` occurs in a document but no `footnotes(...)` node appears.
+- Emit a diagnostic hint when neither `hdoc(title="...")` nor `title { ... }` is present.
+- Emit a diagnostic when both `hdoc(title="...")` and `title { ... }` are present but their plaintext forms differ.
 
 ---
 

From effbb391b29176aab2e7cd8e4980d9d679d55105 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 31 Dec 2025 18:55:50 +0100
Subject: [PATCH 060/116] Vibecoded: Cleans up specification and fixes internal
 consistency issues.

---
 docs/specification-proper-draft.md | 120 +++++++++++++++++++----------
 1 file changed, 79 insertions(+), 41 deletions(-)

diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index 2d94b41..57e6b3e 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -11,13 +11,15 @@ DONE:    Semantics are correct, language might need improvement.
 DRAFT:   Current semantics are not finalized yet.
 MISSING: Chapter needs to be added still.
 
+If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chapters unless a sub-chapter is explicitly listed with a different status.
+
 - "1. Introduction": DONE
 - "2. Conformance and terminology": FROZEN
 - "3. Document encoding (byte- and line-level)": DONE
 - "4. Syntactic model": DONE
 - "5. Grammar and additional syntax rules"
   - "5.1 Grammar (EBNF)": DRAFT
-  - "5.2 Deterministic list-mode disambiguation: DONE
+  - "5.2 Deterministic list-mode disambiguation": DONE
   - "5.3 Maximal munch": FROZEN
   - "5.4 Inline-list brace balancing and backslash dispatch": DONE
   - "5.5 String literals (syntax)": DRAFT
@@ -38,7 +40,7 @@ MISSING: Chapter needs to be added still.
 - "8. Elements and attributes"
   - "8.1 Built-in elements and list mode"
     - "8.1.1 Inline vs block": DONE
-    - "8.1.2 List-body mode per built-in element": TODO
+    - "8.1.2 List-body mode per built-in element": DRAFT
   - "8.2 Element catalog (normative)": DRAFT
     - "8.2.1 `hdoc` (header)": DONE
     - "8.2.2 Headings: `h1`, `h2`, `h3`": DRAFT
@@ -53,12 +55,16 @@ MISSING: Chapter needs to be added still.
     - "8.2.11 `row` (table data row)": DRAFT
     - "8.2.12 `group` (table row group)": DRAFT
     - "8.2.13 `td` (table cell)": DRAFT
+    - "8.2.14 `title` (document title)": DRAFT
+    - "8.2.15 Footnote dump: `footnotes`": DRAFT
   - "8.3 Inline elements"
     - "8.3.1 `\\em`": DRAFT
     - "8.3.2 `\\mono`": DRAFT
     - "8.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT
-    - "8.3.4 `\\link`": DRAFT
+    - "8.3.4 `\link`": DRAFT
     - "8.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT
+    - "8.3.6 `\ref`": DRAFT
+    - "8.3.7 `\footnote`": DRAFT
 - "9. Attribute types and date/time formats": DRAFT
   - "9.1 Common attribute types": DRAFT
   - "9.2 Date / time lexical formats (normative)": DRAFT
@@ -153,7 +159,7 @@ A body is one of:
 
 - `;` - empty body
 - `"..."` - string literal body
-- `:` - verbatim body (one or more `|` lines)
+- `:` - verbatim body (zero or more `|` lines; empty verbatim bodies **MUST** emit a diagnostic)
 - `{ ... }` - list body
 
 ### 4.2 List bodies and modes
@@ -212,7 +218,33 @@ key_seg         ::= ident_char , { ident_char } ;
 
 string_literal  ::= '"' , { string_unit } , '"' ;
 
-(* verbatim_body and ws productions match the source spec. *)
+(* Words *)
+word            ::= word_char , { word_char } ;
+
+(* word_char matches any Unicode scalar value except:
+    - whitespace
+    - '{' or '}'
+    - '\\' (because '\\' begins escape_text or inline_node)
+*)
+word_char       ::= ? any scalar value except WS, "{", "}", "\\" ? ;
+
+(* String literals (syntax only; no escape validation here) *)
+string_unit     ::= string_char | "\\" , escaped_char ;
+string_char     ::= ? any scalar value except '"', "\\", control characters (Unicode category Cc) ? ;
+escaped_char    ::= ? any scalar value except control characters (Unicode category Cc) ? ;
+
+(* Verbatim lines *)
+verbatim_body   ::= ":" , { ws , piped_line } ;
+(* An empty verbatim body (no piped_line) is syntactically valid, but tooling MUST emit a diagnostic. *)
+piped_line      ::= "|" , { not_line_end } , line_terminator ;
+not_line_end    ::= ? any scalar value except CR and LF ? ;
+line_terminator ::= LF | ( CR , LF ) | EOF ;
+
+(* Whitespace *)
+ws              ::= { WS } ;
+WS              ::= " " | "\t" | LF | ( CR , LF ) ;
+CR              ::= "\r" ;
+LF              ::= "\n" ;
 ```
 
 ### 5.2 Deterministic list-mode disambiguation
@@ -243,27 +275,33 @@ In Inline-list mode:
 
 ### 5.5 String literals (syntax)
 
-> TODO: This chapter requires improved wording. String literals are basically parsed by:
->
-> ```pseudo
-> assert next() == '"'
-> while(not eof()):
->   char = next()
->   if char == '\\':
->     _ = next() # skip character
->   elif char == '"':
->     break # end of string literal
->   elif is_control(char): # includes CR, LF, TAB and all other control characters
->     abort() # invalid character
-> ```
-
-String literals are delimiter-based and do **not** validate escape *meaning*.
-
-Syntactically invalid inside `"..."`:
-
-- raw LF or CR
-- a backslash in the last position of the string (`\"` never terminates the string literal)
-- a control character (Unicode `Cc`) - **note:** this includes TAB.
+String literals are delimited by `"` and are parsed without interpreting escape *meaning*.
+
+Syntactic rules:
+
+- The literal starts with `"` and ends at the next `"` that is not consumed as the escaped character after a backslash.
+- A string literal **MUST NOT** contain any Unicode control characters (General Category `Cc`), including TAB, LF, and CR.
+- A backslash (`\`) **MUST NOT** be the last character before the closing `"` (unterminated escape).
+- The closing `"` **MUST** appear before end-of-file.
+
+The following reference algorithm is authoritative:
+
+```pseudo
+assert next() == '"'
+while(not eof()):
+  char = next()
+  if char == '\\':
+    if eof(): abort() # backslash in last position
+    esc = next() # escaped character (meaning is not interpreted here)
+    if is_control(esc): abort() # includes CR, LF, TAB and all other control characters
+  elif char == '"':
+    return # end of string literal
+  elif is_control(char): # includes CR, LF, TAB and all other control characters
+    abort() # invalid character
+abort() # eof before closing '"'
+```
+
+Semantic escape decoding and validation is specified in §6.
 
 ## 6. Escape processing (semantic)
 
@@ -455,13 +493,13 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 
 ### 8.2 Element catalog (normative)
 
-> TODO: "inline text" bodies are:
->
-> - inline list body
-> - string body
-> - verbatim body
->
-> So only an empty body is not "inline text"
+In this chapter, an "inline text" body is one of:
+
+- a string body (`"..."`)
+- a verbatim body (`:`)
+- an inline-list body (`{ ... }` parsed in Inline-list mode)
+
+Only an empty body (`;`) is not "inline text".
 
 #### 8.2.1 `hdoc` (header)
 
@@ -478,13 +516,13 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 #### 8.2.2 Headings: `h1`, `h2`, `h3`
 
 - **Role:** block heading levels 1-3
-- **Body:** inline text (string body or inline-list body)
+- **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
 #### 8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
 
 - **Role:** paragraph-like block with semantic hint
-- **Body:** inline text (string body or inline-list body)
+- **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
 #### 8.2.4 Lists: `ul`, `ol`
@@ -512,7 +550,7 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 
 - **Body:** inline text caption/description (may be empty)
 - **Attributes:**
-  - `path` (required, non-empty)
+  - `path` (required, non-empty; relative to the current file location)
   - `alt` (optional, non-empty)
   - `lang` (optional)
   - `id` (optional; top-level only)
@@ -581,10 +619,10 @@ Table layout rules:
   - a verbatim body
 - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional)
 
-#### 8.2.X `title` (document title)
+#### 8.2.14 `title` (document title)
 
 - **Role:** document-level display title
-- **Body:** inline text (string body or inline-list body)
+- **Body:** inline text
 - **Attributes:** `lang` (optional)
 
 Semantic constraints:
@@ -594,7 +632,7 @@ Semantic constraints:
 - If present, `title` **MUST** be the second node in the document (after `hdoc`).
 - `title` **MUST NOT** have an `id` attribute.
 
-#### 8.2.X Footnote dump: `footnotes`
+#### 8.2.15 Footnote dump: `footnotes`
 
 - **Role:** collect and render accumulated footnotes
 - **Body:** `;` (empty)
@@ -654,7 +692,7 @@ Notes:
 - **Body:** must be plain text, a single string, or verbatim (no nested inline elements)
 - **Attributes:** `fmt` (optional; per element), `lang` (optional)
 
-#### 8.3.X `\ref`
+#### 8.3.6 `\ref`
 
 - **Role:** validated interior reference (to a top-level `id`)
 - **Body:** inline text (optional; may be empty)
@@ -685,7 +723,7 @@ If the referenced target is not a heading:
 
 When computing `<name>` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored).
 
-#### 8.3.X `\footnote`
+#### 8.3.7 `\footnote`
 
 - **Role:** footnote/citation marker and definition
 - **Body:** inline text (required for defining form; empty for reference form)

From f57acfb751f0159e0656d50fe819eb62aa5e316d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Wed, 31 Dec 2025 20:41:11 +0100
Subject: [PATCH 061/116] Add spec compliance TODOs

---
 SPEC_TODO.md | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 SPEC_TODO.md

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
new file mode 100644
index 0000000..d0d5ef7
--- /dev/null
+++ b/SPEC_TODO.md
@@ -0,0 +1,9 @@
+# Spec compliance TODOs
+
+- Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】
+- Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】
+- Align date/time parsing with spec: enforce that `\date`/`\time`/`\datetime` bodies are plain text (no nested inline nodes), and honor `hdoc(tz=...)` as the default zone when parsing header and inline date/time values that omit a timezone. Today only nested date/time spans are rejected and header parsing cannot use the header’s own `tz` hint. 【F:docs/specification-proper-draft.md†L689-L804】【F:src/hyperdoc.zig†L1560-L1645】
+- Preserve verbatim whitespace for `pre` content: verbatim bodies should not strip trailing spaces when used as inline text for `pre`, but `translate_inline_body` currently trims the right side of each verbatim line. 【F:docs/specification-proper-draft.md†L558-L565】【F:src/hyperdoc.zig†L1710-L1768】
+- Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】
+- Implement table layout semantics: reserve a leading title column when any row has `title` or any `group` appears, and treat `group` as spanning all columns rather than leaving column accounting unaffected. The current translator only checks column-count consistency. 【F:docs/specification-proper-draft.md†L572-L596】【F:src/hyperdoc.zig†L1050-L1149】
+- Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】

From 342439278aa5a287dc9ad79aae0a1103ea905e6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Wed, 31 Dec 2025 20:48:26 +0100
Subject: [PATCH 062/116] Update spec TODOs

---
 SPEC_TODO.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index d0d5ef7..06140f5 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -5,5 +5,4 @@
 - Align date/time parsing with spec: enforce that `\date`/`\time`/`\datetime` bodies are plain text (no nested inline nodes), and honor `hdoc(tz=...)` as the default zone when parsing header and inline date/time values that omit a timezone. Today only nested date/time spans are rejected and header parsing cannot use the header’s own `tz` hint. 【F:docs/specification-proper-draft.md†L689-L804】【F:src/hyperdoc.zig†L1560-L1645】
 - Preserve verbatim whitespace for `pre` content: verbatim bodies should not strip trailing spaces when used as inline text for `pre`, but `translate_inline_body` currently trims the right side of each verbatim line. 【F:docs/specification-proper-draft.md†L558-L565】【F:src/hyperdoc.zig†L1710-L1768】
 - Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】
-- Implement table layout semantics: reserve a leading title column when any row has `title` or any `group` appears, and treat `group` as spanning all columns rather than leaving column accounting unaffected. The current translator only checks column-count consistency. 【F:docs/specification-proper-draft.md†L572-L596】【F:src/hyperdoc.zig†L1050-L1149】
 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】

From 8af92dbd4ab0ce4d9acdf78d8c7c78d28c51cdaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Wed, 31 Dec 2025 23:42:40 +0100
Subject: [PATCH 063/116] Revert inline-node clarification for string/verbatim
 bodies

---
 docs/specification-proper-draft.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index 57e6b3e..d3eee13 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -490,6 +490,7 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 - Containers (`ul`, `ol`, `table`, `row`, `columns`) naturally contain nested nodes.
 - Text blocks (`title`, `p`, headings, etc.) contain inline text streams.
 - `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode.
+- Built-in elements with empty bodies are also parsed in Inline-list mode so accidental `{ ... }` usage stays balanced and formatters can recover consistently (e.g., `toc;`, `footnotes;`).
 
 ### 8.2 Element catalog (normative)
 
@@ -591,8 +592,8 @@ Table layout rules:
 - Each `row` defines a data row.
 - Each `group` acts as a section heading for subsequent rows.
 - After applying `td.colspan`, all `row` and `columns` entries **MUST** resolve to the same effective column count.
-- If any `row` has a `title` attribute **or** any `group` is present, renderers **MUST** reserve a leading title column.
-  - In that case, `columns` **SHOULD** include an empty leading header cell.
+- If any `row` has a `title` attribute, renderers **MUST** reserve a leading title column.
+  - The leading column’s header cell is implicit (empty/invisible) and **MUST NOT** be authored inside `columns`.
 
 #### 8.2.10 `columns` (table header row)
 

From ee8504b5ceaef411b299846a5879ee55248e8dfc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Thu, 1 Jan 2026 12:10:11 +0100
Subject: [PATCH 064/116] Stop trimming verbatim lines

---
 SPEC_TODO.md      |  1 -
 src/hyperdoc.zig  |  4 +---
 src/testsuite.zig | 23 +++++++++++++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index 06140f5..cf3d56f 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -3,6 +3,5 @@
 - Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】
 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】
 - Align date/time parsing with spec: enforce that `\date`/`\time`/`\datetime` bodies are plain text (no nested inline nodes), and honor `hdoc(tz=...)` as the default zone when parsing header and inline date/time values that omit a timezone. Today only nested date/time spans are rejected and header parsing cannot use the header’s own `tz` hint. 【F:docs/specification-proper-draft.md†L689-L804】【F:src/hyperdoc.zig†L1560-L1645】
-- Preserve verbatim whitespace for `pre` content: verbatim bodies should not strip trailing spaces when used as inline text for `pre`, but `translate_inline_body` currently trims the right side of each verbatim line. 【F:docs/specification-proper-draft.md†L558-L565】【F:src/hyperdoc.zig†L1710-L1768】
 - Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】
 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index ee7ef20..cfccc88 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -1746,9 +1746,7 @@ pub const SemanticAnalyzer = struct {
                     else
                         line.text[1..];
 
-                    const stripped = std.mem.trimRight(u8, text, whitespace_chars);
-
-                    text_buffer.appendSliceAssumeCapacity(stripped);
+                    text_buffer.appendSliceAssumeCapacity(text);
                 }
 
                 const location: Parser.Location = if (verbatim_lines.len > 0) blk: {
diff --git a/src/testsuite.zig b/src/testsuite.zig
index a4133fb..8b72758 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -224,6 +224,29 @@ test "span merger preserves whitespace after inline mono" {
     }
 }
 
+test "pre verbatim preserves trailing whitespace" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        "hdoc(version=\"2.0\",lang=\"en\");\n" ++ "pre:\n" ++ "| line with trailing spaces   \n" ++ "|   indented line  \n";
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 1), doc.contents.len);
+
+    const preformatted = doc.contents[0].preformatted;
+    try std.testing.expectEqual(@as(usize, 1), preformatted.content.len);
+
+    const expected = "line with trailing spaces   \n  indented line  ";
+    switch (preformatted.content[0].content) {
+        .text => |text| try std.testing.expectEqualStrings(expected, text),
+        else => return error.TestExpectedEqual,
+    }
+}
+
 test "parser reports unterminated string literals" {
     var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
     defer arena.deinit();

From f4cde949dbd2ff1a33cb0aaccecc4321036a8804 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 1 Jan 2026 16:46:33 +0100
Subject: [PATCH 065/116] Implements hdoc(date) respecting hdoc(tz) default.

---
 src/hyperdoc.zig  | 12 ++++++++++--
 src/testsuite.zig | 20 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index cfccc88..59a58ba 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -776,7 +776,7 @@ pub const SemanticAnalyzer = struct {
             version: Version,
             title: ?[]const u8 = null,
             author: ?[]const u8 = null,
-            date: ?DateTime = null, // TODO: Allow skipping TZ value!
+            date: ?[]const u8 = null,
             lang: LanguageTag = .inherit,
             tz: ?TimeZoneOffset = null,
         });
@@ -791,12 +791,20 @@ pub const SemanticAnalyzer = struct {
         if (attrs.version.minor != 0)
             return error.UnsupportedVersion;
 
+        const date = if (attrs.date) |date_str|
+            DateTime.parse(date_str, attrs.tz) catch blk: {
+                try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "date" } }, get_attribute_location(node, "date", .value).?);
+                break :blk null;
+            }
+        else
+            null;
+
         return .{
             .version = attrs.version,
             .lang = if (lang_location != null) attrs.lang else null,
             .title = attrs.title,
             .author = attrs.author,
-            .date = attrs.date,
+            .date = date,
             .timezone = attrs.tz,
         };
     }
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 8b72758..6e141ed 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -817,3 +817,23 @@ test "diagnostics for bare carriage return" {
 
     try std.testing.expect(saw_bare_cr);
 }
+
+test "hdoc header date uses timezone hint for missing zone" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source = "hdoc(version=\"2.0\",lang=\"en\",tz=\"-01:30\",date=\"2026-01-01T12:00:00\");";
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    const parsed = doc.date orelse return error.TestExpectedEqual;
+    try std.testing.expectEqual(@as(i32, 2026), parsed.date.year);
+    try std.testing.expectEqual(@as(u4, 1), parsed.date.month);
+    try std.testing.expectEqual(@as(u5, 1), parsed.date.day);
+    try std.testing.expectEqual(@as(u5, 12), parsed.time.hour);
+    try std.testing.expectEqual(@as(u6, 0), parsed.time.minute);
+    try std.testing.expectEqual(@as(u6, 0), parsed.time.second);
+    try std.testing.expectEqual(@as(u20, 0), parsed.time.microsecond);
+    try std.testing.expectEqual(try hdoc.TimeZoneOffset.parse("-01:30"), parsed.time.timezone);
+}

From d1af43fe1db1c3c7b60678935f4768294c9b3b5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 1 Jan 2026 16:52:41 +0100
Subject: [PATCH 066/116] Implements \date, \time and \datetime reject anything
 except bare text bodies.

---
 SPEC_TODO.md      |  1 -
 src/hyperdoc.zig  | 35 ++++++++++++++++++-----------------
 src/testsuite.zig |  9 +++++++++
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index cf3d56f..128cd1b 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -2,6 +2,5 @@
 
 - Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】
 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】
-- Align date/time parsing with spec: enforce that `\date`/`\time`/`\datetime` bodies are plain text (no nested inline nodes), and honor `hdoc(tz=...)` as the default zone when parsing header and inline date/time values that omit a timezone. Today only nested date/time spans are rejected and header parsing cannot use the header’s own `tz` hint. 【F:docs/specification-proper-draft.md†L689-L804】【F:src/hyperdoc.zig†L1560-L1645】
 - Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】
 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 59a58ba..a42ae8b 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -1571,22 +1571,23 @@ pub const SemanticAnalyzer = struct {
                     fmt: []const u8 = "",
                 });
 
-                const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space);
-
-                // Enforce that date/time bodies only contain plain text/string/verbatim.
-                // HyperDoc cannot format date/time values on it's own so we can't render
-                // \date, \time and \datetime into a string. It also doesn't make any sense
-                // to nest them.
-                for (content_spans) |span| {
-                    switch (span.content) {
-                        .text => {},
-                        .date, .time, .datetime => {
-                            try sema.emit_diagnostic(.nested_date_time, span.location);
-                            break :blk;
-                        },
-                    }
+                // Enforce the body is only plain text.
+                const ok = switch (node.body) {
+                    .empty => false,
+                    .string, .verbatim, .text_span => true, // always ok
+                    .list => |list| for (list) |item| {
+                        if (item.type != .text) {
+                            break false;
+                        }
+                    } else true,
+                };
+                if (!ok) {
+                    try sema.emit_diagnostic(.invalid_date_time_body, node.location);
+                    break :blk;
                 }
 
+                const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space);
+
                 //  Convert the content_spans into a "rendered string".
                 const content_text = sema.render_spans_to_plaintext(content_spans) catch |err| switch (err) {
                     error.DateTimeRenderingUnsupported => unreachable,
@@ -2961,7 +2962,7 @@ pub const Diagnostic = struct {
         link_not_nestable,
         invalid_link,
         invalid_date_time,
-        nested_date_time,
+        invalid_date_time_body,
         invalid_date_time_fmt: DateTimeFormatError,
         missing_timezone,
         invalid_unicode_string_escape,
@@ -3019,7 +3020,7 @@ pub const Diagnostic = struct {
                 .illegal_child_item,
                 .list_body_required,
                 .illegal_id_attribute,
-                .nested_date_time,
+                .invalid_date_time_body,
                 .column_count_mismatch,
                 .duplicate_id,
                 .unknown_id,
@@ -3104,7 +3105,7 @@ pub const Diagnostic = struct {
 
                 .illegal_id_attribute => try w.writeAll("Attribute 'id' not allowed here."),
 
-                .nested_date_time => try w.writeAll("Nesting \\date, \\time and \\datetime is not allowed."),
+                .invalid_date_time_body => try w.writeAll("\\date, \\time and \\datetime do not allow any inlines inside their body."),
 
                 .column_count_mismatch => |ctx| try w.print("Expected {} columns, but found {}", .{ ctx.expected, ctx.actual }),
 
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 6e141ed..5949fe3 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -837,3 +837,12 @@ test "hdoc header date uses timezone hint for missing zone" {
     try std.testing.expectEqual(@as(u20, 0), parsed.time.microsecond);
     try std.testing.expectEqual(try hdoc.TimeZoneOffset.parse("-01:30"), parsed.time.timezone);
 }
+
+test "\\date rejects bad body" {
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); p { \\date; }", &.{
+        .invalid_date_time_body,
+    });
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); p { \\date{start \\em{inner}} }", &.{
+        .invalid_date_time_body,
+    });
+}

From 1da78c3612b20dff60b0c7c235f338fb693f027d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 1 Jan 2026 17:02:39 +0100
Subject: [PATCH 067/116] Refactory Block.TableOfContents.depth to non-optional
 value between 1 and 3.

---
 SPEC_TODO.md         |  1 -
 src/hyperdoc.zig     | 18 +++++++++---------
 src/render/dump.zig  |  2 +-
 src/render/html5.zig |  2 +-
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index 128cd1b..4a92f37 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -2,5 +2,4 @@
 
 - Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】
 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】
-- Apply the `toc` default depth of 3 when `depth` is omitted instead of leaving it null. 【F:docs/specification-proper-draft.md†L567-L571】【F:src/hyperdoc.zig†L1014-L1045】
 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index a42ae8b..61e37e6 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -93,7 +93,7 @@ pub const Block = union(enum) {
 
     pub const TableOfContents = struct {
         lang: LanguageTag,
-        depth: ?u8,
+        depth: u8,
     };
 
     pub const Table = struct {
@@ -1023,16 +1023,15 @@ pub const SemanticAnalyzer = struct {
         const attrs = try sema.get_attributes(node, struct {
             lang: LanguageTag = .inherit,
             id: ?Reference = null,
-            depth: ?u32 = null,
+            depth: ?u8 = null,
         });
 
-        var depth: ?u8 = null;
-        if (attrs.depth) |depth_value| {
-            if (depth_value < 1 or depth_value > 3) {
-                try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "depth" } }, get_attribute_location(node, "depth", .value) orelse node.location);
-            } else {
-                depth = @intCast(depth_value);
-            }
+        const max_depth: comptime_int = @typeInfo(Block.HeadingLevel).@"enum".fields.len;
+
+        var depth = attrs.depth orelse max_depth;
+        if (depth < 1 or depth > max_depth) {
+            try sema.emit_diagnostic(.{ .invalid_attribute = .{ .type = node.type, .name = "depth" } }, get_attribute_location(node, "depth", .value) orelse node.location);
+            depth = @max(1, @min(max_depth, depth));
         }
 
         switch (node.body) {
@@ -1882,6 +1881,7 @@ pub const SemanticAnalyzer = struct {
         return switch (T) {
             []const u8 => value,
 
+            u8 => std.fmt.parseInt(u8, value, 10) catch return error.InvalidValue,
             u32 => std.fmt.parseInt(u32, value, 10) catch return error.InvalidValue,
 
             Reference => Reference.parse(value) catch return error.InvalidValue,
diff --git a/src/render/dump.zig b/src/render/dump.zig
index 1635df9..e731a96 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -435,7 +435,7 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err
         .toc => |toc| {
             try writeTypeTag(writer, "toc");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang.text);
-            try dumpOptionalNumberField(writer, indent + indent_step, "depth", toc.depth);
+            try dumpOptionalNumberField(writer, indent + indent_step, "depth", @as(?u8, toc.depth));
         },
         .table => |table| {
             try writeTypeTag(writer, "table");
diff --git a/src/render/html5.zig b/src/render/html5.zig
index 275453a..1eb76bc 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -166,7 +166,7 @@ const RenderContext = struct {
     }
 
     fn renderTableOfContents(ctx: *RenderContext, toc_block: hdoc.Block.TableOfContents, block_index: ?usize, indent: usize) RenderError!void {
-        const depth = toc_block.depth orelse 3;
+        const depth = toc_block.depth;
         const lang_attr = langAttribute(toc_block.lang);
         const id_attr = ctx.resolveBlockId(block_index);
 

From 59cbae272eac1ca04d3431557f9397dcbee76e50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 1 Jan 2026 22:51:35 +0100
Subject: [PATCH 068/116] =?UTF-8?q?Rewrites=20=C2=A77.5=20Identifiers=20an?=
 =?UTF-8?q?d=20References?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/TODO.md                       | 23 +++++++++++++++-----
 docs/specification-proper-draft.md | 35 +++++++++++++++++-------------
 justfile                           | 10 +++++++++
 3 files changed, 48 insertions(+), 20 deletions(-)
 create mode 100644 justfile

diff --git a/docs/TODO.md b/docs/TODO.md
index 3bfde40..f703537 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -1,11 +1,24 @@
 # Specification TODOs
 
-- Introduction of `\ref` vs. `\link`
-  - <https://chatgpt.com/s/t_695508371f6481918106960ddef5bf4d>
-- Introduction of `\footnote{body}`, `\footnote(id="foo"){body}` and `\footnote(ref="");`
-  - `id` namespace is separate from toplevel `id` namespace. requires better naming
-  - Equal to `\footnote{}` introduce a `\cite{}` inline for citations or use \footnote{style="citation") {}
 - Assign semantics to node types, paragraph kinds, ...
 - Specify "syntax" proper
 - Add links to RFCs where possible
 - Document `lang` inheritance. No `lang` attribute means that parent language is used.
+
+
+- Special-style blocks become block containers
+  - The “special paragraph” family (e.g. note, info, warning, danger, tip, spoiler, quote, …) are block containers.
+  - Their { ... } list body is always Block-list mode (i.e., they contain blocks like p, ul, ol, pre, etc.).
+  - They do not accept inline-list bodies directly. Inline markup requires an explicit paragraph:
+    note { p { text with \link(...) { inline } nodes } }
+- General implicit-paragraph shorthand (removes special cases)
+  - You’re removing the element-specific special casing (like the old quote/li/td convenience rules) and replacing it with one general semantic rule:
+  - Rule: If a block element’s list body would allow “regular top-level blocks” (e.g. p, pre, ol, ul, …), then that element’s body MAY be written as a string or verbatim literal.
+  - Equivalence: A string/verbatim body is equivalent to a block-list body containing a single paragraph with the same content as plain text.
+    Concretely:
+      X "TEXT" ≡ X { p "TEXT" }
+      X: | TEXT ≡ X { p: | TEXT }
+  - Notes:
+    - This shorthand produces plain text and therefore follows your normal inline text construction rules (including whitespace normalization).
+    - This shorthand should apply to “flow containers” like quote, note, and also fixes li / td ergonomics cleanly.
+    - It should not be used for structural containers where a string would be misleading (e.g. ul/ol/table/columns/row), because those don’t “allow regular blocks” as direct children in the first place.
diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index d3eee13..c13936b 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -442,26 +442,33 @@ The renderer **MUST** see the post-normalization result.
 - Required attributes **MUST** be present.
 - Attributes not defined for an element **MUST** be rejected.
 
-### 7.5 IDs and references
+### 7.5 Identifiers and References
 
-> TODO: References must not contain control characters or whitespace. They can be any sequence of characters that are not spaces or control characters.
+HyperDoc defines two separate namespaces for identifiers to allow cross-referencing within a document: the **Block Namespace** and the **Footnote Namespace**.
 
-- `id` is allowed only on **top-level block elements** (direct children of the document; not inside another node).
-- `id` values **MUST** be non-empty and **MUST** be unique (case-sensitive) across the document.
+Identifiers in both namespaces are case-sensitive and share the same syntax: they **MUST** be a non-empty sequence of one or more characters, and **MUST NOT** contain any whitespace or Unicode control characters (General Category `Cc`).
 
-#### Interior references (`ref`)
+#### 7.5.1 Block Namespace (`id` and `\ref(ref)`)
 
-- A `ref` attribute value **MUST** be a valid Reference value (§9.1).
-- `\ref(ref="...")` **MUST** reference an existing top-level `id`.
+The Block Namespace is used for referencing top-level block elements like headings, figures, or tables.
 
-#### Footnote references (`key` / `ref`)
+- **Definition**: An identifier is added to the Block Namespace using the `id` attribute.
+  - The `id` attribute is only allowed on **top-level block elements** (direct children of the document, not nested inside another node).
+  - `id` values **MUST** be unique across the document's Block Namespace.
 
-Footnotes define a separate reference namespace from top-level `id`:
+- **Reference**: An identifier in the Block Namespace is referenced using the `\ref` inline element.
+  - `\ref(ref="...")` **MUST** reference an `id` that exists in the Block Namespace.
 
-- `\footnote(key="..."){...}` defines a footnote key in the **footnote namespace**.
-- Footnote keys **MUST** be unique (case-sensitive) within the footnote namespace.
-- `\footnote(ref="...");` **MUST** reference an existing footnote key.
+#### 7.5.2 Footnote Namespace (`\footnote(key)` and `\footnote(ref)`)
 
+The Footnote Namespace is used for defining and referencing reusable footnotes.
+
+- **Definition**: An identifier is added to the Footnote Namespace using the `key` attribute on a `\footnote` element that has a body.
+  - `\footnote(key="..."){...}` defines a footnote and associates it with an identifier.
+  - `key` values **MUST** be unique across the document's Footnote Namespace.
+
+- **Reference**: An identifier in the Footnote Namespace is referenced using a `\footnote` element that has no body.
+  - `\footnote(ref="...");` **MUST** reference a `key` that has been defined in the Footnote Namespace.
 
 ### 7.6 Built-in element recognition
 
@@ -560,9 +567,7 @@ Only an empty body (`;`) is not "inline text".
 
 > TODO: Body is always just "inline text", as verbatim bodies are also always inline text.
 
-- **Body:** either
-  - verbatim body (`:`) for literal lines (**recommended**), or
-  - inline text body (string or inline-list); whitespace is preserved (no trimming/collapse)
+- **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only)
 
 #### 8.2.8 Table of contents: `toc`
diff --git a/justfile b/justfile
new file mode 100644
index 0000000..3c1c2ee
--- /dev/null
+++ b/justfile
@@ -0,0 +1,10 @@
+default: build test
+
+build:
+  zig-0.15.2 build -freference-trace=11 --prominent-compile-errors
+
+test:
+  zig-0.15.2 build -freference-trace=11 --prominent-compile-errors test
+
+dump: build   
+  ./zig-out/bin/hyperdoc --format dump "test/accept/workset.hdoc"

From fe9218b11b097da26fc8c824a5b5ee7a09c6c6d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 1 Jan 2026 23:13:59 +0100
Subject: [PATCH 069/116] Restructures chapter 8 (Elements and attributes) into
 a more well-formed structure, and refines rules for block promotion rules

---
 docs/TODO.md                       |  11 --
 docs/specification-proper-draft.md | 170 ++++++++++++++++-------------
 2 files changed, 94 insertions(+), 87 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index f703537..f6e284e 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -11,14 +11,3 @@
   - Their { ... } list body is always Block-list mode (i.e., they contain blocks like p, ul, ol, pre, etc.).
   - They do not accept inline-list bodies directly. Inline markup requires an explicit paragraph:
     note { p { text with \link(...) { inline } nodes } }
-- General implicit-paragraph shorthand (removes special cases)
-  - You’re removing the element-specific special casing (like the old quote/li/td convenience rules) and replacing it with one general semantic rule:
-  - Rule: If a block element’s list body would allow “regular top-level blocks” (e.g. p, pre, ol, ul, …), then that element’s body MAY be written as a string or verbatim literal.
-  - Equivalence: A string/verbatim body is equivalent to a block-list body containing a single paragraph with the same content as plain text.
-    Concretely:
-      X "TEXT" ≡ X { p "TEXT" }
-      X: | TEXT ≡ X { p: | TEXT }
-  - Notes:
-    - This shorthand produces plain text and therefore follows your normal inline text construction rules (including whitespace normalization).
-    - This shorthand should apply to “flow containers” like quote, note, and also fixes li / td ergonomics cleanly.
-    - It should not be used for structural containers where a string would be misleading (e.g. ul/ol/table/columns/row), because those don’t “allow regular blocks” as direct children in the first place.
diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index c13936b..09413ca 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -35,7 +35,7 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap
   - "7.2 Inline text construction and normalization": DONE
   - "7.3 Attribute uniqueness": DONE
   - "7.4 Attribute validity": DONE
-  - "7.5 IDs and references": DRAFT
+  - "7.5 Identifiers and References": DONE
   - "7.6 Built-in element recognition": DONE
 - "8. Elements and attributes"
   - "8.1 Built-in elements and list mode"
@@ -486,9 +486,6 @@ The Footnote Namespace is used for defining and referencing reusable footnotes.
 
 #### 8.1.2 List-body mode per built-in element
 
-> TODO: `li` and `td` have an auto-upgrade rule, which performs a conversion of string/verbatim body to `{ p { <content of body> } }`.
->       This means they auto-upgrade their body from literal to "paragraph with literal content"
-
 When a built-in element uses a `{ ... }` list body, it is parsed in the mode below:
 
 - **Inline-list mode:** `title`, `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\ref`, `\footnote`, `\date`, `\time`, `\datetime`, ...).
@@ -499,15 +496,25 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 - `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode.
 - Built-in elements with empty bodies are also parsed in Inline-list mode so accidental `{ ... }` usage stays balanced and formatters can recover consistently (e.g., `toc;`, `footnotes;`).
 
-### 8.2 Element catalog (normative)
+#### 8.1.3 Shorthand Body Promotion
 
-In this chapter, an "inline text" body is one of:
+If a block element's list body can contain general text block elements (such as `p`, `pre`, `ol`, `ul`, etc.), its body **MAY** instead be written as a shorthand string or verbatim literal.
 
-- a string body (`"..."`)
-- a verbatim body (`:`)
-- an inline-list body (`{ ... }` parsed in Inline-list mode)
+When a shorthand body is used, it is semantically equivalent to a block-list body containing a single `p` (paragraph) node whose own body is the original string or verbatim content.
 
-Only an empty body (`;`) is not "inline text".
+For example, `li "some text"` is semantically identical to:
+
+```hdoc
+li {
+  p "some text"
+}
+```
+
+This promotion is a feature for convenience and applies only to the following elements:
+- `li`
+- `td`
+
+### 8.2 Top-Level Block Elements
 
 #### 8.2.1 `hdoc` (header)
 
@@ -521,19 +528,68 @@ Only an empty body (`;`) is not "inline text".
   - `date` (optional): datetime lexical format (§9.2.3)
   - `tz` (optional): default timezone for time/datetime values (§9.2)
 
-#### 8.2.2 Headings: `h1`, `h2`, `h3`
+#### 8.2.2 `title` (document title)
+
+- **Role:** document-level display title
+- **Body:** inline text
+- **Attributes:** `lang` (optional)
+
+Semantic constraints:
+
+- `title` **MUST** be a top-level block element.
+- `title` **MUST** appear at most once.
+- If present, `title` **MUST** be the second node in the document (after `hdoc`).
+- `title` **MUST NOT** have an `id` attribute.
+
+#### 8.2.3 Table of contents: `toc`
+
+- **Role:** Generates a table of contents.
+- **Body:** `;` (empty)
+- **Attributes:** `depth` (optional Integer in {1,2,3}; default 3), `lang` (optional), `id` (optional)
+
+Semantic constraints:
+- `toc` **MUST** be a top-level block element (a direct child of the document).
+
+#### 8.2.4 Footnote dump: `footnotes`
+
+- **Role:** collect and render accumulated footnotes
+- **Body:** `;` (empty)
+- **Attributes:**
+  - `kind` (optional; one of `footnote`, `citation`)
+  - `lang` (optional)
+
+Semantics:
+
+- `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet).
+- `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node.
+- `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node.
+- Each invocation of `footnotes(...)` **MUST** advance the “collection cursor” for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set).
+- `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`.
+- Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears.
+
+### 8.3 General Text Block Elements
+
+In this chapter, an "inline text" body is one of:
+
+- a string body (`"..."`)
+- a verbatim body (`:`)
+- an inline-list body (`{ ... }` parsed in Inline-list mode)
+
+Only an empty body (`;`) is not "inline text".
+
+#### 8.3.1 Headings: `h1`, `h2`, `h3`
 
 - **Role:** block heading levels 1-3
 - **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
+#### 8.3.2 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
 
 - **Role:** paragraph-like block with semantic hint
 - **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.2.4 Lists: `ul`, `ol`
+#### 8.3.3 Lists: `ul`, `ol`
 
 > TODO: Split into two separate parts "Unordered Lists" and "Ordered Lists"
 
@@ -544,17 +600,7 @@ Only an empty body (`;`) is not "inline text".
 
 - `first` (optional Integer ≥ 0; default 1): number of the first list item
 
-#### 8.2.5 List item: `li`
-
-> TODO: Include correct body upgrade rules
-
-- **Body:** either
-  - a block-list of block elements, or
-  - a single string body, or
-  - a verbatim body
-- **Attributes:** `lang` (optional)
-
-#### 8.2.6 Figure: `img`
+#### 8.3.4 Figure: `img`
 
 - **Body:** inline text caption/description (may be empty)
 - **Attributes:**
@@ -563,19 +609,14 @@ Only an empty body (`;`) is not "inline text".
   - `lang` (optional)
   - `id` (optional; top-level only)
 
-#### 8.2.7 Preformatted: `pre`
+#### 8.3.5 Preformatted: `pre`
 
 > TODO: Body is always just "inline text", as verbatim bodies are also always inline text.
 
 - **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only)
 
-#### 8.2.8 Table of contents: `toc`
-
-- **Body:** `;` (empty)
-- **Attributes:** `depth` (optional Integer in {1,2,3}; default 3), `lang` (optional), `id` (optional; top-level only)
-
-#### 8.2.9 Tables: `table`
+#### 8.3.6 Tables: `table`
 
 - **Body:** block-list containing:
   - optional `columns`, then
@@ -600,24 +641,32 @@ Table layout rules:
 - If any `row` has a `title` attribute, renderers **MUST** reserve a leading title column.
   - The leading column’s header cell is implicit (empty/invisible) and **MUST NOT** be authored inside `columns`.
 
-#### 8.2.10 `columns` (table header row)
+### 8.4 Structural Elements
+
+#### 8.4.1 List item: `li`
+
+- **Body:** either
+  - a block-list of block elements, or
+  - a single string body, or
+  - a verbatim body
+- **Attributes:** `lang` (optional)
+
+#### 8.4.2 `columns` (table header row)
 
 - **Body:** block-list containing `td` (at least one)
 - **Attributes:** `lang` (optional)
 
-#### 8.2.11 `row` (table data row)
+#### 8.4.3 `row` (table data row)
 
 - **Body:** block-list containing `td` (at least one)
 - **Attributes:** `title` (optional string), `lang` (optional)
 
-#### 8.2.12 `group` (table row group)
+#### 8.4.4 `group` (table row group)
 
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
-#### 8.2.13 `td` (table cell)
-
-> TODO: Include correct body upgrade rules
+#### 8.4.5 `td` (table cell)
 
 - **Body:** either
   - a block-list of block elements, or
@@ -625,60 +674,29 @@ Table layout rules:
   - a verbatim body
 - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional)
 
-#### 8.2.14 `title` (document title)
-
-- **Role:** document-level display title
-- **Body:** inline text
-- **Attributes:** `lang` (optional)
-
-Semantic constraints:
-
-- `title` **MUST** be a top-level block element.
-- `title` **MUST** appear at most once.
-- If present, `title` **MUST** be the second node in the document (after `hdoc`).
-- `title` **MUST NOT** have an `id` attribute.
-
-#### 8.2.15 Footnote dump: `footnotes`
-
-- **Role:** collect and render accumulated footnotes
-- **Body:** `;` (empty)
-- **Attributes:**
-  - `kind` (optional; one of `footnote`, `citation`)
-  - `lang` (optional)
-
-Semantics:
-
-- `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet).
-- `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node.
-- `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node.
-- Each invocation of `footnotes(...)` **MUST** advance the “collection cursor” for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set).
-- `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`.
-- Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears.
-
-
-### 8.3 Inline elements
+### 8.5 Inline elements
 
 Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer).
 
-#### 8.3.1 `\\em`
+#### 8.5.1 `\\em`
 
 - **Role:** emphasis
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
-#### 8.3.2 `\\mono`
+#### 8.5.2 `\\mono`
 
 - **Role:** monospaced span
 - **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional)
 
-#### 8.3.3 `\\strike`, `\\sub`, `\\sup`
+#### 8.5.3 `\\strike`, `\\sub`, `\\sup`
 
 - **Role:** strike-through / subscript / superscript
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
-#### 8.3.4 `\link`
+#### 8.5.4 `\link`
 
 - **Role:** foreign hyperlink (external or non-validated target)
 - **Body:** inline text
@@ -692,13 +710,13 @@ Notes:
 - Interior references use `\ref(ref="...")`.
 
 
-#### 8.3.5 `\\date`, `\\time`, `\\datetime`
+#### 8.5.5 `\\date`, `\\time`, `\\datetime`
 
 - **Role:** localized date/time rendering
 - **Body:** must be plain text, a single string, or verbatim (no nested inline elements)
 - **Attributes:** `fmt` (optional; per element), `lang` (optional)
 
-#### 8.3.6 `\ref`
+#### 8.5.6 `\ref`
 
 - **Role:** validated interior reference (to a top-level `id`)
 - **Body:** inline text (optional; may be empty)
@@ -729,7 +747,7 @@ If the referenced target is not a heading:
 
 When computing `<name>` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored).
 
-#### 8.3.7 `\footnote`
+#### 8.5.7 `\footnote`
 
 - **Role:** footnote/citation marker and definition
 - **Body:** inline text (required for defining form; empty for reference form)

From eadc76e21eef902ba4189ad96a99a473d5cf5180 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 1 Jan 2026 23:30:07 +0100
Subject: [PATCH 070/116] Changes note/warning/... from paragraphs into
 admonition blocks, which wrap other blocks.

---
 docs/specification-proper-draft.md |  30 +-
 docs/specification.md              | 764 -----------------------------
 2 files changed, 21 insertions(+), 773 deletions(-)
 delete mode 100644 docs/specification.md

diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index 09413ca..2a0f168 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -488,12 +488,12 @@ The Footnote Namespace is used for defining and referencing reusable footnotes.
 
 When a built-in element uses a `{ ... }` list body, it is parsed in the mode below:
 
-- **Inline-list mode:** `title`, `h1`, `h2`, `h3`, `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\ref`, `\footnote`, `\date`, `\time`, `\datetime`, ...).
-- **Block-list mode:** `ul`, `ol`, `li`, `table`, `columns`, `row`, `td`.
+- **Inline-list mode:** `title`, `h1`, `h2`, `h3`, `p`, `img`, `pre`, `group`, and all inline elements (`\em`, `\mono`, `\link`, `\ref`, `\footnote`, `\date`, `\time`, `\datetime`, ...).
+- **Block-list mode:** `ul`, `ol`, `li`, `table`, `columns`, `row`, `td`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`.
 
 - Containers (`ul`, `ol`, `table`, `row`, `columns`) naturally contain nested nodes.
 - Text blocks (`title`, `p`, headings, etc.) contain inline text streams.
-- `li` and `td` contain either blocks or a single string/verbatim; representing blocks implies block-list mode.
+- `li`, `td`, and admonition blocks contain either blocks or a single string/verbatim body; representing blocks implies block-list mode.
 - Built-in elements with empty bodies are also parsed in Inline-list mode so accidental `{ ... }` usage stays balanced and formatters can recover consistently (e.g., `toc;`, `footnotes;`).
 
 #### 8.1.3 Shorthand Body Promotion
@@ -513,6 +513,12 @@ li {
 This promotion is a feature for convenience and applies only to the following elements:
 - `li`
 - `td`
+- `note`
+- `warning`
+- `danger`
+- `tip`
+- `quote`
+- `spoiler`
 
 ### 8.2 Top-Level Block Elements
 
@@ -583,13 +589,19 @@ Only an empty body (`;`) is not "inline text".
 - **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.2 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
+#### 8.3.2 Paragraph: `p`
 
-- **Role:** paragraph-like block with semantic hint
+- **Role:** A standard paragraph of text.
 - **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.3 Lists: `ul`, `ol`
+#### 8.3.3 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
+
+- **Role:** A block that renders with a distinct style to draw the reader's attention.
+- **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§ 8.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph.
+- **Attributes:** `lang` (optional), `id` (optional; top-level only)
+
+#### 8.3.4 Lists: `ul`, `ol`
 
 > TODO: Split into two separate parts "Unordered Lists" and "Ordered Lists"
 
@@ -600,7 +612,7 @@ Only an empty body (`;`) is not "inline text".
 
 - `first` (optional Integer ≥ 0; default 1): number of the first list item
 
-#### 8.3.4 Figure: `img`
+#### 8.3.5 Figure: `img`
 
 - **Body:** inline text caption/description (may be empty)
 - **Attributes:**
@@ -609,14 +621,14 @@ Only an empty body (`;`) is not "inline text".
   - `lang` (optional)
   - `id` (optional; top-level only)
 
-#### 8.3.5 Preformatted: `pre`
+#### 8.3.6 Preformatted: `pre`
 
 > TODO: Body is always just "inline text", as verbatim bodies are also always inline text.
 
 - **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.6 Tables: `table`
+#### 8.3.7 Tables: `table`
 
 - **Body:** block-list containing:
   - optional `columns`, then
diff --git a/docs/specification.md b/docs/specification.md
deleted file mode 100644
index a4ecd99..0000000
--- a/docs/specification.md
+++ /dev/null
@@ -1,764 +0,0 @@
-# HyperDoc 2.0
-
-This specification describes the document markup language "HyperDoc 2.0", that tries to be a simple to parse, easy to write markup language for hypertext documents.
-
-It sits in a space where it's unambigious to parse, but still relatively convenient to write.
-
-## Syntax Overview
-
-```hdoc
-hdoc(version="2.0");
-
-h1 "Introduction"
-
-p {
-  This is my first HyperDoc 2.0 document!
-}
-
-pre(syntax="c"):
-| #include <stdio.h>
-| int main(int argc, char *argv[]) {
-|   printf("Hello, World!");
-|   return 0;
-| }
-```
-
-## Document encoding
-
-This section defines the required byte-level encoding and line structure of HyperDoc documents.
-
-### Character encoding
-
-- A HyperDoc document **MUST** be encoded as **UTF-8**.
-- A HyperDoc document **MUST NOT** contain invalid UTF-8 byte sequences.
-
-**Byte Order Mark (BOM):**
-
-- A UTF-8 BOM (the byte sequence `EF BB BF`) **SHOULD NOT** be used. Tooling **MAY** accept it and treat it as whitespace at the beginning of the document.
-
-### Line endings
-
-- Lines **MUST** be terminated by either:
-  - `<LF>` (U+000A), or
-  - `<CR><LF>` (U+000D U+000A).
-- A bare `<CR>` **MUST NOT** appear except as part of a `<CR><LF>` sequence.
-
-A document **MAY** mix `<LF>` and `<CR><LF>` line endings, but tooling **SHOULD** normalize to a single convention when rewriting documents.
-
-The canonical line ending emitted by tooling **SHOULD** be `<LF>`.
-
-### Control characters
-
-- The only permitted control character **within a line** is:
-  - `<TAB>` (U+0009).
-- Apart from line terminators (`<LF>` and `<CR>` only as part of `<CR><LF>`), all other Unicode control characters (General Category `Cc`) **MUST NOT** appear anywhere in a HyperDoc document.
-
-### Unicode text
-
-- Apart from the restrictions above, arbitrary Unicode text is allowed.
-
-### Recommendations for writing systems and directionality (non-normative)
-
-HyperDoc does not define special handling for right-to-left scripts, bidirectional layout, or writing system segmentation. For readability and to reduce ambiguity across renderers and editors:
-
-- Authors **SHOULD** keep each paragraph primarily in a **single writing system/directionality** where practical.
-- Tooling **MAY** warn when a paragraph mixes strongly different directional scripts or contains invisible bidirectional formatting characters (e.g., bidi overrides/isolates), since these can be confusing in editors and reviews.
-
-## Syntax
-
-This chapter defines the **syntactic structure** of HyperDoc documents: how characters form tokens, how tokens form **nodes**, and how nodes nest. It intentionally does **not** define meaning (required elements, allowed attributes per node type, ID/refs, allowed escape sequences, etc.). Those are handled in later chapters as **semantic validity** rules.
-
-A HyperDoc document is a sequence of **nodes**. Each node has:
-
-- a **node name** (identifier),
-- an optional **attribute list** `(key="value", ...)`,
-- and a mandatory **body**, which is one of:
-  - `;` empty body,
-  - `"..."` string literal body,
-  - `:` verbatim body (one or more `|` lines),
-  - `{ ... }` list body.
-
-A list body `{ ... }` is parsed in one of two modes:
-
-- **Block-list mode**: the list contains nested nodes.
-- **Inline-list mode**: the list contains a token stream of text items, escape tokens, inline nodes, and balanced brace groups.
-
-The grammar below is syntax-only and intentionally leaves the choice between block-list and inline-list content to an **external disambiguation rule**.
-
-### Grammar (EBNF)
-
-```ebnf
-(* ---------- Top level ---------- *)
-
-document        ::= ws , { node , ws } , EOF ;
-
-(* ---------- Nodes ---------- *)
-
-node            ::= node_name , ws , [ attribute_list , ws ] , body ;
-
-body            ::= empty_body
-                  | string_body
-                  | verbatim_body
-                  | list_body ;
-
-empty_body      ::= ";" ;
-
-string_body     ::= string_literal ;
-
-verbatim_body   ::= ":" , { ws , piped_line } ;
-
-list_body       ::= "{" , list_content , "}" ;
-
-(*
-  IMPORTANT: list_content is intentionally ambiguous.
-  A conforming parser chooses either inline_content or block_content by an
-  EXTERNAL rule (see “Disambiguation for list bodies”).
-*)
-list_content    ::= inline_content | block_content ;
-
-
-(* ---------- Attributes ---------- *)
-
-attribute_list  ::= "(" , ws ,
-                    [ attribute ,
-                      { ws , "," , ws , attribute } ,
-                      [ ws , "," ]          (* trailing comma allowed *)
-                    ] ,
-                    ws , ")" ;
-
-attribute       ::= attr_key , ws , "=" , ws , string_literal ;
-
-(*
-  Attribute keys may include '-' in addition to node-name characters.
-*)
-attr_key        ::= attr_key_char , { attr_key_char } ;
-
-attr_key_char   ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | "\" ;
-
-
-(* ---------- Block-list content ---------- *)
-
-block_content   ::= ws , { node , ws } ;
-
-
-(* ---------- Inline-list content ---------- *)
-
-inline_content  ::= ws , { inline_item , ws } ;
-
-inline_item     ::= word
-                  | escape_text
-                  | inline_node
-                  | inline_group ;
-
-(*
-  Balanced braces in inline content are represented as inline_group.
-  If braces cannot be balanced, they must be written as \{ and \}.
-*)
-inline_group    ::= "{" , inline_content , "}" ;
-
-(*
-  Backslash dispatch inside inline content:
-  - If next char is one of '\', '{', '}', emit escape_text.
-  - Otherwise begin an inline_node.
-*)
-escape_text     ::= "\" , ( "\" | "{" | "}" ) ;
-
-inline_node     ::= inline_name , ws , [ attribute_list , ws ] , body ;
-
-(*
-  Inline node names start with '\' and then continue with node-name characters.
-*)
-inline_name     ::= "\" , node_name_char_no_backslash , { node_name_char } ;
-
-
-(* ---------- Words / node names ---------- *)
-
-(*
-  Node names intentionally do NOT include ':' because ':' is also a body marker
-  (e.g. 'p:' for verbatim body) and adjacency is allowed.
-*)
-node_name       ::= node_name_char , { node_name_char } ;
-
-node_name_char  ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" | "\" ;
-
-node_name_char_no_backslash
-                ::= "A"…"Z" | "a"…"z" | "0"…"9" | "_" | "-" ;
-
-word            ::= word_char , { word_char } ;
-
-(*
-  word_char matches any Unicode scalar value except:
-  - whitespace
-  - '{' or '}'
-  - '\' (because '\' begins escape_text or inline_node)
-*)
-word_char       ::= ? any scalar value except WS, "{", "}", "\" ? ;
-
-
-(* ---------- String literals (syntax only; no escape validation here) ---------- *)
-
-string_literal  ::= "\"" , { string_unit } , "\"" ;
-
-(*
-  string_unit is permissive enough that malformed escapes remain parsable,
-  BUT forbids escaping control characters (including LF/CR/TAB).
-  Raw TAB is allowed as a normal string_char.
-*)
-string_unit     ::= string_char | "\" , escaped_noncontrol ;
-
-string_char     ::= ? any scalar value except '"', '\', LF, CR ? ;
-
-escaped_noncontrol
-                ::= ? any scalar value except control chars (Unicode category Cc) ? ;
-
-
-(* ---------- Verbatim lines ---------- *)
-
-piped_line      ::= "|" , { not_line_end } , line_terminator ;
-
-not_line_end    ::= ? any scalar value except CR and LF ? ;
-
-line_terminator ::= LF | CR , LF | EOF ;
-
-
-(* ---------- Whitespace ---------- *)
-
-ws              ::= { WS } ;
-
-WS              ::= " " | "\t" | CR | LF ;
-
-CR              ::= "\r" ;
-LF              ::= "\n" ;
-```
-
-### Additional syntax rules and notes (normative)
-
-#### 1) Maximal-munch for identifiers
-
-When reading `node_name`, `inline_name`, and `attr_key`, parsers **MUST** consume the **longest possible** sequence of allowed identifier characters (maximal munch). This is required because `\` is a legal identifier character and must not be arbitrarily split.
-
-#### 2) Disambiguation for list bodies (external chooser)
-
-The production `list_content ::= inline_content | block_content` is resolved by a deterministic, non-backtracking rule:
-
-1. Before parsing the content of a `{ ... }` list body, the parser **MUST** choose exactly one list mode: **Inline-list mode** or **Block-list mode**.
-2. The mode is determined solely from the syntactic **node name token** (not attributes, not body contents, not document state).
-3. Required behavior (recovery-friendly):
-   - If the node name begins with `\`, the parser **MUST** choose **Inline-list mode**.
-   - If the node name is recognized as a built-in name with a specified list mode, the parser **MUST** choose that mode.
-   - Otherwise (unknown / misspelled / unsupported node name), the parser **MUST** choose **Inline-list mode**.
-
-This rule ensures unknown nodes accept rich inline content for typo recovery (e.g. `prre { ... }`).
-
-#### 3) Inline-list mode: brace balancing and escape-text tokens
-
-In **Inline-list mode**:
-
-- `{` and `}` that appear as literal characters in the inline stream are represented structurally as `inline_group` and therefore **must be balanced**.
-- If braces cannot be balanced, they **must** be written using the escape-text tokens `\{` and `\}`.
-- A backslash in inline content is interpreted as:
-  - one of the three **escape-text tokens** `\\`, `\{`, `\}`, or
-  - the start of an `inline_node` otherwise.
-
-The escape-text tokens exist primarily so the three characters `\`, `{`, `}` can be represented literally within inline content without always starting an inline node.
-
-#### 4) String literals are syntax-only at this stage
-
-String literals are delimited by `"` and parsed without interpreting escape meanings. This is intentional: documents with malformed or unknown escape sequences remain **syntactically valid**, allowing formatters and other tooling to round-trip source reliably.
-
-However, the following are **syntactically invalid** inside string literals:
-
-- raw LF or CR characters (line breaks are not allowed within `"..."`),
-- a backslash immediately followed by a **control character** (Unicode General Category `Cc`), which includes TAB.
-
-(Separately: which escape sequences are *semantically* valid is defined later.)
-
-#### 5) Verbatim bodies are line-oriented
-
-In a verbatim body (`:`):
-
-- The body consists of zero or more `piped_line` entries.
-- Each `piped_line` starts with `|` after optional whitespace skipping.
-- The content of a verbatim line is everything up to the line terminator; it is not tokenized into nodes.
-
-A file ending without a final newline is syntactically allowed (`EOF` as a line terminator), though tooling may warn.
-
-#### 6) Syntactic validity vs semantic validity
-
-A document is **syntactically valid** if it matches the grammar and the additional syntax rules above (maximal munch, list-mode disambiguation, inline brace balancing, and the string-literal constraints).
-
-A syntactically valid document may still be **semantically invalid**. Semantic validation is defined later and may include rules such as required header nodes, attribute constraints, reference resolution, allowed escape sequences, encoding policy, and disallowed control characters in source text.
-
-## Escape encoding
-
-This chapter defines how **escape sequences are interpreted** to produce decoded Unicode text. Escape processing is part of **semantic validation**: a document may be syntactically valid even if it contains unknown or malformed escapes, but it is not semantically valid unless all escapes decode successfully under the rules below.
-
-HyperDoc documents are UTF-8 text. Unless explicitly stated otherwise, all “characters” in this chapter refer to Unicode scalar values.
-
-### Scope
-
-Escape sequences are recognized in two places:
-
-1. **STRING literals** (the `"..."` body form, and attribute values which are also STRING literals).
-2. **Inline escape-text tokens** inside inline-list bodies: `\\`, `\{`, `\}` (these are emitted as text spans by the parser and can be decoded to literal characters during semantic processing).
-
-No other part of the syntax performs escape decoding (not node names, not verbatim bodies, not block-list structure).
-
-## Control character policy
-
-HyperDoc forbids control characters except **LF** and **CR**.
-
-- A semantically valid document **MUST NOT** contain any Unicode control characters (General Category `Cc`) anywhere **except**:
-  - U+000A LINE FEED (LF)
-  - U+000D CARRIAGE RETURN (CR)
-
-This rule applies both to:
-
-- the raw document text (source), and
-- any decoded text produced from escapes.
-
-Implications:
-
-- TAB (U+0009) is forbidden, including if introduced via `\u{9}`.
-- NUL (U+0000) is forbidden, including if introduced via `\u{0}`.
-
-(Structural line breaks in the file may be LF or CRLF or CR as allowed by the syntax rules; decoded strings may contain LF/CR only via escapes.)
-
-### String literal escape sequences
-
-#### Overview
-
-Within a STRING literal, a backslash (`\`) begins an escape sequence. The set of valid escapes is deliberately small.
-
-A semantic validator/decoder **MUST** accept exactly the escape forms listed below and **MUST** reject all others.
-
-#### Supported escapes (STRING literals)
-
-The following escapes are valid inside STRING literals:
-
-| Escape     | Decodes to                   |
-| ---------- | ---------------------------- |
-| `\\`       | U+005C REVERSE SOLIDUS (`\`) |
-| `\"`       | U+0022 QUOTATION MARK (`"`)  |
-| `\n`       | U+000A LINE FEED (LF)        |
-| `\r`       | U+000D CARRIAGE RETURN (CR)  |
-| `\u{H...}` | Unicode scalar value U+H...  |
-
-No other escapes exist. In particular, `\0`, `\xHH`, `\e`, and similar are not part of HyperDoc.
-
-#### Unicode escape `\u{H...}`
-
-`H...` is a non-empty sequence of hexadecimal digits (`0–9`, `A–F`, `a–f`) representing a Unicode code point in hexadecimal.
-
-Rules:
-
-- The hex sequence **MUST** contain **1 to 6** hex digits.
-- The value **MUST** be within `0x0 .. 0x10FFFF` inclusive.
-- The value **MUST NOT** be in the surrogate range `0xD800 .. 0xDFFF`.
-- The value **MUST NOT** decode to a forbidden control character (see Control character policy). The only allowed controls are LF and CR.
-
-Notes:
-
-- Leading zeros are allowed (`\u{000041}` is `A`).
-- `\u{20}` is ASCII space. (`\u{032}` is U+0032, the digit `"2"`, because the digits are hexadecimal.)
-
-#### Invalid escapes (STRING literals)
-
-A semantic validator/decoder **MUST** reject a document (or at least reject that literal) if any STRING literal contains:
-
-- an unknown escape (e.g. `\q`, `\uFFFF`, `\x20`, `\t`, `\b`, …),
-- an unterminated escape (string ends immediately after `\`),
-- a malformed Unicode escape (`\u{}`, missing `{`/`}`, non-hex digits, more than 6 hex digits),
-- a Unicode escape outside the valid scalar range or within the surrogate range,
-- a Unicode escape that produces a forbidden control character.
-
-#### Canonical encoding recommendations (non-normative)
-
-For authors and formatters:
-
-- Prefer `\\` and `\"` for literal backslash and quote.
-- Prefer `\n` and `\r` for LF/CR instead of `\u{A}` / `\u{D}`.
-- Prefer the shortest hex form for `\u{...}` without leading zeros unless alignment/readability benefits.
-
-### Inline escape-text tokens in inline-list bodies
-
-Inside **inline-list bodies**, the syntax defines three special two-character text tokens:
-
-- `\\`
-- `\{`
-- `\}`
-
-These exist so that inline content can contain literal `\`, `{`, and `}` without always starting an inline node (`\name{...}`) or requiring brace balancing.
-
-#### Decoding rule
-
-During semantic text construction, an implementation **MAY** decode these tokens as:
-
-- `\\` → `\`
-- `\{` → `{`
-- `\}` → `}`
-
-This decoding is independent of STRING literal escapes: these tokens occur in inline text streams, not inside `"..."` literals.
-
-#### Round-tripping note (normative intent)
-
-A formatter or tooling that aims to preserve the author’s intent **SHOULD** preserve the distinction between:
-
-- a literal `{`/`}` that is part of a balanced inline group, and
-- an escaped brace token `\{`/`\}` that was used to avoid imbalance.
-
-This distinction matters for reliable reconstruction and for edits that may reflow or restructure inline content.
-
-### Interaction with syntax
-
-- Escape decoding is performed **after** syntactic parsing.
-- Syntactic parsing of STRING literals is delimiter-based and does not validate escape *meaning*.
-- Semantic validation determines whether escapes are valid and produces the decoded Unicode text.
-
-This separation is intentional: it allows autoformatters to parse and rewrite documents that may contain malformed escapes without losing information, while still allowing strict validators to enforce the escape rules above.
-
-## Semantic Validity
-
-> TO BE DONE.
->
-> - Attribute uniqueness
-> - Attribute must be defined on a node
-> - Non-optional attributes must be present
-> - id is only valid on top-level nodes
-> - id must be unique
-> - id is case sensitive
-> - ref must point to an existing id
-
-## Element Overview
-
-| Element                                                     | Element Type | Allowed Children             | Attributes                                         |
-| ----------------------------------------------------------- | ------------ | ---------------------------- | -------------------------------------------------- |
-| *Document*                                                  | Document     | `hdoc`, Blocks               |                                                    |
-| `hdoc`                                                      | Header       | -                            | `lang`, `title`, `version`, `author`, `date`, `tz` |
-| `h1`, `h2`, `h3`                                            | Block        | Text Body                    | `lang`, \[`id`\]                                   |
-| `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler` | Block        | Text Body                    | `lang`, \[`id`\]                                   |
-| `ul`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\]                                   |
-| `ol`                                                        | Block        | `li` ≥ 1                     | `lang`, \[`id`\], `first`                          |
-| `img`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `alt`, `path`                    |
-| `pre`                                                       | Block        | Text Body                    | `lang`, \[`id`\], `syntax`                         |
-| `toc`                                                       | Block        | -                            | `lang`, \[`id`\], `depth`                          |
-| `table`                                                     | Block        | Table Rows                   | `lang`, \[`id`\]                                   |
-| `li`                                                        | List Item    | Blocks, String, Verbatim     | `lang`                                             |
-| `td`                                                        | Table Cell   | Blocks, String, Verbatim     | `lang`, `colspan`                                  |
-| `columns`                                                   | Table Row    | `td` ≥ 1                     | `lang`                                             |
-| `group`                                                     | Table Row    | Text Body                    | `lang`,                                            |
-| `row`                                                       | Table Row    | `td` ≥ 1                     | `lang`, `title`                                    |
-| `\em`                                                       | Text Body    | Text Body                    | `lang`                                             |
-| `\mono`                                                     | Text Body    | Text Body                    | `lang`, `syntax`                                   |
-| `\strike`                                                   | Text Body    | Text Body                    | `lang`                                             |
-| `\sub`, `\sup`                                              | Text Body    | Text Body                    | `lang`                                             |
-| `\link`                                                     | Text Body    | Text Body                    | `lang`, (`ref` \| `uri`)                           |
-| `\date`, `\time`, `\datetime`                               | Text Body    | Plain Text, String, Verbatim | `lang`, `fmt`                                      |
-| *Plain Text*                                                | Text Body    | -                            |                                                    |
-| *String*                                                    | Text Body    | -                            |                                                    |
-| *Verbatim*                                                  | Text Body    | -                            |                                                    |
-
-Notes:
-
-- The attribute `id` is only allowed when the element is a top-level element (direct child of the document)
-- The attributes `ref` and `uri` on a `\link` are mutually exclusive
-- `\date`, `\time` and `\datetime` cannot contain other text body items except for plain text, string or verbatim content.
-
-## Attribute Overview
-
-| Attribute | Type            | Required | Allowed Values                                                                               | Description                                                                     |
-| --------- | --------------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
-| `version` | Version         | Yes      | `2.0`                                                                                        | Describes the version of this HyperDoc document.                                |
-| `lang`    | Language Tag    | No       | [BCP 47 Language Tag](https://datatracker.ietf.org/doc/html/rfc5646)                         | Defines the language of the elements contents.                                  |
-| `title`   | String          | No       | *Any*                                                                                        | Sets the title of the document or the table row.                                |
-| `author`  | String          | No       | *Any*                                                                                        | Sets the author of the document.                                                |
-| `date`    | DateTime        | No       | A date-time value using the format specified below                                           | Sets the authoring date of the document.                                        |
-| `id`      | Reference       | No       | Non-empty                                                                                    | Sets a reference which can be linked to with `\link(ref="...")`.                |
-| `first`   | Integer         | No       | Decimal integer numbers ≥ 0                                                                  | Sets the number of the first list item.                                         |
-| `alt`     | String          | No       | Non-empty                                                                                    | Sets the alternative text shown when an image cannot be loaded.                 |
-| `path`    | String          | Yes      | Non-empty file path to an image file                                                         | Defines the file path where the image file can be found.                        |
-| `syntax`  | String          | No       | *See element documentation*                                                                  | Hints the syntax highlighter how how the elements context shall be highlighted. |
-| `depth`   | Integer         | No       | `1`, `2` or `3`                                                                              | Defines how many levels of headings shall be included.                          |
-| `colspan` | Integer         | No       | Decimal integer numbers ≥ 1                                                                  | Sets how many columns the table cell spans.                                     |
-| `ref`     | Reference       | No       | Any value present in an `id` attribute.                                                      | References any `id` inside this document.                                       |
-| `uri`     | URI             | No       | [Internationalized Resource Identifier (IRI)](https://datatracker.ietf.org/doc/html/rfc3987) | Links to a foreign document with a URI.                                         |
-| `fmt`     | Enum            | No       | *See element documentation*                                                                  | Defines how the date/time value shall be displayed.                             |
-| `tz`      | Timezone Offset | No       | `Z` for UTC or a `±HH:MM` timezone offset.                                                   | Defines the default timezone for time/datetime values.                          |
-
-NOTE: All attribute values allow leading and trailing whitespace, but it's heavily discouraged and should yield a non-fatal diagnostic or hint in implementations.
-
-## Attribute Types
-
-| Type              | Example                             | Syntax                                                          | Notes                                                                                                 |
-| ----------------- | ----------------------------------- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- |
-| `Date`            | `2025-12-31T13:37:42`               | *See below*                                                     | A date value as specified below.                                                                      |
-| `Enum`            | `auto`                              | `\w+`                                                           |                                                                                                       |
-| `Integer`         | `10`                                | `\d+`                                                           | Leading zeroes are allowed, but discouraged.                                                          |
-| `Language Tag`    | `de-DE`                             | *See [RFC 5646](https://datatracker.ietf.org/doc/html/rfc5646)* |                                                                                                       |
-| `Reference`       | `attribute-types`                   | *No control characters or whitespace*                           |                                                                                                       |
-| `String`          | `This image shows a cat and a dog.` | *Any Value*                                                     | Any textual value.                                                                                    |
-| `Timezone Offset` | `+13:30`                            | `Z\|[+-]{00..23}:{00..59}`                                      | Expresses the UTC timezone with `Z` or a relative offset in hours + minutes                           |
-| `URI`             | `www://example.com`                 | *See [RFC 3987](https://datatracker.ietf.org/doc/html/rfc3987)  | The type actually allows IRIs (unicode-enabled URIs), but is called URI to use the core common term.  |
-| `Version`         | `2.0`                               | `\d+\.\d+`                                                      | Has no semantic meaning yet, and is forced to be `2.0`. All other values are reserved for future use. |
-
-## Semantic Structure
-
-All elements have these attributes:
-
-| Attribute | Function                                                                                                                                          |
-| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `lang`    | Marks the (human) language of the contents of that element. This must be an [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag). |
-
-## Top-Level / Block Elements
-
-All top-level elements have these attributes:
-
-| Attribute | Function                                                                         |
-| --------- | -------------------------------------------------------------------------------- |
-| `id`      | Marks a target for a `\link(ref="...")`. Must be unique throughout the document. |
-
-### Headings: `h1`, `h2`, `h3`
-
-**Allowed Items:** Inline Text
-
-These elements are all rendered as headings of different levels.
-
-- `h1` is the top-level heading.
-- `h2` is the level below `h1`.
-- `h3` is the level below `h2`.
-
-### Paragraphs: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
-
-**Allowed Items:** Inline Text
-
-These elements are all rendered as paragraphs.
-
-The type of the paragraph includes a semantic hint:
-
-- `p`: A normal paragraph.
-- `note`: A paragraph that informs the reader. This is typically rendered with a blue/white color hint. The associated icon is a white i in a blue box/circle.
-- `warning`: A paragraph that warns the reader. This is typically rendered with a yellow/black color hint. The associated icon is a yellow triangle with a black exclamation mark.
-- `danger`: A paragraph that warns the of danger. This is typically rendered with a red/white color hint. The associated icon is a red octagon with a white exclamation mark.
-- `tip`: A paragraph that gives the reader a tip. The associated icon is a lightbulb.
-- `quote`: A paragraph that quotes a foreign source. This is typically rendered with a small indentation and a distinct font.
-- `spoiler`: A paragraph that contains information the reader about things they might not want to know. This is typically visually hidden/blurred so it's unreadable until a reader action is performed.
-
-### Lists: `ul`, `ol`
-
-**Allowed Items:** `li`
-
-- `ul` is an unordered list rendered with typically either dashes or dots as list enumerators.
-- `ol` is an ordered list rendered with typically either roman or arabic numerals as list enumerators.
-
-#### Ordered List `ol`
-
-| Attribute | Function                                                                                                                    |
-| --------- | --------------------------------------------------------------------------------------------------------------------------- |
-| `first`   | An integer string that is the number of the *first* item of the list. Allows paragraph breaks between a single joined list. |
-
-### Figures: `img`
-
-**Allowed Items:** Inline Text
-
-| Attribute | Function                                                                                                                                           |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `alt`     | A textual description of the image contents for vision-impaired users. Similar to the [HTML alt tag](https://en.wikipedia.org/wiki/Alt_attribute). |
-| `path`    | A path relative to the current file that points to an image file that should be shown.                                                             |
-
-This element shows a full-width image or figure. Its contents are the figure description.
-
-If the contents are empty, the figure may be rendered in a simpler form.
-
-### Preformatted: `pre`
-
-**Allowed Items:** Inline Text
-
-| Attribute | Function                                                                                                |
-| --------- | ------------------------------------------------------------------------------------------------------- |
-| `syntax`  | If present, hints a syntax highlighter that this preformatted block contains programming language code. |
-
-In contrast to all other block types, a `pre` block retains whitespace and line-break information and lays out the text as-is.
-
-It does not allow automatic line break insertion or word-wrapping.
-
-If a pre contains inline elements, these will still be parsed and apply their styles to the text spans.
-
-### Table Of Contents: `toc`
-
-**Allowed Items:** *none*
-
-| Attribute | Function                                                                       |
-| --------- | ------------------------------------------------------------------------------ |
-| `depth`   | String `1`, `2` or `3`. Defines how many levels of headings shall be included. |
-
-Renders a table of contents for the current document.
-
-This element allows no child items.
-
-## Lists
-
-### List Items `li`
-
-**Allowed Items:** Block Elements *or* String Content.
-
-These elements wrap a sequence of blocks that will be rendered for this list item.
-
-It also allows a string to be used as it's content directly, this will be equivalent to having a nested paragraph with that strings content:
-
-```hdoc
-ul {
-  li { p { This is a normal item. } }
-  li "This is a normal item."
-}
-```
-
-will have two identical list items.
-
-### Tables: `table`
-
-**Allowed Items:** `columns`, `row`, `group`
-
-Tables are made up of an optional header row (`columns`) followed by a sequence of `row` and `group` elements.
-
-- `columns` defines the header labels and the column count.
-- `row` defines a data row.
-- `group` provides a section heading that applies to subsequent rows until the next group or the end of the table.
-
-All `row` and `columns` elements must resolve to the same number of columns after applying `colspan`.
-If a `row` uses the `title` attribute or a `group` is present, renderers must reserve a leading title column.
-In that case, the header row should have an empty leading cell before the column headers.
-
-## Table Elements
-
-### Column Headers: `columns`
-
-**Allowed Items:** `td`
-
-This element contains the header cells for each column.
-
-### Rows: `row`
-
-**Allowed Items:** `td`
-
-| Attribute | Function                                                                     |
-| --------- | ---------------------------------------------------------------------------- |
-| `title`   | A title caption for this row. If present, will be shown left of all columns. |
-
-### Row Groups: `group`
-
-**Allowed Items:** Inline Text
-
-A *row group* is a row that contains a single heading-style cell that labels the rows below.
-
-### Cells: `td`
-
-**Allowed Items:** Block Elements *or* String Content.
-
-| Attribute | Function                                                  |
-| --------- | --------------------------------------------------------- |
-| `colspan` | Integer string defining how many columns this cell spans. |
-
-This element contains the contents of a table cell.
-
-Like `li`, a `td` can either contain a single string or a nested block sequence.
-
-## Inline Text
-
-These elements are all allowed inside a paragraph-like content and can typically be nested.
-
-*Inline Text* can either be a string literal, a literal block or a list.
-
-If the text is a list, it allows the use of inline elements like `\em` or `\mono`.
-
-### Plain Text
-
-This is normal plain text and has no special meaning.
-
-### Emphasis: `em`
-
-**Nesting:** Yes
-
-Formats the text as emphasised. This is typically bold or italic rendering.
-
-### Monospaced: `mono`
-
-**Nesting:** Yes
-
-| Attribute | Function                                                                                  |
-| --------- | ----------------------------------------------------------------------------------------- |
-| `syntax`  | If present, hints a syntax highlighter that this span contains programming language code. |
-
-Formats the text in a monospaced font. This is useful for code-like structures.
-
-### Strike-through: `strike`
-
-**Nesting:** Yes
-
-Renders the text with a horizontal line through the text, striking it out.
-
-### Sub/Superscript: `sub`, `sup`
-
-**Nesting:** Yes
-
-Renders the text a bit smaller and moved upwards (`sup`) or downwards (`sub`) to allow sub- or superscript rendering.
-
-### Linking: `link`
-
-**Nesting:** No
-
-| Attribute | Function                                                                                                 |
-| --------- | -------------------------------------------------------------------------------------------------------- |
-| `ref`     | Points the link to a top-level block with the `id` of this `ref` attribute. Mutually exclusive to `uri`. |
-| `uri`     | Points the link to the resource inside the `uri`. Mutually exclusive to `ref`.                           |
-
-Adds a hyperlink to the contents. This allows a reader to navigate by typically clicking the link.
-
-### Localized Date/Time: `date`, `time`, `datetime`
-
-**Nesting:** No
-
-| Element    | Attribute | Function                                                                                                    |
-| ---------- | --------- | ----------------------------------------------------------------------------------------------------------- |
-| `date`     | `fmt`     | `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso` (raw ISO 8601).                       |
-| `time`     | `fmt`     | `short`, `long`, `rough`, `relative`, `iso` (raw ISO 8601).                                                 |
-| `datetime` | `fmt`     | `short` (localized date+time), `long` (localized date+time with seconds), `relative`, `iso` (raw ISO 8601). |
-
-Renders a [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations) date, time or date+time in a localized manner.
-
-## Date/Time Formatting
-
-All date/time values MUST use the formats defined in this section. This is a conservative, interoperable intersection between [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), so values that conform here are valid under both specifications. Digits are ASCII decimal unless stated otherwise.
-
-### Date Format
-
-Date strings MUST follow `YYYY-MM-DD`.
-
-- `YYYY` is a year with one or more digits.
-- `MM` is a two-digit month in the range `01` to `12`.
-- `DD` is a two-digit day in the range `01` to `31`.
-- The `-` separators are mandatory.
-
-Examples: `2025-12-25`, `1-01-01`.
-
-### Time Format
-
-Time strings MUST follow `hh:mm:ss` with a required time zone.
-
-- `hh`, `mm`, `ss` are two-digit hour, minute, second fields.
-- Hour MUST be in `00` to `23`, minute and second MUST be in `00` to `59`.
-- An optional fractional seconds component MAY follow the seconds field as `.` plus
-  1, 2, 3, 6, or 9 digits.
-- The fractional separator MUST be `.`. Comma is not allowed.
-- A time zone is required when no `tz` attribute is present on the header node and
-  MUST be either `Z` (UTC) or a numeric offset in the form `+hh:mm` or `-hh:mm` with two-digit hour/minute fields.
-- Offset hours MUST be in `00` to `23`, offset minutes MUST be in `00` to `59`.
-
-Examples: `22:30:46Z`, `22:30:46.136+01:00`, `21:30:46.136797358-05:30`, `22:30:46` (only with `tz` attribute).
-
-### Date/Time Format
-
-Date/time strings MUST combine a date and time with a literal `T`.
-
-- Format: `YYYY-MM-DD` + `T` + `hh:mm:ss` (with optional fraction and required zone).
-
-Examples: `2025-12-25T22:31:50.13+01:00`, `2025-12-25T21:31:43Z`.

From 59afce3bfd2c1b66f76fe37a80d34f83e2f5252d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 1 Jan 2026 23:34:59 +0100
Subject: [PATCH 071/116] Splits Lists chapter into two.

---
 docs/TODO.md                       |  7 -------
 docs/specification-proper-draft.md | 20 ++++++++++----------
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index f6e284e..ae7b9a7 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -4,10 +4,3 @@
 - Specify "syntax" proper
 - Add links to RFCs where possible
 - Document `lang` inheritance. No `lang` attribute means that parent language is used.
-
-
-- Special-style blocks become block containers
-  - The “special paragraph” family (e.g. note, info, warning, danger, tip, spoiler, quote, …) are block containers.
-  - Their { ... } list body is always Block-list mode (i.e., they contain blocks like p, ul, ol, pre, etc.).
-  - They do not accept inline-list bodies directly. Inline markup requires an explicit paragraph:
-    note { p { text with \link(...) { inline } nodes } }
diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index 2a0f168..7c8fe71 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -601,18 +601,20 @@ Only an empty body (`;`) is not "inline text".
 - **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§ 8.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph.
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.4 Lists: `ul`, `ol`
-
-> TODO: Split into two separate parts "Unordered Lists" and "Ordered Lists"
+#### 8.3.4 Unordered List: `ul`
 
 - **Body:** block-list containing `li` (at least one)
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-`ol` additional attribute:
+#### 8.3.5 Ordered List: `ol`
 
-- `first` (optional Integer ≥ 0; default 1): number of the first list item
+- **Body:** block-list containing `li` (at least one)
+- **Attributes:**
+  - `lang` (optional)
+  - `id` (optional; top-level only)
+  - `first` (optional Integer ≥ 0; default 1): number of the first list item
 
-#### 8.3.5 Figure: `img`
+#### 8.3.6 Figure: `img`
 
 - **Body:** inline text caption/description (may be empty)
 - **Attributes:**
@@ -621,14 +623,12 @@ Only an empty body (`;`) is not "inline text".
   - `lang` (optional)
   - `id` (optional; top-level only)
 
-#### 8.3.6 Preformatted: `pre`
-
-> TODO: Body is always just "inline text", as verbatim bodies are also always inline text.
+#### 8.3.7 Preformatted: `pre`
 
 - **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.7 Tables: `table`
+#### 8.3.8 Tables: `table`
 
 - **Body:** block-list containing:
   - optional `columns`, then

From a6681b5b72394d7d1890d75a08f416c6a27b1a2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 1 Jan 2026 23:42:16 +0100
Subject: [PATCH 072/116] Clarifies table semantics.

---
 docs/specification-proper-draft.md | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/docs/specification-proper-draft.md b/docs/specification-proper-draft.md
index 7c8fe71..37e7a9b 100644
--- a/docs/specification-proper-draft.md
+++ b/docs/specification-proper-draft.md
@@ -637,21 +637,11 @@ Only an empty body (`;`) is not "inline text".
 
 Table layout rules:
 
-> TODO: `group` is not a "row with implicit title and no cells", but basically
->       `group { <text> }` is equivalent to `columns { td(colspan="<all>") { <text> } }`,
->       so a regular row with a single cell spanning all columns.
->       `group` never implies the existence of the "leading title column"
-
-> TODO: The `row(title="…")` does never affect the effective column count.
->       It implies an additional untitled first column, which is blank in `columns` and `group` rows.
->       The `title` row is designed to form matrices with an empty top-left field.
-
-- `columns` defines header labels and the column count.
-- Each `row` defines a data row.
-- Each `group` acts as a section heading for subsequent rows.
-- After applying `td.colspan`, all `row` and `columns` entries **MUST** resolve to the same effective column count.
-- If any `row` has a `title` attribute, renderers **MUST** reserve a leading title column.
-  - The leading column’s header cell is implicit (empty/invisible) and **MUST NOT** be authored inside `columns`.
+- **Column Count:** The number of columns in a table is determined by the `columns` element. It is the sum of the `colspan` values of the `td` cells within the `columns` row. If `columns` is absent, the column count is determined by the first `row` element in the same way. All `columns` and `row` elements in a table **MUST** resolve to the same effective column count.
+
+- **Row Headers (`row(title)`):** A `row` element may have a `title` attribute, which creates a *row header*. This header is rendered as an implicit, additional first column for that row. This "row header column" does **not** contribute to the table's main column count. If any `row` in the table has a `title`, renderers **MUST** reserve space for a leading row header column throughout the table. This leading column will be blank for `columns`, `group`, and any `row` without a `title`.
+
+- **Group Headers (`group`):** A `group` element acts as a heading that spans all columns of the table. Semantically, `group { ... }` is equivalent to a `row` containing a single `td` with a `colspan` attribute equal to the table's column count. A `group` does not have a `title` and does not render a cell in the row header column.
 
 ### 8.4 Structural Elements
 
@@ -665,21 +655,27 @@ Table layout rules:
 
 #### 8.4.2 `columns` (table header row)
 
+- **Role:** Defines the labels for the columns of a table. The number of cells in this element (taking `colspan` into account) defines the table's column count.
 - **Body:** block-list containing `td` (at least one)
 - **Attributes:** `lang` (optional)
 
 #### 8.4.3 `row` (table data row)
 
+- **Role:** Defines a row of data in a table.
 - **Body:** block-list containing `td` (at least one)
-- **Attributes:** `title` (optional string), `lang` (optional)
+- **Attributes:**
+  - `title` (optional string): If present, creates a header cell for the row in an implicit leading column.
+  - `lang` (optional)
 
 #### 8.4.4 `group` (table row group)
 
+- **Role:** A heading row that spans all table columns.
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
 #### 8.4.5 `td` (table cell)
 
+- **Role:** A single cell within a table row.
 - **Body:** either
   - a block-list of block elements, or
   - a single string body, or

From 1ab645820d671f85a9a736445656ef90f4a0a6d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 13:09:33 +0100
Subject: [PATCH 073/116] Renames docs/specification-proper-draft.md ->
 docs/specification.md

---
 docs/TODO.md                                             | 3 +++
 docs/{specification-proper-draft.md => specification.md} | 0
 2 files changed, 3 insertions(+)
 rename docs/{specification-proper-draft.md => specification.md} (100%)

diff --git a/docs/TODO.md b/docs/TODO.md
index ae7b9a7..c2aa0ee 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -4,3 +4,6 @@
 - Specify "syntax" proper
 - Add links to RFCs where possible
 - Document `lang` inheritance. No `lang` attribute means that parent language is used.
+
+
+> Okay, next task: Fix chapter 6 (escapes) by splitting into two chapters (described in 308-315), clarify how control characters are handled (L328)
\ No newline at end of file
diff --git a/docs/specification-proper-draft.md b/docs/specification.md
similarity index 100%
rename from docs/specification-proper-draft.md
rename to docs/specification.md

From 061a7f9faa1744d5bb6a6755f6f1396bfb69d85f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 14:06:52 +0100
Subject: [PATCH 074/116] Refactory escapes semantics chapter 6 and splits it
 into 6 and 7

---
 docs/TODO.md          |  38 +++++-
 docs/specification.md | 293 ++++++++++++++++++++----------------------
 2 files changed, 176 insertions(+), 155 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index c2aa0ee..986c7e0 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -4,6 +4,42 @@
 - Specify "syntax" proper
 - Add links to RFCs where possible
 - Document `lang` inheritance. No `lang` attribute means that parent language is used.
+- Clarify that page layout is static and won't change except for context resize.
+- \abbrev and \term might be good ideas.
 
+> Okay, next task: Fix chapter 6 (escapes) by splitting into two chapters (described in 308-315), clarify how control characters are handled (L328)
 
-> Okay, next task: Fix chapter 6 (escapes) by splitting into two chapters (described in 308-315), clarify how control characters are handled (L328)
\ No newline at end of file
+
+
+> §5.5 - String Literal Control Character Inconsistency
+
+§5.5 forbids "any Unicode control characters" in string literals
+§6.3 allows \n (LF) and \r (CR) escape sequences
+Problem: These decode to control characters (Cc), contradicting §6.2 which says "resolved string-literal values" must not contain control characters except line terminators. Need explicit carve-out.
+
+> Problem: How does this interact with inline \time and \datetime elements? Do they inherit it? §9.2.2 says "If hdoc(tz="...") is present, a time value MAY omit the zone," but doesn't specify how the default is applied during rendering.
+
+> Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns.
+
+> Problem: What should synthesized text be for valid non-heading targets like table, img, pre? Spec says headings get <index> <name> but doesn't define fallback for figures ("Figure 3"), tables ("Table 2"), etc.
+
+States "A renderer SHALL render a regular footnote marker as \sup{\link{\d+}}"
+Problem: This seems like implementation guidance, not semantic requirement. Different renderers (HTML, PDF, terminal) may render markers differently. Should be in §10 (non-normative) or relaxed to "SHOULD".
+
+
+> Recommendation 3: Add Formal Whitespace Processing Algorithm
+
+
+Recommendation 5: Add Appendix with Formal Schema
+Rationale: Current spec requires reading entire document to understand element relationships. Machine-readable schema would enable automatic validation and tooling.
+Provide RelaxNG Compact syntax schema defining:
+
+
+Rationale: Technical documentation needs to emphasize specific code lines (tutorials, diffs, explanations).
+pre(syntax="python", highlight="2,4-6"):
+| def factorial(n):
+|     if n == 0:  # Base case
+|         return 1
+|     else:
+|         return n * factorial(n-1)  # Recursive case
+also: enable line numbers
diff --git a/docs/specification.md b/docs/specification.md
index 37e7a9b..96693eb 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -23,56 +23,56 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap
   - "5.3 Maximal munch": FROZEN
   - "5.4 Inline-list brace balancing and backslash dispatch": DONE
   - "5.5 String literals (syntax)": DRAFT
-- "6. Escape processing (semantic)": DRAFT
-  - "6.1 Scope": DRAFT
-  - "6.2 Control character policy (semantic)": DRAFT
-  - "6.3 Supported escapes in string literals": DRAFT
-    - "6.3.1 Unicode escape `\\u{H...}`": DRAFT
-  - "6.4 Invalid escapes": DRAFT
-  - "6.5 Inline escape-text tokens": DRAFT
-- "7. Semantic document model": DRAFT
-  - "7.1 Document structure": DONE
-  - "7.2 Inline text construction and normalization": DONE
-  - "7.3 Attribute uniqueness": DONE
-  - "7.4 Attribute validity": DONE
-  - "7.5 Identifiers and References": DONE
-  - "7.6 Built-in element recognition": DONE
-- "8. Elements and attributes"
-  - "8.1 Built-in elements and list mode"
-    - "8.1.1 Inline vs block": DONE
-    - "8.1.2 List-body mode per built-in element": DRAFT
-  - "8.2 Element catalog (normative)": DRAFT
-    - "8.2.1 `hdoc` (header)": DONE
-    - "8.2.2 Headings: `h1`, `h2`, `h3`": DRAFT
-    - "8.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT
-    - "8.2.4 Lists: `ul`, `ol`": DRAFT
-    - "8.2.5 List item: `li`": DRAFT
-    - "8.2.6 Figure: `img`": DRAFT
-    - "8.2.7 Preformatted: `pre`": DRAFT
-    - "8.2.8 Table of contents: `toc`": DRAFT
-    - "8.2.9 Tables: `table`": DRAFT
-    - "8.2.10 `columns` (table header row)": DRAFT
-    - "8.2.11 `row` (table data row)": DRAFT
-    - "8.2.12 `group` (table row group)": DRAFT
-    - "8.2.13 `td` (table cell)": DRAFT
-    - "8.2.14 `title` (document title)": DRAFT
-    - "8.2.15 Footnote dump: `footnotes`": DRAFT
-  - "8.3 Inline elements"
-    - "8.3.1 `\\em`": DRAFT
-    - "8.3.2 `\\mono`": DRAFT
-    - "8.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT
-    - "8.3.4 `\link`": DRAFT
-    - "8.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT
-    - "8.3.6 `\ref`": DRAFT
-    - "8.3.7 `\footnote`": DRAFT
-- "9. Attribute types and date/time formats": DRAFT
-  - "9.1 Common attribute types": DRAFT
-  - "9.2 Date / time lexical formats (normative)": DRAFT
-    - "9.2.1 Date": DRAFT
-    - "9.2.2 Time": DRAFT
-    - "9.2.3 Datetime": DRAFT
-  - "9.3 `fmt` values": DRAFT
-- "10. Non-normative guidance for tooling": DRAFT
+- "6. Inline Text Escape Processing (semantic)": DRAFT
+  - "6.1 Inline escape-text tokens": DRAFT
+- "7. String Literal Escape Processing (semantic)": DRAFT
+  - "7.1 Control character policy (semantic)": DRAFT
+  - "7.2 Supported escapes in string literals": DRAFT
+    - "7.2.1 Unicode escape `\\u{H...}`": DRAFT
+  - "7.3 Invalid escapes": DRAFT
+- "8. Semantic document model": DRAFT
+  - "8.1 Document structure": DONE
+  - "8.2 Inline text construction and normalization": DONE
+  - "8.3 Attribute uniqueness": DONE
+  - "8.4 Attribute validity": DONE
+  - "8.5 Identifiers and References": DONE
+  - "8.6 Built-in element recognition": DONE
+- "9. Elements and attributes"
+  - "9.1 Built-in elements and list mode"
+    - "9.1.1 Inline vs block": DONE
+    - "9.1.2 List-body mode per built-in element": DRAFT
+  - "9.2 Element catalog (normative)": DRAFT
+    - "9.2.1 `hdoc` (header)": DONE
+    - "9.2.2 Headings: `h1`, `h2`, `h3`": DRAFT
+    - "9.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT
+    - "9.2.4 Lists: `ul`, `ol`": DRAFT
+    - "9.2.5 List item: `li`": DRAFT
+    - "9.2.6 Figure: `img`": DRAFT
+    - "9.2.7 Preformatted: `pre`": DRAFT
+    - "9.2.8 Table of contents: `toc`": DRAFT
+    - "9.2.9 Tables: `table`": DRAFT
+    - "9.2.10 `columns` (table header row)": DRAFT
+    - "9.2.11 `row` (table data row)": DRAFT
+    - "9.2.12 `group` (table row group)": DRAFT
+    - "9.2.13 `td` (table cell)": DRAFT
+    - "9.2.14 `title` (document title)": DRAFT
+    - "9.2.15 Footnote dump: `footnotes`": DRAFT
+  - "9.3 Inline elements"
+    - "9.3.1 `\\em`": DRAFT
+    - "9.3.2 `\\mono`": DRAFT
+    - "9.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT
+    - "9.3.4 `\link`": DRAFT
+    - "9.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT
+    - "9.3.6 `\ref`": DRAFT
+    - "9.3.7 `\footnote`": DRAFT
+- "10. Attribute types and date/time formats": DRAFT
+  - "10.1 Common attribute types": DRAFT
+  - "10.2 Date / time lexical formats (normative)": DRAFT
+    - "10.2.1 Date": DRAFT
+    - "10.2.2 Time": DRAFT
+    - "10.2.3 Datetime": DRAFT
+  - "10.3 `fmt` values": DRAFT
+- "11. Non-normative guidance for tooling": DRAFT
 - "Appendix A. Example": DRAFT
 - "Appendix B. Element Overview": MISSING
 - "Appendix C. Attribute Overview": MISSING
@@ -98,7 +98,7 @@ A document can be:
 - **Syntactically valid**: conforms to the grammar and additional syntax rules.
 - **Semantically valid**: syntactically valid **and** conforms to semantic rules (elements, attributes, escape decoding, IDs/refs, etc.).
 
-Unless explicitly stated, rules in chapters 3-5 are **syntax** rules; rules in chapters 6-9 are **semantic** rules.
+Unless explicitly stated, rules in chapters 3-5 are **syntax** rules; rules in chapters 6-10 are **semantic** rules.
 
 ## 3. Document encoding (byte- and line-level)
 
@@ -131,7 +131,7 @@ The canonical line ending emitted by tooling **SHOULD** be `<LF>`.
   - U+000D (CR) as part of a valid line ending.
 - Surrogate characters (Plane "unassigned", U+D800…U+DFFF) **MUST NOT** appear in the source text. A conforming parser **MUST** reject them.
 
-A semantic validator **MAY** reject TABs in source text (see §6.2).
+A semantic validator **MAY** reject TABs in source text (see §7.1).
 
 ### 3.4 Unicode text
 
@@ -176,7 +176,7 @@ The grammar is intentionally ambiguous; a deterministic external rule selects a
 - Attribute lists are comma-separated `(key="value", ...)`.
 - Trailing commas are allowed.
 - Attribute values are **string literals** (see §5.5).
-- Attribute keys are identifiers with hyphen-separated segments (see §5.1 and §9.1).
+- Attribute keys are identifiers with hyphen-separated segments (see §5.1 and §10.1).
 
 ## 5. Grammar and additional syntax rules
 
@@ -257,7 +257,7 @@ The mode is determined solely from the **node name token**:
 2. Else, if the node name is a recognized built-in with a specified list mode, the parser **MUST** choose that mode.
 3. Otherwise (unknown node name), the parser **MUST** choose **Inline-list mode**.
 
-Built-in elements and their list modes are defined in §8.1.
+Built-in elements and their list modes are defined in §9.1.
 
 ### 5.3 Maximal munch
 
@@ -301,57 +301,58 @@ while(not eof()):
 abort() # eof before closing '"'
 ```
 
-Semantic escape decoding and validation is specified in §6.
+Semantic escape decoding and validation is specified in §7.
 
-## 6. Escape processing (semantic)
+## 6. Inline Text Escape Processing (semantic)
 
-> TODO: This chapter must be split into two chapters:
->
-> - "Inline Text Escape Processing"
-> - "String Literal Escape Processing"
->
-> This includes renumbering all chapters and their references for the markdown spec.
->
-> Chapter "6.1 Scope" will be removed then.
+Escape decoding in inline-list bodies applies only to the three escape-text tokens produced by the parser (§5.4).
+
+### 6.1 Inline escape-text tokens
+
+In inline-list bodies, the parser emits three special text tokens:
 
-### 6.1 Scope
+- `\\`
+- `\{`
+- `\}`
 
-Escape sequences are recognized only in:
+During semantic inline-text construction (§8.2), implementations **MUST** decode these to literal `\`, `{`, `}`.
 
-1. String literals (node bodies of the `"..."` form and attribute values).
-2. Inline escape-text tokens emitted by the parser: `\\\\`, `\\{`, `\\}`.
+Tooling that aims to preserve author intent **SHOULD** preserve whether braces were written as balanced groups vs escaped brace tokens, because these spellings are not semantically equivalent in the inline parse tree.
 
-No other syntax performs escape decoding.
+## 7. String Literal Escape Processing (semantic)
 
-### 6.2 Control character policy (semantic)
+Escape sequences are recognized only in string literals (node bodies of the `"..."` form and attribute values). No other syntax performs string-literal escape decoding.
 
-> TODO: The same rules as in §3 are applied, except that `TAB` is also additionally forbidden after escaping.
+### 7.1 Control character policy (semantic)
 
 - A semantic validator **MAY** reject TAB (U+0009) in source text.
-- Regardless of whether TAB is accepted in source text, TAB **MUST** be rejected in the **resolved value of any string literal** (quoted node bodies and attribute values). This includes TAB that appears literally between quotes and TAB produced via `\u{...}`.
+- After decoding escapes in any string literal, the resolved value **MUST NOT** contain any Unicode control character (General Category `Cc`) except:
+  - LF (U+000A), and
+  - CR (U+000D) only when immediately followed by LF (U+000A) (i.e. as part of a CRLF sequence U+000D U+000A).
+- TAB (U+0009) is always forbidden in resolved string-literal values, including when produced via `\u{...}`.
 
-Apart from LF/CR line terminators and TAB (U+0009) in source text, a semantically valid document **MUST NOT** contain other Unicode control characters (General Category `Cc`). Resolved string-literal values are restricted by the rules above (TAB is always forbidden there).
+String literals are syntactically forbidden from containing literal control characters (§5.5); therefore LF/CRLF can only appear in resolved values via `\n`, `\r`, or `\u{...}`.
 
-### 6.3 Supported escapes in string literals
+### 7.2 Supported escapes in string literals
 
 A semantic validator/decoder **MUST** accept exactly:
 
-| Escape      | Decodes to                  |
-| ----------- | --------------------------- |
-| `\\\\`      | U+005C (`\\`)               |
-| `\\"`       | U+0022 (`"`)                |
-| `\\n`       | U+000A (LF)                 |
-| `\\r`       | U+000D (CR)                 |
-| `\\u{H...}` | Unicode scalar value U+H... |
+| Escape     | Decodes to                  |
+| ---------- | --------------------------- |
+| `\\`       | U+005C (`\`)                |
+| `\"`       | U+0022 (`"`)                |
+| `\n`       | U+000A (LF)                 |
+| `\r`       | U+000D (CR)                 |
+| `\u{H...}` | Unicode scalar value U+H... |
 
-#### 6.3.1 Unicode escape `\\u{H...}`
+#### 7.2.1 Unicode escape `\u{H...}`
 
 - 1-6 hex digits
 - value in `0x0..0x10FFFF`
 - not in `0xD800..0xDFFF` (surrogates)
-- must not decode to a forbidden control character (§6.2)
+- must not decode to a forbidden control character (§7.1)
 
-### 6.4 Invalid escapes
+### 7.3 Invalid escapes
 
 A semantic validator/decoder **MUST** reject a string literal that contains:
 
@@ -361,25 +362,9 @@ A semantic validator/decoder **MUST** reject a string literal that contains:
 - out-of-range or surrogate code points
 - forbidden control characters produced by `\u{...}`
 
-### 6.5 Inline escape-text tokens
-
-> TODO: Move to chapter "Inline Text Escape Processing"
-
-In inline-list bodies, the parser emits three special text tokens:
-
-- `\\`
-- `\{`
-- `\}`
-
-During semantic text construction, implementations **MUST** decode these to literal `\`, `{`, `}`.
-
-> TODO: The following sentence is unclear. The intent is: "When parsing, tooling should not perform ad-hoc conversion of escape sequences, so the output can be rendered again as-is. The escape sequences must always be display their escaped variant."
-
-Tooling that aims to preserve author intent **SHOULD** preserve whether braces were written as balanced groups vs escaped brace tokens.
-
-## 7. Semantic document model
+## 8. Semantic document model
 
-### 7.1 Document structure
+### 8.1 Document structure
 
 - A semantically valid document **MUST** contain exactly one `hdoc` header node.
 - The `hdoc` node **MUST** be the first node in the document.
@@ -406,7 +391,7 @@ Tooling that aims to preserve author intent **SHOULD** preserve whether braces w
 - If neither is present, tooling **MAY** emit a diagnostic hint that the document has no title.
 
 
-### 7.2 Inline text construction and normalization
+### 8.2 Inline text construction and normalization
 
 Many elements (e.g. `p`, headings, and inline elements) produce **inline text** for rendering. Inline text is constructed from one of:
 
@@ -432,23 +417,23 @@ The renderer **MUST** see the post-normalization result.
 
 **String and verbatim bodies:** When a string body or verbatim body is converted into spans, it is treated as a single text source (no nested inline nodes) and then processed using the same rules above, including whitespace normalization for non-`pre` elements.
 
-### 7.3 Attribute uniqueness
+### 8.3 Attribute uniqueness
 
 - Within a node, attribute keys **MUST** be unique (case-sensitive).
 
-### 7.4 Attribute validity
+### 8.4 Attribute validity
 
 - Attributes **MUST** be allowed on the element they appear on.
 - Required attributes **MUST** be present.
 - Attributes not defined for an element **MUST** be rejected.
 
-### 7.5 Identifiers and References
+### 8.5 Identifiers and References
 
 HyperDoc defines two separate namespaces for identifiers to allow cross-referencing within a document: the **Block Namespace** and the **Footnote Namespace**.
 
 Identifiers in both namespaces are case-sensitive and share the same syntax: they **MUST** be a non-empty sequence of one or more characters, and **MUST NOT** contain any whitespace or Unicode control characters (General Category `Cc`).
 
-#### 7.5.1 Block Namespace (`id` and `\ref(ref)`)
+#### 8.5.1 Block Namespace (`id` and `\ref(ref)`)
 
 The Block Namespace is used for referencing top-level block elements like headings, figures, or tables.
 
@@ -459,7 +444,7 @@ The Block Namespace is used for referencing top-level block elements like headin
 - **Reference**: An identifier in the Block Namespace is referenced using the `\ref` inline element.
   - `\ref(ref="...")` **MUST** reference an `id` that exists in the Block Namespace.
 
-#### 7.5.2 Footnote Namespace (`\footnote(key)` and `\footnote(ref)`)
+#### 8.5.2 Footnote Namespace (`\footnote(key)` and `\footnote(ref)`)
 
 The Footnote Namespace is used for defining and referencing reusable footnotes.
 
@@ -470,21 +455,21 @@ The Footnote Namespace is used for defining and referencing reusable footnotes.
 - **Reference**: An identifier in the Footnote Namespace is referenced using a `\footnote` element that has no body.
   - `\footnote(ref="...");` **MUST** reference a `key` that has been defined in the Footnote Namespace.
 
-### 7.6 Built-in element recognition
+### 8.6 Built-in element recognition
 
-- Built-in element names are defined in §8.
+- Built-in element names are defined in §9.
 - Unknown elements are syntactically valid (parseable), but semantically invalid.
 
-## 8. Elements and attributes
+## 9. Elements and attributes
 
-### 8.1 Built-in elements and list mode
+### 9.1 Built-in elements and list mode
 
-#### 8.1.1 Inline vs block
+#### 9.1.1 Inline vs block
 
 - Any element name starting with `\` is an **inline element**.
 - Any element name not starting with `\` is a **block element**.
 
-#### 8.1.2 List-body mode per built-in element
+#### 9.1.2 List-body mode per built-in element
 
 When a built-in element uses a `{ ... }` list body, it is parsed in the mode below:
 
@@ -496,7 +481,7 @@ When a built-in element uses a `{ ... }` list body, it is parsed in the mode bel
 - `li`, `td`, and admonition blocks contain either blocks or a single string/verbatim body; representing blocks implies block-list mode.
 - Built-in elements with empty bodies are also parsed in Inline-list mode so accidental `{ ... }` usage stays balanced and formatters can recover consistently (e.g., `toc;`, `footnotes;`).
 
-#### 8.1.3 Shorthand Body Promotion
+#### 9.1.3 Shorthand Body Promotion
 
 If a block element's list body can contain general text block elements (such as `p`, `pre`, `ol`, `ul`, etc.), its body **MAY** instead be written as a shorthand string or verbatim literal.
 
@@ -520,9 +505,9 @@ This promotion is a feature for convenience and applies only to the following el
 - `quote`
 - `spoiler`
 
-### 8.2 Top-Level Block Elements
+### 9.2 Top-Level Block Elements
 
-#### 8.2.1 `hdoc` (header)
+#### 9.2.1 `hdoc` (header)
 
 - **Role:** document header
 - **Body:** `;` (empty)
@@ -531,10 +516,10 @@ This promotion is a feature for convenience and applies only to the following el
   - `lang` (optional)
   - `title` (optional)
   - `author` (optional)
-  - `date` (optional): datetime lexical format (§9.2.3)
-  - `tz` (optional): default timezone for time/datetime values (§9.2)
+  - `date` (optional): datetime lexical format (§10.2.3)
+  - `tz` (optional): default timezone for time/datetime values (§10.2)
 
-#### 8.2.2 `title` (document title)
+#### 9.2.2 `title` (document title)
 
 - **Role:** document-level display title
 - **Body:** inline text
@@ -547,7 +532,7 @@ Semantic constraints:
 - If present, `title` **MUST** be the second node in the document (after `hdoc`).
 - `title` **MUST NOT** have an `id` attribute.
 
-#### 8.2.3 Table of contents: `toc`
+#### 9.2.3 Table of contents: `toc`
 
 - **Role:** Generates a table of contents.
 - **Body:** `;` (empty)
@@ -556,7 +541,7 @@ Semantic constraints:
 Semantic constraints:
 - `toc` **MUST** be a top-level block element (a direct child of the document).
 
-#### 8.2.4 Footnote dump: `footnotes`
+#### 9.2.4 Footnote dump: `footnotes`
 
 - **Role:** collect and render accumulated footnotes
 - **Body:** `;` (empty)
@@ -573,7 +558,7 @@ Semantics:
 - `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`.
 - Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears.
 
-### 8.3 General Text Block Elements
+### 9.3 General Text Block Elements
 
 In this chapter, an "inline text" body is one of:
 
@@ -583,30 +568,30 @@ In this chapter, an "inline text" body is one of:
 
 Only an empty body (`;`) is not "inline text".
 
-#### 8.3.1 Headings: `h1`, `h2`, `h3`
+#### 9.3.1 Headings: `h1`, `h2`, `h3`
 
 - **Role:** block heading levels 1-3
 - **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.2 Paragraph: `p`
+#### 9.3.2 Paragraph: `p`
 
 - **Role:** A standard paragraph of text.
 - **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.3 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
+#### 9.3.3 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
 
 - **Role:** A block that renders with a distinct style to draw the reader's attention.
-- **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§ 8.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph.
+- **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§9.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph.
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.4 Unordered List: `ul`
+#### 9.3.4 Unordered List: `ul`
 
 - **Body:** block-list containing `li` (at least one)
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.5 Ordered List: `ol`
+#### 9.3.5 Ordered List: `ol`
 
 - **Body:** block-list containing `li` (at least one)
 - **Attributes:**
@@ -614,7 +599,7 @@ Only an empty body (`;`) is not "inline text".
   - `id` (optional; top-level only)
   - `first` (optional Integer ≥ 0; default 1): number of the first list item
 
-#### 8.3.6 Figure: `img`
+#### 9.3.6 Figure: `img`
 
 - **Body:** inline text caption/description (may be empty)
 - **Attributes:**
@@ -623,12 +608,12 @@ Only an empty body (`;`) is not "inline text".
   - `lang` (optional)
   - `id` (optional; top-level only)
 
-#### 8.3.7 Preformatted: `pre`
+#### 9.3.7 Preformatted: `pre`
 
 - **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only)
 
-#### 8.3.8 Tables: `table`
+#### 9.3.8 Tables: `table`
 
 - **Body:** block-list containing:
   - optional `columns`, then
@@ -643,9 +628,9 @@ Table layout rules:
 
 - **Group Headers (`group`):** A `group` element acts as a heading that spans all columns of the table. Semantically, `group { ... }` is equivalent to a `row` containing a single `td` with a `colspan` attribute equal to the table's column count. A `group` does not have a `title` and does not render a cell in the row header column.
 
-### 8.4 Structural Elements
+### 9.4 Structural Elements
 
-#### 8.4.1 List item: `li`
+#### 9.4.1 List item: `li`
 
 - **Body:** either
   - a block-list of block elements, or
@@ -653,13 +638,13 @@ Table layout rules:
   - a verbatim body
 - **Attributes:** `lang` (optional)
 
-#### 8.4.2 `columns` (table header row)
+#### 9.4.2 `columns` (table header row)
 
 - **Role:** Defines the labels for the columns of a table. The number of cells in this element (taking `colspan` into account) defines the table's column count.
 - **Body:** block-list containing `td` (at least one)
 - **Attributes:** `lang` (optional)
 
-#### 8.4.3 `row` (table data row)
+#### 9.4.3 `row` (table data row)
 
 - **Role:** Defines a row of data in a table.
 - **Body:** block-list containing `td` (at least one)
@@ -667,13 +652,13 @@ Table layout rules:
   - `title` (optional string): If present, creates a header cell for the row in an implicit leading column.
   - `lang` (optional)
 
-#### 8.4.4 `group` (table row group)
+#### 9.4.4 `group` (table row group)
 
 - **Role:** A heading row that spans all table columns.
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
-#### 8.4.5 `td` (table cell)
+#### 9.4.5 `td` (table cell)
 
 - **Role:** A single cell within a table row.
 - **Body:** either
@@ -682,29 +667,29 @@ Table layout rules:
   - a verbatim body
 - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional)
 
-### 8.5 Inline elements
+### 9.5 Inline elements
 
 Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer).
 
-#### 8.5.1 `\\em`
+#### 9.5.1 `\\em`
 
 - **Role:** emphasis
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
-#### 8.5.2 `\\mono`
+#### 9.5.2 `\\mono`
 
 - **Role:** monospaced span
 - **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional)
 
-#### 8.5.3 `\\strike`, `\\sub`, `\\sup`
+#### 9.5.3 `\\strike`, `\\sub`, `\\sup`
 
 - **Role:** strike-through / subscript / superscript
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
-#### 8.5.4 `\link`
+#### 9.5.4 `\link`
 
 - **Role:** foreign hyperlink (external or non-validated target)
 - **Body:** inline text
@@ -718,13 +703,13 @@ Notes:
 - Interior references use `\ref(ref="...")`.
 
 
-#### 8.5.5 `\\date`, `\\time`, `\\datetime`
+#### 9.5.5 `\\date`, `\\time`, `\\datetime`
 
 - **Role:** localized date/time rendering
 - **Body:** must be plain text, a single string, or verbatim (no nested inline elements)
 - **Attributes:** `fmt` (optional; per element), `lang` (optional)
 
-#### 8.5.6 `\ref`
+#### 9.5.6 `\ref`
 
 - **Role:** validated interior reference (to a top-level `id`)
 - **Body:** inline text (optional; may be empty)
@@ -755,7 +740,7 @@ If the referenced target is not a heading:
 
 When computing `<name>` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored).
 
-#### 8.5.7 `\footnote`
+#### 9.5.7 `\footnote`
 
 - **Role:** footnote/citation marker and definition
 - **Body:** inline text (required for defining form; empty for reference form)
@@ -784,7 +769,7 @@ Marker rendering (normative):
 - A renderer **SHALL** render a citation marker as `\sup{\link{[\d+]}}`.
 
 
-## 9. Attribute types and date/time formats
+## 10. Attribute types and date/time formats
 
 > TODO: Attributes should be documented well and not only be mentioned in the element catalog.
 >       This chapter shall document attributes and their types, including detailled descriptions for both.
@@ -793,7 +778,7 @@ Marker rendering (normative):
 >       Non-fatal diagnostics **MUST** be emitted for that.
 >       Leading and trailing whitespace must be stripped.
 
-### 9.1 Common attribute types
+### 10.1 Common attribute types
 
 - **Version:** must be `2.0`.
 - **Integer:** ASCII decimal digits; leading zeros allowed but discouraged.
@@ -802,11 +787,11 @@ Marker rendering (normative):
 - **Timezone offset:** `Z` or `±HH:MM`.
 - **URI/IRI:** per RFC 3987.
 
-### 9.2 Date / time lexical formats (normative)
+### 10.2 Date / time lexical formats (normative)
 
 These formats are a conservative intersection of RFC 3339 and ISO 8601.
 
-#### 9.2.1 Date
+#### 10.2.1 Date
 
 `YYYY-MM-DD`
 
@@ -814,7 +799,7 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601.
 - `MM`: `01`-`12`
 - `DD`: `01`-`31`
 
-#### 9.2.2 Time
+#### 10.2.2 Time
 
 `hh:mm:ss` with a required time zone unless a default `tz` is defined in `hdoc`.
 
@@ -828,13 +813,13 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601.
 
 If `hdoc(tz="...")` is present, a time value **MAY** omit the zone.
 
-#### 9.2.3 Datetime
+#### 10.2.3 Datetime
 
 `YYYY-MM-DD` `T` `hh:mm:ss` (with optional fraction and required zone, unless `hdoc.tz` is present)
 
 If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is permitted specifically for `hdoc(date="...")` and for `\datetime` bodies.
 
-### 9.3 `fmt` values
+### 10.3 `fmt` values
 
 > TODO: `fmt` values need a proper description of what the expected output is.
 >       The output is using the `lang` context of the \date, \time, \datetime element and
@@ -858,7 +843,7 @@ Defaults when omitted:
 - `\datetime(fmt=...)`: default `short`
 - `\ref(fmt=...)`: default `full`
 
-## 10. Non-normative guidance for tooling
+## 11. Non-normative guidance for tooling
 
 - Formatters should normalize line endings to LF.
 - Provide diagnostics for discouraged patterns (leading/trailing whitespace in attribute values, leading zeros, mixed directionality, etc.).

From 62f0f7643228486e118463890fb7d384af2a7539 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 14:19:13 +0100
Subject: [PATCH 075/116] Improves \time and \datetime tz handling.

---
 docs/TODO.md          | 18 ++++++------------
 docs/specification.md | 18 ++++++++++++++----
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 986c7e0..68ad3ab 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -6,18 +6,12 @@
 - Document `lang` inheritance. No `lang` attribute means that parent language is used.
 - Clarify that page layout is static and won't change except for context resize.
 - \abbrev and \term might be good ideas.
-
-> Okay, next task: Fix chapter 6 (escapes) by splitting into two chapters (described in 308-315), clarify how control characters are handled (L328)
-
-
-
-> §5.5 - String Literal Control Character Inconsistency
-
-§5.5 forbids "any Unicode control characters" in string literals
-§6.3 allows \n (LF) and \r (CR) escape sequences
-Problem: These decode to control characters (Cc), contradicting §6.2 which says "resolved string-literal values" must not contain control characters except line terminators. Need explicit carve-out.
-
-> Problem: How does this interact with inline \time and \datetime elements? Do they inherit it? §9.2.2 says "If hdoc(tz="...") is present, a time value MAY omit the zone," but doesn't specify how the default is applied during rendering.
+- Add more text to the introduction and underlying ideas of the format:
+  - Orthogonality: Semantic structure is not dependend on syntax. Verbatim lines are not preformatted, but `pre` blocks are.
+  - Strictness for ecosystem health: Prevent HTML uncontrolled growth desaster
+  - Allow tooling to work with semanticall yinvalid documents
+  - Static layout: No surprises. Layout once, yield consistent rendering
+  - Accessiblity: Everything is semantic, nothing is presentation-only.
 
 > Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns.
 
diff --git a/docs/specification.md b/docs/specification.md
index 96693eb..6437148 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -801,7 +801,7 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601.
 
 #### 10.2.2 Time
 
-`hh:mm:ss` with a required time zone unless a default `tz` is defined in `hdoc`.
+`hh:mm:ss` with an optional fraction and an optional zone.
 
 - `hh`: `00`-`23`
 - `mm`: `00`-`59`
@@ -811,13 +811,23 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601.
   - `Z`, or
   - `+hh:mm` / `-hh:mm` (two-digit hour/minute)
 
-If `hdoc(tz="...")` is present, a time value **MAY** omit the zone.
+Normative rules:
+
+- If `hdoc(tz="...")` is present, a time value **MAY** omit the zone; if omitted, the effective zone **MUST** be `hdoc.tz`.
+- If `hdoc(tz="...")` is not present, a time value **MUST** specify a zone.
+- If a time value specifies a zone, that zone **MUST** be used regardless of `hdoc.tz`.
 
 #### 10.2.3 Datetime
 
-`YYYY-MM-DD` `T` `hh:mm:ss` (with optional fraction and required zone, unless `hdoc.tz` is present)
+`YYYY-MM-DD` `T` `hh:mm:ss` with an optional fraction and an optional zone.
+
+The time component (including fraction and zone syntax) uses the same rules as §10.2.2.
+
+Normative rules:
 
-If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone. This is permitted specifically for `hdoc(date="...")` and for `\datetime` bodies.
+- If `hdoc(tz="...")` is present, a datetime value **MAY** omit the zone; if omitted, the effective zone **MUST** be `hdoc.tz`.
+- If `hdoc(tz="...")` is not present, a datetime value **MUST** specify a zone.
+- If a datetime value specifies a zone, that zone **MUST** be used regardless of `hdoc.tz`.
 
 ### 10.3 `fmt` values
 

From b853dfb16214ffc59a9a742c6511ed9d193e6ba7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 14:56:57 +0100
Subject: [PATCH 076/116] Implements automatic header number generation

---
 src/hyperdoc.zig         | 69 ++++++++++++++++++++++++++++++++--------
 src/render/dump.zig      |  2 +-
 src/render/html5.zig     | 23 ++++++++++++--
 src/testsuite.zig        | 10 +++---
 test/accept/workset.hdoc | 18 +++++++++++
 5 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 61e37e6..423d258 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -26,7 +26,7 @@ pub const Document = struct {
     timezone: ?TimeZoneOffset,
 
     pub const TableOfContents = struct {
-        level: Block.HeadingLevel,
+        level: Block.Heading.Level, // TODO: Refactor to use `index` here as well.
         headings: []usize,
         children: []TableOfContents,
     };
@@ -52,12 +52,28 @@ pub const Block = union(enum) {
     table: Table,
 
     pub const Heading = struct {
-        level: HeadingLevel,
+        index: Index,
         lang: LanguageTag,
         content: []Span,
-    };
 
-    pub const HeadingLevel = enum { h1, h2, h3 };
+        pub const Level = enum(u2) {
+            pub const count: comptime_int = @typeInfo(@This()).@"enum".fields.len;
+
+            h1 = 0,
+            h2 = 1,
+            h3 = 2,
+        };
+
+        /// Stores both heading level and the index number of that heading.
+        /// h1 is §[0]
+        /// h2 is §[0].[1]
+        /// h3 is §[0].[1].[2]
+        pub const Index = union(Level) {
+            h1: [1]u16,
+            h2: [2]u16,
+            h3: [3]u16,
+        };
+    };
 
     pub const Paragraph = struct {
         kind: ParagraphKind,
@@ -688,11 +704,11 @@ pub const SemanticAnalyzer = struct {
     };
 
     const TocBuilder = struct {
-        level: Block.HeadingLevel,
+        level: Block.Heading.Level,
         headings: std.ArrayList(usize),
         children: std.ArrayList(*TocBuilder),
 
-        fn init(level: Block.HeadingLevel) @This() {
+        fn init(level: Block.Heading.Level) @This() {
             return .{
                 .level = level,
                 .headings = .empty,
@@ -712,6 +728,9 @@ pub const SemanticAnalyzer = struct {
     id_locations: std.ArrayList(?Parser.Location) = .empty,
     pending_refs: std.ArrayList(RefUse) = .empty,
 
+    current_heading_level: usize = 0,
+    heading_counters: [Block.Heading.Level.count]u16 = @splat(0),
+
     fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, UnsupportedVersion }!void {
         switch (node.type) {
             .hdoc => {
@@ -881,12 +900,12 @@ pub const SemanticAnalyzer = struct {
         });
 
         const heading: Block.Heading = .{
-            .level = switch (node.type) {
+            .index = try sema.compute_next_heading(node, switch (node.type) {
                 .h1 => .h1,
                 .h2 => .h2,
                 .h3 => .h3,
                 else => unreachable,
-            },
+            }),
             .lang = attrs.lang,
             .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
         };
@@ -1026,7 +1045,7 @@ pub const SemanticAnalyzer = struct {
             depth: ?u8 = null,
         });
 
-        const max_depth: comptime_int = @typeInfo(Block.HeadingLevel).@"enum".fields.len;
+        const max_depth = Block.Heading.Level.count;
 
         var depth = attrs.depth orelse max_depth;
         if (depth < 1 or depth > max_depth) {
@@ -1925,7 +1944,7 @@ pub const SemanticAnalyzer = struct {
                 else => continue,
             };
 
-            const target_depth = heading_level_index(heading.level);
+            const target_depth = heading_level_index(heading.index);
 
             while (stack.items.len > target_depth) {
                 _ = stack.pop();
@@ -1987,7 +2006,7 @@ pub const SemanticAnalyzer = struct {
         return node;
     }
 
-    fn heading_level_index(level: Block.HeadingLevel) usize {
+    fn heading_level_index(level: Block.Heading.Level) usize {
         return switch (level) {
             .h1 => 1,
             .h2 => 2,
@@ -1995,7 +2014,7 @@ pub const SemanticAnalyzer = struct {
         };
     }
 
-    fn next_heading_level(level: Block.HeadingLevel) Block.HeadingLevel {
+    fn next_heading_level(level: Block.Heading.Level) Block.Heading.Level {
         return switch (level) {
             .h1 => .h2,
             .h2 => .h3,
@@ -2003,6 +2022,30 @@ pub const SemanticAnalyzer = struct {
         };
     }
 
+    /// Computes the next index number for a heading of the given level:
+    fn compute_next_heading(sema: *SemanticAnalyzer, node: Parser.Node, level: Block.Heading.Level) !Block.Heading.Index {
+        const index = @intFromEnum(level);
+
+        sema.heading_counters[index] += 1;
+
+        if (index > sema.current_heading_level + 1) {
+            // TODO: Emit fatal diagnostic for invalid heading sequencing: "h3 after h1 is not legal"
+        }
+        sema.current_heading_level = index;
+
+        // Reset all higher levels to 1:
+        for (sema.heading_counters[index + 1 ..]) |*val| {
+            val.* = 0;
+        }
+        _ = node;
+
+        return switch (level) {
+            .h1 => .{ .h1 = sema.heading_counters[0..1].* },
+            .h2 => .{ .h2 = sema.heading_counters[0..2].* },
+            .h3 => .{ .h3 = sema.heading_counters[0..3].* },
+        };
+    }
+
     fn emit_diagnostic(sema: *SemanticAnalyzer, code: Diagnostic.Code, location: Parser.Location) !void {
         if (sema.diagnostics) |diag| {
             try diag.add(code, sema.make_location(location.offset));
@@ -2940,7 +2983,7 @@ pub const Diagnostic = struct {
     pub const ForbiddenControlCharacter = struct { codepoint: u21 };
     pub const TableShapeError = struct { actual: usize, expected: usize };
     pub const ReferenceError = struct { ref: []const u8 };
-    pub const AutomaticHeading = struct { level: Block.HeadingLevel };
+    pub const AutomaticHeading = struct { level: Block.Heading.Level };
 
     pub const Code = union(enum) {
         // errors:
diff --git a/src/render/dump.zig b/src/render/dump.zig
index e731a96..b617f0d 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -403,7 +403,7 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err
     switch (block) {
         .heading => |heading| {
             try writeTypeTag(writer, "heading");
-            try dumpEnumField(writer, indent + indent_step, "level", heading.level);
+            try dumpEnumField(writer, indent + indent_step, "level", heading.index); // TODO: Also print the indices here
             try dumpOptionalStringField(writer, indent + indent_step, "lang", heading.lang.text);
             try dumpSpanListField(writer, indent + indent_step, "content", heading.content);
         },
diff --git a/src/render/html5.zig b/src/render/html5.zig
index 1eb76bc..3bbc4e9 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -48,12 +48,29 @@ const RenderContext = struct {
             null;
 
         try writeIndent(ctx.writer, indent);
-        try writeStartTag(ctx.writer, headingTag(heading.level), .regular, .{
+        try writeStartTag(ctx.writer, headingTag(heading.index), .regular, .{
             .id = id_attr,
             .lang = lang_attr,
         });
+
+        // TODO: Make stylable:
+        if (true) {
+            var buffer: [32]u8 = undefined;
+            try ctx.renderSpan(.{
+                .content = .{
+                    .text = switch (heading.index) {
+                        .h1 => |level| std.fmt.bufPrint(&buffer, "§{} ", .{level[0]}) catch unreachable,
+                        .h2 => |level| std.fmt.bufPrint(&buffer, "§{}.{} ", .{ level[0], level[1] }) catch unreachable,
+                        .h3 => |level| std.fmt.bufPrint(&buffer, "§{}.{}.{} ", .{ level[0], level[1], level[2] }) catch unreachable,
+                    },
+                },
+                .attribs = .{},
+                .location = undefined,
+            });
+        }
+
         try ctx.renderSpans(heading.content);
-        try writeEndTag(ctx.writer, headingTag(heading.level));
+        try writeEndTag(ctx.writer, headingTag(heading.index));
         try ctx.writer.writeByte('\n');
     }
 
@@ -625,7 +642,7 @@ fn takeLang(lang: *?[]const u8) ?[]const u8 {
     return null;
 }
 
-fn headingTag(level: hdoc.Block.HeadingLevel) []const u8 {
+fn headingTag(level: hdoc.Block.Heading.Level) []const u8 {
     return switch (level) {
         .h1 => "h1",
         .h2 => "h2",
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 5949fe3..682aaae 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -435,26 +435,26 @@ test "table of contents inserts automatic headings when skipping levels" {
     try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[2].code, .{ .automatic_heading_insertion = .{ .level = .h2 } }));
 
     const toc = doc.toc;
-    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h1, toc.level);
+    try std.testing.expectEqual(.h1, toc.level);
     try std.testing.expectEqualSlices(usize, &.{ 0, 2 }, toc.headings);
     try std.testing.expectEqual(@as(usize, 2), toc.children.len);
 
     const auto_h1 = toc.children[0];
-    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h2, auto_h1.level);
+    try std.testing.expectEqual(.h2, auto_h1.level);
     try std.testing.expectEqualSlices(usize, &.{ 0, 1 }, auto_h1.headings);
     try std.testing.expectEqual(@as(usize, 2), auto_h1.children.len);
 
     const auto_h2 = auto_h1.children[0];
-    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h3, auto_h2.level);
+    try std.testing.expectEqual(.h3, auto_h2.level);
     try std.testing.expectEqualSlices(usize, &.{0}, auto_h2.headings);
 
     const h2_child = auto_h1.children[1];
-    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h3, h2_child.level);
+    try std.testing.expectEqual(.h3, h2_child.level);
     try std.testing.expectEqual(@as(usize, 0), h2_child.headings.len);
     try std.testing.expectEqual(@as(usize, 0), h2_child.children.len);
 
     const trailing_h1_child = toc.children[1];
-    try std.testing.expectEqual(hdoc.Block.HeadingLevel.h2, trailing_h1_child.level);
+    try std.testing.expectEqual(.h2, trailing_h1_child.level);
     try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.headings.len);
     try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.children.len);
 }
diff --git a/test/accept/workset.hdoc b/test/accept/workset.hdoc
index 77cabaf..72d4f44 100644
--- a/test/accept/workset.hdoc
+++ b/test/accept/workset.hdoc
@@ -1,4 +1,22 @@
 hdoc(version="2.0", lang="en");
 
+h1 "First"
+
+h2 "First.1"
+h2 "First.2"
+h2 "First.3"
+
 p { We can mix \em{emphasis}, \strike{strike}, \mono{monospace} text. Superscript x\sup{2} and subscript x\sub{2} also appear. }
 
+h1 "Second"
+h2 "Second.2"
+h3 "Second.2.first"
+h3 "Second.2.second"
+h3 "Second.2.third"
+h2 "Second.3"
+
+h1 "Third"
+h2 "Third.1"
+h3 "Third.1.first"
+h3 "Third.1.second"
+h3 "Third.1.third"

From 89b290919162b771b286410f2d2d13695a503400 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 15:46:40 +0100
Subject: [PATCH 077/116] Improves the specification around 'fmt' attribute.

---
 docs/TODO.md          |   1 +
 docs/specification.md | 114 ++++++++++++++++++++++++++++++++++--------
 2 files changed, 94 insertions(+), 21 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 68ad3ab..48d62c0 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -12,6 +12,7 @@
   - Allow tooling to work with semanticall yinvalid documents
   - Static layout: No surprises. Layout once, yield consistent rendering
   - Accessiblity: Everything is semantic, nothing is presentation-only.
+- h3 after h1 is not legal
 
 > Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns.
 
diff --git a/docs/specification.md b/docs/specification.md
index 6437148..b340351 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -831,27 +831,99 @@ Normative rules:
 
 ### 10.3 `fmt` values
 
-> TODO: `fmt` values need a proper description of what the expected output is.
->       The output is using the `lang` context of the \date, \time, \datetime element and
->       we provide examples in german and english for each `fmt` option.
-
-> TODO: This chapter shall be split into:
->
-> - `fmt` for `\date`
-> - `fmt` for `\time`
-> - `fmt` for `\datetime`
-
-- `\date(fmt=...)`: `year`, `month`, `day`, `weekday`, `short`, `long`, `relative`, `iso`
-- `\time(fmt=...)`: `short`, `long`, `rough`, `relative`, `iso`
-- `\datetime(fmt=...)`: `short`, `long`, `relative`, `iso`
-- `\ref(fmt=...)`: `full`, `name`, `index`
-
-Defaults when omitted:
-
-- `\date(fmt=...)`: default `short`
-- `\time(fmt=...)`: default `short`
-- `\datetime(fmt=...)`: default `short`
-- `\ref(fmt=...)`: default `full`
+Some inline elements accept a `fmt` attribute that controls localized formatting of their value.
+
+The `fmt` value **MUST** be one of the values explicitly listed for the element; any other value **MUST** be rejected as semantically invalid.
+
+#### 10.3.1 Language context
+
+Formatting uses the element’s **language context**.
+
+The base language context is determined as follows:
+
+1. If the element has a `lang` attribute, that language tag **SHALL** be used.
+2. Otherwise, if the document header has `hdoc(lang="...")`, that language tag **SHALL** be used.
+3. Otherwise, there is no language context.
+
+Tooling **MAY** allow users to override the language context and/or localized formatting preferences (e.g. force 24-hour time, force a preferred date ordering). If such an override is active, it **SHALL** replace the base language context for the purpose of all formatting in this section.
+
+If there is no language context after applying user overrides, or if the implementation has no matching localized formatting data for the selected language context, then implementations **MUST** fall back to locale-independent formatting as follows:
+
+- For `\date`:
+  - `fmt="iso"` and `fmt="year"` proceed normally.
+  - `fmt="day"` **MUST** render the day-of-month as decimal digits (`DD`), without an ordinal suffix.
+  - `fmt="month"` **MUST** render the month as decimal digits (`MM`).
+  - `fmt="weekday"` **MUST** render the ISO weekday number (`1`=Monday … `7`=Sunday).
+  - `fmt="short"`, `fmt="long"`, and `fmt="relative"` **MUST** behave as if `fmt="iso"` was specified.
+- For `\time` and `\datetime`:
+  - if `fmt="iso"`, formatting proceeds normally, and
+  - otherwise, the implementation **MUST** behave as if `fmt="iso"` was specified.
+
+The examples below use `en-US` and `de-DE` language tags, but the exact output of localized formats (punctuation, capitalization, abbreviations, and choice of words) is implementation-defined.
+
+#### 10.3.2 Time zone context
+
+For `\time` and `\datetime`, formatting uses the value’s **effective zone**:
+
+- If the value explicitly specifies a zone, that zone **MUST** be the effective zone.
+- Otherwise, the effective zone **MUST** be `hdoc.tz` (see §10.2.2 and §10.2.3).
+
+#### 10.3.3 `fmt` values for `\date`
+
+The body of `\date` **MUST** be a date in the lexical format of §10.2.1.
+
+Supported values:
+
+| Value             | Meaning (normative)                                                                      | Example output (`en-US`) | Example output (`de-DE`) |
+| ----------------- | ---------------------------------------------------------------------------------------- | ------------------------ | ------------------------ |
+| `iso`             | Render the date in the lexical format of §10.2.1.                                        | `2026-09-13`             | `2026-09-13`             |
+| `short` (default) | Render the date in a numeric, locale-appropriate short form.                             | `9/13/2026`              | `13.09.2026`             |
+| `long`            | Render the date in a locale-appropriate long form (month name, full year).               | `September 13, 2026`     | `13. September 2026`     |
+| `relative`        | Render a relative description of the date compared to “today”.                           | `in 3 days`              | `in 3 Tagen`             |
+| `year`            | Render only the year component.                                                          | `2026`                   | `2026`                   |
+| `month`           | Render only the month component in a locale-appropriate form (typically a month name).   | `September`              | `September`              |
+| `day`             | Render only the day-of-month component in a locale-appropriate form (may be an ordinal). | `13th`                   | `13.`                    |
+| `weekday`         | Render the weekday name for that date.                                                   | `Saturday`               | `Samstag`                |
+
+The `relative` examples are non-normative and assume “today” is `2026-09-10` in the renderer’s date context.
+
+#### 10.3.4 `fmt` values for `\time`
+
+The body of `\time` **MUST** be a time in the lexical format of §10.2.2.
+
+Supported values:
+
+| Value             | Meaning (normative)                                                             | Example output (`en-US`) | Example output (`de-DE`) |
+| ----------------- | ------------------------------------------------------------------------------- | ------------------------ | ------------------------ |
+| `iso`             | Render the time in the lexical format of §10.2.2, including the effective zone. | `13:36:00+02:00`         | `13:36:00+02:00`         |
+| `short` (default) | Render the time with minute precision in a locale-appropriate form.             | `1:36 PM`                | `13:36`                  |
+| `long`            | Render the time with second precision; include the fractional part if present.  | `1:36:00 PM`             | `13:36:00`               |
+| `rough`           | Render a coarse day-period description (e.g. morning/afternoon/evening).        | `afternoon`              | `Nachmittag`             |
+
+#### 10.3.5 `fmt` values for `\datetime`
+
+The body of `\datetime` **MUST** be a datetime in the lexical format of §10.2.3. The time component uses the same formatting rules as §10.3.4.
+
+Supported values:
+
+| Value             | Meaning (normative)                                                                 | Example output (`en-US`)         | Example output (`de-DE`)       |
+| ----------------- | ----------------------------------------------------------------------------------- | -------------------------------- | ------------------------------ |
+| `iso`             | Render the datetime in the lexical format of §10.2.3, including the effective zone. | `2026-09-13T13:36:00+02:00`      | `2026-09-13T13:36:00+02:00`    |
+| `short` (default) | Render date and time with minute precision in a locale-appropriate short form.      | `9/13/2026, 1:36 PM`             | `13.09.2026, 13:36`            |
+| `long`            | Render date and time with second precision; include the fractional part if present. | `September 13, 2026, 1:36:00 PM` | `13. September 2026, 13:36:00` |
+| `relative`        | Render a relative description compared to the current datetime.                     | `20 minutes ago`                 | `vor 20 Minuten`               |
+
+The `relative` examples are non-normative and assume the effective zone is `+02:00`, the value is `2026-09-13T13:36:00+02:00`, and “now” is `2026-09-13T13:56:00+02:00`.
+
+#### 10.3.6 `fmt` values for `\ref`
+
+The `fmt` attribute on `\ref` controls how synthesized link text is produced when the `\ref` body is empty (§9.5.6). It does not affect `\ref` nodes with a non-empty body.
+
+| Value            | Meaning (normative)        | Example                       |
+| ---------------- | -------------------------- | ----------------------------- |
+| `full` (default) | Render `"<index> <name>"`. | `§10.3.6 fmt values for \ref` |
+| `name`           | Render `"<name>"`.         | `fmt values for \ref`         |
+| `index`          | Render `"<index>"`.        | `§10.3.6`                     |
 
 ## 11. Non-normative guidance for tooling
 

From 29847449d54ceadb50b2db9793817424ef8e174f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 15:51:03 +0100
Subject: [PATCH 078/116] Removes rendering requirement for \footnote,  removes
 resolved TODOs

---
 docs/TODO.md          | 7 +------
 docs/specification.md | 8 +++-----
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 48d62c0..7e92bc5 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -13,15 +13,10 @@
   - Static layout: No surprises. Layout once, yield consistent rendering
   - Accessiblity: Everything is semantic, nothing is presentation-only.
 - h3 after h1 is not legal
+- Support "appendix{}" in addition to h1,h2,h3 which is a h1-level chapter that renders as "A. B. C." instead of "1. 2. 3."
 
 > Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns.
 
-> Problem: What should synthesized text be for valid non-heading targets like table, img, pre? Spec says headings get <index> <name> but doesn't define fallback for figures ("Figure 3"), tables ("Table 2"), etc.
-
-States "A renderer SHALL render a regular footnote marker as \sup{\link{\d+}}"
-Problem: This seems like implementation guidance, not semantic requirement. Different renderers (HTML, PDF, terminal) may render markers differently. Should be in §10 (non-normative) or relaxed to "SHOULD".
-
-
 > Recommendation 3: Add Formal Whitespace Processing Algorithm
 
 
diff --git a/docs/specification.md b/docs/specification.md
index b340351..3f3ca35 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -735,6 +735,9 @@ Target-derived values:
 
 If the referenced target is not a heading:
 
+> TODO: Also add semantics for `ref(ref);` with `img` (Figure X.) and `table` (Table X.).
+>       This requires the introduction of counters for these tags, and allow auto-numbering.
+
 - `\ref(ref="X");` (implicit body) is semantically invalid and **MUST** be rejected.
 - `\ref(ref="X"){...}` remains valid.
 
@@ -763,11 +766,6 @@ Semantics:
 - Each `kind` has an independent numeric namespace: footnotes and citations are numbered separately.
 - A renderer **MAY** hyperlink markers and dumped entries back-and-forth.
 
-Marker rendering (normative):
-
-- A renderer **SHALL** render a regular footnote marker as `\sup{\link{\d+}}`.
-- A renderer **SHALL** render a citation marker as `\sup{\link{[\d+]}}`.
-
 
 ## 10. Attribute types and date/time formats
 

From f7e84ff786f403c03aa047590eda7d793e95429c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 16:07:54 +0100
Subject: [PATCH 079/116] Adds note about inheritance of lang attribute

---
 docs/TODO.md          |  1 -
 docs/specification.md | 21 ++++++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 7e92bc5..84159ff 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -3,7 +3,6 @@
 - Assign semantics to node types, paragraph kinds, ...
 - Specify "syntax" proper
 - Add links to RFCs where possible
-- Document `lang` inheritance. No `lang` attribute means that parent language is used.
 - Clarify that page layout is static and won't change except for context resize.
 - \abbrev and \term might be good ideas.
 - Add more text to the introduction and underlying ideas of the format:
diff --git a/docs/specification.md b/docs/specification.md
index 3f3ca35..95bbe1b 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -837,11 +837,7 @@ The `fmt` value **MUST** be one of the values explicitly listed for the element;
 
 Formatting uses the element’s **language context**.
 
-The base language context is determined as follows:
-
-1. If the element has a `lang` attribute, that language tag **SHALL** be used.
-2. Otherwise, if the document header has `hdoc(lang="...")`, that language tag **SHALL** be used.
-3. Otherwise, there is no language context.
+The base language context is the element’s **effective language tag** (§10.4.1). This means `lang` is inherited from parent elements, and top-level elements inherit their language tag from `hdoc(lang="...")`.
 
 Tooling **MAY** allow users to override the language context and/or localized formatting preferences (e.g. force 24-hour time, force a preferred date ordering). If such an override is active, it **SHALL** replace the base language context for the purpose of all formatting in this section.
 
@@ -923,6 +919,21 @@ The `fmt` attribute on `\ref` controls how synthesized link text is produced whe
 | `name`           | Render `"<name>"`.         | `fmt values for \ref`         |
 | `index`          | Render `"<index>"`.        | `§10.3.6`                     |
 
+### 10.4 `lang` attribute
+
+The `lang` attribute assigns a BCP 47 language tag (§10.1) to an element.
+
+#### 10.4.1 Effective language tag
+
+Each element has an **effective language tag**, computed as follows:
+
+1. If the element has a `lang` attribute, its value **SHALL** be the effective language tag.
+2. Otherwise, if the element has a parent element, the effective language tag **SHALL** be inherited from the parent element.
+3. Otherwise (for top-level elements), if the document header has `hdoc(lang="...")`, that language tag **SHALL** be the effective language tag.
+4. Otherwise, the element has no effective language tag.
+
+This inheritance allows documents to mix language contexts across nested elements (e.g. an English document that contains a German `quote` with an Italian paragraph inside), and keeps localized date/time values in their local context.
+
 ## 11. Non-normative guidance for tooling
 
 - Formatters should normalize line endings to LF.

From 9467f1f20f7e58eb3c7f81c341ccdf51c75376c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 17:02:08 +0100
Subject: [PATCH 080/116] Clarifies tables more

---
 docs/TODO.md          |  3 ---
 docs/specification.md | 23 ++++++++++++++++++-----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 84159ff..4c8a047 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -3,7 +3,6 @@
 - Assign semantics to node types, paragraph kinds, ...
 - Specify "syntax" proper
 - Add links to RFCs where possible
-- Clarify that page layout is static and won't change except for context resize.
 - \abbrev and \term might be good ideas.
 - Add more text to the introduction and underlying ideas of the format:
   - Orthogonality: Semantic structure is not dependend on syntax. Verbatim lines are not preformatted, but `pre` blocks are.
@@ -14,8 +13,6 @@
 - h3 after h1 is not legal
 - Support "appendix{}" in addition to h1,h2,h3 which is a h1-level chapter that renders as "A. B. C." instead of "1. 2. 3."
 
-> Problem: What if neither columns nor row exists (table with only group)? Spec should require at least one row or columns.
-
 > Recommendation 3: Add Formal Whitespace Processing Algorithm
 
 
diff --git a/docs/specification.md b/docs/specification.md
index 95bbe1b..2d67edf 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -622,11 +622,24 @@ Only an empty body (`;`) is not "inline text".
 
 Table layout rules:
 
-- **Column Count:** The number of columns in a table is determined by the `columns` element. It is the sum of the `colspan` values of the `td` cells within the `columns` row. If `columns` is absent, the column count is determined by the first `row` element in the same way. All `columns` and `row` elements in a table **MUST** resolve to the same effective column count.
-
-- **Row Headers (`row(title)`):** A `row` element may have a `title` attribute, which creates a *row header*. This header is rendered as an implicit, additional first column for that row. This "row header column" does **not** contribute to the table's main column count. If any `row` in the table has a `title`, renderers **MUST** reserve space for a leading row header column throughout the table. This leading column will be blank for `columns`, `group`, and any `row` without a `title`.
-
-- **Group Headers (`group`):** A `group` element acts as a heading that spans all columns of the table. Semantically, `group { ... }` is equivalent to a `row` containing a single `td` with a `colspan` attribute equal to the table's column count. A `group` does not have a `title` and does not render a cell in the row header column.
+- **Column Count:**
+  - The **effective column count** in a table is determined by the `columns` element and is the sum of the `colspan` values of the `td` cells within the `columns` row.
+  - If `columns` is absent, the column count is determined by the first `row` element in the same way.
+  - A table with an effective column count of `0` **MUST** be rejected as semantically invalid.
+  - All `columns` and `row` elements in a table **MUST** resolve to the same effective column count.
+- **Row Headers (`row(title)`):**
+  - A `row` element may have a `title` attribute, which creates a *row header*.
+  - If any *row header* is created, an *implicit*, additional first "row header column" is created.
+  - This header is rendered in that column.
+  - This "row header column" does **not** contribute to the table's main column count.
+  - If any `row` in the table has a `title`, renderers **MUST** reserve space for a leading row header column throughout the table.
+  - This leading column is blank for `columns`, `group`, and any `row` without a `title`.
+- **Group Headers (`group`):**
+  - A `group` element starts a new group of rows with a shared semantic topic.
+  - The `group` body contains the caption for the topic of the following rows, until the next `group` element appears (or until the end of the table).
+  - Rows before the first `group` have no defined topic.
+  - A `group` element acts as a heading that spans all columns of the table.
+  - A `group` does not have a `title` and does not render a cell in the row header column.
 
 ### 9.4 Structural Elements
 

From ef62482958c78a7baadb58bb8a1899e0294944f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Fri, 2 Jan 2026 19:36:25 +0100
Subject: [PATCH 081/116] Add tasks from specification review

---
 SPEC_TODO.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index 4a92f37..0695989 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -3,3 +3,7 @@
 - Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】
 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】
 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】
+- Enforce `hdoc` placement and body rules by rejecting headers that are not the first node or that carry any non-empty body, instead of merely warning on the first non-header block and accepting later headers. 【F:docs/specification.md†L369-L373】【F:src/hyperdoc.zig†L734-L788】
+- Treat admonition blocks (`note`, `warning`, `danger`, `tip`, `quote`, `spoiler`) as block-list containers with shorthand promotion for string/verbatim bodies rather than forcing them into a single inline paragraph payload. 【F:docs/specification.md†L585-L588】【F:src/hyperdoc.zig†L916-L935】
+- Enforce table column structure: allow at most one optional leading `columns` row, derive a non-zero effective column count even when `columns` is absent, and reject tables where no row or column establishes width. The current implementation accepts multiple `columns` nodes anywhere and never validates missing/zero column counts. 【F:docs/specification.md†L618-L629】【F:src/hyperdoc.zig†L1076-L1147】
+- Restrict `toc` to top-level usage as required by the specification; the current translator permits `toc` blocks inside nested block lists. 【F:docs/specification.md†L535-L543】【F:src/hyperdoc.zig†L1041-L1073】【F:src/hyperdoc.zig†L1254-L1270】

From b5634df6a0dab9fb73f21de7dea0d8aec2ce8c9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Fri, 2 Jan 2026 21:36:35 +0100
Subject: [PATCH 082/116] Clarify top-level scope for chapter 9.2 elements

---
 docs/specification.md | 71 +++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 33 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 2d67edf..9af418e 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -507,6 +507,8 @@ This promotion is a feature for convenience and applies only to the following el
 
 ### 9.2 Top-Level Block Elements
 
+The elements in this chapter **MUST** appear only as top-level block elements (direct children of the document). They **MUST NOT** appear inside nested structures.
+
 #### 9.2.1 `hdoc` (header)
 
 - **Role:** document header
@@ -532,7 +534,24 @@ Semantic constraints:
 - If present, `title` **MUST** be the second node in the document (after `hdoc`).
 - `title` **MUST NOT** have an `id` attribute.
 
-#### 9.2.3 Table of contents: `toc`
+#### 9.2.3 Headings: `h1`, `h2`, `h3`
+
+- **Role:** block heading levels 1-3
+- **Body:** inline text
+- **Attributes:** `lang` (optional), `id` (optional)
+
+Heading structure and numbering:
+
+- `h1`, `h2`, and `h3` **MUST** appear only as top-level block elements.
+- `h1` **MAY** appear anywhere in the document order.
+- `h2` **MUST** be preceded by an `h1`, and that `h1` is the parent section for the `h2`.
+- `h3` **MUST** be preceded by an `h2`, and there **MUST NOT** be any intervening `h1` between that `h2` and the `h3`; the most recent `h2` is the parent section for the `h3`.
+- Heading indices are assigned as follows:
+  - Each `h1` receives a one-part index `[i1]` that starts at `1` and increments by `1` after assignment.
+  - Each `h2` receives a two-part index `[i1, i2]`; `i2` resets to `1` when a new `h1` is assigned and increments by `1` after assignment.
+  - Each `h3` receives a three-part index `[i1, i2, i3]`; `i3` resets to `1` when a new `h1` or `h2` is assigned and increments by `1` after assignment.
+
+#### 9.2.4 Table of contents: `toc`
 
 - **Role:** Generates a table of contents.
 - **Body:** `;` (empty)
@@ -541,7 +560,7 @@ Semantic constraints:
 Semantic constraints:
 - `toc` **MUST** be a top-level block element (a direct child of the document).
 
-#### 9.2.4 Footnote dump: `footnotes`
+#### 9.2.5 Footnote dump: `footnotes`
 
 - **Role:** collect and render accumulated footnotes
 - **Body:** `;` (empty)
@@ -568,30 +587,24 @@ In this chapter, an "inline text" body is one of:
 
 Only an empty body (`;`) is not "inline text".
 
-#### 9.3.1 Headings: `h1`, `h2`, `h3`
-
-- **Role:** block heading levels 1-3
-- **Body:** inline text
-- **Attributes:** `lang` (optional), `id` (optional; top-level only)
-
-#### 9.3.2 Paragraph: `p`
+#### 9.3.1 Paragraph: `p`
 
 - **Role:** A standard paragraph of text.
 - **Body:** inline text
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 9.3.3 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
+#### 9.3.2 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`
 
 - **Role:** A block that renders with a distinct style to draw the reader's attention.
 - **Body:** A block-list containing zero or more General Text Block Elements. Per the Shorthand Body Promotion rule (§9.1.3), a string or verbatim body may be provided, which will be treated as a single contained paragraph.
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 9.3.4 Unordered List: `ul`
+#### 9.3.3 Unordered List: `ul`
 
 - **Body:** block-list containing `li` (at least one)
 - **Attributes:** `lang` (optional), `id` (optional; top-level only)
 
-#### 9.3.5 Ordered List: `ol`
+#### 9.3.4 Ordered List: `ol`
 
 - **Body:** block-list containing `li` (at least one)
 - **Attributes:**
@@ -599,7 +612,7 @@ Only an empty body (`;`) is not "inline text".
   - `id` (optional; top-level only)
   - `first` (optional Integer ≥ 0; default 1): number of the first list item
 
-#### 9.3.6 Figure: `img`
+#### 9.3.5 Figure: `img`
 
 - **Body:** inline text caption/description (may be empty)
 - **Attributes:**
@@ -608,12 +621,12 @@ Only an empty body (`;`) is not "inline text".
   - `lang` (optional)
   - `id` (optional; top-level only)
 
-#### 9.3.7 Preformatted: `pre`
+#### 9.3.6 Preformatted: `pre`
 
 - **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional), `id` (optional; top-level only)
 
-#### 9.3.8 Tables: `table`
+#### 9.3.7 Tables: `table`
 
 - **Body:** block-list containing:
   - optional `columns`, then
@@ -735,24 +748,16 @@ Semantics:
 
 - `\ref(ref="X")` **MUST** resolve to a top-level element with `id="X"`, otherwise it is semantically invalid.
 - If `\ref` has a non-empty body, the body **MUST** be used as the rendered link text.
-- If `\ref` has an empty body (`;`), the renderer **MUST** synthesize link text from the referenced target and `fmt`:
-
-  - `fmt="full"`: renders `"<index> <name>"` (default)
-  - `fmt="name"`: renders `"<name>"`
-  - `fmt="index"`: renders `"<index>"`
-
-Target-derived values:
-
-- For heading targets (`h1`, `h2`, `h3`), `<name>` is the heading’s constructed plaintext inline text.
-- For heading targets, `<index>` is the heading’s hierarchical number within the document (e.g. `3.` / `3.2.` / `3.2.1.`).
-
-If the referenced target is not a heading:
-
-> TODO: Also add semantics for `ref(ref);` with `img` (Figure X.) and `table` (Table X.).
->       This requires the introduction of counters for these tags, and allow auto-numbering.
-
-- `\ref(ref="X");` (implicit body) is semantically invalid and **MUST** be rejected.
-- `\ref(ref="X"){...}` remains valid.
+- If `\ref` has an empty body (`;`), the following rules apply:
+  - If the referenced target is a heading (`h1`, `h2`, `h3`), the renderer **MUST** synthesize link text from the target and `fmt`:
+    - `fmt="full"`: renders `"<index> <name>"` (default)
+    - `fmt="name"`: renders `"<name>"`
+    - `fmt="index"`: renders `"<index>"`
+    - `<name>` is the heading’s constructed plaintext inline text.
+    - `<index>` is the heading’s hierarchical number within the document (e.g. `3.` / `3.2.` / `3.2.1.`).
+  - > TODO: Also add semantics for `ref(ref);` with `img` (Figure X.) and `table` (Table X.).
+    >       This requires the introduction of counters for these tags, and allow auto-numbering.
+  - In all other cases, `\ref(ref="X");` (implicit body) **MUST** be rejected with a diagnostic explaining that empty-body references are only supported for headings until this TODO is resolved.
 
 When computing `<name>` for headings, inline footnote/citation markers **SHOULD NOT** contribute to the plaintext (i.e., their marker text is ignored).
 

From ff19d99e14a923c00abbd8be8837f29478953bd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Fri, 2 Jan 2026 21:55:03 +0100
Subject: [PATCH 083/116] Update chapter status for reorganized elements

---
 docs/specification.md | 50 +++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index 9af418e..b977ea9 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -41,30 +41,34 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap
   - "9.1 Built-in elements and list mode"
     - "9.1.1 Inline vs block": DONE
     - "9.1.2 List-body mode per built-in element": DRAFT
-  - "9.2 Element catalog (normative)": DRAFT
+  - "9.2 Top-Level Block Elements": DRAFT
     - "9.2.1 `hdoc` (header)": DONE
-    - "9.2.2 Headings: `h1`, `h2`, `h3`": DRAFT
-    - "9.2.3 Paragraph blocks: `p`, `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT
-    - "9.2.4 Lists: `ul`, `ol`": DRAFT
-    - "9.2.5 List item: `li`": DRAFT
-    - "9.2.6 Figure: `img`": DRAFT
-    - "9.2.7 Preformatted: `pre`": DRAFT
-    - "9.2.8 Table of contents: `toc`": DRAFT
-    - "9.2.9 Tables: `table`": DRAFT
-    - "9.2.10 `columns` (table header row)": DRAFT
-    - "9.2.11 `row` (table data row)": DRAFT
-    - "9.2.12 `group` (table row group)": DRAFT
-    - "9.2.13 `td` (table cell)": DRAFT
-    - "9.2.14 `title` (document title)": DRAFT
-    - "9.2.15 Footnote dump: `footnotes`": DRAFT
-  - "9.3 Inline elements"
-    - "9.3.1 `\\em`": DRAFT
-    - "9.3.2 `\\mono`": DRAFT
-    - "9.3.3 `\\strike`, `\\sub`, `\\sup`": DRAFT
-    - "9.3.4 `\link`": DRAFT
-    - "9.3.5 `\\date`, `\\time`, `\\datetime`": DRAFT
-    - "9.3.6 `\ref`": DRAFT
-    - "9.3.7 `\footnote`": DRAFT
+    - "9.2.2 `title` (document title)": DRAFT
+    - "9.2.3 Headings: `h1`, `h2`, `h3`": DRAFT
+    - "9.2.4 Table of contents: `toc`": DRAFT
+    - "9.2.5 Footnote dump: `footnotes`": DRAFT
+  - "9.3 General Text Block Elements": DRAFT
+    - "9.3.1 Paragraph: `p`": DRAFT
+    - "9.3.2 Admonition Blocks: `note`, `warning`, `danger`, `tip`, `quote`, `spoiler`": DRAFT
+    - "9.3.3 Unordered List: `ul`": DRAFT
+    - "9.3.4 Ordered List: `ol`": DRAFT
+    - "9.3.5 Figure: `img`": DRAFT
+    - "9.3.6 Preformatted: `pre`": DRAFT
+    - "9.3.7 Tables: `table`": DRAFT
+  - "9.4 Structural Elements": DRAFT
+    - "9.4.1 List item: `li`": DRAFT
+    - "9.4.2 `columns` (table header row)": DRAFT
+    - "9.4.3 `row` (table data row)": DRAFT
+    - "9.4.4 `group` (table row group)": DRAFT
+    - "9.4.5 `td` (table cell)": DRAFT
+  - "9.5 Inline elements"
+    - "9.5.1 `\\em`": DRAFT
+    - "9.5.2 `\\mono`": DRAFT
+    - "9.5.3 `\\strike`, `\\sub`, `\\sup`": DRAFT
+    - "9.5.4 `\link`": DRAFT
+    - "9.5.5 `\\date`, `\\time`, `\\datetime`": DRAFT
+    - "9.5.6 `\ref`": DRAFT
+    - "9.5.7 `\footnote`": DRAFT
 - "10. Attribute types and date/time formats": DRAFT
   - "10.1 Common attribute types": DRAFT
   - "10.2 Date / time lexical formats (normative)": DRAFT

From abc4070833f1e102d5e8e06516058d273d8eb590 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 22:26:12 +0100
Subject: [PATCH 084/116] Adds better introduction adding motivation, usage
 scenarios and design philosophy

---
 docs/TODO.md          | 39 ++++++++++++++++++++++++++++++---------
 docs/specification.md | 35 +++++++++++++++++++++++++++++------
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 4c8a047..109d830 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -1,25 +1,46 @@
 # Specification TODOs
 
+## Tasks
+
 - Assign semantics to node types, paragraph kinds, ...
 - Specify "syntax" proper
 - Add links to RFCs where possible
-- \abbrev and \term might be good ideas.
-- Add more text to the introduction and underlying ideas of the format:
-  - Orthogonality: Semantic structure is not dependend on syntax. Verbatim lines are not preformatted, but `pre` blocks are.
-  - Strictness for ecosystem health: Prevent HTML uncontrolled growth desaster
-  - Allow tooling to work with semanticall yinvalid documents
-  - Static layout: No surprises. Layout once, yield consistent rendering
-  - Accessiblity: Everything is semantic, nothing is presentation-only.
-- h3 after h1 is not legal
+
+## Potential Future Features
+
+### `appendix` element
+
 - Support "appendix{}" in addition to h1,h2,h3 which is a h1-level chapter that renders as "A. B. C." instead of "1. 2. 3."
 
-> Recommendation 3: Add Formal Whitespace Processing Algorithm
+### Abbreviations
+
+- \abbrev(title="Antiblockiersystem"){ABS} defines a new abbreviation
+- \abbrev{ABS} references an existing abbreviation
+- \abbrev(title) can only be set once.
+- glossary; emits a glossary/definition list of all abbreviations
+
+### Definition Lists
+
+- deflist {structural} is a definition list
+- term {inline} defines a new term, must be followed by a 
+- def { blocks } definition for the term
+
+### Glossary
+
+- \indexed{Word} adds a new entry to the index.
+- index; emits an index with refs to all `\index`ed words.
+
+### Formal Whitespace Processing Algorithm
+
+Write a formal definition of the whitespace processing algorithm so it can be easily replicated.
 
+### Formal Language Schema
 
 Recommendation 5: Add Appendix with Formal Schema
 Rationale: Current spec requires reading entire document to understand element relationships. Machine-readable schema would enable automatic validation and tooling.
 Provide RelaxNG Compact syntax schema defining:
 
+### Highlighted Lines and Line Numbering
 
 Rationale: Technical documentation needs to emphasize specific code lines (tutorials, diffs, explanations).
 pre(syntax="python", highlight="2,4-6"):
diff --git a/docs/specification.md b/docs/specification.md
index b977ea9..a030fa0 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -87,12 +87,36 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap
 
 HyperDoc 2.0 ("HyperDoc") is a plain-text markup language for hypertext documents.
 
-Design goals:
+It was created out of frustration with the practical reality of Markdown: while its permissiveness and brevity are useful, its underspecified semantics have led to many dialects where correct authoring and correct rendering often require trial-and-error. In most ecosystems, Markdown also ends up being a convenient frontend for HTML, and HTML is the definition of unbounded growth: implementing a full HTML renderer is a large, ongoing effort, and the surface area keeps expanding.
+
+HyperDoc aims to be a middle ground between "just text" formats such as Gemini’s Gemtext and the flexibility of HTML: it has a strict, proper semantic definition and supports rich documents, but it is intentionally far more restrictive (and therefore more implementable, testable, and interoperable) than HTML.
+
+### 1.1 Design goals (non-normative)
 
 - Deterministic, unambiguous parsing.
 - Convenient authoring in plain text.
 - Round-trippable formatting (tooling can rewrite without losing information).
 
+### 1.2 Underlying ideas (non-normative)
+
+- **Orthogonality:** Syntax is an encoding of a document tree, not its meaning. In particular, verbatim (`:`) bodies are not inherently "preformatted"; only elements such as `pre` assign preformatted rendering semantics to their content.
+- **Strictness for ecosystem health:** A small, precisely specified core prevents uncontrolled growth into renderer-specific quirks and accidental "standard library" behavior, which is how many HTML-adjacent formats fragment over time.
+- **Tooling-friendly invalidity:** Tooling is allowed to operate on documents that are syntactically valid but semantically invalid, so that formatters, editors, refactoring tools, and diagnostics can work with incomplete or broken drafts.
+- **Static layout:** Documents have no runtime behavior; rendering can be decided from the semantic document tree without hidden state or incremental "best effort" heuristics.
+- **Accessibility:** Everything is semantic; the format aims to avoid presentation-only constructs so renderers can provide accessible output (screen readers, reflow, alternative presentations) without guessing author intent.
+
+### 1.3 Designated authoring area (non-normative)
+
+HyperDoc is designed for writing and publishing informational documents where structure, linking, and predictable rendering matter:
+
+- Informational content (e.g. websites)
+- Technical documentation
+- Blogs
+- News posts
+- Code documentation
+- Personal notes
+- Public wiki content
+
 ## 2. Conformance and terminology
 
 The key words **MUST**, **MUST NOT**, **SHOULD**, **SHOULD NOT**, and **MAY** are to be interpreted as described in RFC 2119.
@@ -577,7 +601,7 @@ Semantics:
 - `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet).
 - `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node.
 - `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node.
-- Each invocation of `footnotes(...)` **MUST** advance the “collection cursor” for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set).
+- Each invocation of `footnotes(...)` **MUST** advance the "collection cursor" for subsequent `footnotes(...)` nodes (i.e., each dump emits only the accumulated entries since the last dump, not the whole-document set).
 - `footnotes` **MUST NOT** emit a heading; headings are authored via `h1`/`h2`/`h3`.
 - Tooling **SHOULD** emit a warning if any `\footnote(...)` is present in the document but no `footnotes(...)` node appears.
 
@@ -788,7 +812,6 @@ Semantics:
 - Each `kind` has an independent numeric namespace: footnotes and citations are numbered separately.
 - A renderer **MAY** hyperlink markers and dumped entries back-and-forth.
 
-
 ## 10. Attribute types and date/time formats
 
 > TODO: Attributes should be documented well and not only be mentioned in the element catalog.
@@ -895,13 +918,13 @@ Supported values:
 | `iso`             | Render the date in the lexical format of §10.2.1.                                        | `2026-09-13`             | `2026-09-13`             |
 | `short` (default) | Render the date in a numeric, locale-appropriate short form.                             | `9/13/2026`              | `13.09.2026`             |
 | `long`            | Render the date in a locale-appropriate long form (month name, full year).               | `September 13, 2026`     | `13. September 2026`     |
-| `relative`        | Render a relative description of the date compared to “today”.                           | `in 3 days`              | `in 3 Tagen`             |
+| `relative`        | Render a relative description of the date compared to "today".                           | `in 3 days`              | `in 3 Tagen`             |
 | `year`            | Render only the year component.                                                          | `2026`                   | `2026`                   |
 | `month`           | Render only the month component in a locale-appropriate form (typically a month name).   | `September`              | `September`              |
 | `day`             | Render only the day-of-month component in a locale-appropriate form (may be an ordinal). | `13th`                   | `13.`                    |
 | `weekday`         | Render the weekday name for that date.                                                   | `Saturday`               | `Samstag`                |
 
-The `relative` examples are non-normative and assume “today” is `2026-09-10` in the renderer’s date context.
+The `relative` examples are non-normative and assume "today" is `2026-09-10` in the renderer’s date context.
 
 #### 10.3.4 `fmt` values for `\time`
 
@@ -929,7 +952,7 @@ Supported values:
 | `long`            | Render date and time with second precision; include the fractional part if present. | `September 13, 2026, 1:36:00 PM` | `13. September 2026, 13:36:00` |
 | `relative`        | Render a relative description compared to the current datetime.                     | `20 minutes ago`                 | `vor 20 Minuten`               |
 
-The `relative` examples are non-normative and assume the effective zone is `+02:00`, the value is `2026-09-13T13:36:00+02:00`, and “now” is `2026-09-13T13:56:00+02:00`.
+The `relative` examples are non-normative and assume the effective zone is `+02:00`, the value is `2026-09-13T13:36:00+02:00`, and "now" is `2026-09-13T13:56:00+02:00`.
 
 #### 10.3.6 `fmt` values for `\ref`
 

From 27748e80dab211ee8e4f3c479756fb2f76e70818 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Fri, 2 Jan 2026 23:52:57 +0100
Subject: [PATCH 085/116] Add HTML5 document header test

---
 src/hyperdoc.zig                    | 255 +++++++++++++++++++++++++---
 src/render/dump.zig                 |  44 ++++-
 src/render/html5.zig                |  55 +++++-
 src/testsuite.zig                   |  44 ++++-
 test/html5/document_header.hdoc     |   5 +
 test/html5/document_header.html     |   5 +
 test/html5/media_and_toc.html       |  11 +-
 test/html5/nesting_and_inlines.html |   9 +-
 test/html5/paragraph_styles.html    |   5 +-
 test/html5/tables.html              |   5 +-
 10 files changed, 392 insertions(+), 46 deletions(-)
 create mode 100644 test/html5/document_header.hdoc
 create mode 100644 test/html5/document_header.html

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 423d258..f9f88a6 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -20,11 +20,16 @@ pub const Document = struct {
 
     // header information
     lang: LanguageTag = .inherit, // inherit here means "unset"
-    title: ?[]const u8,
+    title: ?Title = null,
     author: ?[]const u8,
     date: ?DateTime,
     timezone: ?TimeZoneOffset,
 
+    pub const Title = struct {
+        full: Block.Title,
+        simple: []const u8,
+    };
+
     pub const TableOfContents = struct {
         level: Block.Heading.Level, // TODO: Refactor to use `index` here as well.
         headings: []usize,
@@ -146,6 +151,11 @@ pub const Block = union(enum) {
         colspan: u32,
         content: []Block,
     };
+
+    pub const Title = struct {
+        lang: LanguageTag,
+        content: []Span,
+    };
 };
 
 pub fn FormattedDateTime(comptime DT: type) type {
@@ -578,6 +588,7 @@ pub fn parse(
     try sema.validate_references(&id_map);
 
     const doc_lang = header.lang orelse LanguageTag.inherit;
+    const title = try sema.finalize_title(header, doc_lang);
     const contents = try sema.blocks.toOwnedSlice(arena.allocator());
     const block_locations = try sema.block_locations.toOwnedSlice(arena.allocator());
     const toc = try sema.build_toc(contents, block_locations);
@@ -590,7 +601,7 @@ pub fn parse(
         .toc = toc,
 
         .lang = doc_lang,
-        .title = header.title,
+        .title = title,
         .version = header.version,
         .author = header.author,
         .date = header.date,
@@ -722,6 +733,9 @@ pub const SemanticAnalyzer = struct {
     code: []const u8,
 
     header: ?Header = null,
+    title_block: ?Block.Title = null,
+    title_location: ?Parser.Location = null,
+    top_level_index: usize = 0,
     blocks: std.ArrayList(Block) = .empty,
     block_locations: std.ArrayList(Parser.Location) = .empty,
     ids: std.ArrayList(?Reference) = .empty,
@@ -732,8 +746,18 @@ pub const SemanticAnalyzer = struct {
     heading_counters: [Block.Heading.Level.count]u16 = @splat(0),
 
     fn append_node(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, UnsupportedVersion }!void {
+        const node_index = sema.top_level_index;
+        sema.top_level_index += 1;
+
         switch (node.type) {
             .hdoc => {
+                if (node_index != 0) {
+                    try sema.emit_diagnostic(.misplaced_hdoc_header, node.location);
+                }
+                if (node.body != .empty) {
+                    try sema.emit_diagnostic(.non_empty_hdoc_body, node.location);
+                }
+
                 const header = sema.translate_header_node(node) catch |err| switch (err) {
                     error.OutOfMemory, error.UnsupportedVersion => |e| return e,
                     error.BadAttributes => null,
@@ -753,15 +777,32 @@ pub const SemanticAnalyzer = struct {
                 std.debug.assert(sema.header != null);
             },
 
+            .title => {
+                if (sema.header == null and node_index == 0) {
+                    try sema.emit_diagnostic(.missing_hdoc_header, node.location);
+                }
+                if (node_index != 1) {
+                    try sema.emit_diagnostic(.misplaced_title_block, node.location);
+                }
+                if (sema.title_block != null) {
+                    try sema.emit_diagnostic(.duplicate_title_block, node.location);
+                    return;
+                }
+
+                const title_block = sema.translate_title_node(node) catch |err| switch (err) {
+                    error.OutOfMemory => |e| return e,
+                    error.BadAttributes => {
+                        return;
+                    },
+                };
+
+                sema.title_block = title_block;
+                sema.title_location = node.location;
+            },
+
             else => {
-                if (sema.header == null) {
-                    if (sema.blocks.items.len == 0) {
-                        // Emit error for the first encountered block.
-                        // This can only happen exactly once, as we either:
-                        // - have already set a header block when the first non-header nodes arrives.
-                        // - we have processed another block already, so the previous block would've emitted the warning already.
-                        try sema.emit_diagnostic(.missing_hdoc_header, node.location);
-                    }
+                if (sema.header == null and node_index == 0) {
+                    try sema.emit_diagnostic(.missing_hdoc_header, node.location);
                 }
 
                 const block, const id = sema.translate_block_node(node) catch |err| switch (err) {
@@ -851,6 +892,10 @@ pub const SemanticAnalyzer = struct {
                 const image, const id = try sema.translate_image_node(node);
                 return .{ .{ .image = image }, id };
             },
+            .title => {
+                try sema.emit_diagnostic(.{ .invalid_block_type = .{ .name = sema.code[node.location.offset .. node.location.offset + node.location.length] } }, node.location);
+                return error.InvalidNodeType;
+            },
             .pre => {
                 const preformatted, const id = try sema.translate_preformatted_node(node);
                 return .{ .{ .preformatted = preformatted }, id };
@@ -913,6 +958,17 @@ pub const SemanticAnalyzer = struct {
         return .{ heading, attrs.id };
     }
 
+    fn translate_title_node(sema: *SemanticAnalyzer, node: Parser.Node) !Block.Title {
+        const attrs = try sema.get_attributes(node, struct {
+            lang: LanguageTag = .inherit,
+        });
+
+        return .{
+            .lang = attrs.lang,
+            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
+        };
+    }
+
     fn translate_paragraph_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Paragraph, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
             lang: LanguageTag = .inherit,
@@ -1607,10 +1663,10 @@ pub const SemanticAnalyzer = struct {
                 const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space);
 
                 //  Convert the content_spans into a "rendered string".
-                const content_text = sema.render_spans_to_plaintext(content_spans) catch |err| switch (err) {
+                const content_text = (sema.render_spans_to_plaintext(content_spans, .reject_date_time) catch |err| switch (err) {
                     error.DateTimeRenderingUnsupported => unreachable,
                     else => |e| return e,
-                };
+                }).text;
 
                 const content: Span.Content = switch (node.type) {
                     .@"\\date" => try sema.parse_date_body(node, .date, Date, content_text, props.fmt),
@@ -1632,6 +1688,7 @@ pub const SemanticAnalyzer = struct {
             .h1,
             .h2,
             .h3,
+            .title,
             .p,
             .note,
             .warning,
@@ -1706,28 +1763,156 @@ pub const SemanticAnalyzer = struct {
         });
     }
 
-    fn render_spans_to_plaintext(sema: *SemanticAnalyzer, source_spans: []const Span) error{ OutOfMemory, DateTimeRenderingUnsupported }![]const u8 {
-        var len: usize = 0;
-        for (source_spans) |span| {
-            len += switch (span.content) {
-                .text => |str| str.len,
-                .date, .time, .datetime => return error.DateTimeRenderingUnsupported,
-            };
-        }
+    const TitlePlainText = struct {
+        text: []const u8,
+        contains_date_time: bool,
+    };
 
-        var output_str: std.ArrayList(u8) = .empty;
-        defer output_str.deinit(sema.arena);
+    const PlaintextMode = enum {
+        reject_date_time,
+        iso_date_time,
+    };
 
-        try output_str.ensureTotalCapacityPrecise(sema.arena, len);
+    fn render_spans_to_plaintext(
+        sema: *SemanticAnalyzer,
+        source_spans: []const Span,
+        mode: PlaintextMode,
+    ) error{ OutOfMemory, DateTimeRenderingUnsupported }!TitlePlainText {
+        var output: std.ArrayList(u8) = .empty;
+        defer output.deinit(sema.arena);
+
+        var contains_date_time = false;
 
         for (source_spans) |span| {
             switch (span.content) {
-                .text => |str| output_str.appendSliceAssumeCapacity(str),
-                .date, .time, .datetime => unreachable,
+                .text => |str| try output.appendSlice(sema.arena, str),
+                .date => |value| switch (mode) {
+                    .reject_date_time => return error.DateTimeRenderingUnsupported,
+                    .iso_date_time => {
+                        contains_date_time = true;
+                        var buffer: [64]u8 = undefined;
+                        const text = format_iso_date(value.value, &buffer);
+                        try output.appendSlice(sema.arena, text);
+                    },
+                },
+                .time => |value| switch (mode) {
+                    .reject_date_time => return error.DateTimeRenderingUnsupported,
+                    .iso_date_time => {
+                        contains_date_time = true;
+                        var buffer: [64]u8 = undefined;
+                        const text = format_iso_time(value.value, &buffer);
+                        try output.appendSlice(sema.arena, text);
+                    },
+                },
+                .datetime => |value| switch (mode) {
+                    .reject_date_time => return error.DateTimeRenderingUnsupported,
+                    .iso_date_time => {
+                        contains_date_time = true;
+                        var buffer: [96]u8 = undefined;
+                        const text = format_iso_datetime(value.value, &buffer);
+                        try output.appendSlice(sema.arena, text);
+                    },
+                },
             }
         }
 
-        return try output_str.toOwnedSlice(sema.arena);
+        return .{
+            .text = try output.toOwnedSlice(sema.arena),
+            .contains_date_time = contains_date_time,
+        };
+    }
+
+    fn format_iso_date(value: Date, buffer: []u8) []const u8 {
+        const formatted = std.fmt.bufPrint(buffer, "{d:0>4}-{d:0>2}-{d:0>2}", .{
+            @as(u32, @intCast(value.year)),
+            value.month,
+            value.day,
+        }) catch unreachable;
+
+        return if (formatted.len > 0 and formatted[0] == '+')
+            formatted[1..]
+        else
+            formatted;
+    }
+
+    fn format_iso_time(value: Time, buffer: []u8) []const u8 {
+        var stream = std.io.fixedBufferStream(buffer);
+        const writer = stream.writer();
+
+        writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ value.hour, value.minute, value.second }) catch unreachable;
+        if (value.microsecond > 0) {
+            writer.print(".{d:0>6}", .{value.microsecond}) catch unreachable;
+        }
+        const minutes = @intFromEnum(value.timezone);
+        if (minutes == 0) {
+            writer.writeByte('Z') catch unreachable;
+        } else {
+            const sign: u8 = if (minutes < 0) '-' else '+';
+            const abs_minutes: u32 = @intCast(@abs(minutes));
+            const hour: u32 = abs_minutes / 60;
+            const minute: u32 = abs_minutes % 60;
+            writer.print("{c}{d:0>2}:{d:0>2}", .{ sign, hour, minute }) catch unreachable;
+        }
+
+        return stream.getWritten();
+    }
+
+    fn format_iso_datetime(value: DateTime, buffer: []u8) []const u8 {
+        const date_text = format_iso_date(value.date, buffer);
+        const sep_index = date_text.len;
+        buffer[sep_index] = 'T';
+
+        const time_text = format_iso_time(value.time, buffer[sep_index + 1 ..]);
+
+        return buffer[0 .. sep_index + 1 + time_text.len];
+    }
+
+    fn synthesize_title_from_plaintext(sema: *SemanticAnalyzer, text: []const u8, doc_lang: LanguageTag) !Block.Title {
+        const spans = try sema.arena.alloc(Span, 1);
+        spans[0] = .{
+            .content = .{ .text = text },
+            .attribs = .{ .lang = .inherit },
+            .location = .{ .offset = 0, .length = text.len },
+        };
+
+        return .{
+            .lang = doc_lang,
+            .content = spans,
+        };
+    }
+
+    fn finalize_title(sema: *SemanticAnalyzer, header: Header, doc_lang: LanguageTag) !?Document.Title {
+        const header_title = header.title;
+        const block_title = sema.title_block;
+
+        if (header_title == null and block_title == null)
+            return null;
+
+        if (block_title) |title_block| {
+            const rendered = sema.render_spans_to_plaintext(title_block.content, .iso_date_time) catch |err| switch (err) {
+                error.DateTimeRenderingUnsupported => unreachable,
+                else => |e| return e,
+            };
+
+            if (header_title == null and rendered.contains_date_time) {
+                if (sema.title_location) |location| {
+                    try sema.emit_diagnostic(.title_inline_date_time_without_header, location);
+                }
+            }
+
+            return .{
+                .full = title_block,
+                .simple = rendered.text,
+            };
+        }
+
+        const simple_text = header_title.?;
+        const synthesized_full = try sema.synthesize_title_from_plaintext(simple_text, doc_lang);
+
+        return .{
+            .full = synthesized_full,
+            .simple = simple_text,
+        };
     }
 
     const EmptyHandling = enum {
@@ -2809,6 +2994,7 @@ pub const Parser = struct {
         h1,
         h2,
         h3,
+        title,
         p,
         note,
         warning,
@@ -2861,6 +3047,7 @@ pub const Parser = struct {
                 .h1,
                 .h2,
                 .h3,
+                .title,
                 .p,
                 .note,
                 .warning,
@@ -2890,6 +3077,7 @@ pub const Parser = struct {
                 .h2,
                 .h3,
 
+                .title,
                 .p,
                 .note,
                 .warning,
@@ -2995,6 +3183,8 @@ pub const Diagnostic = struct {
         unterminated_block_list,
         missing_hdoc_header: MissingHdocHeader,
         duplicate_hdoc_header: DuplicateHdocHeader,
+        misplaced_hdoc_header,
+        non_empty_hdoc_body,
         missing_attribute: NodeAttributeError,
         invalid_attribute: NodeAttributeError,
         empty_attribute: NodeAttributeError,
@@ -3015,6 +3205,8 @@ pub const Diagnostic = struct {
         illegal_child_item,
         list_body_required,
         illegal_id_attribute,
+        misplaced_title_block,
+        duplicate_title_block,
         column_count_mismatch: TableShapeError,
         duplicate_id: ReferenceError,
         unknown_id: ReferenceError,
@@ -3033,6 +3225,7 @@ pub const Diagnostic = struct {
         attribute_leading_trailing_whitespace,
         tab_character,
         automatic_heading_insertion: AutomaticHeading,
+        title_inline_date_time_without_header,
 
         pub fn severity(code: Code) Severity {
             return switch (code) {
@@ -3044,6 +3237,8 @@ pub const Diagnostic = struct {
                 .unterminated_block_list,
                 .missing_hdoc_header,
                 .duplicate_hdoc_header,
+                .misplaced_hdoc_header,
+                .non_empty_hdoc_body,
                 .invalid_attribute,
                 .missing_attribute,
                 .empty_attribute,
@@ -3064,6 +3259,8 @@ pub const Diagnostic = struct {
                 .list_body_required,
                 .illegal_id_attribute,
                 .invalid_date_time_body,
+                .misplaced_title_block,
+                .duplicate_title_block,
                 .column_count_mismatch,
                 .duplicate_id,
                 .unknown_id,
@@ -3082,6 +3279,7 @@ pub const Diagnostic = struct {
                 .tab_character,
                 .document_starts_with_bom,
                 .automatic_heading_insertion,
+                .title_inline_date_time_without_header,
                 => .warning,
             };
         }
@@ -3104,6 +3302,8 @@ pub const Diagnostic = struct {
                 .unterminated_block_list => try w.writeAll("Block list body is unterminated (missing '}' before end of file)."),
                 .missing_hdoc_header => try w.writeAll("Document must start with an 'hdoc' header."),
                 .duplicate_hdoc_header => try w.writeAll("Only one 'hdoc' header is allowed; additional header found."),
+                .misplaced_hdoc_header => try w.writeAll("The 'hdoc' header must be the first node in the document."),
+                .non_empty_hdoc_body => try w.writeAll("The 'hdoc' header must have an empty body (';')."),
                 .duplicate_attribute => |ctx| try w.print("Duplicate attribute '{s}' will overwrite the earlier value.", .{ctx.name}),
                 .empty_verbatim_block => try w.writeAll("Verbatim block has no lines."),
                 .verbatim_missing_trailing_newline => try w.writeAll("Verbatim line should end with a newline."),
@@ -3147,6 +3347,8 @@ pub const Diagnostic = struct {
                 .illegal_child_item => try w.writeAll("Node not allowed here."),
 
                 .illegal_id_attribute => try w.writeAll("Attribute 'id' not allowed here."),
+                .misplaced_title_block => try w.writeAll("Document title must be the second node (directly after 'hdoc')."),
+                .duplicate_title_block => try w.writeAll("Only one 'title' block is allowed."),
 
                 .invalid_date_time_body => try w.writeAll("\\date, \\time and \\datetime do not allow any inlines inside their body."),
 
@@ -3159,6 +3361,7 @@ pub const Diagnostic = struct {
                 .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."),
 
                 .automatic_heading_insertion => |ctx| try w.print("Inserted automatic {t} to fill heading level gap.", .{ctx.level}),
+                .title_inline_date_time_without_header => try w.writeAll("Title block contains \\date/\\time/\\datetime but hdoc(title=\"...\") is missing; metadata title cannot be derived reliably."),
             }
         }
     };
diff --git a/src/render/dump.zig b/src/render/dump.zig
index b617f0d..3188c56 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -455,11 +455,25 @@ fn dumpOptionalDateTimeField(writer: *Writer, indent: usize, key: []const u8, va
     }
 }
 
+fn dumpOptionalTitleField(writer: *Writer, indent: usize, key: []const u8, value: ?hdoc.Document.Title) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (value) |title| {
+        try writer.print("{s}:\n", .{key});
+        try dumpOptionalStringField(writer, indent + indent_step, "simple", title.simple);
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("full:\n");
+        try dumpOptionalStringField(writer, indent + 2 * indent_step, "lang", title.full.lang.text);
+        try dumpSpanListField(writer, indent + 2 * indent_step, "content", title.full.content);
+    } else {
+        try writer.print("{s}: null\n", .{key});
+    }
+}
+
 fn dumpDocument(writer: *Writer, doc: *const hdoc.Document) Writer.Error!void {
     try writer.writeAll("document:\n");
     try dumpVersion(writer, indent_step, doc.version);
     try dumpOptionalStringField(writer, indent_step, "lang", doc.lang.text);
-    try dumpOptionalStringField(writer, indent_step, "title", doc.title);
+    try dumpOptionalTitleField(writer, indent_step, "title", doc.title);
     try dumpOptionalStringField(writer, indent_step, "author", doc.author);
     try dumpOptionalDateTimeField(writer, indent_step, "date", doc.date);
     try dumpTableOfContents(writer, indent_step, doc.toc);
@@ -475,8 +489,8 @@ pub fn render(doc: hdoc.Document, writer: *Writer) Writer.Error!void {
 test "render escapes string values" {
     const title = "Doc \"Title\"\n";
     const span_text = "Hello \"world\"\n";
-    const link_ref: hdoc.Reference = .init("section \"A\"");
-    const id_value: hdoc.Reference = .init("id:1\n");
+    const link_ref: hdoc.Reference = .{ .text = "section \"A\"" };
+    const id_value: hdoc.Reference = .{ .text = "id:1\n" };
 
     var doc: hdoc.Document = .{
         .arena = std.heap.ArenaAllocator.init(std.testing.allocator),
@@ -486,7 +500,7 @@ test "render escapes string values" {
         .id_map = .{},
         .toc = undefined,
         .lang = .inherit,
-        .title = title,
+        .title = null,
         .author = null,
         .date = null,
         .timezone = null,
@@ -494,6 +508,21 @@ test "render escapes string values" {
     defer doc.deinit();
 
     const arena_alloc = doc.arena.allocator();
+
+    const title_spans = try arena_alloc.alloc(hdoc.Span, 1);
+    title_spans[0] = .{
+        .content = .{ .text = title },
+        .attribs = .{},
+        .location = .{ .offset = 0, .length = title.len },
+    };
+    doc.title = .{
+        .full = .{
+            .lang = .inherit,
+            .content = title_spans,
+        },
+        .simple = title,
+    };
+
     doc.contents = try arena_alloc.alloc(hdoc.Block, 0);
     doc.content_ids = try arena_alloc.alloc(?hdoc.Reference, 0);
     doc.toc = .{
@@ -506,6 +535,7 @@ test "render escapes string values" {
     spans[0] = .{
         .content = .{ .text = span_text },
         .attribs = .{ .link = .{ .ref = link_ref } },
+        .location = .{ .offset = 0, .length = span_text.len },
     };
 
     const blocks = try arena_alloc.alloc(hdoc.Block, 1);
@@ -541,9 +571,9 @@ test "render escapes string values" {
     try buffer.writer.flush();
     const output = buffer.writer.buffered();
 
-    const expected_title = try std.fmt.allocPrint(std.testing.allocator, "title: \"{f}\"\n", .{std.zig.fmtString(title)});
-    defer std.testing.allocator.free(expected_title);
-    try std.testing.expect(std.mem.indexOf(u8, output, expected_title) != null);
+    const expected_title_simple = try std.fmt.allocPrint(std.testing.allocator, "    simple: \"{f}\"\n", .{std.zig.fmtString(title)});
+    defer std.testing.allocator.free(expected_title_simple);
+    try std.testing.expect(std.mem.indexOf(u8, output, expected_title_simple) != null);
 
     const expected_span = try std.fmt.allocPrint(
         std.testing.allocator,
diff --git a/src/render/html5.zig b/src/render/html5.zig
index 3bbc4e9..364255e 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -11,6 +11,8 @@ const indent_step: usize = 2;
 pub fn render(doc: hdoc.Document, writer: *Writer) RenderError!void {
     var ctx: RenderContext = .{ .doc = &doc, .writer = writer };
 
+    try ctx.renderDocumentHeader();
+
     for (doc.contents, 0..) |block, index| {
         try ctx.renderBlock(block, index, 0);
     }
@@ -32,6 +34,53 @@ const RenderContext = struct {
         }
     }
 
+    fn renderDocumentHeader(ctx: *RenderContext) RenderError!void {
+        const has_title = ctx.doc.title != null;
+        const has_author = ctx.doc.author != null;
+        const has_date = ctx.doc.date != null;
+
+        if (!has_title and !has_author and !has_date) return;
+
+        try writeStartTag(ctx.writer, "header", .regular, .{ .lang = langAttribute(ctx.doc.lang) });
+        try ctx.writer.writeByte('\n');
+
+        if (has_title) {
+            const title = ctx.doc.title.?;
+            try writeIndent(ctx.writer, indent_step);
+            try writeStartTag(ctx.writer, "h1", .regular, .{ .lang = langAttribute(title.full.lang) });
+            try ctx.renderSpans(title.full.content);
+            try writeEndTag(ctx.writer, "h1");
+            try ctx.writer.writeByte('\n');
+        }
+
+        if (has_author or has_date) {
+            try writeIndent(ctx.writer, indent_step);
+            try writeStartTag(ctx.writer, "p", .regular, .{ .class = "hdoc-doc-meta" });
+
+            var wrote_any = false;
+            if (has_author) {
+                try ctx.writer.writeAll("By ");
+                try writeEscapedHtml(ctx.writer, ctx.doc.author.?);
+                wrote_any = true;
+            }
+            if (has_date) {
+                if (wrote_any) {
+                    try ctx.writer.writeAll(" - ");
+                }
+
+                var date_buffer: [128]u8 = undefined;
+                const date_text = try formatIsoDateTime(ctx.doc.date.?, &date_buffer);
+                try writeEscapedHtml(ctx.writer, date_text);
+            }
+
+            try writeEndTag(ctx.writer, "p");
+            try ctx.writer.writeByte('\n');
+        }
+
+        try writeEndTag(ctx.writer, "header");
+        try ctx.writer.writeByte('\n');
+    }
+
     fn renderBlocks(ctx: *RenderContext, blocks: []const hdoc.Block, indent: usize) RenderError!void {
         for (blocks) |block| {
             try ctx.renderBlock(block, null, indent);
@@ -644,9 +693,9 @@ fn takeLang(lang: *?[]const u8) ?[]const u8 {
 
 fn headingTag(level: hdoc.Block.Heading.Level) []const u8 {
     return switch (level) {
-        .h1 => "h1",
-        .h2 => "h2",
-        .h3 => "h3",
+        .h1 => "h2",
+        .h2 => "h3",
+        .h3 => "h4",
     };
 }
 
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 682aaae..9a0a9dc 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -601,11 +601,53 @@ test "diagnostic codes are emitted for expected samples" {
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n|nospace\n", &.{.verbatim_missing_space});
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); pre:\n| trailing \n", &.{.trailing_whitespace});
     try validateDiagnostics(.{}, "h1 \"Title\"", &.{.missing_hdoc_header});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{.duplicate_hdoc_header});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{ .misplaced_hdoc_header, .duplicate_hdoc_header });
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }});
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
 }
 
+test "title block populates metadata and warns on inline date" {
+    const code = "hdoc(version=\"2.0\",lang=\"en\");\ntitle { Hello \\date{2020-01-02} }\nh1 \"Body\"";
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 1), diagnostics.items.items.len);
+    try std.testing.expect(diagnostics.items.items[0].code == .title_inline_date_time_without_header);
+
+    const title = doc.title orelse return error.TestExpectedEqual;
+    const full = title.full;
+    try std.testing.expectEqualStrings("Hello 2020-01-02", title.simple);
+    try std.testing.expectEqual(@as(usize, 3), full.content.len);
+}
+
+test "header title synthesizes full title representation" {
+    const code = "hdoc(version=\"2.0\",title=\"Metadata\",lang=\"en\");\nh1 \"Body\"";
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 0), diagnostics.items.items.len);
+
+    const title = doc.title orelse return error.TestExpectedEqual;
+    try std.testing.expectEqualStrings("Metadata", title.simple);
+
+    const full = title.full;
+    try std.testing.expectEqual(@as(usize, 1), full.content.len);
+    switch (full.content[0].content) {
+        .text => |text| try std.testing.expectEqualStrings("Metadata", text),
+        else => return error.TestExpectedEqual,
+    }
+}
+
 test "parser reports unterminated inline lists" {
     var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
     defer arena.deinit();
diff --git a/test/html5/document_header.hdoc b/test/html5/document_header.hdoc
new file mode 100644
index 0000000..3366121
--- /dev/null
+++ b/test/html5/document_header.hdoc
@@ -0,0 +1,5 @@
+hdoc(version="2.0", title="Metadata Title", date="2024-08-16T09:30:00", lang="en", tz="+02:00");
+
+title { Metadata Title From Block }
+
+p { This document uses header metadata and a title block without any explicit headings. }
diff --git a/test/html5/document_header.html b/test/html5/document_header.html
new file mode 100644
index 0000000..f5fdb71
--- /dev/null
+++ b/test/html5/document_header.html
@@ -0,0 +1,5 @@
+<header lang="en">
+  <h1>Metadata Title From Block</h1>
+  <p class="hdoc-doc-meta">+2024-08-16T09:30:00+02:00</p>
+</header>
+<p>This document uses header metadata and a title block without any explicit headings.</p>
diff --git a/test/html5/media_and_toc.html b/test/html5/media_and_toc.html
index 563874e..5cf5739 100644
--- a/test/html5/media_and_toc.html
+++ b/test/html5/media_and_toc.html
@@ -1,4 +1,7 @@
-<h1 id="intro">Media and TOC</h1>
+<header lang="en">
+  <h1 lang="en">Media and TOC</h1>
+</header>
+<h2 id="intro">§1 Media and TOC</h2>
 <nav aria-label="Table of contents">
   <ol>
     <li><a href="#intro">Media and TOC</a>
@@ -10,14 +13,14 @@ <h1 id="intro">Media and TOC</h1>
     </li>
   </ol>
 </nav>
-<h2 id="code">Preformatted</h2>
+<h3 id="code">§1.1 Preformatted</h3>
 <pre><code class="hdoc-code" data-syntax="python"> print(&quot;hello world&quot;) </code></pre>
-<h2 id="figure">Figure</h2>
+<h3 id="figure">§1.2 Figure</h3>
 <figure id="fig-code">
   <img src="./example.png" alt="Example figure"/>
   <figcaption>Figure caption text.</figcaption>
 </figure>
-<h2 id="dates">Dates and Times</h2>
+<h3 id="dates">§1.3 Dates and Times</h3>
 <p>Today is <time datetime="+2024-03-01">+2024-03-01</time>.</p>
 <p>The meeting is at <time datetime="14:30:45Z">14:30:45 Z</time>.</p>
 <p>Release happens on <time datetime="+2024-04-15T08:00:00Z">+2024-04-15 08:00 Z</time>.</p>
diff --git a/test/html5/nesting_and_inlines.html b/test/html5/nesting_and_inlines.html
index 5db4d36..1b5be1a 100644
--- a/test/html5/nesting_and_inlines.html
+++ b/test/html5/nesting_and_inlines.html
@@ -1,7 +1,10 @@
-<h1 id="top">Nesting and Inline Styling</h1>
+<header lang="en">
+  <h1 lang="en">Nesting and Inlines</h1>
+</header>
+<h2 id="top">§1 Nesting and Inline Styling</h2>
 <p>This document exercises inline formatting and nested lists.</p>
-<p>We can mix <em>emphasis</em>, <s>strike</s>, <code class="hdoc-code">monospace</code>text. Superscript x<sup>2</sup>and subscript x<sub>2</sub>also appear.</p>
-<p>Links point to <a href="#top">local anchors</a>or <a href="https://example.com">external sites</a>.</p>
+<p>We can mix <em>emphasis</em>, <s>strike</s>, <code class="hdoc-code">monospace</code> text. Superscript x<sup>2</sup> and subscript x<sub>2</sub> also appear.</p>
+<p>Links point to <a href="#top">local anchors</a> or <a href="https://example.com">external sites</a>.</p>
 <ul>
   <li>
     <p>Top-level item one</p>
diff --git a/test/html5/paragraph_styles.html b/test/html5/paragraph_styles.html
index 82e8555..575b583 100644
--- a/test/html5/paragraph_styles.html
+++ b/test/html5/paragraph_styles.html
@@ -1,4 +1,7 @@
-<h1 id="hdoc-auto-0">Paragraph Styles</h1>
+<header lang="en">
+  <h1 lang="en">Paragraph Styles</h1>
+</header>
+<h2 id="hdoc-auto-0">§1 Paragraph Styles</h2>
 <p>A standard paragraph introducing the styles below.</p>
 <p class="hdoc-note">Notes provide informational context without urgency.</p>
 <p class="hdoc-warning">Warnings highlight potential issues to watch for.</p>
diff --git a/test/html5/tables.html b/test/html5/tables.html
index bfce614..9384ec5 100644
--- a/test/html5/tables.html
+++ b/test/html5/tables.html
@@ -1,4 +1,7 @@
-<h1 id="hdoc-auto-0">Table Coverage</h1>
+<header lang="en">
+  <h1 lang="en">Tables</h1>
+</header>
+<h2 id="hdoc-auto-0">§1 Table Coverage</h2>
 <p>This file covers header rows, data rows with titles, groups, and colspans.</p>
 <table>
   <thead>

From 16a6af796bef4850c24e169fb5d259d65a35a481 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 2 Jan 2026 23:03:38 +0100
Subject: [PATCH 086/116] Cleans up specification text. Disallows arbitrary
 number of year digits, must be 4 now. We won't live till 9999 anyways.

---
 docs/TODO.md          | 10 ++++++++++
 docs/specification.md | 38 +++++++++++++++++++-------------------
 src/hyperdoc.zig      |  4 ++--
 src/testsuite.zig     |  6 +-----
 4 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 109d830..89df194 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -6,6 +6,11 @@
 - Specify "syntax" proper
 - Add links to RFCs where possible
 
+- Verbatim-body to text conversion is under-specified. You define verbatim syntax (: with | lines) and later say verbatim bodies become inline text spans (§8.2), but you don’t precisely define how piped lines join (LF vs preserving original CRLF, whether there is a trailing newline, whether a final EOF line_terminator contributes a newline, etc.). Different implementations may diverge. 
+- Inline “groups” exist syntactically but are not given explicit semantics. The grammar includes inline_group ::= "{" , inline_content , "}" and §5.4 makes brace balancing a core rule, but §8.2 doesn’t explicitly state that groups are semantically transparent (flattened) versus affecting whitespace normalization boundaries or span merging. 
+- Span attribute semantics are referenced but not fully defined. §8.2 introduces spans with an “attribute set (e.g. emphasis/monospace/link…)” but the spec never fully defines the canonical attribute keys, nesting behavior (e.g., \em inside \mono), or how lang overrides interact at span level. That’s a major interoperability risk because renderers may differ even if parsers agree. 
+- 
+
 ## Potential Future Features
 
 ### `appendix` element
@@ -50,3 +55,8 @@ pre(syntax="python", highlight="2,4-6"):
 |     else:
 |         return n * factorial(n-1)  # Recursive case
 also: enable line numbers
+
+## Rejected Features
+
+- `\kbd{…}` is just `\mono(syntax="kbd"){…}`
+- `include(path="...")` is rejected for unbounded document content growth
diff --git a/docs/specification.md b/docs/specification.md
index a030fa0..a3ffe77 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -28,7 +28,7 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap
 - "7. String Literal Escape Processing (semantic)": DRAFT
   - "7.1 Control character policy (semantic)": DRAFT
   - "7.2 Supported escapes in string literals": DRAFT
-    - "7.2.1 Unicode escape `\\u{H...}`": DRAFT
+    - "7.2.1 Unicode escape `\u{H...}`": DRAFT
   - "7.3 Invalid escapes": DRAFT
 - "8. Semantic document model": DRAFT
   - "8.1 Document structure": DONE
@@ -62,11 +62,11 @@ If a chapter is marked DONE or FROZEN, the status applies to all of its sub-chap
     - "9.4.4 `group` (table row group)": DRAFT
     - "9.4.5 `td` (table cell)": DRAFT
   - "9.5 Inline elements"
-    - "9.5.1 `\\em`": DRAFT
-    - "9.5.2 `\\mono`": DRAFT
-    - "9.5.3 `\\strike`, `\\sub`, `\\sup`": DRAFT
+    - "9.5.1 `\em`": DRAFT
+    - "9.5.2 `\mono`": DRAFT
+    - "9.5.3 `\strike`, `\sub`, `\sup`": DRAFT
     - "9.5.4 `\link`": DRAFT
-    - "9.5.5 `\\date`, `\\time`, `\\datetime`": DRAFT
+    - "9.5.5 `\date`, `\time`, `\datetime`": DRAFT
     - "9.5.6 `\ref`": DRAFT
     - "9.5.7 `\footnote`": DRAFT
 - "10. Attribute types and date/time formats": DRAFT
@@ -384,7 +384,7 @@ A semantic validator/decoder **MUST** accept exactly:
 
 A semantic validator/decoder **MUST** reject a string literal that contains:
 
-- any other escape (`\t`, `\\xHH`, `\0`, etc.)
+- any other escape (`\t`, `\xHH`, `\0`, etc.)
 - an unterminated escape (string ends after `\`)
 - malformed `\u{...}` (missing braces, empty, non-hex, >6 digits)
 - out-of-range or surrogate code points
@@ -723,21 +723,21 @@ Table layout rules:
 
 ### 9.5 Inline elements
 
-Inline elements appear only in inline-list bodies (or inside string/verbatim, depending on renderer).
+Inline elements appear only in inline-list bodies.
 
-#### 9.5.1 `\\em`
+#### 9.5.1 `\em`
 
 - **Role:** emphasis
 - **Body:** inline text
 - **Attributes:** `lang` (optional)
 
-#### 9.5.2 `\\mono`
+#### 9.5.2 `\mono`
 
 - **Role:** monospaced span
 - **Body:** inline text
 - **Attributes:** `syntax` (optional), `lang` (optional)
 
-#### 9.5.3 `\\strike`, `\\sub`, `\\sup`
+#### 9.5.3 `\strike`, `\sub`, `\sup`
 
 - **Role:** strike-through / subscript / superscript
 - **Body:** inline text
@@ -757,7 +757,7 @@ Notes:
 - Interior references use `\ref(ref="...")`.
 
 
-#### 9.5.5 `\\date`, `\\time`, `\\datetime`
+#### 9.5.5 `\date`, `\time`, `\datetime`
 
 - **Role:** localized date/time rendering
 - **Body:** must be plain text, a single string, or verbatim (no nested inline elements)
@@ -795,7 +795,7 @@ When computing `<name>` for headings, inline footnote/citation markers **SHOULD
 - **Body:** inline text (required for defining form; empty for reference form)
 - **Attributes:**
   - `key` (optional; defines a named footnote)
-  - `ref` (optional; references a previously defined named footnote)
+  - `ref` (optional; references a defined named footnote)
   - `kind` (optional; one of `footnote`, `citation`; default `footnote`)
   - `lang` (optional)
 
@@ -808,7 +808,7 @@ Semantics:
 
 - `\footnote{...}` defines an anonymous footnote entry at the marker position.
 - `\footnote(key="X"){...}` defines a named footnote entry in the footnote namespace and emits its marker at the marker position.
-- `\footnote(ref="X");` emits a marker for the previously defined named footnote `X`.
+- `\footnote(ref="X");` emits a marker for the defined named footnote `X`.
 - Each `kind` has an independent numeric namespace: footnotes and citations are numbered separately.
 - A renderer **MAY** hyperlink markers and dumped entries back-and-forth.
 
@@ -838,17 +838,17 @@ These formats are a conservative intersection of RFC 3339 and ISO 8601.
 
 `YYYY-MM-DD`
 
-- `YYYY`: one or more digits
-- `MM`: `01`-`12`
-- `DD`: `01`-`31`
+- `YYYY`: exactly four digits
+- `MM`: `01`-`12`: exactly two digits
+- `DD`: `01`-`31`: exactly two digits
 
 #### 10.2.2 Time
 
 `hh:mm:ss` with an optional fraction and an optional zone.
 
-- `hh`: `00`-`23`
-- `mm`: `00`-`59`
-- `ss`: `00`-`59`
+- `hh`: `00`-`23`: exactly two digits
+- `mm`: `00`-`59`: exactly two digits
+- `ss`: `00`-`59`: exactly two digits
 - optional fraction: `.` followed by 1,2,3,6, or 9 digits
 - zone:
   - `Z`, or
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index f9f88a6..c22cc27 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -294,7 +294,7 @@ pub const Date = struct {
     day: u5, // 1-31
 
     pub fn parse(text: []const u8) !Date {
-        if (text.len < 7) // "Y-MM-DD"
+        if (text.len != "YYYY-MM-DD".len)
             return error.InvalidValue;
         const first_dash = std.mem.indexOfScalar(u8, text, '-') orelse return error.InvalidValue;
         const tail = text[first_dash + 1 ..];
@@ -305,7 +305,7 @@ pub const Date = struct {
         const month_text = text[first_dash + 1 .. second_dash];
         const day_text = text[second_dash + 1 ..];
 
-        if (year_text.len == 0 or month_text.len != 2 or day_text.len != 2) return error.InvalidValue;
+        if (year_text.len != 4 or month_text.len != 2 or day_text.len != 2) return error.InvalidValue;
 
         const year_value = std.fmt.parseInt(u32, year_text, 10) catch return error.InvalidValue;
         if (year_value > std.math.maxInt(i32)) return error.InvalidValue;
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 9a0a9dc..b6e7e1b 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -694,14 +694,10 @@ test "Date.parse accepts ISO dates" {
     try std.testing.expectEqual(@as(u4, 12), date.month);
     try std.testing.expectEqual(@as(u5, 25), date.day);
 
-    const short_year = try hdoc.Date.parse("1-01-01");
-    try std.testing.expectEqual(@as(i32, 1), short_year.year);
-    try std.testing.expectEqual(@as(u4, 1), short_year.month);
-    try std.testing.expectEqual(@as(u5, 1), short_year.day);
-
     try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-1-01"));
     try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-13-01"));
     try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("2025-12-32"));
+    try std.testing.expectError(error.InvalidValue, hdoc.Date.parse("1-01-01"));
 }
 
 test "Time.parse accepts ISO times with zones" {

From effddce7be29e226fb717037959e4cdc6cf26808 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 3 Jan 2026 00:09:53 +0100
Subject: [PATCH 087/116] Adds a huge load of stuff to docs/TODO.md, improves
 docs/specification.md by adding two sub-chapters for unknown node semantics
 and verbatim body joining

---
 SPEC_TODO.md          |  1 -
 docs/TODO.md          | 65 +++++++++++++++++++++++++++++++++++++++++--
 docs/specification.md | 41 +++++++++++++++++++++++++--
 3 files changed, 102 insertions(+), 5 deletions(-)

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index 0695989..1cfadaf 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -1,6 +1,5 @@
 # Spec compliance TODOs
 
-- Add support for the `title` block as a top-level element, enforce its single/second-position semantics, and integrate it with `hdoc(title=...)` metadata handling. This block is not parsed or emitted from `translate_block_node`, so the display title path is currently missing. 【F:docs/specification-proper-draft.md†L360-L405】【F:docs/specification-proper-draft.md†L622-L633】【F:src/hyperdoc.zig†L820-L838】
 - Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】
 - Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】
 - Enforce `hdoc` placement and body rules by rejecting headers that are not the first node or that carry any non-empty body, instead of merely warning on the first non-header block and accepting later headers. 【F:docs/specification.md†L369-L373】【F:src/hyperdoc.zig†L734-L788】
diff --git a/docs/TODO.md b/docs/TODO.md
index 89df194..556c7dc 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -5,14 +5,63 @@
 - Assign semantics to node types, paragraph kinds, ...
 - Specify "syntax" proper
 - Add links to RFCs where possible
-
 - Verbatim-body to text conversion is under-specified. You define verbatim syntax (: with | lines) and later say verbatim bodies become inline text spans (§8.2), but you don’t precisely define how piped lines join (LF vs preserving original CRLF, whether there is a trailing newline, whether a final EOF line_terminator contributes a newline, etc.). Different implementations may diverge. 
 - Inline “groups” exist syntactically but are not given explicit semantics. The grammar includes inline_group ::= "{" , inline_content , "}" and §5.4 makes brace balancing a core rule, but §8.2 doesn’t explicitly state that groups are semantically transparent (flattened) versus affecting whitespace normalization boundaries or span merging. 
 - Span attribute semantics are referenced but not fully defined. §8.2 introduces spans with an “attribute set (e.g. emphasis/monospace/link…)” but the spec never fully defines the canonical attribute keys, nesting behavior (e.g., \em inside \mono), or how lang overrides interact at span level. That’s a major interoperability risk because renderers may differ even if parsers agree. 
-- 
+- Refine that `hdoc(title)` is metadata while `title{}` is rendered rich text
+- Refine `img(path)` only using forward slash.
+  - Proposal: Add to §9.3.5:
+    - "path MUST use forward slashes (/) as path separators, regardless of host OS."
+    - "path MUST be relative; absolute paths and URI schemes (e.g., http://) MUST be rejected."
+    - "Path resolution is relative to the directory containing the HyperDoc source file."
+    - "Path traversal outside the source directory (e.g., ../../etc/passwd) SHOULD be rejected or restricted by implementations."
+- Proposal: Add to §9.2.4:
+  - "Multiple toc elements MAY appear in a document; each MUST render the same heading structure but MAY appear at different locations."
+  - "If depth differs between instances, each TOC renders independently according to its own depth attribute."
+- Add to §9.2.5:
+  - "Multiple footnotes elements partition footnote rendering; each instance collects only footnotes/citations accumulated since the previous dump (or document start)."
+- Proposal: Add to §4:
+  - "Implementations MUST support nesting depths of at least 32 levels."
+  - "Implementations MAY reject documents exceeding this depth with a diagnostic."
+  - "Nesting depth is measured as the maximum distance from the document root to any leaf node."
+- Ambiguity of Inline Unicode:
+  - Finding: String literals ("...") support \u{...} escapes (§7.2.1). Inline text streams (bodies of p, h1) do not (§6.1 only lists \\, \{, \}).
+  - Issue: Authors cannot enter invisible characters (like Non-Breaking Space U+00A0 or Zero Width Space U+200B) into a paragraph without pasting the raw invisible character, which is brittle and invisible in editors.
+- Recommendation: Add explicit sequencing in §7 stating: "Escape decoding MUST occur during semantic validation, before inline text construction (§8.2) for inline-list bodies, and before attribute validation for attribute values."
+- Recommendation: Add to §9.2.1: "If the document contains any \date, \time, or \datetime elements with fmt values other than iso, and hdoc(lang) is not specified, implementations SHOULD emit a diagnostic."
+- Issue: "Lexical" implies only regex-level matching. It does not strictly forbid 2023-02-31. For a strict format, "Semantic" validity (Gregorian correctness) should be enforced to prevent invalid metadata.
 
 ## Potential Future Features
 
+### `hr;` or `break;`
+
+Purpose: Explicit scene/topic breaks within prose (equivalent to HTML <hr>).
+
+Attributes:
+  id (optional; top-level only)
+Body:
+  ; (empty)
+Constraints:
+  - MUST be top-level or inside block containers that allow general text blocks
+  - MUST NOT appear inside inline contexts
+
+Rationale:
+  Common typographic convention for section breaks that are less formal than headings. Currently missing; authors might abuse pre: or empty paragraphs as workarounds.
+
+### `\plain`
+
+Finding: Attributes like lang are supported on \em, \mono, etc. However, if an author needs to mark a plain-text span as a different language (e.g., "The word Angst (German) means...") without applying italics or monospace, there is no element to hold the lang attribute.
+
+### `table{title{}}` or `table(caption="")` + `img(caption="")`
+
+x(caption) composes well with `\ref(ref)`.
+
+table title is good for accesibility.
+
+### `\br;` inline
+
+Introduce \br for Hard Line Breaks: Since whitespace normalization collapses \n to space, there is currently no way to force a line break within a paragraph (e.g., for postal addresses or poetry) without using pre. Adding a \br inline element would resolve this semantic gap.
+
 ### `appendix` element
 
 - Support "appendix{}" in addition to h1,h2,h3 which is a h1-level chapter that renders as "A. B. C." instead of "1. 2. 3."
@@ -56,7 +105,19 @@ pre(syntax="python", highlight="2,4-6"):
 |         return n * factorial(n-1)  # Recursive case
 also: enable line numbers
 
+### Attribution
+
+```hdoc
+quote {
+  p "Premature optimization is the root of all evil."
+  attribution "Donald Knuth"
+}
+```
+
 ## Rejected Features
 
 - `\kbd{…}` is just `\mono(syntax="kbd"){…}`
 - `include(path="...")` is rejected for unbounded document content growth
+- `code` is just `\mono(syntax="…")`
+- `details/summary` is just HTML with dynamic changing page layout, ever tried printing this?
+- `\math`, `equation{…}` have too high implementation complexity and have high requirements on fonts, font renderers and layout engines.
\ No newline at end of file
diff --git a/docs/specification.md b/docs/specification.md
index a3ffe77..50f184c 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -445,6 +445,30 @@ The renderer **MUST** see the post-normalization result.
 
 **String and verbatim bodies:** When a string body or verbatim body is converted into spans, it is treated as a single text source (no nested inline nodes) and then processed using the same rules above, including whitespace normalization for non-`pre` elements.
 
+#### 8.2.1 Verbatim body decoding (normative)
+
+A verbatim body is converted to a Unicode string as follows.
+
+Let `LINESEP` be U+000A (LF).
+
+For each `piped_line` in source order:
+
+1. Let `raw` be the sequence of characters after the leading `|` up to (but not including) the line terminator.
+2. If `raw` begins with a single U+0020 SPACE, remove **exactly one** such leading SPACE from `raw`.
+   (This optional SPACE is a visual separator between `|` and the content and is not part of the verbatim value.)
+3. Append the resulting string to a list `lines`.
+
+The verbatim value is:
+
+- the empty string if `lines` is empty, otherwise
+- `join(LINESEP, lines)` (i.e., insert `LINESEP` between adjacent entries, but not after the last entry).
+
+Notes:
+
+- The concrete source line ending used for `piped_line` termination (LF vs CRLF vs EOF) does not affect the verbatim value.
+- The resulting verbatim value is then processed as a single text source under §8.2 (including span merging and whitespace normalization for non-`pre` elements).
+
+
 ### 8.3 Attribute uniqueness
 
 - Within a node, attribute keys **MUST** be unique (case-sensitive).
@@ -488,6 +512,19 @@ The Footnote Namespace is used for defining and referencing reusable footnotes.
 - Built-in element names are defined in §9.
 - Unknown elements are syntactically valid (parseable), but semantically invalid.
 
+### 8.6.1 Closed-world semantics and compatibility policy (normative)
+
+HyperDoc 2.0 defines a **closed** set of built-in element names and attributes.
+
+- A semantic validator **MUST** treat any node whose name is not a built-in element name (§9) as **semantically invalid**.
+- A semantic validator **MUST** treat any attribute key that is not defined for the given element (§9) as **semantically invalid** (see also §8.4).
+- Renderers **MUST NOT** assign renderer-specific meaning to unknown element names or unknown attributes.
+
+#### Tooling guidance (non-normative)
+
+- Tools that operate on syntactically valid documents (formatters, editors, refactoring tools) **SHOULD** preserve unknown nodes and unknown attributes when round-tripping, while emitting diagnostics, to support drafts and forward-compatibility experiments.
+- Conformance tests for “HyperDoc 2.0 renderers” should assume closed-world semantics: unknown names are errors, not extension points.
+
 ## 9. Elements and attributes
 
 ### 9.1 Built-in elements and list mode
@@ -687,7 +724,7 @@ Table layout rules:
 #### 9.4.1 List item: `li`
 
 - **Body:** either
-  - a block-list of block elements, or
+  - a block-list of general text block elements, or
   - a single string body, or
   - a verbatim body
 - **Attributes:** `lang` (optional)
@@ -716,7 +753,7 @@ Table layout rules:
 
 - **Role:** A single cell within a table row.
 - **Body:** either
-  - a block-list of block elements, or
+  - a block-list of general text block elements, or
   - a single string body, or
   - a verbatim body
 - **Attributes:** `colspan` (optional Integer ≥ 1; default 1), `lang` (optional)

From 893074e6eb7fc86b666e822b825c00781582d396 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 00:33:26 +0100
Subject: [PATCH 088/116] Enforce top-level toc

---
 src/hyperdoc.zig  | 5 +++++
 src/testsuite.zig | 1 +
 2 files changed, 6 insertions(+)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index c22cc27..2203f88 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -1316,6 +1316,11 @@ pub const SemanticAnalyzer = struct {
                 try blocks.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
 
                 for (child_nodes) |child_node| {
+                    if (child_node.type == .toc) {
+                        try sema.emit_diagnostic(.illegal_child_item, child_node.location);
+                        continue;
+                    }
+
                     const block, const id = try sema.translate_block_node(child_node);
                     if (id != null) {
                         try sema.emit_diagnostic(.illegal_id_attribute, get_attribute_location(child_node, "id", .name).?);
diff --git a/src/testsuite.zig b/src/testsuite.zig
index b6e7e1b..f2e51b4 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -604,6 +604,7 @@ test "diagnostic codes are emitted for expected samples" {
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{ .misplaced_hdoc_header, .duplicate_hdoc_header });
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }});
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); ul{ li{ toc; } }", &.{.illegal_child_item});
 }
 
 test "title block populates metadata and warns on inline date" {

From fb71accca58ff61d5ce29fc44f117f35ed5dab9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 08:57:59 +0100
Subject: [PATCH 089/116] Track row title column usage

---
 src/hyperdoc.zig     | 106 ++++++++++++++++++++++++++++++-------------
 src/render/dump.zig  |   2 +
 src/render/html5.zig |  42 ++---------------
 src/testsuite.zig    |  83 +++++++++++++++++++++++++++++++++
 4 files changed, 163 insertions(+), 70 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index c22cc27..d5dfbf9 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -118,8 +118,8 @@ pub const Block = union(enum) {
     };
 
     pub const Table = struct {
-        // TODO: column_count: usize,
-        // TODO: has_row_titles: bool, // not counted inside `Table.column_count`!
+        column_count: usize,
+        has_row_titles: bool, // not counted inside `Table.column_count`!
         lang: LanguageTag,
         rows: []TableRow,
     };
@@ -1138,7 +1138,10 @@ pub const SemanticAnalyzer = struct {
         var rows: std.ArrayList(Block.TableRow) = .empty;
         defer rows.deinit(sema.arena);
 
-        var column_count: ?usize = null;
+        var column_count: usize = 0;
+        var saw_header_row = false;
+        var saw_non_header_row = false;
+        var has_row_titles = false;
 
         switch (node.body) {
             .list => |child_nodes| {
@@ -1146,39 +1149,43 @@ pub const SemanticAnalyzer = struct {
                 for (child_nodes) |child_node| {
                     switch (child_node.type) {
                         .columns => {
+                            if (saw_header_row) {
+                                try sema.emit_diagnostic(.duplicate_columns_row, child_node.location);
+                            }
+
+                            if (saw_non_header_row) {
+                                try sema.emit_diagnostic(.misplaced_columns_row, child_node.location);
+                            }
+
+                            saw_header_row = true;
+
                             const row_attrs = try sema.get_attributes(child_node, struct {
                                 lang: LanguageTag = .inherit,
                             });
 
                             const cells = try sema.translate_table_cells(child_node);
+                            const width = calculate_table_width(cells);
+                            try sema.update_table_column_count(&column_count, width, child_node.location);
+
                             rows.appendAssumeCapacity(.{
                                 .columns = .{
                                     .lang = row_attrs.lang,
                                     .cells = cells,
                                 },
                             });
-
-                            var width: usize = 0;
-                            for (cells) |cell| {
-                                std.debug.assert(cell.colspan > 0);
-                                width += cell.colspan;
-                            }
-
-                            column_count = column_count orelse width;
-                            if (width != column_count) {
-                                try sema.emit_diagnostic(.{ .column_count_mismatch = .{
-                                    .expected = column_count.?,
-                                    .actual = width,
-                                } }, child_node.location);
-                            }
                         },
                         .row => {
+                            saw_non_header_row = true;
+
                             const row_attrs = try sema.get_attributes(child_node, struct {
                                 lang: LanguageTag = .inherit,
                                 title: ?[]const u8 = null,
                             });
 
                             const cells = try sema.translate_table_cells(child_node);
+                            const width = calculate_table_width(cells);
+                            try sema.update_table_column_count(&column_count, width, child_node.location);
+                            has_row_titles = has_row_titles or (row_attrs.title != null);
 
                             rows.appendAssumeCapacity(.{
                                 .row = .{
@@ -1187,22 +1194,10 @@ pub const SemanticAnalyzer = struct {
                                     .cells = cells,
                                 },
                             });
-
-                            var width: usize = 0;
-                            for (cells) |cell| {
-                                std.debug.assert(cell.colspan > 0);
-                                width += cell.colspan;
-                            }
-
-                            column_count = column_count orelse width;
-                            if (width != column_count) {
-                                try sema.emit_diagnostic(.{ .column_count_mismatch = .{
-                                    .expected = column_count.?,
-                                    .actual = width,
-                                } }, child_node.location);
-                            }
                         },
                         .group => {
+                            saw_non_header_row = true;
+
                             const row_attrs = try sema.get_attributes(child_node, struct {
                                 lang: LanguageTag = .inherit,
                             });
@@ -1225,7 +1220,14 @@ pub const SemanticAnalyzer = struct {
             },
         }
 
+        if (column_count == 0) {
+            try sema.emit_diagnostic(.missing_table_column_count, node.location);
+            column_count = 1;
+        }
+
         const table: Block.Table = .{
+            .column_count = column_count,
+            .has_row_titles = has_row_titles,
             .lang = attrs.lang,
             .rows = try rows.toOwnedSlice(sema.arena),
         };
@@ -1233,6 +1235,39 @@ pub const SemanticAnalyzer = struct {
         return .{ table, attrs.id };
     }
 
+    fn calculate_table_width(cells: []const Block.TableCell) usize {
+        var width: usize = 0;
+        for (cells) |cell| {
+            std.debug.assert(cell.colspan > 0);
+            width += cell.colspan;
+        }
+        return width;
+    }
+
+    fn update_table_column_count(sema: *SemanticAnalyzer, column_count: *usize, width: usize, location: Parser.Location) !void {
+        if (width == 0) {
+            if (column_count.* != 0) {
+                try sema.emit_diagnostic(.{ .column_count_mismatch = .{
+                    .expected = column_count.*,
+                    .actual = 0,
+                } }, location);
+            }
+            return;
+        }
+
+        if (column_count.* == 0) {
+            column_count.* = width;
+            return;
+        }
+
+        if (width != column_count.*) {
+            try sema.emit_diagnostic(.{ .column_count_mismatch = .{
+                .expected = column_count.*,
+                .actual = width,
+            } }, location);
+        }
+    }
+
     fn translate_table_cells(sema: *SemanticAnalyzer, node: Parser.Node) error{ OutOfMemory, BadAttributes, InvalidNodeType, Unimplemented }![]Block.TableCell {
         var cells: std.ArrayList(Block.TableCell) = .empty;
         defer cells.deinit(sema.arena);
@@ -3208,6 +3243,9 @@ pub const Diagnostic = struct {
         misplaced_title_block,
         duplicate_title_block,
         column_count_mismatch: TableShapeError,
+        missing_table_column_count,
+        misplaced_columns_row,
+        duplicate_columns_row,
         duplicate_id: ReferenceError,
         unknown_id: ReferenceError,
 
@@ -3262,6 +3300,9 @@ pub const Diagnostic = struct {
                 .misplaced_title_block,
                 .duplicate_title_block,
                 .column_count_mismatch,
+                .missing_table_column_count,
+                .misplaced_columns_row,
+                .duplicate_columns_row,
                 .duplicate_id,
                 .unknown_id,
                 => .@"error",
@@ -3353,6 +3394,9 @@ pub const Diagnostic = struct {
                 .invalid_date_time_body => try w.writeAll("\\date, \\time and \\datetime do not allow any inlines inside their body."),
 
                 .column_count_mismatch => |ctx| try w.print("Expected {} columns, but found {}", .{ ctx.expected, ctx.actual }),
+                .missing_table_column_count => try w.writeAll("Table must declare at least one column via a columns or row entry."),
+                .misplaced_columns_row => try w.writeAll("The 'columns' header row must be the first item in a table."),
+                .duplicate_columns_row => try w.writeAll("Only one 'columns' header row is allowed per table."),
 
                 .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}),
                 .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}),
diff --git a/src/render/dump.zig b/src/render/dump.zig
index 3188c56..9e284d0 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -440,6 +440,8 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err
         .table => |table| {
             try writeTypeTag(writer, "table");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang.text);
+            try dumpOptionalNumberField(writer, indent + indent_step, "column_count", @as(?usize, table.column_count));
+            try dumpBoolField(writer, indent + indent_step, "has_row_titles", table.has_row_titles);
             try dumpTableRowsField(writer, indent + indent_step, "rows", table.rows);
         },
     }
diff --git a/src/render/html5.zig b/src/render/html5.zig
index 364255e..2357a42 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -301,8 +301,8 @@ const RenderContext = struct {
         const lang_attr = langAttribute(table.lang);
         const id_attr = ctx.resolveBlockId(block_index);
 
-        const column_count = inferColumnCount(table.rows) orelse 0;
-        const has_title_column = tableHasTitleColumn(table.rows);
+        const column_count = table.column_count;
+        const has_title_column = table.has_row_titles;
 
         try writeIndent(ctx.writer, indent);
         try writeStartTag(ctx.writer, "table", .regular, .{ .id = id_attr, .lang = lang_attr });
@@ -707,44 +707,8 @@ fn tocHasEntries(node: hdoc.Document.TableOfContents) bool {
     return false;
 }
 
-fn inferColumnCount(rows: []const hdoc.Block.TableRow) ?usize {
-    for (rows) |row| {
-        switch (row) {
-            .columns => |columns| {
-                var width: usize = 0;
-                for (columns.cells) |cell| {
-                    width += cell.colspan;
-                }
-                return width;
-            },
-            .row => |data_row| {
-                var width: usize = 0;
-                for (data_row.cells) |cell| {
-                    width += cell.colspan;
-                }
-                return width;
-            },
-            .group => {},
-        }
-    }
-    return null;
-}
-
-fn tableHasTitleColumn(rows: []const hdoc.Block.TableRow) bool {
-    for (rows) |row| {
-        switch (row) {
-            .row => |data_row| if (data_row.title != null) return true,
-            .group => return true,
-            .columns => {},
-        }
-    }
-    return false;
-}
-
 fn findHeaderIndex(rows: []const hdoc.Block.TableRow) ?usize {
-    for (rows, 0..) |row, index| {
-        if (row == .columns) return index;
-    }
+    if (rows.len > 0 and rows[0] == .columns) return 0;
     return null;
 }
 
diff --git a/src/testsuite.zig b/src/testsuite.zig
index b6e7e1b..e2c55b8 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -606,6 +606,89 @@ test "diagnostic codes are emitted for expected samples" {
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
 }
 
+test "table derives column count from first data row" {
+    const code =
+        \\hdoc(version="2.0",lang="en");
+        \\table {
+        \\  row(title="headered") {
+        \\    td { p "A" }
+        \\    td(colspan="2") { p "B" }
+        \\  }
+        \\}
+    ;
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 1), doc.contents.len);
+
+    switch (doc.contents[0]) {
+        .table => |table| {
+            try std.testing.expectEqual(@as(usize, 3), table.column_count);
+            try std.testing.expect(table.has_row_titles);
+        },
+        else => return error.TestExpectedEqual,
+    }
+}
+
+test "table without header or data rows is rejected" {
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); table { group \"Topic\" }", &.{.missing_table_column_count});
+}
+
+test "columns row must come first" {
+    const code =
+        \\hdoc(version="2.0",lang="en");
+        \\table {
+        \\  row { td "A" }
+        \\  columns { td "B" }
+        \\}
+    ;
+
+    try validateDiagnostics(.{}, code, &.{.misplaced_columns_row});
+}
+
+test "table allows only one columns row" {
+    const code =
+        \\hdoc(version="2.0",lang="en");
+        \\table {
+        \\  columns { td "A" }
+        \\  columns { td "B" }
+        \\}
+    ;
+
+    try validateDiagnostics(.{}, code, &.{.duplicate_columns_row});
+}
+
+test "table tracks presence of row titles" {
+    const code =
+        \\hdoc(version="2.0",lang="en");
+        \\table {
+        \\  row { td "A" }
+        \\  group { "Topic" }
+        \\}
+    ;
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, code, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 1), doc.contents.len);
+
+    switch (doc.contents[0]) {
+        .table => |table| {
+            try std.testing.expect(!table.has_row_titles);
+        },
+        else => return error.TestExpectedEqual,
+    }
+}
+
 test "title block populates metadata and warns on inline date" {
     const code = "hdoc(version=\"2.0\",lang=\"en\");\ntitle { Hello \\date{2020-01-02} }\nh1 \"Body\"";
 

From 74d1dc0d81560e6329b1f4d5e50c8246e541818e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 10:01:16 +0100
Subject: [PATCH 090/116] Remove unused paragraph kind

---
 examples/demo.hdoc                | 12 ++---
 examples/guide.hdoc               | 12 ++---
 src/hyperdoc.zig                  | 48 +++++++++++++------
 src/render/dump.zig               |  7 ++-
 src/render/html5.zig              | 30 +++++++++---
 src/testsuite.zig                 | 78 +++++++++++++++++++++++++++++++
 test/html5/admonition_blocks.hdoc | 17 +++++++
 test/html5/admonition_blocks.html | 22 +++++++++
 test/html5/paragraph_styles.html  | 24 +++++++---
 9 files changed, 211 insertions(+), 39 deletions(-)
 create mode 100644 test/html5/admonition_blocks.hdoc
 create mode 100644 test/html5/admonition_blocks.html

diff --git a/examples/demo.hdoc b/examples/demo.hdoc
index a092e91..7b18c00 100644
--- a/examples/demo.hdoc
+++ b/examples/demo.hdoc
@@ -21,12 +21,12 @@ p {
 
 h2{Special Paragraphs}
 
-note    { HyperDoc 2.0 also supports different types of paragraphs. }
-warning { These should affect rendering, and have well-defined semantics attached to them. }
-danger  { You shall not assume any specific formatting of these elements though. }
-tip     { They typically have a standardized style though. }
-quote   { You shall not pass! }
-spoiler { Nobody expects the Spanish Inquisition! }
+note "HyperDoc 2.0 also supports different types of paragraphs."
+warning "These should affect rendering, and have well-defined semantics attached to them."
+danger "You shall not assume any specific formatting of these elements though."
+tip "They typically have a standardized style though."
+quote "You shall not pass!"
+spoiler "Nobody expects the Spanish Inquisition!"
 
 h2{Verbatim and Preformatted Text}
 
diff --git a/examples/guide.hdoc b/examples/guide.hdoc
index 3f939f4..d7d4ecd 100644
--- a/examples/guide.hdoc
+++ b/examples/guide.hdoc
@@ -24,12 +24,12 @@ p {
   Links can target \link(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}.
 }
 
-note    { Notes highlight supportive information. }
-warning { Warnings call out risky behavior. }
-danger  { Danger paragraphs emphasize critical hazards. }
-tip     { Tips provide actionable hints. }
-quote   { Quotes include sourced or emphasized wording. }
-spoiler { Spoilers hide key story information until revealed. }
+note "Notes highlight supportive information."
+warning "Warnings call out risky behavior."
+danger "Danger paragraphs emphasize critical hazards."
+tip "Tips provide actionable hints."
+quote "Quotes include sourced or emphasized wording."
+spoiler "Spoilers hide key story information until revealed."
 
 h2(id="literals") { Literal and Preformatted Blocks }
 
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index c22cc27..be548ba 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -50,6 +50,7 @@ pub const Document = struct {
 pub const Block = union(enum) {
     heading: Heading,
     paragraph: Paragraph,
+    admonition: Admonition,
     list: List,
     image: Image,
     preformatted: Preformatted,
@@ -81,12 +82,17 @@ pub const Block = union(enum) {
     };
 
     pub const Paragraph = struct {
-        kind: ParagraphKind,
         lang: LanguageTag,
         content: []Span,
     };
 
-    pub const ParagraphKind = enum { p, note, warning, danger, tip, quote, spoiler };
+    pub const Admonition = struct {
+        kind: AdmonitionKind,
+        lang: LanguageTag,
+        content: []Block,
+    };
+
+    pub const AdmonitionKind = enum { note, warning, danger, tip, quote, spoiler };
 
     pub const List = struct {
         lang: LanguageTag,
@@ -880,10 +886,14 @@ pub const SemanticAnalyzer = struct {
                 const heading, const id = try sema.translate_heading_node(node);
                 return .{ .{ .heading = heading }, id };
             },
-            .p, .note, .warning, .danger, .tip, .quote, .spoiler => {
+            .p => {
                 const paragraph, const id = try sema.translate_paragraph_node(node);
                 return .{ .{ .paragraph = paragraph }, id };
             },
+            .note, .warning, .danger, .tip, .quote, .spoiler => {
+                const admonition, const id = try sema.translate_admonition_node(node);
+                return .{ .{ .admonition = admonition }, id };
+            },
             .ul, .ol => {
                 const list, const id = try sema.translate_list_node(node);
                 return .{ .{ .list = list }, id };
@@ -976,8 +986,21 @@ pub const SemanticAnalyzer = struct {
         });
 
         const heading: Block.Paragraph = .{
+            .lang = attrs.lang,
+            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
+        };
+
+        return .{ heading, attrs.id };
+    }
+
+    fn translate_admonition_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Admonition, ?Reference } {
+        const attrs = try sema.get_attributes(node, struct {
+            lang: LanguageTag = .inherit,
+            id: ?Reference = null,
+        });
+
+        const admonition: Block.Admonition = .{
             .kind = switch (node.type) {
-                .p => .p,
                 .note => .note,
                 .warning => .warning,
                 .danger => .danger,
@@ -987,10 +1010,10 @@ pub const SemanticAnalyzer = struct {
                 else => unreachable,
             },
             .lang = attrs.lang,
-            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
+            .content = try sema.translate_block_list(node, .text_to_p),
         };
 
-        return .{ heading, attrs.id };
+        return .{ admonition, attrs.id };
     }
 
     fn translate_list_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.List, ?Reference } {
@@ -1337,7 +1360,6 @@ pub const SemanticAnalyzer = struct {
                     const blocks = try sema.arena.alloc(Block, 1);
                     blocks[0] = .{
                         .paragraph = .{
-                            .kind = .p,
                             .lang = .inherit,
                             .content = spans,
                         },
@@ -3079,12 +3101,6 @@ pub const Parser = struct {
 
                 .title,
                 .p,
-                .note,
-                .warning,
-                .danger,
-                .tip,
-                .quote,
-                .spoiler,
 
                 .img,
                 .pre,
@@ -3106,6 +3122,12 @@ pub const Parser = struct {
                 => true,
 
                 .hdoc,
+                .note,
+                .warning,
+                .danger,
+                .tip,
+                .quote,
+                .spoiler,
                 .ul,
                 .ol,
                 .table,
diff --git a/src/render/dump.zig b/src/render/dump.zig
index 3188c56..39f2904 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -409,10 +409,15 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err
         },
         .paragraph => |paragraph| {
             try writeTypeTag(writer, "paragraph");
-            try dumpEnumField(writer, indent + indent_step, "kind", paragraph.kind);
             try dumpOptionalStringField(writer, indent + indent_step, "lang", paragraph.lang.text);
             try dumpSpanListField(writer, indent + indent_step, "content", paragraph.content);
         },
+        .admonition => |admonition| {
+            try writeTypeTag(writer, "admonition");
+            try dumpEnumField(writer, indent + indent_step, "kind", admonition.kind);
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", admonition.lang.text);
+            try dumpBlockListField(writer, indent + indent_step, "content", admonition.content);
+        },
         .list => |list| {
             try writeTypeTag(writer, "list");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", list.lang.text);
diff --git a/src/render/html5.zig b/src/render/html5.zig
index 364255e..a3661d3 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -26,6 +26,7 @@ const RenderContext = struct {
         switch (block) {
             .heading => |heading| try ctx.renderHeading(heading, block_index, indent),
             .paragraph => |paragraph| try ctx.renderParagraph(paragraph, block_index, indent),
+            .admonition => |admonition| try ctx.renderAdmonition(admonition, block_index, indent),
             .list => |list| try ctx.renderList(list, block_index, indent),
             .image => |image| try ctx.renderImage(image, block_index, indent),
             .preformatted => |preformatted| try ctx.renderPreformatted(preformatted, block_index, indent),
@@ -127,23 +128,38 @@ const RenderContext = struct {
         const lang_attr = langAttribute(paragraph.lang);
         const id_attr = ctx.resolveBlockId(block_index);
 
-        var class_buffer: [32]u8 = undefined;
-        const class_attr: ?[]const u8 = switch (paragraph.kind) {
-            .p => null,
-            else => std.fmt.bufPrint(&class_buffer, "hdoc-{s}", .{@tagName(paragraph.kind)}) catch unreachable,
-        };
-
         try writeIndent(ctx.writer, indent);
         try writeStartTag(ctx.writer, "p", .regular, .{
             .id = id_attr,
             .lang = lang_attr,
-            .class = class_attr,
         });
         try ctx.renderSpans(paragraph.content);
         try writeEndTag(ctx.writer, "p");
         try ctx.writer.writeByte('\n');
     }
 
+    fn renderAdmonition(ctx: *RenderContext, admonition: hdoc.Block.Admonition, block_index: ?usize, indent: usize) RenderError!void {
+        const lang_attr = langAttribute(admonition.lang);
+        const id_attr = ctx.resolveBlockId(block_index);
+
+        var class_buffer: [32]u8 = undefined;
+        const class_attr = std.fmt.bufPrint(&class_buffer, "hdoc-{s}", .{@tagName(admonition.kind)}) catch unreachable;
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "div", .regular, .{
+            .id = id_attr,
+            .lang = lang_attr,
+            .class = class_attr,
+        });
+        if (admonition.content.len > 0) {
+            try ctx.writer.writeByte('\n');
+            try ctx.renderBlocks(admonition.content, indent + indent_step);
+            try writeIndent(ctx.writer, indent);
+        }
+        try writeEndTag(ctx.writer, "div");
+        try ctx.writer.writeByte('\n');
+    }
+
     fn renderList(ctx: *RenderContext, list: hdoc.Block.List, block_index: ?usize, indent: usize) RenderError!void {
         const lang_attr = langAttribute(list.lang);
         const id_attr = ctx.resolveBlockId(block_index);
diff --git a/src/testsuite.zig b/src/testsuite.zig
index b6e7e1b..0f358de 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -224,6 +224,84 @@ test "span merger preserves whitespace after inline mono" {
     }
 }
 
+test "admonition supports block-list bodies" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\note{
+        \\  p "Outer block text."
+        \\  ul{li "Nested item"}
+        \\}
+    ;
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 1), doc.contents.len);
+
+    const admonition = doc.contents[0].admonition;
+    try std.testing.expectEqual(hdoc.Block.AdmonitionKind.note, admonition.kind);
+    try std.testing.expectEqual(@as(usize, 2), admonition.content.len);
+
+    switch (admonition.content[0]) {
+        .paragraph => |para| {
+            try std.testing.expectEqual(@as(usize, 1), para.content.len);
+            try std.testing.expectEqualStrings("Outer block text.", para.content[0].content.text);
+        },
+        else => return error.TestExpectedEqual,
+    }
+
+    switch (admonition.content[1]) {
+        .list => |list| {
+            try std.testing.expectEqual(@as(usize, 1), list.items.len);
+            try std.testing.expectEqual(@as(?u32, null), list.first);
+            try std.testing.expectEqual(@as(usize, 1), list.items[0].content.len);
+        },
+        else => return error.TestExpectedEqual,
+    }
+}
+
+test "admonition shorthand promotes inline bodies to paragraphs" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        "hdoc(version=\"2.0\",lang=\"en\");\n" ++
+        "warning \"Be careful.\" \n" ++
+        "tip:\n" ++
+        "| first line\n" ++
+        "| second line\n";
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 2), doc.contents.len);
+
+    const warning_block = doc.contents[0].admonition;
+    try std.testing.expectEqual(hdoc.Block.AdmonitionKind.warning, warning_block.kind);
+    try std.testing.expectEqual(@as(usize, 1), warning_block.content.len);
+    switch (warning_block.content[0]) {
+        .paragraph => |para| {
+            try std.testing.expectEqualStrings("Be careful.", para.content[0].content.text);
+        },
+        else => return error.TestExpectedEqual,
+    }
+
+    const tip_block = doc.contents[1].admonition;
+    try std.testing.expectEqual(hdoc.Block.AdmonitionKind.tip, tip_block.kind);
+    try std.testing.expectEqual(@as(usize, 1), tip_block.content.len);
+    switch (tip_block.content[0]) {
+        .paragraph => |para| {
+            try std.testing.expectEqualStrings("first line\nsecond line", para.content[0].content.text);
+        },
+        else => return error.TestExpectedEqual,
+    }
+}
+
 test "pre verbatim preserves trailing whitespace" {
     var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
     defer diagnostics.deinit();
diff --git a/test/html5/admonition_blocks.hdoc b/test/html5/admonition_blocks.hdoc
new file mode 100644
index 0000000..3636623
--- /dev/null
+++ b/test/html5/admonition_blocks.hdoc
@@ -0,0 +1,17 @@
+hdoc(version="2.0", title="Admonition Blocks", lang="en");
+
+h1 "Admonitions as Containers"
+
+note{
+  p "A note can span multiple blocks."
+  ul{
+    li "Lists are allowed."
+    li "They render inside the note container."
+  }
+}
+
+danger "String bodies become paragraphs inside the container."
+
+spoiler:
+| Hidden detail
+| spans multiple lines.
diff --git a/test/html5/admonition_blocks.html b/test/html5/admonition_blocks.html
new file mode 100644
index 0000000..a298e1e
--- /dev/null
+++ b/test/html5/admonition_blocks.html
@@ -0,0 +1,22 @@
+<header lang="en">
+  <h1 lang="en">Admonition Blocks</h1>
+</header>
+<h2 id="hdoc-auto-0">§1 Admonitions as Containers</h2>
+<div class="hdoc-note">
+  <p>A note can span multiple blocks.</p>
+  <ul>
+    <li>
+      <p>Lists are allowed.</p>
+    </li>
+    <li>
+      <p>They render inside the note container.</p>
+    </li>
+  </ul>
+</div>
+<div class="hdoc-danger">
+  <p>String bodies become paragraphs inside the container.</p>
+</div>
+<div class="hdoc-spoiler">
+  <p>Hidden detail
+spans multiple lines.</p>
+</div>
diff --git a/test/html5/paragraph_styles.html b/test/html5/paragraph_styles.html
index 575b583..1bdd369 100644
--- a/test/html5/paragraph_styles.html
+++ b/test/html5/paragraph_styles.html
@@ -3,9 +3,21 @@ <h1 lang="en">Paragraph Styles</h1>
 </header>
 <h2 id="hdoc-auto-0">§1 Paragraph Styles</h2>
 <p>A standard paragraph introducing the styles below.</p>
-<p class="hdoc-note">Notes provide informational context without urgency.</p>
-<p class="hdoc-warning">Warnings highlight potential issues to watch for.</p>
-<p class="hdoc-danger">Danger blocks signal critical problems.</p>
-<p class="hdoc-tip">Tips offer helpful hints for readers.</p>
-<p class="hdoc-quote">Quoted material sits in its own paragraph style.</p>
-<p class="hdoc-spoiler">This is a spoiler; renderers may hide or blur this content.</p>
+<div class="hdoc-note">
+  <p>Notes provide informational context without urgency.</p>
+</div>
+<div class="hdoc-warning">
+  <p>Warnings highlight potential issues to watch for.</p>
+</div>
+<div class="hdoc-danger">
+  <p>Danger blocks signal critical problems.</p>
+</div>
+<div class="hdoc-tip">
+  <p>Tips offer helpful hints for readers.</p>
+</div>
+<div class="hdoc-quote">
+  <p>Quoted material sits in its own paragraph style.</p>
+</div>
+<div class="hdoc-spoiler">
+  <p>This is a spoiler; renderers may hide or blur this content.</p>
+</div>

From 21f6fc68d3f90208dcd93a0c2c93398c5372abb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 12:25:28 +0100
Subject: [PATCH 091/116] Guard plaintext rendering from unexpected refs

---
 AGENTS.md                           |   1 +
 docs/specification.md               |   1 +
 examples/demo.hdoc                  |   2 +-
 examples/guide.hdoc                 |   2 +-
 src/hyperdoc.zig                    | 204 ++++++++++++++++++++++++----
 src/render/dump.zig                 |  17 ++-
 src/render/html5.zig                |  89 +++++++++++-
 src/testsuite.zig                   |  58 +++++++-
 test/html5/nesting_and_inlines.hdoc |   6 +-
 test/html5/nesting_and_inlines.html |   2 +
 10 files changed, 347 insertions(+), 35 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 2ab16dd..dc23294 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -10,6 +10,7 @@
 - Run `zig build` to validate the main application still compiles
 - Test `./zig-out/bin/hyperdoc` with the `.hdoc` files in `examples/` and `test/`.
 - Avoid editing documentation unless the request explicitly asks for it.
+- `src/hyperdoc.zig` must not contain locale- or rendering-specific parts.
 - Treat `docs/specification.md` as the authoritative source of behavior; examples may be outdated or incorrect.
 - If the spec is unclear or conflicts with code/tests, ask before changing behavior.
 - Do not implement "just make it work" fallbacks that alter semantics to satisfy examples.
diff --git a/docs/specification.md b/docs/specification.md
index 50f184c..fcee7cd 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -812,6 +812,7 @@ Notes:
 Semantics:
 
 - `\ref(ref="X")` **MUST** resolve to a top-level element with `id="X"`, otherwise it is semantically invalid.
+- A `\ref` inline element **MUST NOT** appear inside `h1`, `h2`, `h3`, or `title` elements.
 - If `\ref` has a non-empty body, the body **MUST** be used as the rendered link text.
 - If `\ref` has an empty body (`;`), the following rules apply:
   - If the referenced target is a heading (`h1`, `h2`, `h3`), the renderer **MUST** synthesize link text from the target and `fmt`:
diff --git a/examples/demo.hdoc b/examples/demo.hdoc
index a092e91..5284b9a 100644
--- a/examples/demo.hdoc
+++ b/examples/demo.hdoc
@@ -15,7 +15,7 @@ p(id="foo") {
 p {
   This paragraph contains \em{inline} formatting. We don't support \strike{bold} or \strike{italic} as it's a stylistic choice.
   Other formatting we have is \mono{monospaced}, superscript (x\sup{2}) and subscript(x\sub{2}).
-  We can also \link(ref="foo"){link to other parts of a document} or \link(uri="https://ashet.computer"){to websites}.
+  We can also \ref(ref="foo"){link to other parts of a document} or \link(uri="https://ashet.computer"){to websites}.
   With \mono(syntax="c"){int *value = 10;} we can also have language information and potential syntax highlighting attached to monospaced font.
 }
 
diff --git a/examples/guide.hdoc b/examples/guide.hdoc
index 3f939f4..4a0bbf0 100644
--- a/examples/guide.hdoc
+++ b/examples/guide.hdoc
@@ -21,7 +21,7 @@ p {
 }
 
 p {
-  Links can target \link(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}.
+  Links can target \ref(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}.
 }
 
 note    { Notes highlight supportive information. }
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index c22cc27..2c35cfa 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -166,11 +166,20 @@ pub fn FormattedDateTime(comptime DT: type) type {
 }
 
 pub const Span = struct {
+    pub const ReferenceFormat = enum { full, name, index };
+
     pub const Content = union(enum) {
         text: []const u8,
         date: FormattedDateTime(Date),
         time: FormattedDateTime(Time),
         datetime: FormattedDateTime(DateTime),
+        reference: InlineReference,
+    };
+
+    pub const InlineReference = struct {
+        ref: Reference,
+        fmt: ReferenceFormat,
+        target_block: ?usize = null,
     };
 
     pub const Attributes = struct {
@@ -220,18 +229,27 @@ pub const ScriptPosition = enum {
 
 pub const Link = union(enum) {
     none,
-    ref: Reference,
+    ref: RefTarget,
     uri: Uri,
 
     pub fn eql(lhs: Link, rhs: Link) bool {
         return switch (lhs) {
             .none => (rhs == .none),
-            .ref => (rhs == .ref) and std.mem.eql(u8, lhs.ref.text, rhs.ref.text),
+            .ref => (rhs == .ref) and lhs.ref.eql(rhs.ref),
             .uri => (rhs == .uri) and std.mem.eql(u8, lhs.uri.text, rhs.uri.text),
         };
     }
 };
 
+pub const RefTarget = struct {
+    ref: Reference,
+    block_index: ?usize = null,
+
+    pub fn eql(lhs: RefTarget, rhs: RefTarget) bool {
+        return lhs.ref.eql(rhs.ref) and lhs.block_index == rhs.block_index;
+    }
+};
+
 /// HyperDoc Version Number
 pub const Version = struct {
     major: u16,
@@ -586,6 +604,7 @@ pub fn parse(
     }
 
     try sema.validate_references(&id_map);
+    try sema.resolve_references(&id_map);
 
     const doc_lang = header.lang orelse LanguageTag.inherit;
     const title = try sema.finalize_title(header, doc_lang);
@@ -920,6 +939,7 @@ pub const SemanticAnalyzer = struct {
             .@"\\sub",
             .@"\\sup",
             .@"\\link",
+            .@"\\ref",
             .@"\\time",
             .@"\\date",
             .@"\\datetime",
@@ -1415,6 +1435,12 @@ pub const SemanticAnalyzer = struct {
 
                     try merger.output.append(merger.arena, span);
                 },
+                .reference => {
+                    try merger.flush_internal(.keep);
+                    std.debug.assert(merger.current_span.items.len == 0);
+
+                    try merger.output.append(merger.arena, span);
+                },
                 .text => |text_content| {
                     std.debug.assert(span.attribs.eql(merger.attribs));
 
@@ -1594,34 +1620,44 @@ pub const SemanticAnalyzer = struct {
             .@"\\link" => {
                 const props = try sema.get_attributes(node, struct {
                     lang: LanguageTag = .inherit,
-                    uri: ?Uri = null,
-                    ref: ?Reference = null,
+                    uri: Uri,
                 });
 
-                if (props.uri != null and props.ref != null) {
-                    try sema.emit_diagnostic(.invalid_link, node.location);
-                }
+                try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                    .lang = props.lang,
+                    .link = .{ .uri = props.uri },
+                }), .emit_diagnostic);
+            },
 
-                const link: Link = if (props.uri) |uri| blk: {
-                    break :blk .{ .uri = uri };
-                } else if (props.ref) |ref| blk: {
-                    break :blk .{ .ref = ref };
-                } else blk: {
-                    try sema.emit_diagnostic(.invalid_link, node.location);
-                    break :blk .none;
-                };
+            .@"\\ref" => {
+                const props = try sema.get_attributes(node, struct {
+                    lang: LanguageTag = .inherit,
+                    ref: Reference,
+                    fmt: Span.ReferenceFormat = .full,
+                });
 
-                if (props.ref) |ref| {
-                    if (props.uri == null) {
-                        const ref_location = get_attribute_location(node, "ref", .value) orelse node.location;
-                        try sema.pending_refs.append(sema.arena, .{ .ref = ref, .location = ref_location });
-                    }
-                }
+                const ref_location = get_attribute_location(node, "ref", .value) orelse node.location;
+                try sema.pending_refs.append(sema.arena, .{ .ref = props.ref, .location = ref_location });
 
-                try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
+                const link_attribs = try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
-                    .link = link,
-                }), .emit_diagnostic);
+                    .link = .{ .ref = .{ .ref = props.ref } },
+                });
+
+                switch (node.body) {
+                    .empty => {
+                        try spans.append(sema.arena, .{
+                            .content = .{ .reference = .{
+                                .ref = props.ref,
+                                .fmt = props.fmt,
+                                .target_block = null,
+                            } },
+                            .attribs = link_attribs,
+                            .location = node.location,
+                        });
+                    },
+                    else => try sema.translate_inline_body(spans, node.body, link_attribs, .emit_diagnostic),
+                }
             },
 
             .@"\\mono" => {
@@ -1665,6 +1701,7 @@ pub const SemanticAnalyzer = struct {
                 //  Convert the content_spans into a "rendered string".
                 const content_text = (sema.render_spans_to_plaintext(content_spans, .reject_date_time) catch |err| switch (err) {
                     error.DateTimeRenderingUnsupported => unreachable,
+                    error.UnexpectedReference => unreachable,
                     else => |e| return e,
                 }).text;
 
@@ -1777,7 +1814,7 @@ pub const SemanticAnalyzer = struct {
         sema: *SemanticAnalyzer,
         source_spans: []const Span,
         mode: PlaintextMode,
-    ) error{ OutOfMemory, DateTimeRenderingUnsupported }!TitlePlainText {
+    ) error{ OutOfMemory, DateTimeRenderingUnsupported, UnexpectedReference }!TitlePlainText {
         var output: std.ArrayList(u8) = .empty;
         defer output.deinit(sema.arena);
 
@@ -1786,6 +1823,7 @@ pub const SemanticAnalyzer = struct {
         for (source_spans) |span| {
             switch (span.content) {
                 .text => |str| try output.appendSlice(sema.arena, str),
+                .reference => return error.UnexpectedReference,
                 .date => |value| switch (mode) {
                     .reject_date_time => return error.DateTimeRenderingUnsupported,
                     .iso_date_time => {
@@ -1891,6 +1929,7 @@ pub const SemanticAnalyzer = struct {
         if (block_title) |title_block| {
             const rendered = sema.render_spans_to_plaintext(title_block.content, .iso_date_time) catch |err| switch (err) {
                 error.DateTimeRenderingUnsupported => unreachable,
+                error.UnexpectedReference => unreachable,
                 else => |e| return e,
             };
 
@@ -2099,7 +2138,10 @@ pub const SemanticAnalyzer = struct {
             LanguageTag => LanguageTag.parse(value) catch return error.InvalidValue,
             TimeZoneOffset => TimeZoneOffset.parse(value) catch return error.InvalidValue,
 
-            else => @compileError("Unsupported attribute type: " ++ @typeName(T)),
+            inline else => |EnumT| switch (@typeInfo(EnumT)) {
+                .@"enum" => std.meta.stringToEnum(EnumT, value) orelse return error.InvalidValue,
+                else => @compileError("Unsupported attribute type: " ++ @typeName(EnumT)),
+            },
         };
     }
 
@@ -2111,6 +2153,107 @@ pub const SemanticAnalyzer = struct {
         }
     }
 
+    fn resolve_references(
+        sema: *SemanticAnalyzer,
+        id_map: *const std.StringArrayHashMapUnmanaged(usize),
+    ) error{OutOfMemory}!void {
+        for (sema.blocks.items) |*block| {
+            try sema.resolve_block_references(block, id_map);
+        }
+
+        if (sema.title_block) |*title_block| {
+            try sema.resolve_span_slice(&title_block.content, id_map);
+        }
+    }
+
+    fn resolve_block_references(
+        sema: *SemanticAnalyzer,
+        block: *Block,
+        id_map: *const std.StringArrayHashMapUnmanaged(usize),
+    ) error{OutOfMemory}!void {
+        switch (block.*) {
+            .heading => |*heading| {
+                try sema.resolve_span_slice(&heading.content, id_map);
+            },
+            .paragraph => |*paragraph| {
+                try sema.resolve_span_slice(&paragraph.content, id_map);
+            },
+            .list => |*list| {
+                for (list.items) |*item| {
+                    for (item.content) |*child| {
+                        try sema.resolve_block_references(child, id_map);
+                    }
+                }
+            },
+            .image => |*image| {
+                try sema.resolve_span_slice(&image.content, id_map);
+            },
+            .preformatted => |*preformatted| {
+                try sema.resolve_span_slice(&preformatted.content, id_map);
+            },
+            .toc => {},
+            .table => |*table| {
+                for (table.rows) |*row| switch (row.*) {
+                    .columns => |*columns| {
+                        for (columns.cells) |*cell| {
+                            for (cell.content) |*child| {
+                                try sema.resolve_block_references(child, id_map);
+                            }
+                        }
+                    },
+                    .group => |*group| {
+                        try sema.resolve_span_slice(&group.content, id_map);
+                    },
+                    .row => |*table_row| {
+                        for (table_row.cells) |*cell| {
+                            for (cell.content) |*child| {
+                                try sema.resolve_block_references(child, id_map);
+                            }
+                        }
+                    },
+                };
+            },
+        }
+    }
+
+    fn resolve_span_slice(
+        sema: *SemanticAnalyzer,
+        spans: *[]Span,
+        id_map: *const std.StringArrayHashMapUnmanaged(usize),
+    ) error{OutOfMemory}!void {
+        for (spans.*) |*span| {
+            var target_index: ?usize = null;
+            switch (span.attribs.link) {
+                .ref => |ref_target| {
+                    target_index = ref_target.block_index orelse id_map.get(ref_target.ref.text);
+                    span.attribs.link = .{ .ref = .{
+                        .ref = ref_target.ref,
+                        .block_index = target_index,
+                    } };
+                },
+                else => {},
+            }
+
+            switch (span.content) {
+                .reference => |ref_content| {
+                    const resolved_index = target_index orelse id_map.get(ref_content.ref.text) orelse continue;
+                    const target_block = sema.blocks.items[resolved_index];
+                    switch (target_block) {
+                        .heading => {},
+                        else => try sema.emit_diagnostic(.empty_ref_body_target, span.location),
+                    }
+                    span.content = .{ .reference = .{
+                        .ref = ref_content.ref,
+                        .fmt = ref_content.fmt,
+                        .target_block = resolved_index,
+                    } };
+                    span.attribs.link = .{ .ref = .{ .ref = ref_content.ref, .block_index = resolved_index } };
+                },
+                else => {},
+            }
+        }
+    }
+
     fn build_toc(sema: *SemanticAnalyzer, contents: []const Block, block_locations: []const Parser.Location) !Document.TableOfContents {
         std.debug.assert(contents.len == block_locations.len);
 
@@ -3021,6 +3164,7 @@ pub const Parser = struct {
         @"\\sub",
         @"\\sup",
         @"\\link",
+        @"\\ref",
         @"\\date",
         @"\\time",
         @"\\datetime",
@@ -3036,6 +3180,7 @@ pub const Parser = struct {
                 .@"\\sub",
                 .@"\\sup",
                 .@"\\link",
+                .@"\\ref",
                 .@"\\date",
                 .@"\\time",
                 .@"\\datetime",
@@ -3097,6 +3242,7 @@ pub const Parser = struct {
                 .@"\\sub",
                 .@"\\sup",
                 .@"\\link",
+                .@"\\ref",
                 .@"\\date",
                 .@"\\time",
                 .@"\\datetime",
@@ -3193,7 +3339,6 @@ pub const Diagnostic = struct {
         block_list_required: NodeBodyError,
         invalid_inline_combination: InlineCombinationError,
         link_not_nestable,
-        invalid_link,
         invalid_date_time,
         invalid_date_time_body,
         invalid_date_time_fmt: DateTimeFormatError,
@@ -3210,6 +3355,7 @@ pub const Diagnostic = struct {
         column_count_mismatch: TableShapeError,
         duplicate_id: ReferenceError,
         unknown_id: ReferenceError,
+        empty_ref_body_target,
 
         // warnings:
         document_starts_with_bom,
@@ -3247,7 +3393,6 @@ pub const Diagnostic = struct {
                 .block_list_required,
                 .invalid_inline_combination,
                 .link_not_nestable,
-                .invalid_link,
                 .invalid_date_time,
                 .invalid_date_time_fmt,
                 .missing_timezone,
@@ -3264,6 +3409,7 @@ pub const Diagnostic = struct {
                 .column_count_mismatch,
                 .duplicate_id,
                 .unknown_id,
+                .empty_ref_body_target,
                 => .@"error",
 
                 .missing_document_language,
@@ -3323,7 +3469,6 @@ pub const Diagnostic = struct {
                 .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}),
                 .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }),
                 .link_not_nestable => try w.writeAll("Links are not nestable"),
-                .invalid_link => try w.writeAll("\\link requires either ref=\"…\" or uri=\"…\" attribute."),
 
                 .attribute_leading_trailing_whitespace => try w.writeAll("Attribute value has invalid leading or trailing whitespace."),
 
@@ -3356,6 +3501,7 @@ pub const Diagnostic = struct {
 
                 .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}),
                 .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}),
+                .empty_ref_body_target => try w.writeAll("Empty-body \\ref is only supported for headings."),
 
                 .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."),
                 .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."),
diff --git a/src/render/dump.zig b/src/render/dump.zig
index 3188c56..25ffe02 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -135,7 +135,11 @@ fn writeSpanAttributes(writer: *Writer, span: hdoc.Span) Writer.Error!void {
         .none => {},
         .ref => |value| {
             try writeAttrSeparator(writer, &first);
-            try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.text)});
+            if (value.block_index) |idx| {
+                try writer.print("link=\"ref:{f}#{d}\"", .{ std.zig.fmtString(value.ref.text), idx });
+            } else {
+                try writer.print("link=\"ref:{f}\"", .{std.zig.fmtString(value.ref.text)});
+            }
         },
         .uri => |value| {
             try writeAttrSeparator(writer, &first);
@@ -217,6 +221,17 @@ fn writeSpanContentInline(writer: *Writer, content: hdoc.Span.Content) Writer.Er
             try writeFormattedDateTimeInline(writer, datetime);
             try writer.writeByte('"');
         },
+        .reference => |reference| {
+            try writer.writeByte('"');
+            try writer.writeAll("ref:");
+            try writer.writeAll(reference.ref.text);
+            try writer.writeByte('@');
+            try writer.writeAll(@tagName(reference.fmt));
+            if (reference.target_block) |idx| {
+                try writer.print("#{d}", .{idx});
+            }
+            try writer.writeByte('"');
+        },
     }
 }
 
diff --git a/src/render/html5.zig b/src/render/html5.zig
index 364255e..936bdba 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -472,8 +472,13 @@ const RenderContext = struct {
             const href_value = switch (span.attribs.link) {
                 .none => unreachable,
                 .ref => |reference| blk: {
+                    if (ctx.resolveBlockId(reference.block_index)) |resolved| {
+                        var href_buffer: [128]u8 = undefined;
+                        break :blk std.fmt.bufPrint(&href_buffer, "#{s}", .{resolved}) catch unreachable;
+                    }
+
                     var href_buffer: [128]u8 = undefined;
-                    break :blk std.fmt.bufPrint(&href_buffer, "#{s}", .{reference.text}) catch unreachable;
+                    break :blk std.fmt.bufPrint(&href_buffer, "#{s}", .{reference.ref.text}) catch unreachable;
                 },
                 .uri => |uri| uri.text,
             };
@@ -530,6 +535,9 @@ const RenderContext = struct {
             .date => |date| try ctx.renderDateTimeValue(.date, date, content_lang),
             .time => |time| try ctx.renderDateTimeValue(.time, time, content_lang),
             .datetime => |datetime| try ctx.renderDateTimeValue(.datetime, datetime, content_lang),
+            .reference => |reference| {
+                try ctx.renderReference(reference, content_lang);
+            },
         }
 
         while (opened_len > 0) {
@@ -538,6 +546,66 @@ const RenderContext = struct {
         }
     }
 
+    fn renderReference(ctx: *RenderContext, reference: hdoc.Span.InlineReference, content_lang: ?[]const u8) RenderError!void {
+        if (reference.target_block) |target_idx| {
+            if (target_idx < ctx.doc.contents.len) {
+                switch (ctx.doc.contents[target_idx]) {
+                    .heading => |heading| return ctx.renderHeadingReference(reference, heading, content_lang),
+                    else => {},
+                }
+            }
+        }
+
+        try ctx.renderReferenceText(reference.ref.text, content_lang);
+    }
+
+    fn renderHeadingReference(ctx: *RenderContext, reference: hdoc.Span.InlineReference, heading: hdoc.Block.Heading, content_lang: ?[]const u8) RenderError!void {
+        var has_bdi = false;
+        if (content_lang) |lang| {
+            try writeStartTag(ctx.writer, "bdi", .regular, .{ .lang = lang });
+            has_bdi = true;
+        }
+
+        const print_index = reference.fmt != .name;
+        if (print_index) {
+            var index_buffer: [32]u8 = undefined;
+            const index_label = try formatHeadingIndexLabel(heading.index, &index_buffer);
+            try writeEscapedHtml(ctx.writer, index_label);
+        }
+
+        if (reference.fmt == .full and heading.content.len > 0) {
+            try ctx.writer.writeByte(' ');
+        }
+
+        switch (reference.fmt) {
+            .full, .name => try ctx.renderReferenceTargetSpans(heading.content),
+            .index => {},
+        }
+
+        if (has_bdi) {
+            try writeEndTag(ctx.writer, "bdi");
+        }
+    }
+
+    fn renderReferenceText(ctx: *RenderContext, text: []const u8, content_lang: ?[]const u8) RenderError!void {
+        if (content_lang) |lang| {
+            try writeStartTag(ctx.writer, "bdi", .regular, .{ .lang = lang });
+            try writeEscapedHtml(ctx.writer, text);
+            try writeEndTag(ctx.writer, "bdi");
+            return;
+        }
+
+        try writeEscapedHtml(ctx.writer, text);
+    }
+
+    fn renderReferenceTargetSpans(ctx: *RenderContext, spans: []const hdoc.Span) RenderError!void {
+        for (spans) |span| {
+            var adjusted = span;
+            adjusted.attribs.link = .none;
+            try ctx.renderSpan(adjusted);
+        }
+    }
+
     fn renderDateTimeValue(ctx: *RenderContext, comptime kind: enum { date, time, datetime }, value: anytype, lang_attr: ?[]const u8) RenderError!void {
         var datetime_buffer: [128]u8 = undefined;
         const datetime_value = switch (kind) {
@@ -790,6 +858,25 @@ fn formatIsoDateTime(value: hdoc.DateTime, buffer: []u8) RenderError![]const u8
     return std.fmt.bufPrint(buffer, "{s}T{s}", .{ date_text, time_text }) catch unreachable;
 }
 
+fn formatHeadingIndexLabel(index: hdoc.Block.Heading.Index, buffer: []u8) RenderError![]const u8 {
+    var stream = std.io.fixedBufferStream(buffer);
+    const writer = stream.writer();
+
+    const parts = switch (index) {
+        .h1 => index.h1[0..1],
+        .h2 => index.h2[0..2],
+        .h3 => index.h3[0..3],
+    };
+
+    for (parts, 0..) |value, idx| {
+        if (idx != 0) try writer.writeByte('.');
+        try writer.print("{d}", .{value});
+    }
+    try writer.writeByte('.');
+
+    return stream.getWritten();
+}
+
 fn formatDateValue(value: hdoc.FormattedDateTime(hdoc.Date), buffer: []u8) RenderError![]const u8 {
     return switch (value.format) {
         .year => std.fmt.bufPrint(buffer, "{d}", .{value.value.year}) catch unreachable,
diff --git a/src/testsuite.zig b/src/testsuite.zig
index b6e7e1b..464518c 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -415,6 +415,62 @@ test "parser handles unknown node types" {
     }
 }
 
+test "\\ref synthesizes heading text for empty bodies" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\h1(id="intro"){Introduction}
+        \\p{See \ref(ref="intro"); and \ref(ref="intro",fmt="name"); and \ref(ref="intro",fmt="index");}
+    ;
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 2), doc.contents.len);
+
+    const paragraph = doc.contents[1].paragraph;
+    const expected_formats = [_]hdoc.Span.ReferenceFormat{ .full, .name, .index };
+
+    var seen: usize = 0;
+    for (paragraph.content) |span| {
+        if (span.content != .reference) continue;
+
+        const reference = span.content.reference;
+        try std.testing.expect(seen < expected_formats.len);
+        try std.testing.expectEqual(expected_formats[seen], reference.fmt);
+        try std.testing.expectEqual(@as(?usize, 0), reference.target_block);
+
+        switch (span.attribs.link) {
+            .ref => |link| try std.testing.expectEqual(@as(?usize, 0), link.block_index),
+            else => return error.TestExpectedEqual,
+        }
+
+        seen += 1;
+    }
+
+    try std.testing.expectEqual(expected_formats.len, seen);
+}
+
+test "\\ref empty body rejects non-heading targets" {
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\p(id="p1"){Body}
+        \\p{\ref(ref="p1");}
+    ;
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(diagnostics.has_error());
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .empty_ref_body_target));
+}
+
 test "table of contents inserts automatic headings when skipping levels" {
     const source =
         \\hdoc(version="2.0");
@@ -791,7 +847,7 @@ test "diagnostics for missing timezone and unknown id" {
 
     const source =
         \\hdoc(version="2.0");
-        \\p{ \time"12:00:00" \link(ref="missing"){missing} }
+        \\p{ \time"12:00:00" \ref(ref="missing"){missing} }
     ;
 
     var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
diff --git a/test/html5/nesting_and_inlines.hdoc b/test/html5/nesting_and_inlines.hdoc
index f1bd8a2..fcfd8fc 100644
--- a/test/html5/nesting_and_inlines.hdoc
+++ b/test/html5/nesting_and_inlines.hdoc
@@ -6,7 +6,11 @@ p "This document exercises inline formatting and nested lists."
 
 p { We can mix \em{emphasis}, \strike{strike}, \mono{monospace} text. Superscript x\sup{2} and subscript x\sub{2} also appear. }
 
-p { Links point to \link(ref="top"){local anchors} or \link(uri="https://example.com"){external sites}. }
+p { Links point to \ref(ref="top"){local anchors} or \link(uri="https://example.com"){external sites}. }
+
+h2(id="formatted") {Formatted \em{Heading}}
+
+p { Empty-body references become \ref(ref="formatted",fmt="full"); \ref(ref="formatted",fmt="name"); and \ref(ref="formatted",fmt="index"); }
 
 ul {
   li { p "Top-level item one" }
diff --git a/test/html5/nesting_and_inlines.html b/test/html5/nesting_and_inlines.html
index 1b5be1a..1ee2e94 100644
--- a/test/html5/nesting_and_inlines.html
+++ b/test/html5/nesting_and_inlines.html
@@ -5,6 +5,8 @@ <h2 id="top">§1 Nesting and Inline Styling</h2>
 <p>This document exercises inline formatting and nested lists.</p>
 <p>We can mix <em>emphasis</em>, <s>strike</s>, <code class="hdoc-code">monospace</code> text. Superscript x<sup>2</sup> and subscript x<sub>2</sub> also appear.</p>
 <p>Links point to <a href="#top">local anchors</a> or <a href="https://example.com">external sites</a>.</p>
+<h3 id="formatted">§1.1 Formatted <em>Heading</em></h3>
+<p>Empty-body references become <a href="#formatted">1.1. Formatted <em>Heading</em></a> <a href="#formatted">Formatted <em>Heading</em></a> and <a href="#formatted">1.1.</a></p>
 <ul>
   <li>
     <p>Top-level item one</p>

From 48997cdaf3f55b7737cd7f92b12936f8397b4066 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 15:15:14 +0100
Subject: [PATCH 092/116] Modify validate.yml for hdoc-2.0 and testing

Updated workflow to include hdoc-2.0 branch and added test step.
---
 .github/workflows/validate.yml | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
index 0b8538c..9f2c982 100644
--- a/.github/workflows/validate.yml
+++ b/.github/workflows/validate.yml
@@ -2,9 +2,9 @@ name: Build
 
 on:
   pull_request:
-    branches: [master]
+    branches: [master, hdoc-2.0]
   push:
-    branches: [master]
+    branches: [master, hdoc-2.0]
 
 jobs:
   build:
@@ -20,4 +20,8 @@ jobs:
 
       - name: Build
         run: |
-          zig build
+          zig build install
+
+      - name: Test
+        run: |
+          zig build test

From 3627f99a47c50c3d8feee239154042cbd3cb37c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 3 Jan 2026 21:05:16 +0100
Subject: [PATCH 093/116] Fixes simple merging bug

---
 src/hyperdoc.zig | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index d8c19b6..4012b0d 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -2253,6 +2253,11 @@ pub const SemanticAnalyzer = struct {
             .preformatted => |*preformatted| {
                 try sema.resolve_span_slice(&preformatted.content, id_map);
             },
+            .admonition => |*admonition| {
+                for (admonition.content) |*child| {
+                    try sema.resolve_block_references(child, id_map);
+                }
+            },
             .toc => {},
             .table => |*table| {
                 for (table.rows) |*row| switch (row.*) {

From be0c069f193ec4e94f7d9d13a7faff7c9afb8c0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 3 Jan 2026 21:15:32 +0100
Subject: [PATCH 094/116] Adds yaml dumps to tests. Adds basic golden file
 verification

---
 build.zig                           |  27 ++++++
 src/main.zig                        |   4 +-
 test/html5/admonition_blocks.yaml   |  73 +++++++++++++++
 test/html5/document_header.yaml     |  33 +++++++
 test/html5/media_and_toc.yaml       | 101 +++++++++++++++++++++
 test/html5/nesting_and_inlines.yaml | 121 +++++++++++++++++++++++++
 test/html5/paragraph_styles.yaml    |  89 ++++++++++++++++++
 test/html5/tables.yaml              | 134 ++++++++++++++++++++++++++++
 8 files changed, 580 insertions(+), 2 deletions(-)
 create mode 100644 test/html5/admonition_blocks.yaml
 create mode 100644 test/html5/document_header.yaml
 create mode 100644 test/html5/media_and_toc.yaml
 create mode 100644 test/html5/nesting_and_inlines.yaml
 create mode 100644 test/html5/paragraph_styles.yaml
 create mode 100644 test/html5/tables.yaml

diff --git a/build.zig b/build.zig
index 1d265f5..262e775 100644
--- a/build.zig
+++ b/build.zig
@@ -1,5 +1,14 @@
 const std = @import("std");
 
+const test_files: []const []const u8 = &.{
+    "test/html5/admonition_blocks.hdoc",
+    "test/html5/document_header.hdoc",
+    "test/html5/media_and_toc.hdoc",
+    "test/html5/nesting_and_inlines.hdoc",
+    "test/html5/paragraph_styles.hdoc",
+    "test/html5/tables.hdoc",
+};
+
 pub fn build(b: *std.Build) void {
     // Options:
     const target = b.standardTargetOptions(.{});
@@ -35,6 +44,24 @@ pub fn build(b: *std.Build) void {
 
     run_step.dependOn(&run_cmd.step);
 
+    // Snapshot tests:
+    for (test_files) |path| {
+        std.debug.assert(std.mem.endsWith(u8, path, ".hdoc"));
+        const html_file = b.fmt("{s}.html", .{path[0 .. path.len - 5]});
+        const yaml_file = b.fmt("{s}.yaml", .{path[0 .. path.len - 5]});
+
+        for (&[2][]const u8{ html_file, yaml_file }) |file| {
+            const test_run = b.addRunArtifact(exe);
+            test_run.addArgs(&.{ "--format", file[file.len - 4 ..] });
+            test_run.addFileArg(b.path(path));
+            test_run.expectStdOutEqual(
+                b.build_root.handle.readFileAlloc(b.allocator, file, 10 * 1024 * 1024) catch @panic("oom"),
+            );
+            test_step.dependOn(&test_run.step);
+        }
+    }
+
+    // Unit tests:
     const exe_tests = b.addTest(.{
         .root_module = b.createModule(.{
             .root_source_file = b.path("src/testsuite.zig"),
diff --git a/src/main.zig b/src/main.zig
index 693a2f1..776d241 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -65,7 +65,7 @@ fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostic
     }
 
     switch (options.format) {
-        .dump => try hdoc.render.yaml(parsed, output_stream),
+        .yaml => try hdoc.render.yaml(parsed, output_stream),
         .html => try hdoc.render.html5(parsed, output_stream),
     }
 }
@@ -76,7 +76,7 @@ const CliOptions = struct {
 };
 
 const RenderFormat = enum {
-    dump,
+    yaml,
     html,
 };
 
diff --git a/test/html5/admonition_blocks.yaml b/test/html5/admonition_blocks.yaml
new file mode 100644
index 0000000..d40b0fc
--- /dev/null
+++ b/test/html5/admonition_blocks.yaml
@@ -0,0 +1,73 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Admonition Blocks"
+    full:
+      lang: "en"
+      content:
+        - [] "Admonition Blocks"
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings:
+      - 0
+    children:
+      -
+        level: h2
+        headings: []
+        children: []
+  contents:
+    - heading:
+      level: h1
+      lang: ""
+      content:
+        - [] "Admonitions as Containers"
+    - admonition:
+      kind: note
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "A note can span multiple blocks."
+        - list:
+          lang: ""
+          first: null
+          items:
+            - lang: ""
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "Lists are allowed."
+            - lang: ""
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "They render inside the note container."
+    - admonition:
+      kind: danger
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "String bodies become paragraphs inside the container."
+    - admonition:
+      kind: spoiler
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "Hidden detail\nspans multiple lines."
+  ids:
+    - null
+    - null
+    - null
+    - null
diff --git a/test/html5/document_header.yaml b/test/html5/document_header.yaml
new file mode 100644
index 0000000..eae9439
--- /dev/null
+++ b/test/html5/document_header.yaml
@@ -0,0 +1,33 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Metadata Title From Block"
+    full:
+      lang: ""
+      content:
+        - [] "Metadata Title From Block"
+  author: null
+  date:
+    date:
+      year: 2024
+      month: 8
+      day: 16
+    time:
+      hour: 9
+      minute: 30
+      second: 0
+      microsecond: 0
+  toc:
+    level: h1
+    headings: []
+    children: []
+  contents:
+    - paragraph:
+      lang: ""
+      content:
+        - [] "This document uses header metadata and a title block without any explicit headings."
+  ids:
+    - null
diff --git a/test/html5/media_and_toc.yaml b/test/html5/media_and_toc.yaml
new file mode 100644
index 0000000..cd0bd24
--- /dev/null
+++ b/test/html5/media_and_toc.yaml
@@ -0,0 +1,101 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Media and TOC"
+    full:
+      lang: "en"
+      content:
+        - [] "Media and TOC"
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings:
+      - 0
+    children:
+      -
+        level: h2
+        headings:
+          - 2
+          - 4
+          - 6
+        children:
+          -
+            level: h3
+            headings: []
+            children: []
+          -
+            level: h3
+            headings: []
+            children: []
+          -
+            level: h3
+            headings: []
+            children: []
+  contents:
+    - heading:
+      level: h1
+      lang: ""
+      content:
+        - [] "Media and TOC"
+    - toc:
+      lang: ""
+      depth: 3
+    - heading:
+      level: h2
+      lang: ""
+      content:
+        - [] "Preformatted"
+    - preformatted:
+      lang: ""
+      syntax: "python"
+      content:
+        - [] " print(\"hello world\") "
+    - heading:
+      level: h2
+      lang: ""
+      content:
+        - [] "Figure"
+    - image:
+      lang: ""
+      alt: "Example figure"
+      path: "./example.png"
+      content:
+        - [] "Figure caption text."
+    - heading:
+      level: h2
+      lang: ""
+      content:
+        - [] "Dates and Times"
+    - paragraph:
+      lang: ""
+      content:
+        - [] "Today is "
+        - [] "date:+2024-03-01@iso"
+        - [] "."
+    - paragraph:
+      lang: ""
+      content:
+        - [] "The meeting is at "
+        - [] "time:14:30:45@long"
+        - [] "."
+    - paragraph:
+      lang: ""
+      content:
+        - [] "Release happens on "
+        - [] "datetime:+2024-04-15T08:00:00"
+        - [] "."
+  ids:
+    - "intro"
+    - null
+    - "code"
+    - null
+    - "figure"
+    - "fig-code"
+    - "dates"
+    - null
+    - null
+    - null
diff --git a/test/html5/nesting_and_inlines.yaml b/test/html5/nesting_and_inlines.yaml
new file mode 100644
index 0000000..498844e
--- /dev/null
+++ b/test/html5/nesting_and_inlines.yaml
@@ -0,0 +1,121 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Nesting and Inlines"
+    full:
+      lang: "en"
+      content:
+        - [] "Nesting and Inlines"
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings:
+      - 0
+    children:
+      -
+        level: h2
+        headings:
+          - 4
+        children:
+          -
+            level: h3
+            headings: []
+            children: []
+  contents:
+    - heading:
+      level: h1
+      lang: ""
+      content:
+        - [] "Nesting and Inline Styling"
+    - paragraph:
+      lang: ""
+      content:
+        - [] "This document exercises inline formatting and nested lists."
+    - paragraph:
+      lang: ""
+      content:
+        - [] "We can mix "
+        - [em] "emphasis"
+        - [] ", "
+        - [strike] "strike"
+        - [] ", "
+        - [mono] "monospace"
+        - [] " text. Superscript x"
+        - [position="superscript"] "2"
+        - [] " and subscript x"
+        - [position="subscript"] "2"
+        - [] " also appear."
+    - paragraph:
+      lang: ""
+      content:
+        - [] "Links point to "
+        - [link="ref:top#0"] "local anchors"
+        - [] " or "
+        - [link="uri:https://example.com"] "external sites"
+        - [] "."
+    - heading:
+      level: h2
+      lang: ""
+      content:
+        - [] "Formatted "
+        - [em] "Heading"
+    - paragraph:
+      lang: ""
+      content:
+        - [] "Empty-body references become "
+        - [link="ref:formatted#4"] "ref:formatted@full#4"
+        - [] " "
+        - [link="ref:formatted#4"] "ref:formatted@name#4"
+        - [] " and "
+        - [link="ref:formatted#4"] "ref:formatted@index#4"
+        - [] ""
+    - list:
+      lang: ""
+      first: null
+      items:
+        - lang: ""
+          content:
+            - paragraph:
+              lang: ""
+              content:
+                - [] "Top-level item one"
+        - lang: ""
+          content:
+            - paragraph:
+              lang: ""
+              content:
+                - [] "Top-level item two with nested list"
+            - list:
+              lang: ""
+              first: 1
+              items:
+                - lang: ""
+                  content:
+                    - paragraph:
+                      lang: ""
+                      content:
+                        - [] "Nested ordered item A"
+                - lang: ""
+                  content:
+                    - paragraph:
+                      lang: ""
+                      content:
+                        - [] "Nested ordered item B"
+        - lang: ""
+          content:
+            - paragraph:
+              lang: ""
+              content:
+                - [] "Top-level item three"
+  ids:
+    - "top"
+    - null
+    - null
+    - null
+    - "formatted"
+    - null
+    - null
diff --git a/test/html5/paragraph_styles.yaml b/test/html5/paragraph_styles.yaml
new file mode 100644
index 0000000..e8d82e9
--- /dev/null
+++ b/test/html5/paragraph_styles.yaml
@@ -0,0 +1,89 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Paragraph Styles"
+    full:
+      lang: "en"
+      content:
+        - [] "Paragraph Styles"
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings:
+      - 0
+    children:
+      -
+        level: h2
+        headings: []
+        children: []
+  contents:
+    - heading:
+      level: h1
+      lang: ""
+      content:
+        - [] "Paragraph Styles"
+    - paragraph:
+      lang: ""
+      content:
+        - [] "A standard paragraph introducing the styles below."
+    - admonition:
+      kind: note
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "Notes provide informational context without urgency."
+    - admonition:
+      kind: warning
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "Warnings highlight potential issues to watch for."
+    - admonition:
+      kind: danger
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "Danger blocks signal critical problems."
+    - admonition:
+      kind: tip
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "Tips offer helpful hints for readers."
+    - admonition:
+      kind: quote
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "Quoted material sits in its own paragraph style."
+    - admonition:
+      kind: spoiler
+      lang: ""
+      content:
+        - paragraph:
+          lang: ""
+          content:
+            - [] "This is a spoiler; renderers may hide or blur this content."
+  ids:
+    - null
+    - null
+    - null
+    - null
+    - null
+    - null
+    - null
+    - null
diff --git a/test/html5/tables.yaml b/test/html5/tables.yaml
new file mode 100644
index 0000000..a3e7b4f
--- /dev/null
+++ b/test/html5/tables.yaml
@@ -0,0 +1,134 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Tables"
+    full:
+      lang: "en"
+      content:
+        - [] "Tables"
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings:
+      - 0
+    children:
+      -
+        level: h2
+        headings: []
+        children: []
+  contents:
+    - heading:
+      level: h1
+      lang: ""
+      content:
+        - [] "Table Coverage"
+    - paragraph:
+      lang: ""
+      content:
+        - [] "This file covers header rows, data rows with titles, groups, and colspans."
+    - table:
+      lang: ""
+      column_count: 3
+      has_row_titles: true
+      rows:
+        - columns:
+          lang: ""
+          cells:
+            - lang: ""
+              colspan: 1
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "Column A"
+            - lang: ""
+              colspan: 1
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "Column B"
+            - lang: ""
+              colspan: 1
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "Column C"
+        - group:
+          lang: ""
+          content:
+            - [] "\"Section One\""
+        - row:
+          lang: ""
+          title: "Row 1"
+          cells:
+            - lang: ""
+              colspan: 1
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "A1"
+            - lang: ""
+              colspan: 2
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "B1-C1"
+        - row:
+          lang: ""
+          title: "Row 2"
+          cells:
+            - lang: ""
+              colspan: 2
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "A2-B2"
+            - lang: ""
+              colspan: 1
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "C2"
+        - group:
+          lang: ""
+          content:
+            - [] "\"Section Two\""
+        - row:
+          lang: ""
+          title: "Row 3"
+          cells:
+            - lang: ""
+              colspan: 1
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "A3"
+            - lang: ""
+              colspan: 1
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "B3"
+            - lang: ""
+              colspan: 1
+              content:
+                - paragraph:
+                  lang: ""
+                  content:
+                    - [] "C3"
+  ids:
+    - null
+    - null
+    - null

From dab39970b3ddad9234378a8f94535ba677a72434 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 3 Jan 2026 21:30:05 +0100
Subject: [PATCH 095/116] Implements proper snapshot test validation with a
 basic differ output on failure.

---
 AGENTS.md                                     |  7 +++
 build.zig                                     | 40 ++++++++++-----
 test/compare.zig                              | 51 +++++++++++++++++++
 test/{html5 => snapshot}/AGENTS.md            |  0
 .../admonition_blocks.hdoc                    |  0
 .../admonition_blocks.html                    |  0
 .../admonition_blocks.yaml                    |  0
 test/{html5 => snapshot}/document_header.hdoc |  0
 test/{html5 => snapshot}/document_header.html |  0
 test/{html5 => snapshot}/document_header.yaml |  0
 test/{html5 => snapshot}/media_and_toc.hdoc   |  0
 test/{html5 => snapshot}/media_and_toc.html   |  0
 test/{html5 => snapshot}/media_and_toc.yaml   |  0
 .../nesting_and_inlines.hdoc                  |  0
 .../nesting_and_inlines.html                  |  0
 .../nesting_and_inlines.yaml                  |  0
 .../{html5 => snapshot}/paragraph_styles.hdoc |  0
 .../{html5 => snapshot}/paragraph_styles.html |  0
 .../{html5 => snapshot}/paragraph_styles.yaml |  0
 test/{html5 => snapshot}/tables.hdoc          |  0
 test/{html5 => snapshot}/tables.html          |  0
 test/{html5 => snapshot}/tables.yaml          |  0
 22 files changed, 84 insertions(+), 14 deletions(-)
 create mode 100644 test/compare.zig
 rename test/{html5 => snapshot}/AGENTS.md (100%)
 rename test/{html5 => snapshot}/admonition_blocks.hdoc (100%)
 rename test/{html5 => snapshot}/admonition_blocks.html (100%)
 rename test/{html5 => snapshot}/admonition_blocks.yaml (100%)
 rename test/{html5 => snapshot}/document_header.hdoc (100%)
 rename test/{html5 => snapshot}/document_header.html (100%)
 rename test/{html5 => snapshot}/document_header.yaml (100%)
 rename test/{html5 => snapshot}/media_and_toc.hdoc (100%)
 rename test/{html5 => snapshot}/media_and_toc.html (100%)
 rename test/{html5 => snapshot}/media_and_toc.yaml (100%)
 rename test/{html5 => snapshot}/nesting_and_inlines.hdoc (100%)
 rename test/{html5 => snapshot}/nesting_and_inlines.html (100%)
 rename test/{html5 => snapshot}/nesting_and_inlines.yaml (100%)
 rename test/{html5 => snapshot}/paragraph_styles.hdoc (100%)
 rename test/{html5 => snapshot}/paragraph_styles.html (100%)
 rename test/{html5 => snapshot}/paragraph_styles.yaml (100%)
 rename test/{html5 => snapshot}/tables.hdoc (100%)
 rename test/{html5 => snapshot}/tables.html (100%)
 rename test/{html5 => snapshot}/tables.yaml (100%)

diff --git a/AGENTS.md b/AGENTS.md
index dc23294..5575ad0 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -19,3 +19,10 @@
 
 - Do not use "inline functions" like `const func = struct { fn func(…) {} }.func;`
 - Zig has no methods. Functions used by "method like" functions can still be placed next to them, no need to put them into global scope nor into local scope.
+
+## Snapshot Files
+
+- If you add a `hdoc` file to `test/snapshot`, also:
+  - Generate the corresponding html and yaml file
+  - Add the file inside build.zig to the snapshot_files global
+- If you change behaviour, the snapshot tests will fail. Validate the failure against your expectations and see if you broke something unexpected.
\ No newline at end of file
diff --git a/build.zig b/build.zig
index 262e775..f885e67 100644
--- a/build.zig
+++ b/build.zig
@@ -1,12 +1,12 @@
 const std = @import("std");
 
-const test_files: []const []const u8 = &.{
-    "test/html5/admonition_blocks.hdoc",
-    "test/html5/document_header.hdoc",
-    "test/html5/media_and_toc.hdoc",
-    "test/html5/nesting_and_inlines.hdoc",
-    "test/html5/paragraph_styles.hdoc",
-    "test/html5/tables.hdoc",
+const snapshot_files: []const []const u8 = &.{
+    "test/snapshot/admonition_blocks.hdoc",
+    "test/snapshot/document_header.hdoc",
+    "test/snapshot/media_and_toc.hdoc",
+    "test/snapshot/nesting_and_inlines.hdoc",
+    "test/snapshot/paragraph_styles.hdoc",
+    "test/snapshot/tables.hdoc",
 };
 
 pub fn build(b: *std.Build) void {
@@ -44,20 +44,32 @@ pub fn build(b: *std.Build) void {
 
     run_step.dependOn(&run_cmd.step);
 
+    const snapshot_diff = b.addExecutable(.{
+        .name = "diff",
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("test/compare.zig"),
+            .target = b.graph.host,
+            .optimize = .Debug,
+        }),
+    });
+
     // Snapshot tests:
-    for (test_files) |path| {
+    for (snapshot_files) |path| {
         std.debug.assert(std.mem.endsWith(u8, path, ".hdoc"));
         const html_file = b.fmt("{s}.html", .{path[0 .. path.len - 5]});
         const yaml_file = b.fmt("{s}.yaml", .{path[0 .. path.len - 5]});
 
-        for (&[2][]const u8{ html_file, yaml_file }) |file| {
+        for (&[2][]const u8{ html_file, yaml_file }) |snapshot_file| {
             const test_run = b.addRunArtifact(exe);
-            test_run.addArgs(&.{ "--format", file[file.len - 4 ..] });
+            test_run.addArgs(&.{ "--format", snapshot_file[snapshot_file.len - 4 ..] });
             test_run.addFileArg(b.path(path));
-            test_run.expectStdOutEqual(
-                b.build_root.handle.readFileAlloc(b.allocator, file, 10 * 1024 * 1024) catch @panic("oom"),
-            );
-            test_step.dependOn(&test_run.step);
+            const generated_file = test_run.captureStdOut();
+
+            const compare_run = b.addRunArtifact(snapshot_diff);
+            compare_run.addFileArg(b.path(snapshot_file));
+            compare_run.addFileArg(generated_file);
+
+            test_step.dependOn(&compare_run.step);
         }
     }
 
diff --git a/test/compare.zig b/test/compare.zig
new file mode 100644
index 0000000..57d549b
--- /dev/null
+++ b/test/compare.zig
@@ -0,0 +1,51 @@
+//!
+//! compare <ground truth> <new input>
+//!
+const std = @import("std");
+
+var arena: std.heap.ArenaAllocator = .init(std.heap.page_allocator);
+
+const allocator = arena.allocator();
+
+pub fn main() !u8 {
+    defer arena.deinit();
+
+    const argv = try std.process.argsAlloc(allocator);
+    defer std.process.argsFree(allocator, argv);
+
+    if (argv.len != 3) {
+        std.debug.print("usage: {s} <ground truth> <new input>\n", .{argv[0]});
+        return 2;
+    }
+
+    const ground_truth_path = argv[1];
+    const new_input_path = argv[2];
+
+    const ground_truth = try readFileAlloc(allocator, ground_truth_path, 10 * 1024 * 1024);
+    defer allocator.free(ground_truth);
+
+    const new_input = try readFileAlloc(allocator, new_input_path, 10 * 1024 * 1024);
+    defer allocator.free(new_input);
+
+    // Compare full file contents for now. This keeps the snapshot tests simple and
+    // uses std.testing's string mismatch reporting.
+    std.testing.expectEqualStrings(ground_truth, new_input) catch |err| switch (err) {
+        error.TestExpectedEqual => return 1,
+        else => return err,
+    };
+
+    return 0;
+}
+
+fn readFileAlloc(alloc: std.mem.Allocator, path: []const u8, max_bytes: usize) ![]u8 {
+    const file = try openFile(path);
+    defer file.close();
+    return file.readToEndAlloc(alloc, max_bytes);
+}
+
+fn openFile(path: []const u8) !std.fs.File {
+    if (std.fs.path.isAbsolute(path)) {
+        return std.fs.openFileAbsolute(path, .{});
+    }
+    return std.fs.cwd().openFile(path, .{});
+}
diff --git a/test/html5/AGENTS.md b/test/snapshot/AGENTS.md
similarity index 100%
rename from test/html5/AGENTS.md
rename to test/snapshot/AGENTS.md
diff --git a/test/html5/admonition_blocks.hdoc b/test/snapshot/admonition_blocks.hdoc
similarity index 100%
rename from test/html5/admonition_blocks.hdoc
rename to test/snapshot/admonition_blocks.hdoc
diff --git a/test/html5/admonition_blocks.html b/test/snapshot/admonition_blocks.html
similarity index 100%
rename from test/html5/admonition_blocks.html
rename to test/snapshot/admonition_blocks.html
diff --git a/test/html5/admonition_blocks.yaml b/test/snapshot/admonition_blocks.yaml
similarity index 100%
rename from test/html5/admonition_blocks.yaml
rename to test/snapshot/admonition_blocks.yaml
diff --git a/test/html5/document_header.hdoc b/test/snapshot/document_header.hdoc
similarity index 100%
rename from test/html5/document_header.hdoc
rename to test/snapshot/document_header.hdoc
diff --git a/test/html5/document_header.html b/test/snapshot/document_header.html
similarity index 100%
rename from test/html5/document_header.html
rename to test/snapshot/document_header.html
diff --git a/test/html5/document_header.yaml b/test/snapshot/document_header.yaml
similarity index 100%
rename from test/html5/document_header.yaml
rename to test/snapshot/document_header.yaml
diff --git a/test/html5/media_and_toc.hdoc b/test/snapshot/media_and_toc.hdoc
similarity index 100%
rename from test/html5/media_and_toc.hdoc
rename to test/snapshot/media_and_toc.hdoc
diff --git a/test/html5/media_and_toc.html b/test/snapshot/media_and_toc.html
similarity index 100%
rename from test/html5/media_and_toc.html
rename to test/snapshot/media_and_toc.html
diff --git a/test/html5/media_and_toc.yaml b/test/snapshot/media_and_toc.yaml
similarity index 100%
rename from test/html5/media_and_toc.yaml
rename to test/snapshot/media_and_toc.yaml
diff --git a/test/html5/nesting_and_inlines.hdoc b/test/snapshot/nesting_and_inlines.hdoc
similarity index 100%
rename from test/html5/nesting_and_inlines.hdoc
rename to test/snapshot/nesting_and_inlines.hdoc
diff --git a/test/html5/nesting_and_inlines.html b/test/snapshot/nesting_and_inlines.html
similarity index 100%
rename from test/html5/nesting_and_inlines.html
rename to test/snapshot/nesting_and_inlines.html
diff --git a/test/html5/nesting_and_inlines.yaml b/test/snapshot/nesting_and_inlines.yaml
similarity index 100%
rename from test/html5/nesting_and_inlines.yaml
rename to test/snapshot/nesting_and_inlines.yaml
diff --git a/test/html5/paragraph_styles.hdoc b/test/snapshot/paragraph_styles.hdoc
similarity index 100%
rename from test/html5/paragraph_styles.hdoc
rename to test/snapshot/paragraph_styles.hdoc
diff --git a/test/html5/paragraph_styles.html b/test/snapshot/paragraph_styles.html
similarity index 100%
rename from test/html5/paragraph_styles.html
rename to test/snapshot/paragraph_styles.html
diff --git a/test/html5/paragraph_styles.yaml b/test/snapshot/paragraph_styles.yaml
similarity index 100%
rename from test/html5/paragraph_styles.yaml
rename to test/snapshot/paragraph_styles.yaml
diff --git a/test/html5/tables.hdoc b/test/snapshot/tables.hdoc
similarity index 100%
rename from test/html5/tables.hdoc
rename to test/snapshot/tables.hdoc
diff --git a/test/html5/tables.html b/test/snapshot/tables.html
similarity index 100%
rename from test/html5/tables.html
rename to test/snapshot/tables.html
diff --git a/test/html5/tables.yaml b/test/snapshot/tables.yaml
similarity index 100%
rename from test/html5/tables.yaml
rename to test/snapshot/tables.yaml

From ba41ef81183756dd3668dc566005e687e942d47a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 15:43:07 +0100
Subject: [PATCH 096/116] Add regression tests for pending footnotes

---
 src/hyperdoc.zig             | 270 ++++++++++++++++++++++++++++++++++-
 src/render/dump.zig          |  30 ++++
 src/render/html5.zig         | 100 +++++++++++++
 src/testsuite.zig            | 114 +++++++++++++++
 test/snapshot/footnotes.hdoc |   7 +
 test/snapshot/footnotes.html |  31 ++++
 6 files changed, 551 insertions(+), 1 deletion(-)
 create mode 100644 test/snapshot/footnotes.hdoc
 create mode 100644 test/snapshot/footnotes.html

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 4012b0d..f163269 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -42,6 +42,11 @@ pub const Document = struct {
     }
 };
 
+pub const FootnoteKind = enum {
+    footnote,
+    citation,
+};
+
 /// A top level layouting element of a document.
 /// Each block is a rectangular element on the screen with
 /// variable height, but a fixed width.
@@ -56,6 +61,7 @@ pub const Block = union(enum) {
     preformatted: Preformatted,
     toc: TableOfContents,
     table: Table,
+    footnotes: Footnotes,
 
     pub const Heading = struct {
         index: Index,
@@ -162,6 +168,18 @@ pub const Block = union(enum) {
         lang: LanguageTag,
         content: []Span,
     };
+
+    pub const Footnotes = struct {
+        lang: LanguageTag,
+        entries: []FootnoteEntry,
+    };
+
+    pub const FootnoteEntry = struct {
+        index: usize,
+        kind: FootnoteKind,
+        lang: LanguageTag,
+        content: []Span,
+    };
 };
 
 pub fn FormattedDateTime(comptime DT: type) type {
@@ -180,6 +198,7 @@ pub const Span = struct {
         time: FormattedDateTime(Time),
         datetime: FormattedDateTime(DateTime),
         reference: InlineReference,
+        footnote: Footnote,
     };
 
     pub const InlineReference = struct {
@@ -227,6 +246,11 @@ pub const Span = struct {
     location: Parser.Location,
 };
 
+pub const Footnote = struct {
+    kind: FootnoteKind,
+    index: usize,
+};
+
 pub const ScriptPosition = enum {
     baseline,
     superscript,
@@ -618,6 +642,12 @@ pub fn parse(
     const block_locations = try sema.block_locations.toOwnedSlice(arena.allocator());
     const toc = try sema.build_toc(contents, block_locations);
 
+    if (sema.has_pending_footnotes()) {
+        if (sema.first_footnote_location) |location| {
+            try sema.emit_diagnostic(.footnote_missing_dump, location);
+        }
+    }
+
     return .{
         .arena = arena,
         .contents = contents,
@@ -753,6 +783,13 @@ pub const SemanticAnalyzer = struct {
         }
     };
 
+    const FootnoteDefinition = struct {
+        kind: FootnoteKind,
+        index: usize,
+        lang: LanguageTag,
+        content: []Span,
+    };
+
     arena: std.mem.Allocator,
     diagnostics: ?*Diagnostics,
     code: []const u8,
@@ -766,6 +803,10 @@ pub const SemanticAnalyzer = struct {
     ids: std.ArrayList(?Reference) = .empty,
     id_locations: std.ArrayList(?Parser.Location) = .empty,
     pending_refs: std.ArrayList(RefUse) = .empty,
+    footnote_counters: std.EnumArray(FootnoteKind, usize) = std.EnumArray(FootnoteKind, usize).initFill(0),
+    footnote_pending: std.EnumArray(FootnoteKind, std.ArrayList(Block.FootnoteEntry)) = std.EnumArray(FootnoteKind, std.ArrayList(Block.FootnoteEntry)).initFill(.empty),
+    footnote_keys: std.StringArrayHashMapUnmanaged(FootnoteDefinition) = .empty,
+    first_footnote_location: ?Parser.Location = null,
 
     current_heading_level: usize = 0,
     heading_counters: [Block.Heading.Level.count]u16 = @splat(0),
@@ -933,6 +974,10 @@ pub const SemanticAnalyzer = struct {
                 const toc, const id = try sema.translate_toc_node(node);
                 return .{ .{ .toc = toc }, id };
             },
+            .footnotes => {
+                const footnotes = try sema.translate_footnotes_node(node);
+                return .{ .{ .footnotes = footnotes }, null };
+            },
             .table => {
                 const table, const id = try sema.translate_table_node(node);
                 return .{ .{ .table = table }, id };
@@ -953,6 +998,7 @@ pub const SemanticAnalyzer = struct {
             .@"\\time",
             .@"\\date",
             .@"\\datetime",
+            .@"\\footnote",
             .text,
             .columns,
             .group,
@@ -1172,6 +1218,51 @@ pub const SemanticAnalyzer = struct {
         return .{ toc, attrs.id };
     }
 
+    fn translate_footnotes_node(sema: *SemanticAnalyzer, node: Parser.Node) !Block.Footnotes {
+        const attrs = try sema.get_attributes(node, struct {
+            lang: LanguageTag = .inherit,
+            kind: ?FootnoteKind = null,
+        });
+
+        switch (node.body) {
+            .empty => {},
+            .list => |child_nodes| {
+                for (child_nodes) |child_node| {
+                    try sema.emit_diagnostic(.illegal_child_item, child_node.location);
+                }
+            },
+            .string, .verbatim, .text_span => {
+                try sema.emit_diagnostic(.illegal_child_item, node.location);
+            },
+        }
+
+        var entries: std.ArrayList(Block.FootnoteEntry) = .empty;
+        defer entries.deinit(sema.arena);
+
+        const kinds: []const FootnoteKind = if (attrs.kind) |kind|
+            &[_]FootnoteKind{kind}
+        else
+            &[_]FootnoteKind{ .footnote, .citation };
+
+        for (kinds) |kind| {
+            const pending = sema.footnote_pending.getPtr(kind);
+            if (pending.items.len == 0)
+                continue;
+
+            try entries.appendSlice(sema.arena, pending.items);
+            pending.clearRetainingCapacity();
+        }
+
+        if (!sema.has_pending_footnotes()) {
+            sema.first_footnote_location = null;
+        }
+
+        return .{
+            .lang = attrs.lang,
+            .entries = try entries.toOwnedSlice(sema.arena),
+        };
+    }
+
     fn translate_table_node(sema: *SemanticAnalyzer, node: Parser.Node) !struct { Block.Table, ?Reference } {
         const attrs = try sema.get_attributes(node, struct {
             lang: LanguageTag = .inherit,
@@ -1497,7 +1588,7 @@ pub const SemanticAnalyzer = struct {
 
                     try merger.output.append(merger.arena, span);
                 },
-                .reference => {
+                .reference, .footnote => {
                     try merger.flush_internal(.keep);
                     std.debug.assert(merger.current_span.items.len == 0);
 
@@ -1782,6 +1873,85 @@ pub const SemanticAnalyzer = struct {
                     .location = node.location,
                 });
             },
+            .@"\\footnote" => {
+                const props = try sema.get_attributes(node, struct {
+                    key: ?Reference = null,
+                    ref: ?Reference = null,
+                    kind: ?FootnoteKind = null,
+                    lang: LanguageTag = .inherit,
+                });
+
+                const has_body = node.body != .empty;
+                if (props.key != null and props.ref != null) {
+                    try sema.emit_diagnostic(.footnote_conflicting_key_ref, node.location);
+                    return;
+                }
+
+                if (has_body) {
+                    if (props.ref != null) {
+                        try sema.emit_diagnostic(.footnote_conflicting_key_ref, node.location);
+                        return;
+                    }
+                } else {
+                    if (props.ref == null) {
+                        try sema.emit_diagnostic(.footnote_missing_ref, node.location);
+                        return;
+                    }
+                    if (props.kind != null) {
+                        try sema.emit_diagnostic(.footnote_kind_on_reference, get_attribute_location(node, "kind", .name) orelse node.location);
+                    }
+
+                    const definition = sema.footnote_keys.get(props.ref.?.text) orelse {
+                        try sema.emit_diagnostic(.{ .unknown_footnote_key = .{ .ref = props.ref.?.text } }, get_attribute_location(node, "ref", .value) orelse node.location);
+                        return;
+                    };
+
+                    try sema.enqueue_footnote(definition);
+                    sema.note_footnote_marker(node.location);
+                    try spans.append(sema.arena, .{
+                        .content = .{ .footnote = .{
+                            .kind = definition.kind,
+                            .index = definition.index,
+                        } },
+                        .attribs = attribs,
+                        .location = node.location,
+                    });
+                    return;
+                }
+
+                if (!has_body) {
+                    try sema.emit_diagnostic(.footnote_missing_body, node.location);
+                    return;
+                }
+
+                const kind = props.kind orelse FootnoteKind.footnote;
+
+                var content_spans: std.ArrayList(Span) = .empty;
+                defer content_spans.deinit(sema.arena);
+
+                const content_attribs = try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang });
+                try sema.translate_inline_body(&content_spans, node.body, content_attribs, .emit_diagnostic);
+
+                const compacted = try sema.compact_spans(content_spans.items, .one_space);
+                if (compacted.len == 0) {
+                    try sema.emit_diagnostic(.footnote_missing_body, node.location);
+                    return;
+                }
+
+                const key_location = get_attribute_location(node, "key", .value);
+                const definition = try sema.append_footnote_definition(kind, props.lang, compacted, props.key, node.location, key_location);
+                try sema.enqueue_footnote(definition);
+                sema.note_footnote_marker(node.location);
+
+                try spans.append(sema.arena, .{
+                    .content = .{ .footnote = .{
+                        .kind = definition.kind,
+                        .index = definition.index,
+                    } },
+                    .attribs = attribs,
+                    .location = node.location,
+                });
+            },
 
             .hdoc,
             .h1,
@@ -1800,6 +1970,7 @@ pub const SemanticAnalyzer = struct {
             .img,
             .pre,
             .toc,
+            .footnotes,
             .table,
             .columns,
             .group,
@@ -1913,6 +2084,7 @@ pub const SemanticAnalyzer = struct {
                         try output.appendSlice(sema.arena, text);
                     },
                 },
+                .footnote => {},
             }
         }
 
@@ -2199,6 +2371,7 @@ pub const SemanticAnalyzer = struct {
             DateTime => DateTime.parse(value, timezone_hint) catch return error.InvalidValue,
             LanguageTag => LanguageTag.parse(value) catch return error.InvalidValue,
             TimeZoneOffset => TimeZoneOffset.parse(value) catch return error.InvalidValue,
+            FootnoteKind => std.meta.stringToEnum(FootnoteKind, value) orelse return error.InvalidValue,
 
             inline else => |EnumT| switch (@typeInfo(EnumT)) {
                 .@"enum" => std.meta.stringToEnum(EnumT, value) orelse return error.InvalidValue,
@@ -2258,6 +2431,12 @@ pub const SemanticAnalyzer = struct {
                     try sema.resolve_block_references(child, id_map);
                 }
             },
+            .footnotes => |*footnotes| {
+                for (footnotes.entries) |*entry| {
+                    try sema.resolve_span_slice(&entry.content, id_map);
+                }
+            },
+
             .toc => {},
             .table => |*table| {
                 for (table.rows) |*row| switch (row.*) {
@@ -2417,6 +2596,68 @@ pub const SemanticAnalyzer = struct {
         };
     }
 
+    fn enqueue_footnote(sema: *SemanticAnalyzer, definition: FootnoteDefinition) !void {
+        const pending = sema.footnote_pending.getPtr(definition.kind);
+        for (pending.items) |entry| {
+            if (entry.index == definition.index) {
+                return;
+            }
+        }
+
+        try pending.append(sema.arena, .{
+            .index = definition.index,
+            .kind = definition.kind,
+            .lang = definition.lang,
+            .content = definition.content,
+        });
+    }
+
+    fn append_footnote_definition(
+        sema: *SemanticAnalyzer,
+        kind: FootnoteKind,
+        lang: LanguageTag,
+        content: []Span,
+        key: ?Reference,
+        node_location: Parser.Location,
+        key_location: ?Parser.Location,
+    ) !FootnoteDefinition {
+        const counter = sema.footnote_counters.getPtr(kind);
+        counter.* += 1;
+        const definition: FootnoteDefinition = .{
+            .kind = kind,
+            .index = counter.*,
+            .lang = lang,
+            .content = content,
+        };
+
+        if (key) |reference| {
+            const gop = try sema.footnote_keys.getOrPut(sema.arena, reference.text);
+            if (gop.found_existing) {
+                try sema.emit_diagnostic(.{ .duplicate_footnote_key = .{ .ref = reference.text } }, key_location orelse node_location);
+            } else {
+                gop.value_ptr.* = definition;
+            }
+        }
+
+        return definition;
+    }
+
+    fn note_footnote_marker(sema: *SemanticAnalyzer, location: Parser.Location) void {
+        if (sema.first_footnote_location == null) {
+            sema.first_footnote_location = location;
+        }
+    }
+
+    fn has_pending_footnotes(sema: *SemanticAnalyzer) bool {
+        for (std.meta.tags(FootnoteKind)) |kind| {
+            if (sema.footnote_pending.get(kind).items.len > 0) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
     /// Computes the next index number for a heading of the given level:
     fn compute_next_heading(sema: *SemanticAnalyzer, node: Parser.Node, level: Block.Heading.Level) !Block.Heading.Index {
         const index = @intFromEnum(level);
@@ -3217,6 +3458,7 @@ pub const Parser = struct {
         img,
         pre,
         toc,
+        footnotes,
         table,
         columns,
         group,
@@ -3235,6 +3477,7 @@ pub const Parser = struct {
         @"\\date",
         @"\\time",
         @"\\datetime",
+        @"\\footnote",
 
         unknown_block,
         unknown_inline,
@@ -3251,6 +3494,7 @@ pub const Parser = struct {
                 .@"\\date",
                 .@"\\time",
                 .@"\\datetime",
+                .@"\\footnote",
                 .unknown_inline,
                 .text,
                 => true,
@@ -3272,6 +3516,7 @@ pub const Parser = struct {
                 .img,
                 .pre,
                 .toc,
+                .footnotes,
                 .table,
                 .columns,
                 .group,
@@ -3295,6 +3540,7 @@ pub const Parser = struct {
                 .img,
                 .pre,
                 .toc,
+                .footnotes,
                 .group,
 
                 .@"\\em",
@@ -3307,6 +3553,7 @@ pub const Parser = struct {
                 .@"\\date",
                 .@"\\time",
                 .@"\\datetime",
+                .@"\\footnote",
 
                 .unknown_inline,
                 .unknown_block, // Unknown blocks must also have inline bodies to optimally retain body contents
@@ -3426,6 +3673,12 @@ pub const Diagnostic = struct {
         duplicate_id: ReferenceError,
         unknown_id: ReferenceError,
         empty_ref_body_target,
+        duplicate_footnote_key: ReferenceError,
+        unknown_footnote_key: ReferenceError,
+        footnote_conflicting_key_ref,
+        footnote_missing_ref,
+        footnote_missing_body,
+        footnote_kind_on_reference,
 
         // warnings:
         document_starts_with_bom,
@@ -3442,6 +3695,7 @@ pub const Diagnostic = struct {
         tab_character,
         automatic_heading_insertion: AutomaticHeading,
         title_inline_date_time_without_header,
+        footnote_missing_dump,
 
         pub fn severity(code: Code) Severity {
             return switch (code) {
@@ -3483,6 +3737,12 @@ pub const Diagnostic = struct {
                 .duplicate_id,
                 .unknown_id,
                 .empty_ref_body_target,
+                .duplicate_footnote_key,
+                .unknown_footnote_key,
+                .footnote_conflicting_key_ref,
+                .footnote_missing_ref,
+                .footnote_missing_body,
+                .footnote_kind_on_reference,
                 => .@"error",
 
                 .missing_document_language,
@@ -3499,6 +3759,7 @@ pub const Diagnostic = struct {
                 .document_starts_with_bom,
                 .automatic_heading_insertion,
                 .title_inline_date_time_without_header,
+                .footnote_missing_dump,
                 => .warning,
             };
         }
@@ -3578,12 +3839,19 @@ pub const Diagnostic = struct {
                 .duplicate_id => |ctx| try w.print("The id \"{s}\" is already taken by another node.", .{ctx.ref}),
                 .unknown_id => |ctx| try w.print("The referenced id \"{s}\" does not exist.", .{ctx.ref}),
                 .empty_ref_body_target => try w.writeAll("Empty-body \\ref is only supported for headings."),
+                .duplicate_footnote_key => |ctx| try w.print("The footnote key \"{s}\" is already defined.", .{ctx.ref}),
+                .unknown_footnote_key => |ctx| try w.print("The referenced footnote key \"{s}\" does not exist.", .{ctx.ref}),
+                .footnote_conflicting_key_ref => try w.writeAll("\\footnote attributes 'key' and 'ref' cannot be used together."),
+                .footnote_missing_ref => try w.writeAll("\\footnote without a body requires a ref=\"...\" attribute."),
+                .footnote_missing_body => try w.writeAll("\\footnote definitions require a non-empty body."),
+                .footnote_kind_on_reference => try w.writeAll("Attribute 'kind' is only valid on defining \\footnote entries."),
 
                 .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."),
                 .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."),
 
                 .automatic_heading_insertion => |ctx| try w.print("Inserted automatic {t} to fill heading level gap.", .{ctx.level}),
                 .title_inline_date_time_without_header => try w.writeAll("Title block contains \\date/\\time/\\datetime but hdoc(title=\"...\") is missing; metadata title cannot be derived reliably."),
+                .footnote_missing_dump => try w.writeAll("Document contains footnotes but no footnotes(...) block to render them."),
             }
         }
     };
diff --git a/src/render/dump.zig b/src/render/dump.zig
index cc57876..76d92c2 100644
--- a/src/render/dump.zig
+++ b/src/render/dump.zig
@@ -221,6 +221,9 @@ fn writeSpanContentInline(writer: *Writer, content: hdoc.Span.Content) Writer.Er
             try writeFormattedDateTimeInline(writer, datetime);
             try writer.writeByte('"');
         },
+        .footnote => |footnote| {
+            try writer.print("\"footnote:{s}:{d}\"", .{ @tagName(footnote.kind), footnote.index });
+        },
         .reference => |reference| {
             try writer.writeByte('"');
             try writer.writeAll("ref:");
@@ -321,6 +324,28 @@ fn dumpListItemsField(writer: *Writer, indent: usize, key: []const u8, items: []
     }
 }
 
+fn dumpFootnoteEntry(writer: *Writer, indent: usize, entry: hdoc.Block.FootnoteEntry) Writer.Error!void {
+    try writeIndent(writer, indent);
+    try writer.print("index: {}\n", .{entry.index});
+    try dumpEnumField(writer, indent, "kind", entry.kind);
+    try dumpOptionalStringField(writer, indent, "lang", entry.lang.text);
+    try dumpSpanListField(writer, indent, "content", entry.content);
+}
+
+fn dumpFootnoteEntries(writer: *Writer, indent: usize, entries: []const hdoc.Block.FootnoteEntry) Writer.Error!void {
+    try writeIndent(writer, indent);
+    if (entries.len == 0) {
+        try writer.writeAll("entries: []\n");
+        return;
+    }
+    try writer.writeAll("entries:\n");
+    for (entries) |entry| {
+        try writeIndent(writer, indent + indent_step);
+        try writer.writeAll("- ");
+        try dumpFootnoteEntry(writer, indent + indent_step, entry);
+    }
+}
+
 fn dumpTableCell(writer: *Writer, indent: usize, cell: hdoc.Block.TableCell) Writer.Error!void {
     try dumpOptionalStringFieldInline(writer, "lang", cell.lang.text);
     try dumpOptionalNumberField(writer, indent + indent_step, "colspan", @as(?u32, cell.colspan));
@@ -457,6 +482,11 @@ fn dumpBlockInline(writer: *Writer, indent: usize, block: hdoc.Block) Writer.Err
             try dumpOptionalStringField(writer, indent + indent_step, "lang", toc.lang.text);
             try dumpOptionalNumberField(writer, indent + indent_step, "depth", @as(?u8, toc.depth));
         },
+        .footnotes => |footnotes| {
+            try writeTypeTag(writer, "footnotes");
+            try dumpOptionalStringField(writer, indent + indent_step, "lang", footnotes.lang.text);
+            try dumpFootnoteEntries(writer, indent + indent_step, footnotes.entries);
+        },
         .table => |table| {
             try writeTypeTag(writer, "table");
             try dumpOptionalStringField(writer, indent + indent_step, "lang", table.lang.text);
diff --git a/src/render/html5.zig b/src/render/html5.zig
index 2bc361f..5aa9b97 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -32,6 +32,7 @@ const RenderContext = struct {
             .preformatted => |preformatted| try ctx.renderPreformatted(preformatted, block_index, indent),
             .toc => |toc| try ctx.renderTableOfContents(toc, block_index, indent),
             .table => |table| try ctx.renderTable(table, block_index, indent),
+            .footnotes => |footnotes| try ctx.renderFootnotes(footnotes, block_index, indent),
         }
     }
 
@@ -359,6 +360,78 @@ const RenderContext = struct {
         try ctx.writer.writeByte('\n');
     }
 
+    fn renderFootnotes(ctx: *RenderContext, footnotes: hdoc.Block.Footnotes, block_index: ?usize, indent: usize) RenderError!void {
+        const lang_attr = langAttribute(footnotes.lang);
+        const id_attr = ctx.resolveBlockId(block_index);
+
+        try writeIndent(ctx.writer, indent);
+        try writeStartTag(ctx.writer, "div", .regular, .{
+            .id = id_attr,
+            .lang = lang_attr,
+            .class = "hdoc-footnotes",
+        });
+        try ctx.writer.writeByte('\n');
+
+        const kinds = [_]hdoc.FootnoteKind{ .footnote, .citation };
+        for (kinds) |kind| {
+            var first_index: ?usize = null;
+            var count: usize = 0;
+
+            for (footnotes.entries) |entry| {
+                if (entry.kind != kind)
+                    continue;
+                if (first_index == null)
+                    first_index = entry.index;
+                count += 1;
+            }
+
+            if (count == 0)
+                continue;
+
+            try writeIndent(ctx.writer, indent + indent_step);
+            var class_buffer: [64]u8 = undefined;
+            const list_class = std.fmt.bufPrint(&class_buffer, "hdoc-footnote-list hdoc-{s}", .{footnoteSlug(kind)}) catch unreachable;
+            try writeStartTag(ctx.writer, "ol", .regular, .{
+                .class = list_class,
+                .start = first_index,
+            });
+            try ctx.writer.writeByte('\n');
+
+            for (footnotes.entries) |entry| {
+                if (entry.kind != kind)
+                    continue;
+
+                var id_buffer: [64]u8 = undefined;
+                const entry_id = ctx.footnoteId(entry.kind, entry.index, &id_buffer);
+
+                try writeIndent(ctx.writer, indent + 2 * indent_step);
+                try writeStartTag(ctx.writer, "li", .regular, .{
+                    .id = entry_id,
+                    .lang = langAttribute(entry.lang),
+                });
+                if (entry.content.len > 0) {
+                    try ctx.writer.writeByte('\n');
+                    try writeIndent(ctx.writer, indent + 3 * indent_step);
+                    try writeStartTag(ctx.writer, "p", .regular, .{ .lang = langAttribute(entry.lang) });
+                    try ctx.renderSpans(entry.content);
+                    try writeEndTag(ctx.writer, "p");
+                    try ctx.writer.writeByte('\n');
+                    try writeIndent(ctx.writer, indent + 2 * indent_step);
+                }
+                try writeEndTag(ctx.writer, "li");
+                try ctx.writer.writeByte('\n');
+            }
+
+            try writeIndent(ctx.writer, indent + indent_step);
+            try writeEndTag(ctx.writer, "ol");
+            try ctx.writer.writeByte('\n');
+        }
+
+        try writeIndent(ctx.writer, indent);
+        try writeEndTag(ctx.writer, "div");
+        try ctx.writer.writeByte('\n');
+    }
+
     fn renderHeaderRow(ctx: *RenderContext, columns: hdoc.Block.TableColumns, indent: usize, has_title_column: bool) RenderError!void {
         try writeIndent(ctx.writer, indent);
         try writeStartTag(ctx.writer, "tr", .regular, .{ .lang = langAttribute(columns.lang) });
@@ -471,6 +544,18 @@ const RenderContext = struct {
         return null;
     }
 
+    fn footnoteSlug(kind: hdoc.FootnoteKind) []const u8 {
+        return switch (kind) {
+            .footnote => "footnote",
+            .citation => "citation",
+        };
+    }
+
+    fn footnoteId(ctx: *RenderContext, kind: hdoc.FootnoteKind, index: usize, buffer: []u8) []const u8 {
+        _ = ctx;
+        return std.fmt.bufPrint(buffer, "hdoc-{s}-{d}", .{ footnoteSlug(kind), index }) catch unreachable;
+    }
+
     fn renderSpans(ctx: *RenderContext, spans: []const hdoc.Span) RenderError!void {
         for (spans) |span| {
             try ctx.renderSpan(span);
@@ -554,6 +639,21 @@ const RenderContext = struct {
             .reference => |reference| {
                 try ctx.renderReference(reference, content_lang);
             },
+            .footnote => |footnote| {
+                var id_buffer: [64]u8 = undefined;
+                const target_id = ctx.footnoteId(footnote.kind, footnote.index, &id_buffer);
+                var href_buffer: [64]u8 = undefined;
+                const href = std.fmt.bufPrint(&href_buffer, "#{s}", .{target_id}) catch unreachable;
+
+                var class_buffer: [64]u8 = undefined;
+                const class_attr = std.fmt.bufPrint(&class_buffer, "hdoc-footnote-ref hdoc-{s}", .{footnoteSlug(footnote.kind)}) catch unreachable;
+
+                try writeStartTag(ctx.writer, "sup", .regular, .{ .class = class_attr, .lang = content_lang });
+                try writeStartTag(ctx.writer, "a", .regular, .{ .href = href });
+                try ctx.writer.print("{d}", .{footnote.index});
+                try writeEndTag(ctx.writer, "a");
+                try writeEndTag(ctx.writer, "sup");
+            },
         }
 
         while (opened_len > 0) {
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 41a60f9..5d79e8a 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -593,6 +593,120 @@ test "table of contents inserts automatic headings when skipping levels" {
     try std.testing.expectEqual(@as(usize, 0), trailing_h1_child.children.len);
 }
 
+test "footnotes collect entries per dump" {
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\p{Intro \footnote{first} \footnote(kind="citation",key="cite1"){c1}}
+        \\footnotes;
+        \\p{Again \footnote(ref="cite1"); \footnote{second}}
+        \\footnotes(kind="citation");
+        \\footnotes;
+    ;
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    try std.testing.expect(!diagnostics.has_error());
+    try std.testing.expectEqual(@as(usize, 5), doc.contents.len);
+
+    const first_dump = switch (doc.contents[1]) {
+        .footnotes => |value| value,
+        else => return error.TestExpectedEqual,
+    };
+    try std.testing.expectEqual(@as(usize, 2), first_dump.entries.len);
+    try std.testing.expectEqual(hdoc.FootnoteKind.footnote, first_dump.entries[0].kind);
+    try std.testing.expectEqual(@as(usize, 1), first_dump.entries[0].index);
+    try std.testing.expectEqual(hdoc.FootnoteKind.citation, first_dump.entries[1].kind);
+    try std.testing.expectEqual(@as(usize, 1), first_dump.entries[1].index);
+
+    const second_dump = switch (doc.contents[3]) {
+        .footnotes => |value| value,
+        else => return error.TestExpectedEqual,
+    };
+    try std.testing.expectEqual(@as(usize, 1), second_dump.entries.len);
+    try std.testing.expectEqual(hdoc.FootnoteKind.citation, second_dump.entries[0].kind);
+    try std.testing.expectEqual(@as(usize, 1), second_dump.entries[0].index);
+
+    const final_dump = switch (doc.contents[4]) {
+        .footnotes => |value| value,
+        else => return error.TestExpectedEqual,
+    };
+    try std.testing.expectEqual(@as(usize, 1), final_dump.entries.len);
+    try std.testing.expectEqual(hdoc.FootnoteKind.footnote, final_dump.entries[0].kind);
+    try std.testing.expectEqual(@as(usize, 2), final_dump.entries[0].index);
+}
+
+test "warn when footnotes are missing dumps" {
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\p{Body \footnote{content}}
+    ;
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var saw_warning = false;
+    for (diagnostics.items.items) |item| {
+        if (diagnosticCodesEqual(item.code, .footnote_missing_dump)) {
+            saw_warning = true;
+            break;
+        }
+    }
+    try std.testing.expect(saw_warning);
+}
+
+test "warn when footnotes remain after intermediate dump" {
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\p{First \footnote{one}}
+        \\footnotes{}
+        \\p{Second \footnote{two}}
+    ;
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var saw_warning = false;
+    for (diagnostics.items.items) |item| {
+        if (diagnosticCodesEqual(item.code, .footnote_missing_dump)) {
+            saw_warning = true;
+            break;
+        }
+    }
+    try std.testing.expect(saw_warning);
+}
+
+test "no warning when footnotes are drained after later dump" {
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\p{First \footnote{one}}
+        \\footnotes{}
+        \\p{Second \footnote{two}}
+        \\footnotes{}
+    ;
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    for (diagnostics.items.items) |item| {
+        if (diagnosticCodesEqual(item.code, .footnote_missing_dump)) {
+            return error.TestExpectedEqual;
+        }
+    }
+}
+
 fn diagnosticCodesEqual(lhs: hdoc.Diagnostic.Code, rhs: hdoc.Diagnostic.Code) bool {
     if (std.meta.activeTag(lhs) != std.meta.activeTag(rhs))
         return false;
diff --git a/test/snapshot/footnotes.hdoc b/test/snapshot/footnotes.hdoc
new file mode 100644
index 0000000..ae66d1b
--- /dev/null
+++ b/test/snapshot/footnotes.hdoc
@@ -0,0 +1,7 @@
+hdoc(version="2.0",lang="en");
+title{Footnotes Demo}
+p{Intro with footnote\footnote{Footnote text} and citation\footnote(kind="citation",key="ref1"){Citation text}.}
+footnotes;
+p{Next section references \footnote(ref="ref1"); and adds another\footnote{Second footnote}.}
+footnotes(kind="citation");
+footnotes;
diff --git a/test/snapshot/footnotes.html b/test/snapshot/footnotes.html
new file mode 100644
index 0000000..8ecbc1d
--- /dev/null
+++ b/test/snapshot/footnotes.html
@@ -0,0 +1,31 @@
+<header lang="en">
+  <h1>Footnotes Demo</h1>
+</header>
+<p>Intro with footnote<sup class="hdoc-footnote-ref hdoc-footnote"><a href="#hdoc-footnote-1">1</a></sup> and citation<sup class="hdoc-footnote-ref hdoc-citation"><a href="#hdoc-citation-1">1</a></sup>.</p>
+<div class="hdoc-footnotes">
+  <ol class="hdoc-footnote-list hdoc-footnote" start="1">
+    <li id="hdoc-footnote-1">
+      <p>Footnote text</p>
+    </li>
+  </ol>
+  <ol class="hdoc-footnote-list hdoc-citation" start="1">
+    <li id="hdoc-citation-1">
+      <p>Citation text</p>
+    </li>
+  </ol>
+</div>
+<p>Next section references <sup class="hdoc-footnote-ref hdoc-citation"><a href="#hdoc-citation-1">1</a></sup> and adds another<sup class="hdoc-footnote-ref hdoc-footnote"><a href="#hdoc-footnote-2">2</a></sup>.</p>
+<div class="hdoc-footnotes">
+  <ol class="hdoc-footnote-list hdoc-citation" start="1">
+    <li id="hdoc-citation-1">
+      <p>Citation text</p>
+    </li>
+  </ol>
+</div>
+<div class="hdoc-footnotes">
+  <ol class="hdoc-footnote-list hdoc-footnote" start="2">
+    <li id="hdoc-footnote-2">
+      <p>Second footnote</p>
+    </li>
+  </ol>
+</div>

From 3c6855013a23e6cbfcc4ad0a427c88a45714f195 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 16:38:38 +0100
Subject: [PATCH 097/116] Fix pending footnote warning location

---
 src/hyperdoc.zig  | 37 +++++++++++++++++++++++++++++--------
 src/testsuite.zig | 27 +++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index f163269..42919c7 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -643,7 +643,7 @@ pub fn parse(
     const toc = try sema.build_toc(contents, block_locations);
 
     if (sema.has_pending_footnotes()) {
-        if (sema.first_footnote_location) |location| {
+        if (sema.first_pending_footnote_location()) |location| {
             try sema.emit_diagnostic(.footnote_missing_dump, location);
         }
     }
@@ -806,7 +806,7 @@ pub const SemanticAnalyzer = struct {
     footnote_counters: std.EnumArray(FootnoteKind, usize) = std.EnumArray(FootnoteKind, usize).initFill(0),
     footnote_pending: std.EnumArray(FootnoteKind, std.ArrayList(Block.FootnoteEntry)) = std.EnumArray(FootnoteKind, std.ArrayList(Block.FootnoteEntry)).initFill(.empty),
     footnote_keys: std.StringArrayHashMapUnmanaged(FootnoteDefinition) = .empty,
-    first_footnote_location: ?Parser.Location = null,
+    first_footnote_locations: std.EnumArray(FootnoteKind, ?Parser.Location) = std.EnumArray(FootnoteKind, ?Parser.Location).initFill(null),
 
     current_heading_level: usize = 0,
     heading_counters: [Block.Heading.Level.count]u16 = @splat(0),
@@ -1251,10 +1251,13 @@ pub const SemanticAnalyzer = struct {
 
             try entries.appendSlice(sema.arena, pending.items);
             pending.clearRetainingCapacity();
+            sema.first_footnote_locations.getPtr(kind).* = null;
         }
 
         if (!sema.has_pending_footnotes()) {
-            sema.first_footnote_location = null;
+            for (std.meta.tags(FootnoteKind)) |kind| {
+                sema.first_footnote_locations.getPtr(kind).* = null;
+            }
         }
 
         return .{
@@ -1907,7 +1910,7 @@ pub const SemanticAnalyzer = struct {
                     };
 
                     try sema.enqueue_footnote(definition);
-                    sema.note_footnote_marker(node.location);
+                    sema.note_footnote_marker(definition.kind, node.location);
                     try spans.append(sema.arena, .{
                         .content = .{ .footnote = .{
                             .kind = definition.kind,
@@ -1941,7 +1944,7 @@ pub const SemanticAnalyzer = struct {
                 const key_location = get_attribute_location(node, "key", .value);
                 const definition = try sema.append_footnote_definition(kind, props.lang, compacted, props.key, node.location, key_location);
                 try sema.enqueue_footnote(definition);
-                sema.note_footnote_marker(node.location);
+                sema.note_footnote_marker(definition.kind, node.location);
 
                 try spans.append(sema.arena, .{
                     .content = .{ .footnote = .{
@@ -2642,9 +2645,10 @@ pub const SemanticAnalyzer = struct {
         return definition;
     }
 
-    fn note_footnote_marker(sema: *SemanticAnalyzer, location: Parser.Location) void {
-        if (sema.first_footnote_location == null) {
-            sema.first_footnote_location = location;
+    fn note_footnote_marker(sema: *SemanticAnalyzer, kind: FootnoteKind, location: Parser.Location) void {
+        const slot = sema.first_footnote_locations.getPtr(kind);
+        if (slot.* == null) {
+            slot.* = location;
         }
     }
 
@@ -2658,6 +2662,23 @@ pub const SemanticAnalyzer = struct {
         return false;
     }
 
+    fn first_pending_footnote_location(sema: *SemanticAnalyzer) ?Parser.Location {
+        var earliest: ?Parser.Location = null;
+
+        for (std.meta.tags(FootnoteKind)) |kind| {
+            if (sema.footnote_pending.get(kind).items.len == 0)
+                continue;
+
+            if (sema.first_footnote_locations.get(kind)) |location| {
+                if (earliest == null or location.offset < earliest.?.offset) {
+                    earliest = location;
+                }
+            }
+        }
+
+        return earliest;
+    }
+
     /// Computes the next index number for a heading of the given level:
     fn compute_next_heading(sema: *SemanticAnalyzer, node: Parser.Node, level: Block.Heading.Level) !Block.Heading.Index {
         const index = @intFromEnum(level);
diff --git a/src/testsuite.zig b/src/testsuite.zig
index 5d79e8a..e2003c8 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -685,6 +685,33 @@ test "warn when footnotes remain after intermediate dump" {
     try std.testing.expect(saw_warning);
 }
 
+test "footnote missing dump warning points to earliest remaining kind" {
+    const source =
+        \\hdoc(version="2.0",lang="en");
+        \\p{First \footnote{one}}
+        \\p{C \footnote(kind="citation",key="cite"){two}}
+        \\footnotes(kind="footnote");
+    ;
+
+    var diagnostics: hdoc.Diagnostics = .init(std.testing.allocator);
+    defer diagnostics.deinit();
+
+    var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
+    defer doc.deinit();
+
+    var warning: ?hdoc.Diagnostic = null;
+    for (diagnostics.items.items) |item| {
+        if (diagnosticCodesEqual(item.code, .footnote_missing_dump)) {
+            warning = item;
+            break;
+        }
+    }
+
+    try std.testing.expect(warning != null);
+    try std.testing.expectEqual(@as(u32, 3), warning.?.location.line);
+    try std.testing.expectEqual(@as(u32, 5), warning.?.location.column);
+}
+
 test "no warning when footnotes are drained after later dump" {
     const source =
         \\hdoc(version="2.0",lang="en");

From 9003d2ef306fc09112a8daea90c021af80f68f5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 3 Jan 2026 21:39:50 +0100
Subject: [PATCH 098/116] Improves handling of missing files when dealing with
 snapshots

---
 build.zig                    |  1 +
 test/compare.zig             | 20 +++++++++--
 test/snapshot/footnotes.yaml | 69 ++++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)
 create mode 100644 test/snapshot/footnotes.yaml

diff --git a/build.zig b/build.zig
index f885e67..eb0d9ba 100644
--- a/build.zig
+++ b/build.zig
@@ -7,6 +7,7 @@ const snapshot_files: []const []const u8 = &.{
     "test/snapshot/nesting_and_inlines.hdoc",
     "test/snapshot/paragraph_styles.hdoc",
     "test/snapshot/tables.hdoc",
+    "test/snapshot/footnotes.hdoc",
 };
 
 pub fn build(b: *std.Build) void {
diff --git a/test/compare.zig b/test/compare.zig
index 57d549b..69c9b82 100644
--- a/test/compare.zig
+++ b/test/compare.zig
@@ -21,10 +21,23 @@ pub fn main() !u8 {
     const ground_truth_path = argv[1];
     const new_input_path = argv[2];
 
-    const ground_truth = try readFileAlloc(allocator, ground_truth_path, 10 * 1024 * 1024);
+    var files_ok = true;
+    const ground_truth = readFileAlloc(allocator, ground_truth_path, 10 * 1024 * 1024) catch |err| switch (err) {
+        error.FileNotFound => blk: {
+            files_ok = false;
+            break :blk "<file not found>";
+        },
+        else => |e| return e,
+    };
     defer allocator.free(ground_truth);
 
-    const new_input = try readFileAlloc(allocator, new_input_path, 10 * 1024 * 1024);
+    const new_input = readFileAlloc(allocator, new_input_path, 10 * 1024 * 1024) catch |err| switch (err) {
+        error.FileNotFound => blk: {
+            files_ok = false;
+            break :blk "<file not found>";
+        },
+        else => |e| return e,
+    };
     defer allocator.free(new_input);
 
     // Compare full file contents for now. This keeps the snapshot tests simple and
@@ -34,6 +47,9 @@ pub fn main() !u8 {
         else => return err,
     };
 
+    if (!files_ok)
+        return 1;
+
     return 0;
 }
 
diff --git a/test/snapshot/footnotes.yaml b/test/snapshot/footnotes.yaml
new file mode 100644
index 0000000..5340dde
--- /dev/null
+++ b/test/snapshot/footnotes.yaml
@@ -0,0 +1,69 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Footnotes Demo"
+    full:
+      lang: ""
+      content:
+        - [] "Footnotes Demo"
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings: []
+    children: []
+  contents:
+    - paragraph:
+      lang: ""
+      content:
+        - [] "Intro with footnote"
+        - [] "footnote:footnote:1"
+        - [] " and citation"
+        - [] "footnote:citation:1"
+        - [] "."
+    - footnotes:
+      lang: ""
+      entries:
+        -         index: 1
+        kind: footnote
+        lang: ""
+        content:
+          - [] "Footnote text"
+        -         index: 1
+        kind: citation
+        lang: ""
+        content:
+          - [] "Citation text"
+    - paragraph:
+      lang: ""
+      content:
+        - [] "Next section references "
+        - [] "footnote:citation:1"
+        - [] " and adds another"
+        - [] "footnote:footnote:2"
+        - [] "."
+    - footnotes:
+      lang: ""
+      entries:
+        -         index: 1
+        kind: citation
+        lang: ""
+        content:
+          - [] "Citation text"
+    - footnotes:
+      lang: ""
+      entries:
+        -         index: 2
+        kind: footnote
+        lang: ""
+        content:
+          - [] "Second footnote"
+  ids:
+    - null
+    - null
+    - null
+    - null
+    - null

From bf902d05e628e80bd855c408e7d54fb28af32b41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 3 Jan 2026 21:44:55 +0100
Subject: [PATCH 099/116] Cleans SPEC_TODO.md

---
 SPEC_TODO.md | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index 1cfadaf..c12f0be 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -1,8 +1,2 @@
 # Spec compliance TODOs
 
-- Split interior references from external links: implement an inline `\ref` element with `ref`/`fmt` handling and synthesized text for empty bodies, while restricting `\link` to `uri` only. The current inline switch only recognizes `\link` with `ref` and lacks heading index/name rendering. 【F:docs/specification-proper-draft.md†L675-L724】【F:src/hyperdoc.zig†L1462-L1543】
-- Add footnote/citation handling: parse inline `\footnote` with key/ref/kind rules, collect numbered entries per kind, implement the `footnotes` dump node with cursor advancement, and warn when markers exist without a dump. No such nodes are recognized in the current inline/block translators. 【F:docs/specification-proper-draft.md†L635-L752】【F:docs/specification-proper-draft.md†L834-L835】【F:src/hyperdoc.zig†L1462-L1543】【F:src/hyperdoc.zig†L820-L838】
-- Enforce `hdoc` placement and body rules by rejecting headers that are not the first node or that carry any non-empty body, instead of merely warning on the first non-header block and accepting later headers. 【F:docs/specification.md†L369-L373】【F:src/hyperdoc.zig†L734-L788】
-- Treat admonition blocks (`note`, `warning`, `danger`, `tip`, `quote`, `spoiler`) as block-list containers with shorthand promotion for string/verbatim bodies rather than forcing them into a single inline paragraph payload. 【F:docs/specification.md†L585-L588】【F:src/hyperdoc.zig†L916-L935】
-- Enforce table column structure: allow at most one optional leading `columns` row, derive a non-zero effective column count even when `columns` is absent, and reject tables where no row or column establishes width. The current implementation accepts multiple `columns` nodes anywhere and never validates missing/zero column counts. 【F:docs/specification.md†L618-L629】【F:src/hyperdoc.zig†L1076-L1147】
-- Restrict `toc` to top-level usage as required by the specification; the current translator permits `toc` blocks inside nested block lists. 【F:docs/specification.md†L535-L543】【F:src/hyperdoc.zig†L1041-L1073】【F:src/hyperdoc.zig†L1254-L1270】

From 8ab068029cf556dc49fd34dc37c39be0167c2549 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 3 Jan 2026 22:29:46 +0100
Subject: [PATCH 100/116] Adds specification parts about the syntax attribute.

---
 docs/TODO.md          |   1 -
 docs/specification.md | 121 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 556c7dc..a74db6c 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -3,7 +3,6 @@
 ## Tasks
 
 - Assign semantics to node types, paragraph kinds, ...
-- Specify "syntax" proper
 - Add links to RFCs where possible
 - Verbatim-body to text conversion is under-specified. You define verbatim syntax (: with | lines) and later say verbatim bodies become inline text spans (§8.2), but you don’t precisely define how piped lines join (LF vs preserving original CRLF, whether there is a trailing newline, whether a final EOF line_terminator contributes a newline, etc.). Different implementations may diverge. 
 - Inline “groups” exist syntactically but are not given explicit semantics. The grammar includes inline_group ::= "{" , inline_content , "}" and §5.4 makes brace balancing a core rule, but §8.2 doesn’t explicitly state that groups are semantically transparent (flattened) versus affecting whitespace normalization boundaries or span merging. 
diff --git a/docs/specification.md b/docs/specification.md
index fcee7cd..b862709 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -867,6 +867,36 @@ Semantics:
 - **Language tag:** BCP 47 (RFC 5646).
 - **Timezone offset:** `Z` or `±HH:MM`.
 - **URI/IRI:** per RFC 3987.
+- **Syntax identifier**: see §10.1.1.
+
+#### 10.1.1 Syntax identifier
+
+A **Syntax identifier** is a compact string used to label a syntax-highlighting / tokenization scheme and a language (or other syntax) within that scheme.
+
+Lexical rules (normative):
+
+- A Syntax identifier value **MUST** be non-empty.
+- It **MUST NOT** contain any whitespace or Unicode control characters (General Category `Cc`).
+- It **MAY** contain any other Unicode scalar values, subject to the `scheme` / `name` splitting rules below.
+
+Structure (normative):
+
+A Syntax identifier is either:
+
+- `name` (no scheme prefix), or
+- `scheme ":" name`
+
+Where:
+
+- `scheme` is an attribute-key-like identifier: it **MUST** match the lexical form of `attr_key` (§5.1).
+- `name` is an opaque identifier: it **MUST** be non-empty and **MUST NOT** contain `":"`.
+
+Parsing and normalization (normative):
+
+- If the value contains a `":"`, the first `":"` splits the identifier into `scheme` and `name`.
+- Otherwise, the effective `scheme` is `"hdoc"` and the entire value is the `name`.
+- Scheme matching is **ASCII case-insensitive**. The canonical scheme behavior defined by this specification uses the lowercase scheme name.
+- The meaning and matching rules of `name` are scheme-defined, except for the `"hdoc"` scheme which is defined in Appendix D.
 
 ### 10.2 Date / time lexical formats (normative)
 
@@ -1017,6 +1047,28 @@ Each element has an **effective language tag**, computed as follows:
 
 This inheritance allows documents to mix language contexts across nested elements (e.g. an English document that contains a German `quote` with an Italian paragraph inside), and keeps localized date/time values in their local context.
 
+### 10.5 `syntax` attribute
+
+The `syntax` attribute is a rendering hint for syntax-aware presentation (e.g. syntax highlighting) on `pre` (§9.3.6) and `\mono` (§9.5.2).
+
+Normative rules:
+
+- If present, the `syntax` attribute value **MUST** be a Syntax identifier (§10.1.1).
+- The `syntax` attribute **MUST NOT** affect parsing or semantic meaning of the element’s body. It is a rendering hint only.
+- Renderers **SHOULD** implement the `"hdoc"` scheme defined by this specification (Appendix D).
+- Renderers **MAY** implement additional schemes.
+- If a renderer does not recognize the scheme or the name within that scheme, it **MUST** render the content as **plain monospaced text** (i.e. without syntax-specific styling).
+
+#### 10.5.1 `plain` (normative)
+
+Within the `"hdoc"` scheme (Appendix D), the canonical name `plain` indicates an explicit request for no syntax highlighting.
+
+If the effective Syntax identifier resolves to `hdoc:plain` (including any aliases mapped to `plain`):
+
+- A renderer **MUST NOT** apply syntax highlighting.
+- A renderer **MUST NOT** attempt language autodetection.
+- A renderer **MAY** still apply generic monospace/code styling (font, background, line wrapping policy, etc.), but **MUST NOT** apply token- or language-dependent styling.
+
 ## 11. Non-normative guidance for tooling
 
 - Formatters should normalize line endings to LF.
@@ -1044,3 +1096,72 @@ pre(syntax="c"):
 |   return 0;
 | }
 ```
+
+## Appendix D. `"hdoc"` syntax scheme (normative, non-exhaustive)
+
+This appendix defines the `"hdoc"` scheme used by the `syntax` attribute (§10.5).
+
+The `"hdoc"` scheme is intended to provide stable, interoperable canonical names for common syntaxes, while allowing unknown names without making documents semantically invalid.
+
+### D.1 Canonicalization and aliasing (normative)
+
+For the purpose of interpreting `syntax` values in the `"hdoc"` scheme:
+
+- Matching of `"hdoc"` `name` values is **ASCII case-insensitive**.
+- If the `name` matches an alias in Table D.2, the effective canonical name **MUST** be the alias target.
+- Otherwise, if the `name` matches a canonical name in Table D.1, the effective canonical name is that name.
+- Otherwise, the `name` is **unrecognized** for the `"hdoc"` scheme.
+
+Tooling guidance (non-normative):
+
+- Formatters and rewriters should preserve the original `syntax` string verbatim unless they intentionally canonicalize it.
+- If canonicalizing, tooling should:
+  - prefer omitting the `"hdoc:"` scheme prefix (since `"hdoc"` is the default scheme), and
+  - prefer the canonical names in Table D.1.
+
+### D.2 Canonical `"hdoc"` names (normative)
+
+Table D.1 lists canonical `"hdoc"` syntax names defined by this specification.
+
+| Canonical name | Intended meaning                                              |
+| -------------- | ------------------------------------------------------------- |
+| `plain`        | Explicitly no highlighting (§10.5.1).                         |
+| `hdoc`         | HyperDoc source text.                                         |
+| `c`            | C.                                                            |
+| `cpp`          | C++.                                                          |
+| `csharp`       | C#.                                                           |
+| `rust`         | Rust.                                                         |
+| `zig`          | Zig.                                                          |
+| `python`       | Python.                                                       |
+| `lua`          | Lua.                                                          |
+| `js`           | JavaScript.                                                   |
+| `java`         | Java.                                                         |
+| `xml`          | XML.                                                          |
+| `json`         | JSON.                                                         |
+| `yaml`         | YAML.                                                         |
+| `toml`         | TOML.                                                         |
+| `gfm`          | GitHub Flavored Markdown.                                     |
+| `html`         | HTML.                                                         |
+
+### D.3 Normative alias mapping
+
+Table D.2 defines aliases that **MUST** be mapped to the listed canonical `"hdoc"` name for interpretation.
+
+| Alias        | Canonical `"hdoc"` name |
+| ------------ | ----------------------- |
+| `text`       | `plain`                 |
+| `none`       | `plain`                 |
+| `hyperdoc`   | `hdoc`                  |
+| `c++`        | `cpp`                   |
+| `cxx`        | `cpp`                   |
+| `cc`         | `cpp`                   |
+| `c#`         | `csharp`                |
+| `cs`         | `csharp`                |
+| `c-sharp`    | `csharp`                |
+| `py`         | `python`                |
+| `javascript` | `js`                    |
+| `ecmascript` | `js`                    |
+| `yml`        | `yaml`                  |
+| `md`         | `gfm`                   |
+| `markdown`   | `gfm`                   |
+| `xhtml`      | `html`                  |

From 38e93d6d8481765eb8f62bedf50fd8e0b470aa27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 22:30:39 +0100
Subject: [PATCH 101/116] Use CR escape in string_cr_escape reject fixture

---
 SPEC_TODO.md                                  | 44 +++++++++++++++++
 build.zig                                     | 49 +++++++++++++++++++
 test/conformance/accept/inline_escape.hdoc    |  3 ++
 test/conformance/accept/inline_escape.yaml    | 19 +++++++
 .../accept/title_header_redundant.hdoc        |  5 ++
 .../accept/title_header_redundant.yaml        | 24 +++++++++
 .../reject/container_children.diag            |  0
 .../reject/container_children.hdoc            | 11 +++++
 test/conformance/reject/heading_sequence.diag |  3 ++
 test/conformance/reject/heading_sequence.hdoc |  5 ++
 .../reject/inline_identifier_dash.diag        |  0
 .../reject/inline_identifier_dash.hdoc        |  3 ++
 test/conformance/reject/nested_top_level.diag |  0
 test/conformance/reject/nested_top_level.hdoc |  5 ++
 test/conformance/reject/ref_in_heading.diag   |  0
 test/conformance/reject/ref_in_heading.hdoc   |  5 ++
 test/conformance/reject/string_cr_escape.diag |  0
 test/conformance/reject/string_cr_escape.hdoc |  3 ++
 .../conformance/reject/time_relative_fmt.diag |  0
 .../conformance/reject/time_relative_fmt.hdoc |  3 ++
 20 files changed, 182 insertions(+)
 create mode 100644 test/conformance/accept/inline_escape.hdoc
 create mode 100644 test/conformance/accept/inline_escape.yaml
 create mode 100644 test/conformance/accept/title_header_redundant.hdoc
 create mode 100644 test/conformance/accept/title_header_redundant.yaml
 create mode 100644 test/conformance/reject/container_children.diag
 create mode 100644 test/conformance/reject/container_children.hdoc
 create mode 100644 test/conformance/reject/heading_sequence.diag
 create mode 100644 test/conformance/reject/heading_sequence.hdoc
 create mode 100644 test/conformance/reject/inline_identifier_dash.diag
 create mode 100644 test/conformance/reject/inline_identifier_dash.hdoc
 create mode 100644 test/conformance/reject/nested_top_level.diag
 create mode 100644 test/conformance/reject/nested_top_level.hdoc
 create mode 100644 test/conformance/reject/ref_in_heading.diag
 create mode 100644 test/conformance/reject/ref_in_heading.hdoc
 create mode 100644 test/conformance/reject/string_cr_escape.diag
 create mode 100644 test/conformance/reject/string_cr_escape.hdoc
 create mode 100644 test/conformance/reject/time_relative_fmt.diag
 create mode 100644 test/conformance/reject/time_relative_fmt.hdoc

diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index c12f0be..77efb91 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -1,2 +1,46 @@
 # Spec compliance TODOs
 
+- Inline escape tokens remain undecoded in inline text construction.  
+  - Expect: `\\`, `\{`, and `\}` tokens produced in inline bodies decode to literal `\`, `{`, and `}` during semantic processing (§6.1).  
+  - Actual: Inline text spans keep the backslash sequences verbatim, so escapes render incorrectly.  
+  - Proposed: Decode these three escape tokens before span merging while preserving locations.
+
+- String literal control character policy is incomplete.  
+  - Expect: Resolved string values must reject control characters except LF and CR when immediately followed by LF (§7.1).  
+  - Actual: `\r` escapes decode to lone CR codepoints without diagnostics, so invalid CR characters survive into resolved text.  
+  - Proposed: Reject `\r` unless it participates in a CRLF sequence after escape decoding.
+
+- Identifier parsing permits extra characters.  
+  - Expect: Node names use identifier characters limited to letters, digits, and `_`, with inline names beginning with `\`; attribute keys are hyphen-separated segments of the same identifier characters (§5.1, §4.3).  
+  - Actual: Identifiers allow `-` and `\` in any position, so node and attribute names outside the grammar are accepted.  
+  - Proposed: Align identifier character checks with the grammar and treat hyphens only as separators for attribute keys.
+
+- Heading sequencing rules are missing.  
+  - Expect: `h2` must follow an `h1`, and `h3` must follow an `h2` without intervening `h1` (§9.2.3).  
+  - Actual: Heading indices increment without validating the required ordering.  
+  - Proposed: Track the last seen heading levels and emit errors when a heading appears without its required parent level.
+
+- Title/header interplay lacks the required comparison.  
+  - Expect: When both `hdoc(title=...)` and `title { ... }` are present, their plaintext forms are compared and a redundancy hint is emitted if they match (§8.1).  
+  - Actual: The block title is used and the header title is ignored without any comparison or diagnostics.  
+  - Proposed: Compare the plaintext values, warn when redundant, and keep emitting hints when neither title form is present.
+
+- Top-level-only elements are allowed to nest.  
+  - Expect: `h1`/`h2`/`h3`, `toc`, and `footnotes` may only appear as top-level blocks (§9.2).  
+  - Actual: Nested blocks (e.g., `note { h1 ... }`) accept these nodes, so top-level elements render within other containers.  
+  - Proposed: Reject top-level elements when they appear in nested block lists.
+
+- Containers do not restrict children to general text blocks.  
+  - Expect: `li`, `td`, and admonition blocks contain general text block elements (with shorthand promotion) and may be empty for admonitions (§9.1.3, §9.3.2, §9.4.5).  
+  - Actual: Block lists in these containers accept any block type (including headings and footnotes) and treat empty lists as errors.  
+  - Proposed: Limit children to the allowed general text blocks and permit empty admonition bodies.
+
+- `\time` accepts an unsupported `fmt`.  
+  - Expect: `\time(fmt=...)` supports only `iso`, `short`, `long`, and `rough` (§10.3.4).  
+  - Actual: The `fmt` enum includes `relative`, so `fmt="relative"` is accepted.  
+  - Proposed: Remove the unsupported variant and reject unknown `fmt` values.
+
+- `\ref` is permitted inside headings and titles.  
+  - Expect: `\ref` must not appear inside `h1`/`h2`/`h3` or `title` bodies (§9.5.6).  
+  - Actual: Inline translation allows references in these contexts without diagnostics.  
+  - Proposed: Detect and reject `\ref` nodes while processing heading and title bodies.
diff --git a/build.zig b/build.zig
index eb0d9ba..8d8607f 100644
--- a/build.zig
+++ b/build.zig
@@ -10,6 +10,21 @@ const snapshot_files: []const []const u8 = &.{
     "test/snapshot/footnotes.hdoc",
 };
 
+const conformance_accept_files: []const []const u8 = &.{
+    "test/conformance/accept/inline_escape.hdoc",
+    "test/conformance/accept/title_header_redundant.hdoc",
+};
+
+const conformance_reject_files: []const []const u8 = &.{
+    "test/conformance/reject/string_cr_escape.hdoc",
+    "test/conformance/reject/inline_identifier_dash.hdoc",
+    "test/conformance/reject/heading_sequence.hdoc",
+    "test/conformance/reject/nested_top_level.hdoc",
+    "test/conformance/reject/container_children.hdoc",
+    "test/conformance/reject/time_relative_fmt.hdoc",
+    "test/conformance/reject/ref_in_heading.hdoc",
+};
+
 pub fn build(b: *std.Build) void {
     // Options:
     const target = b.standardTargetOptions(.{});
@@ -74,6 +89,40 @@ pub fn build(b: *std.Build) void {
         }
     }
 
+    // Conformance snapshots: accept cases (YAML only):
+    for (conformance_accept_files) |path| {
+        std.debug.assert(std.mem.endsWith(u8, path, ".hdoc"));
+        const yaml_file = b.fmt("{s}.yaml", .{path[0 .. path.len - 5]});
+
+        const test_run = b.addRunArtifact(exe);
+        test_run.addArgs(&.{ "--format", "yaml" });
+        test_run.addFileArg(b.path(path));
+        const generated_file = test_run.captureStdOut();
+
+        const compare_run = b.addRunArtifact(snapshot_diff);
+        compare_run.addFileArg(b.path(yaml_file));
+        compare_run.addFileArg(generated_file);
+
+        test_step.dependOn(&compare_run.step);
+    }
+
+    // Conformance snapshots: reject cases (diagnostics on stderr, expect exit code 1):
+    for (conformance_reject_files) |path| {
+        std.debug.assert(std.mem.endsWith(u8, path, ".hdoc"));
+        const diag_file = b.fmt("{s}.diag", .{path[0 .. path.len - 5]});
+
+        const test_run = b.addRunArtifact(exe);
+        test_run.addFileArg(b.path(path));
+        test_run.expectExitCode(1);
+        const generated_diag = test_run.captureStdErr();
+
+        const compare_run = b.addRunArtifact(snapshot_diff);
+        compare_run.addFileArg(b.path(diag_file));
+        compare_run.addFileArg(generated_diag);
+
+        test_step.dependOn(&compare_run.step);
+    }
+
     // Unit tests:
     const exe_tests = b.addTest(.{
         .root_module = b.createModule(.{
diff --git a/test/conformance/accept/inline_escape.hdoc b/test/conformance/accept/inline_escape.hdoc
new file mode 100644
index 0000000..5988ae8
--- /dev/null
+++ b/test/conformance/accept/inline_escape.hdoc
@@ -0,0 +1,3 @@
+hdoc(version="2.0", lang="en");
+
+p { backslash \\ brace-open \{ brace-close \} }
diff --git a/test/conformance/accept/inline_escape.yaml b/test/conformance/accept/inline_escape.yaml
new file mode 100644
index 0000000..c222dd3
--- /dev/null
+++ b/test/conformance/accept/inline_escape.yaml
@@ -0,0 +1,19 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title: null
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings: []
+    children: []
+  contents:
+    - paragraph:
+      lang: ""
+      content:
+        - [] "backslash \\\\ brace-open \\{ brace-close \\}"
+  ids:
+    - null
diff --git a/test/conformance/accept/title_header_redundant.hdoc b/test/conformance/accept/title_header_redundant.hdoc
new file mode 100644
index 0000000..acd0c0a
--- /dev/null
+++ b/test/conformance/accept/title_header_redundant.hdoc
@@ -0,0 +1,5 @@
+hdoc(version="2.0", lang="en", title="Header Title");
+
+title { Header Title }
+
+p "body"
diff --git a/test/conformance/accept/title_header_redundant.yaml b/test/conformance/accept/title_header_redundant.yaml
new file mode 100644
index 0000000..5e82b26
--- /dev/null
+++ b/test/conformance/accept/title_header_redundant.yaml
@@ -0,0 +1,24 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Header Title"
+    full:
+      lang: ""
+      content:
+        - [] "Header Title"
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings: []
+    children: []
+  contents:
+    - paragraph:
+      lang: ""
+      content:
+        - [] "body"
+  ids:
+    - null
diff --git a/test/conformance/reject/container_children.diag b/test/conformance/reject/container_children.diag
new file mode 100644
index 0000000..e69de29
diff --git a/test/conformance/reject/container_children.hdoc b/test/conformance/reject/container_children.hdoc
new file mode 100644
index 0000000..71ce4ad
--- /dev/null
+++ b/test/conformance/reject/container_children.hdoc
@@ -0,0 +1,11 @@
+hdoc(version="2.0", lang="en");
+
+ul {
+  li {
+    h1 "Heading child"
+  }
+}
+
+note {
+  h1 "Inside note"
+}
diff --git a/test/conformance/reject/heading_sequence.diag b/test/conformance/reject/heading_sequence.diag
new file mode 100644
index 0000000..31568cd
--- /dev/null
+++ b/test/conformance/reject/heading_sequence.diag
@@ -0,0 +1,3 @@
+test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h1 to fill heading level gap.
+test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h2 to fill heading level gap.
+test/conformance/reject/heading_sequence.hdoc:5:1: Inserted automatic h2 to fill heading level gap.
diff --git a/test/conformance/reject/heading_sequence.hdoc b/test/conformance/reject/heading_sequence.hdoc
new file mode 100644
index 0000000..c8c9b43
--- /dev/null
+++ b/test/conformance/reject/heading_sequence.hdoc
@@ -0,0 +1,5 @@
+hdoc(version="2.0", lang="en");
+
+h3 "Third level first"
+h1 "Top"
+h3 "Third without second"
diff --git a/test/conformance/reject/inline_identifier_dash.diag b/test/conformance/reject/inline_identifier_dash.diag
new file mode 100644
index 0000000..e69de29
diff --git a/test/conformance/reject/inline_identifier_dash.hdoc b/test/conformance/reject/inline_identifier_dash.hdoc
new file mode 100644
index 0000000..1948b61
--- /dev/null
+++ b/test/conformance/reject/inline_identifier_dash.hdoc
@@ -0,0 +1,3 @@
+hdoc(version="2.0", lang="en");
+
+p { \bad-name "ok" }
diff --git a/test/conformance/reject/nested_top_level.diag b/test/conformance/reject/nested_top_level.diag
new file mode 100644
index 0000000..e69de29
diff --git a/test/conformance/reject/nested_top_level.hdoc b/test/conformance/reject/nested_top_level.hdoc
new file mode 100644
index 0000000..b418705
--- /dev/null
+++ b/test/conformance/reject/nested_top_level.hdoc
@@ -0,0 +1,5 @@
+hdoc(version="2.0", lang="en");
+
+note {
+  h1 "Nested heading"
+}
diff --git a/test/conformance/reject/ref_in_heading.diag b/test/conformance/reject/ref_in_heading.diag
new file mode 100644
index 0000000..e69de29
diff --git a/test/conformance/reject/ref_in_heading.hdoc b/test/conformance/reject/ref_in_heading.hdoc
new file mode 100644
index 0000000..fcd2ace
--- /dev/null
+++ b/test/conformance/reject/ref_in_heading.hdoc
@@ -0,0 +1,5 @@
+hdoc(version="2.0", lang="en");
+
+p(id="target") "Target"
+
+h1 { Heading \ref(ref="target") "see"; }
diff --git a/test/conformance/reject/string_cr_escape.diag b/test/conformance/reject/string_cr_escape.diag
new file mode 100644
index 0000000..e69de29
diff --git a/test/conformance/reject/string_cr_escape.hdoc b/test/conformance/reject/string_cr_escape.hdoc
new file mode 100644
index 0000000..204b3de
--- /dev/null
+++ b/test/conformance/reject/string_cr_escape.hdoc
@@ -0,0 +1,3 @@
+hdoc(version="2.0", lang="en");
+
+p "line\rline"
diff --git a/test/conformance/reject/time_relative_fmt.diag b/test/conformance/reject/time_relative_fmt.diag
new file mode 100644
index 0000000..e69de29
diff --git a/test/conformance/reject/time_relative_fmt.hdoc b/test/conformance/reject/time_relative_fmt.hdoc
new file mode 100644
index 0000000..767ed26
--- /dev/null
+++ b/test/conformance/reject/time_relative_fmt.hdoc
@@ -0,0 +1,3 @@
+hdoc(version="2.0", lang="en", tz="+00:00");
+
+p { \time(fmt="relative") "12:00:00Z" }

From bde5027ffae4c557a89703aad58aebec3e82f8b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sat, 3 Jan 2026 22:37:51 +0100
Subject: [PATCH 102/116] Removes the hint about the now new specification.md
 file

---
 docs/AGENTS.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/AGENTS.md b/docs/AGENTS.md
index 238ae4e..43026ff 100644
--- a/docs/AGENTS.md
+++ b/docs/AGENTS.md
@@ -3,7 +3,6 @@
 ## General
 
 - `specification.md` is the current "status quo" specifiction. Do not edit unless explicitly asked.
-- `docs/specification-proper-draft.md` is the new "shiny" specification. This is the one you should edit if only asked about the "specification".
   - This file contains a chapter `0. Chapter Status`. This chapter marks each other chapter of the file as FROZEN, DONE, DRAFT or MISSING
     - If a chapter is marked FROZEN, you are not permitted to change anything in it.
     - If a chapter is marked DONE, you are only permitted to perform language changes, but not semantic changes.

From 67d4426f3fdaca524e5d09e7638ef169bbcc5e1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sat, 3 Jan 2026 22:48:26 +0100
Subject: [PATCH 103/116] Clarify inline group brace text

---
 docs/TODO.md          | 21 +--------------------
 docs/specification.md | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/docs/TODO.md b/docs/TODO.md
index 556c7dc..c9001b1 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -5,30 +5,11 @@
 - Assign semantics to node types, paragraph kinds, ...
 - Specify "syntax" proper
 - Add links to RFCs where possible
-- Verbatim-body to text conversion is under-specified. You define verbatim syntax (: with | lines) and later say verbatim bodies become inline text spans (§8.2), but you don’t precisely define how piped lines join (LF vs preserving original CRLF, whether there is a trailing newline, whether a final EOF line_terminator contributes a newline, etc.). Different implementations may diverge. 
-- Inline “groups” exist syntactically but are not given explicit semantics. The grammar includes inline_group ::= "{" , inline_content , "}" and §5.4 makes brace balancing a core rule, but §8.2 doesn’t explicitly state that groups are semantically transparent (flattened) versus affecting whitespace normalization boundaries or span merging. 
 - Span attribute semantics are referenced but not fully defined. §8.2 introduces spans with an “attribute set (e.g. emphasis/monospace/link…)” but the spec never fully defines the canonical attribute keys, nesting behavior (e.g., \em inside \mono), or how lang overrides interact at span level. That’s a major interoperability risk because renderers may differ even if parsers agree. 
 - Refine that `hdoc(title)` is metadata while `title{}` is rendered rich text
-- Refine `img(path)` only using forward slash.
-  - Proposal: Add to §9.3.5:
-    - "path MUST use forward slashes (/) as path separators, regardless of host OS."
-    - "path MUST be relative; absolute paths and URI schemes (e.g., http://) MUST be rejected."
-    - "Path resolution is relative to the directory containing the HyperDoc source file."
-    - "Path traversal outside the source directory (e.g., ../../etc/passwd) SHOULD be rejected or restricted by implementations."
-- Proposal: Add to §9.2.4:
-  - "Multiple toc elements MAY appear in a document; each MUST render the same heading structure but MAY appear at different locations."
-  - "If depth differs between instances, each TOC renders independently according to its own depth attribute."
-- Add to §9.2.5:
-  - "Multiple footnotes elements partition footnote rendering; each instance collects only footnotes/citations accumulated since the previous dump (or document start)."
-- Proposal: Add to §4:
-  - "Implementations MUST support nesting depths of at least 32 levels."
-  - "Implementations MAY reject documents exceeding this depth with a diagnostic."
-  - "Nesting depth is measured as the maximum distance from the document root to any leaf node."
 - Ambiguity of Inline Unicode:
   - Finding: String literals ("...") support \u{...} escapes (§7.2.1). Inline text streams (bodies of p, h1) do not (§6.1 only lists \\, \{, \}).
   - Issue: Authors cannot enter invisible characters (like Non-Breaking Space U+00A0 or Zero Width Space U+200B) into a paragraph without pasting the raw invisible character, which is brittle and invisible in editors.
-- Recommendation: Add explicit sequencing in §7 stating: "Escape decoding MUST occur during semantic validation, before inline text construction (§8.2) for inline-list bodies, and before attribute validation for attribute values."
-- Recommendation: Add to §9.2.1: "If the document contains any \date, \time, or \datetime elements with fmt values other than iso, and hdoc(lang) is not specified, implementations SHOULD emit a diagnostic."
 - Issue: "Lexical" implies only regex-level matching. It does not strictly forbid 2023-02-31. For a strict format, "Semantic" validity (Gregorian correctness) should be enforced to prevent invalid metadata.
 
 ## Potential Future Features
@@ -120,4 +101,4 @@ quote {
 - `include(path="...")` is rejected for unbounded document content growth
 - `code` is just `\mono(syntax="…")`
 - `details/summary` is just HTML with dynamic changing page layout, ever tried printing this?
-- `\math`, `equation{…}` have too high implementation complexity and have high requirements on fonts, font renderers and layout engines.
\ No newline at end of file
+- `\math`, `equation{…}` have too high implementation complexity and have high requirements on fonts, font renderers and layout engines.
diff --git a/docs/specification.md b/docs/specification.md
index fcee7cd..7ebfe42 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -206,6 +206,12 @@ The grammar is intentionally ambiguous; a deterministic external rule selects a
 - Attribute values are **string literals** (see §5.5).
 - Attribute keys are identifiers with hyphen-separated segments (see §5.1 and §10.1).
 
+### 4.4 Nesting depth (syntax)
+
+- Implementations **MUST** support nesting depths of at least 32 levels.
+- Implementations **MAY** reject documents that exceed this depth with a diagnostic.
+- Nesting depth is measured as the maximum distance from the document root to any leaf node.
+
 ## 5. Grammar and additional syntax rules
 
 ### 5.1 Grammar (EBNF)
@@ -351,6 +357,8 @@ Tooling that aims to preserve author intent **SHOULD** preserve whether braces w
 
 Escape sequences are recognized only in string literals (node bodies of the `"..."` form and attribute values). No other syntax performs string-literal escape decoding.
 
+Escape decoding **MUST** occur during semantic validation, before inline text construction (§8.2) for inline-list bodies, and before attribute validation for attribute values.
+
 ### 7.1 Control character policy (semantic)
 
 - A semantic validator **MAY** reject TAB (U+0009) in source text.
@@ -432,6 +440,8 @@ Semantic processing **MUST** construct inline text as a sequence of **spans**, w
 - a Unicode string, and
 - an attribute set (e.g. emphasis/monospace/link, language overrides, etc.).
 
+Inline groups are structural only: when converting the inline tree into spans, implementations **MUST** flatten `inline_group` boundaries. An `inline_group` **MUST NOT** create a span boundary and **MUST NOT** affect whitespace normalization, but it **MUST** contribute the literal `{` and `}` characters to the inline text at its start and end.
+
 Processing rules:
 
 1. **Parse → tree:** Parsing preserves `ws` and yields an inline tree (text items, inline nodes, and inline groups).
@@ -586,6 +596,9 @@ The elements in this chapter **MUST** appear only as top-level block elements (d
   - `date` (optional): datetime lexical format (§10.2.3)
   - `tz` (optional): default timezone for time/datetime values (§10.2)
 
+Diagnostics:
+- If the document contains any `\date`, `\time`, or `\datetime` elements with `fmt` values other than `iso`, and `hdoc(lang)` is not specified, implementations **SHOULD** emit a diagnostic.
+
 #### 9.2.2 `title` (document title)
 
 - **Role:** document-level display title
@@ -624,6 +637,8 @@ Heading structure and numbering:
 
 Semantic constraints:
 - `toc` **MUST** be a top-level block element (a direct child of the document).
+- Multiple `toc` elements **MAY** appear in a document; each **MUST** render the same heading structure but **MAY** appear at different locations.
+- If `depth` differs between instances, each `toc` **MUST** render independently according to its own `depth` attribute.
 
 #### 9.2.5 Footnote dump: `footnotes`
 
@@ -635,6 +650,7 @@ Semantic constraints:
 
 Semantics:
 
+- Multiple `footnotes` elements **MAY** appear in a document.
 - `footnotes;` collects and renders all footnotes of all kinds accumulated since the previous `footnotes(...)` node (or since start of document if none appeared yet).
 - `footnotes(kind="footnote");` collects and renders only `kind="footnote"` entries accumulated since the previous `footnotes(...)` node.
 - `footnotes(kind="citation");` collects and renders only `kind="citation"` entries accumulated since the previous `footnotes(...)` node.
@@ -686,6 +702,13 @@ Only an empty body (`;`) is not "inline text".
   - `lang` (optional)
   - `id` (optional; top-level only)
 
+Path semantics:
+
+- `path` **MUST** use forward slashes (`/`) as path separators, regardless of host operating system.
+- `path` **MUST** be relative; absolute paths and URI schemes **MUST** be rejected.
+- Path resolution is relative to the directory containing the HyperDoc source file.
+- Path traversal outside the source directory (e.g., `../../etc/passwd`) **SHOULD** be rejected or restricted by implementations.
+
 #### 9.3.6 Preformatted: `pre`
 
 - **Body:** inline text

From 3a9ece8ac671d90bf94da88d956c45f9e81c2d55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sun, 4 Jan 2026 00:52:15 +0100
Subject: [PATCH 104/116] Remove llvm requirement for hyperdoc exe

---
 AGENTS.md                                     |   4 +-
 SPEC_TODO.md                                  |  40 ---
 src/hyperdoc.zig                              | 335 ++++++++++++++----
 src/render/html5.zig                          |   2 +-
 src/testsuite.zig                             |  18 +-
 test/conformance/accept/inline_escape.yaml    |   2 +-
 .../reject/container_children.diag            |   4 +
 test/conformance/reject/heading_sequence.diag |   9 +-
 .../reject/inline_identifier_dash.diag        |   2 +
 test/conformance/reject/nested_top_level.diag |   2 +
 test/conformance/reject/ref_in_heading.diag   |   2 +
 test/conformance/reject/string_cr_escape.diag |   2 +
 .../conformance/reject/time_relative_fmt.diag |   2 +
 13 files changed, 304 insertions(+), 120 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 5575ad0..f10cdca 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -14,6 +14,8 @@
 - Treat `docs/specification.md` as the authoritative source of behavior; examples may be outdated or incorrect.
 - If the spec is unclear or conflicts with code/tests, ask before changing behavior.
 - Do not implement "just make it work" fallbacks that alter semantics to satisfy examples.
+- Diagnostics must not store dynamic strings (e.g., slices to parsed source). Keep diagnostic payloads POD/small and avoid holding arena-backed text.
+- Do not hide crashes by removing safety checks or switching off DebugAllocator; fix the root cause instead. A signal 6 from DebugAllocator indicates memory corruption or a similar misuse.
 
 ## Zig Programming Style
 
@@ -25,4 +27,4 @@
 - If you add a `hdoc` file to `test/snapshot`, also:
   - Generate the corresponding html and yaml file
   - Add the file inside build.zig to the snapshot_files global
-- If you change behaviour, the snapshot tests will fail. Validate the failure against your expectations and see if you broke something unexpected.
\ No newline at end of file
+- If you change behaviour, the snapshot tests will fail. Validate the failure against your expectations and see if you broke something unexpected.
diff --git a/SPEC_TODO.md b/SPEC_TODO.md
index 77efb91..1e879a9 100644
--- a/SPEC_TODO.md
+++ b/SPEC_TODO.md
@@ -1,46 +1,6 @@
 # Spec compliance TODOs
 
-- Inline escape tokens remain undecoded in inline text construction.  
-  - Expect: `\\`, `\{`, and `\}` tokens produced in inline bodies decode to literal `\`, `{`, and `}` during semantic processing (§6.1).  
-  - Actual: Inline text spans keep the backslash sequences verbatim, so escapes render incorrectly.  
-  - Proposed: Decode these three escape tokens before span merging while preserving locations.
-
-- String literal control character policy is incomplete.  
-  - Expect: Resolved string values must reject control characters except LF and CR when immediately followed by LF (§7.1).  
-  - Actual: `\r` escapes decode to lone CR codepoints without diagnostics, so invalid CR characters survive into resolved text.  
-  - Proposed: Reject `\r` unless it participates in a CRLF sequence after escape decoding.
-
-- Identifier parsing permits extra characters.  
-  - Expect: Node names use identifier characters limited to letters, digits, and `_`, with inline names beginning with `\`; attribute keys are hyphen-separated segments of the same identifier characters (§5.1, §4.3).  
-  - Actual: Identifiers allow `-` and `\` in any position, so node and attribute names outside the grammar are accepted.  
-  - Proposed: Align identifier character checks with the grammar and treat hyphens only as separators for attribute keys.
-
-- Heading sequencing rules are missing.  
-  - Expect: `h2` must follow an `h1`, and `h3` must follow an `h2` without intervening `h1` (§9.2.3).  
-  - Actual: Heading indices increment without validating the required ordering.  
-  - Proposed: Track the last seen heading levels and emit errors when a heading appears without its required parent level.
-
 - Title/header interplay lacks the required comparison.  
   - Expect: When both `hdoc(title=...)` and `title { ... }` are present, their plaintext forms are compared and a redundancy hint is emitted if they match (§8.1).  
   - Actual: The block title is used and the header title is ignored without any comparison or diagnostics.  
   - Proposed: Compare the plaintext values, warn when redundant, and keep emitting hints when neither title form is present.
-
-- Top-level-only elements are allowed to nest.  
-  - Expect: `h1`/`h2`/`h3`, `toc`, and `footnotes` may only appear as top-level blocks (§9.2).  
-  - Actual: Nested blocks (e.g., `note { h1 ... }`) accept these nodes, so top-level elements render within other containers.  
-  - Proposed: Reject top-level elements when they appear in nested block lists.
-
-- Containers do not restrict children to general text blocks.  
-  - Expect: `li`, `td`, and admonition blocks contain general text block elements (with shorthand promotion) and may be empty for admonitions (§9.1.3, §9.3.2, §9.4.5).  
-  - Actual: Block lists in these containers accept any block type (including headings and footnotes) and treat empty lists as errors.  
-  - Proposed: Limit children to the allowed general text blocks and permit empty admonition bodies.
-
-- `\time` accepts an unsupported `fmt`.  
-  - Expect: `\time(fmt=...)` supports only `iso`, `short`, `long`, and `rough` (§10.3.4).  
-  - Actual: The `fmt` enum includes `relative`, so `fmt="relative"` is accepted.  
-  - Proposed: Remove the unsupported variant and reject unknown `fmt` values.
-
-- `\ref` is permitted inside headings and titles.  
-  - Expect: `\ref` must not appear inside `h1`/`h2`/`h3` or `title` bodies (§9.5.6).  
-  - Actual: Inline translation allows references in these contexts without diagnostics.  
-  - Proposed: Detect and reject `\ref` nodes while processing heading and title bodies.
diff --git a/src/hyperdoc.zig b/src/hyperdoc.zig
index 42919c7..1396b77 100644
--- a/src/hyperdoc.zig
+++ b/src/hyperdoc.zig
@@ -379,7 +379,6 @@ pub const Time = struct {
         long,
         short,
         rough,
-        relative,
         iso,
     };
 
@@ -584,13 +583,6 @@ pub fn parse(
     };
 
     while (true) {
-        errdefer |err| {
-            std.log.debug("error at examples/demo.hdoc:{f}: {t}", .{
-                parser.make_diagnostic_location(parser.offset),
-                err,
-            });
-        }
-
         const node = parser.accept_node(.top_level) catch |err| switch (err) {
             error.OutOfMemory => |e| return @as(error{OutOfMemory}!Document, e), // TODO: What the fuck? Bug report!
 
@@ -1028,7 +1020,7 @@ pub const SemanticAnalyzer = struct {
                 else => unreachable,
             }),
             .lang = attrs.lang,
-            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
+            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space, .heading),
         };
 
         return .{ heading, attrs.id };
@@ -1041,7 +1033,7 @@ pub const SemanticAnalyzer = struct {
 
         return .{
             .lang = attrs.lang,
-            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
+            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space, .title),
         };
     }
 
@@ -1053,7 +1045,7 @@ pub const SemanticAnalyzer = struct {
 
         const heading: Block.Paragraph = .{
             .lang = attrs.lang,
-            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space),
+            .content = try sema.translate_inline(node, .emit_diagnostic, .one_space, .normal),
         };
 
         return .{ heading, attrs.id };
@@ -1076,7 +1068,11 @@ pub const SemanticAnalyzer = struct {
                 else => unreachable,
             },
             .lang = attrs.lang,
-            .content = try sema.translate_block_list(node, .text_to_p),
+            .content = try sema.translate_block_list(node, .{
+                .upgrade = .text_to_p,
+                .allow_empty = true,
+                .general_text_only = true,
+            }),
         };
 
         return .{ admonition, attrs.id };
@@ -1161,7 +1157,7 @@ pub const SemanticAnalyzer = struct {
             .lang = attrs.lang,
             .alt = alt,
             .path = path,
-            .content = try sema.translate_inline(node, .allow_empty, .one_space),
+            .content = try sema.translate_inline(node, .allow_empty, .one_space, .normal),
         };
 
         return .{ image, attrs.id };
@@ -1177,7 +1173,7 @@ pub const SemanticAnalyzer = struct {
         const preformatted: Block.Preformatted = .{
             .lang = attrs.lang,
             .syntax = attrs.syntax,
-            .content = try sema.translate_inline(node, .emit_diagnostic, .keep_space),
+            .content = try sema.translate_inline(node, .emit_diagnostic, .keep_space, .normal),
         };
 
         return .{ preformatted, attrs.id };
@@ -1342,7 +1338,7 @@ pub const SemanticAnalyzer = struct {
                             rows.appendAssumeCapacity(.{
                                 .group = .{
                                     .lang = row_attrs.lang,
-                                    .content = try sema.translate_inline(child_node, .emit_diagnostic, .one_space),
+                                    .content = try sema.translate_inline(child_node, .emit_diagnostic, .one_space, .normal),
                                 },
                             });
                         },
@@ -1457,7 +1453,10 @@ pub const SemanticAnalyzer = struct {
         return .{
             .lang = attrs.lang,
             .colspan = colspan,
-            .content = try sema.translate_block_list(node, .text_to_p),
+            .content = try sema.translate_block_list(node, .{
+                .upgrade = .text_to_p,
+                .general_text_only = true,
+            }),
         };
     }
 
@@ -1473,13 +1472,48 @@ pub const SemanticAnalyzer = struct {
 
         return .{
             .lang = attrs.lang,
-            .content = try sema.translate_block_list(node, .text_to_p),
+            .content = try sema.translate_block_list(node, .{
+                .upgrade = .text_to_p,
+                .general_text_only = true,
+            }),
         };
     }
 
     const BlockTextUpgrade = enum { no_upgrade, text_to_p };
+    const BlockListOptions = struct {
+        upgrade: BlockTextUpgrade,
+        allow_empty: bool = false,
+        general_text_only: bool = false,
+    };
+
+    fn is_top_level_only_block(node_type: Parser.NodeType) bool {
+        return switch (node_type) {
+            .h1, .h2, .h3, .toc, .footnotes => true,
+            else => false,
+        };
+    }
+
+    fn is_general_text_block(node_type: Parser.NodeType) bool {
+        return switch (node_type) {
+            .p,
+            .note,
+            .warning,
+            .danger,
+            .tip,
+            .quote,
+            .spoiler,
+            .ul,
+            .ol,
+            .img,
+            .pre,
+            .table,
+            => true,
+
+            else => false,
+        };
+    }
 
-    fn translate_block_list(sema: *SemanticAnalyzer, node: Parser.Node, upgrade: BlockTextUpgrade) error{ Unimplemented, InvalidNodeType, OutOfMemory, BadAttributes }![]Block {
+    fn translate_block_list(sema: *SemanticAnalyzer, node: Parser.Node, options: BlockListOptions) error{ Unimplemented, InvalidNodeType, OutOfMemory, BadAttributes }![]Block {
         switch (node.body) {
             .list => |child_nodes| {
                 var blocks: std.ArrayList(Block) = .empty;
@@ -1488,7 +1522,12 @@ pub const SemanticAnalyzer = struct {
                 try blocks.ensureTotalCapacityPrecise(sema.arena, child_nodes.len);
 
                 for (child_nodes) |child_node| {
-                    if (child_node.type == .toc) {
+                    if (is_top_level_only_block(child_node.type)) {
+                        try sema.emit_diagnostic(.illegal_child_item, child_node.location);
+                        continue;
+                    }
+
+                    if (options.general_text_only and !is_general_text_block(child_node.type)) {
                         try sema.emit_diagnostic(.illegal_child_item, child_node.location);
                         continue;
                     }
@@ -1500,16 +1539,26 @@ pub const SemanticAnalyzer = struct {
                     blocks.appendAssumeCapacity(block);
                 }
 
+                if (blocks.items.len == 0 and !options.allow_empty) {
+                    try sema.emit_diagnostic(.list_body_required, node.location);
+                }
+
                 return try blocks.toOwnedSlice(sema.arena);
             },
 
-            .empty, .string, .verbatim, .text_span => switch (upgrade) {
+            .empty, .string, .verbatim, .text_span => switch (options.upgrade) {
                 .no_upgrade => {
+                    if (options.allow_empty and node.body == .empty)
+                        return &.{};
+
                     try sema.emit_diagnostic(.{ .block_list_required = .{ .type = node.type } }, node.location);
                     return &.{};
                 },
                 .text_to_p => {
-                    const spans = try sema.translate_inline(node, .emit_diagnostic, .one_space);
+                    if (options.allow_empty and node.body == .empty)
+                        return &.{};
+
+                    const spans = try sema.translate_inline(node, .emit_diagnostic, .one_space, .normal);
 
                     const blocks = try sema.arena.alloc(Block, 1);
                     blocks[0] = .{
@@ -1526,11 +1575,13 @@ pub const SemanticAnalyzer = struct {
     }
 
     /// Translates a node into a sequence of inline spans.
-    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling, whitespace_handling: Whitespace) error{ OutOfMemory, BadAttributes }![]Span {
+    const InlineContext = enum { normal, heading, title };
+
+    fn translate_inline(sema: *SemanticAnalyzer, node: Parser.Node, empty_handling: EmptyHandling, whitespace_handling: Whitespace, context: InlineContext) error{ OutOfMemory, BadAttributes }![]Span {
         var spans: std.ArrayList(Span) = .empty;
         defer spans.deinit(sema.arena);
 
-        try sema.translate_inline_body(&spans, node.body, .{}, empty_handling);
+        try sema.translate_inline_body(&spans, node.body, .{}, empty_handling, context);
 
         return try sema.compact_spans(spans.items, whitespace_handling);
     }
@@ -1723,11 +1774,11 @@ pub const SemanticAnalyzer = struct {
         return new;
     }
 
-    fn translate_inline_node(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), node: Parser.Node, attribs: Span.Attributes) !void {
+    fn translate_inline_node(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), node: Parser.Node, attribs: Span.Attributes, context: InlineContext) !void {
         switch (node.type) {
             .unknown_inline,
             .text,
-            => try sema.translate_inline_body(spans, node.body, attribs, .emit_diagnostic),
+            => try sema.translate_inline_body(spans, node.body, attribs, .emit_diagnostic, context),
 
             .@"\\em" => {
                 const props = try sema.get_attributes(node, struct {
@@ -1737,7 +1788,7 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .em = true,
-                }), .emit_diagnostic);
+                }), .emit_diagnostic, context);
             },
 
             .@"\\strike" => {
@@ -1748,7 +1799,7 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .strike = true,
-                }), .emit_diagnostic);
+                }), .emit_diagnostic, context);
             },
 
             .@"\\sub" => {
@@ -1759,7 +1810,7 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .position = .subscript,
-                }), .emit_diagnostic);
+                }), .emit_diagnostic, context);
             },
 
             .@"\\sup" => {
@@ -1770,7 +1821,7 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .position = .superscript,
-                }), .emit_diagnostic);
+                }), .emit_diagnostic, context);
             },
 
             .@"\\link" => {
@@ -1782,10 +1833,15 @@ pub const SemanticAnalyzer = struct {
                 try sema.translate_inline_body(spans, node.body, try sema.derive_attribute(node.location, attribs, .{
                     .lang = props.lang,
                     .link = .{ .uri = props.uri },
-                }), .emit_diagnostic);
+                }), .emit_diagnostic, context);
             },
 
             .@"\\ref" => {
+                if (context == .heading or context == .title) {
+                    try sema.emit_diagnostic(.{ .inline_not_allowed = .{ .node_type = node.type } }, node.location);
+                    return;
+                }
+
                 const props = try sema.get_attributes(node, struct {
                     lang: LanguageTag = .inherit,
                     ref: Reference,
@@ -1812,7 +1868,7 @@ pub const SemanticAnalyzer = struct {
                             .location = node.location,
                         });
                     },
-                    else => try sema.translate_inline_body(spans, node.body, link_attribs, .emit_diagnostic),
+                    else => try sema.translate_inline_body(spans, node.body, link_attribs, .emit_diagnostic, context),
                 }
             },
 
@@ -1825,7 +1881,7 @@ pub const SemanticAnalyzer = struct {
                     .mono = true,
                     .lang = props.lang,
                     .syntax = props.syntax,
-                }), .emit_diagnostic);
+                }), .emit_diagnostic, context);
             },
 
             .@"\\date",
@@ -1852,7 +1908,7 @@ pub const SemanticAnalyzer = struct {
                     break :blk;
                 }
 
-                const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space);
+                const content_spans = try sema.translate_inline(node, .emit_diagnostic, .one_space, context);
 
                 //  Convert the content_spans into a "rendered string".
                 const content_text = (sema.render_spans_to_plaintext(content_spans, .reject_date_time) catch |err| switch (err) {
@@ -1933,7 +1989,7 @@ pub const SemanticAnalyzer = struct {
                 defer content_spans.deinit(sema.arena);
 
                 const content_attribs = try sema.derive_attribute(node.location, attribs, .{ .lang = props.lang });
-                try sema.translate_inline_body(&content_spans, node.body, content_attribs, .emit_diagnostic);
+                try sema.translate_inline_body(&content_spans, node.body, content_attribs, .emit_diagnostic, context);
 
                 const compacted = try sema.compact_spans(content_spans.items, .one_space);
                 if (compacted.len == 0) {
@@ -2026,7 +2082,7 @@ pub const SemanticAnalyzer = struct {
         else if (std.meta.stringToEnum(Format, format_str)) |format|
             format
         else blk: {
-            try sema.emit_diagnostic(.{ .invalid_date_time_fmt = .{ .fmt = format_str } }, get_attribute_location(node, "fmt", .value) orelse node.location);
+            try sema.emit_diagnostic(.invalid_date_time_fmt, get_attribute_location(node, "fmt", .value) orelse node.location);
             break :blk .default;
         };
 
@@ -2195,7 +2251,14 @@ pub const SemanticAnalyzer = struct {
         allow_empty,
         emit_diagnostic,
     };
-    fn translate_inline_body(sema: *SemanticAnalyzer, spans: *std.ArrayList(Span), body: Parser.Node.Body, attribs: Span.Attributes, empty_handling: EmptyHandling) error{ OutOfMemory, BadAttributes }!void {
+    fn translate_inline_body(
+        sema: *SemanticAnalyzer,
+        spans: *std.ArrayList(Span),
+        body: Parser.Node.Body,
+        attribs: Span.Attributes,
+        empty_handling: EmptyHandling,
+        context: InlineContext,
+    ) error{ OutOfMemory, BadAttributes }!void {
         switch (body) {
             .empty => |location| switch (empty_handling) {
                 .allow_empty => {},
@@ -2255,13 +2318,22 @@ pub const SemanticAnalyzer = struct {
 
             .list => |list| {
                 for (list) |child_node| {
-                    try sema.translate_inline_node(spans, child_node, attribs);
+                    try sema.translate_inline_node(spans, child_node, attribs, context);
                 }
             },
 
             .text_span => |text_span| {
+                const decoded_text = if (text_span.text.len == 2 and text_span.text[0] == '\\') blk: {
+                    switch (text_span.text[1]) {
+                        '{' => break :blk "{",
+                        '}' => break :blk "}",
+                        '\\' => break :blk "\\",
+                        else => break :blk text_span.text,
+                    }
+                } else text_span.text;
+
                 try spans.append(sema.arena, .{
-                    .content = .{ .text = text_span.text },
+                    .content = .{ .text = decoded_text },
                     .attribs = attribs,
                     .location = text_span.location,
                 });
@@ -2683,6 +2755,15 @@ pub const SemanticAnalyzer = struct {
     fn compute_next_heading(sema: *SemanticAnalyzer, node: Parser.Node, level: Block.Heading.Level) !Block.Heading.Index {
         const index = @intFromEnum(level);
 
+        const missing_parent: ?Block.Heading.Level = switch (level) {
+            .h1 => null,
+            .h2 => if (sema.heading_counters[0] == 0) .h1 else null,
+            .h3 => if (sema.heading_counters[1] == 0) .h2 else null,
+        };
+        if (missing_parent) |missing| {
+            try sema.emit_diagnostic(.{ .invalid_heading_sequence = .{ .level = level, .missing = missing } }, node.location);
+        }
+
         sema.heading_counters[index] += 1;
 
         if (index > sema.current_heading_level + 1) {
@@ -2694,7 +2775,6 @@ pub const SemanticAnalyzer = struct {
         for (sema.heading_counters[index + 1 ..]) |*val| {
             val.* = 0;
         }
-        _ = node;
 
         return switch (level) {
             .h1 => .{ .h1 = sema.heading_counters[0..1].* },
@@ -2882,6 +2962,19 @@ pub const SemanticAnalyzer = struct {
         var output = output_buffer.toOwnedSlice();
         errdefer output.deinit(sema.arena);
 
+        const chars = output.items(.char);
+        for (chars, 0..) |ch, idx| {
+            if (ch == std.ascii.control_code.cr) {
+                const next_is_lf = idx + 1 < chars.len and chars[idx + 1] == std.ascii.control_code.lf;
+                if (!next_is_lf) {
+                    try sema.emit_diagnostic(
+                        .{ .illegal_character = .{ .codepoint = std.ascii.control_code.cr } },
+                        output.get(idx).location,
+                    );
+                }
+            }
+        }
+
         const view = std.unicode.Utf8View.init(output.items(.char)) catch {
             std.log.err("invalid utf-8 input: \"{f}\"", .{std.zig.fmtString(output.items(.char))});
             @panic("String unescape produced invalid UTF-8 sequence. This should not be possible.");
@@ -2953,7 +3046,7 @@ pub const Parser = struct {
             return error.EndOfFile;
         }
 
-        const type_ident = parser.accept_identifier() catch |err| switch (err) {
+        const type_ident = parser.accept_identifier(.node) catch |err| switch (err) {
             error.UnexpectedEndOfFile => |e| switch (scope_type) {
                 .nested => return e,
                 .top_level => return error.EndOfFile,
@@ -2978,7 +3071,7 @@ pub const Parser = struct {
                 while (true) {
                     if (parser.try_accept_char(')'))
                         break;
-                    const attr_name = try parser.accept_identifier();
+                    const attr_name = try parser.accept_identifier(.attribute);
                     _ = try parser.accept_char('=');
                     const attr_value = try parser.accept_string();
 
@@ -3333,7 +3426,56 @@ pub const Parser = struct {
         return error.UnterminatedStringLiteral;
     }
 
-    pub fn accept_identifier(parser: *Parser) error{ UnexpectedEndOfFile, InvalidCharacter }!Token {
+    pub const IdentifierKind = enum {
+        node,
+        attribute,
+    };
+
+    fn is_identifier_char(c: u8) bool {
+        return switch (c) {
+            'a'...'z',
+            'A'...'Z',
+            '0'...'9',
+            '_',
+            => true,
+            else => false,
+        };
+    }
+
+    fn is_node_identifier_terminator(c: u8) bool {
+        return switch (c) {
+            ' ',
+            '\t',
+            '\n',
+            '\r',
+            '(',
+            ')',
+            '{',
+            '}',
+            ';',
+            ':',
+            '"',
+            ',',
+            => true,
+            else => false,
+        };
+    }
+
+    fn is_attribute_identifier_terminator(c: u8) bool {
+        return switch (c) {
+            ' ',
+            '\t',
+            '\n',
+            '\r',
+            ')',
+            '=',
+            ',',
+            => true,
+            else => false,
+        };
+    }
+
+    pub fn accept_identifier(parser: *Parser, kind: IdentifierKind) error{ UnexpectedEndOfFile, InvalidCharacter }!Token {
         parser.skip_whitespace();
 
         if (parser.at_end()) {
@@ -3342,17 +3484,76 @@ pub const Parser = struct {
         }
 
         const start = parser.offset;
-        const first = parser.code[start];
-        if (!is_ident_char(first)) {
-            emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start));
-            return error.InvalidCharacter;
-        }
+        switch (kind) {
+            .node => {
+                const first = parser.code[start];
+                if (first == '\\') {
+                    parser.offset += 1;
+                    if (parser.offset >= parser.code.len or !is_identifier_char(parser.code[parser.offset])) {
+                        emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start));
+                        return error.InvalidCharacter;
+                    }
+                } else if (!is_identifier_char(first)) {
+                    emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start));
+                    return error.InvalidCharacter;
+                } else {
+                    parser.offset += 1;
+                }
 
-        while (parser.offset < parser.code.len) {
-            const c = parser.code[parser.offset];
-            if (!is_ident_char(c))
-                break;
-            parser.offset += 1;
+                while (parser.offset < parser.code.len) {
+                    const c = parser.code[parser.offset];
+                    if (is_identifier_char(c)) {
+                        parser.offset += 1;
+                        continue;
+                    }
+
+                    if (is_node_identifier_terminator(c))
+                        break;
+
+                    emitDiagnostic(parser, .{ .invalid_identifier_character = .{ .char = c } }, parser.make_diagnostic_location(parser.offset));
+                    return error.InvalidCharacter;
+                }
+            },
+            .attribute => {
+                const first = parser.code[start];
+                if (!is_identifier_char(first)) {
+                    emitDiagnostic(parser, .{ .invalid_identifier_start = .{ .char = first } }, parser.make_diagnostic_location(start));
+                    return error.InvalidCharacter;
+                }
+
+                parser.offset += 1;
+                var prev_was_hyphen = false;
+
+                while (parser.offset < parser.code.len) {
+                    const c = parser.code[parser.offset];
+                    if (is_identifier_char(c)) {
+                        prev_was_hyphen = false;
+                        parser.offset += 1;
+                        continue;
+                    }
+
+                    if (c == '-') {
+                        if (prev_was_hyphen) {
+                            emitDiagnostic(parser, .{ .invalid_identifier_character = .{ .char = c } }, parser.make_diagnostic_location(parser.offset));
+                            return error.InvalidCharacter;
+                        }
+                        prev_was_hyphen = true;
+                        parser.offset += 1;
+                        continue;
+                    }
+
+                    if (is_attribute_identifier_terminator(c))
+                        break;
+
+                    emitDiagnostic(parser, .{ .invalid_identifier_character = .{ .char = c } }, parser.make_diagnostic_location(parser.offset));
+                    return error.InvalidCharacter;
+                }
+
+                if (prev_was_hyphen) {
+                    emitDiagnostic(parser, .{ .invalid_identifier_character = .{ .char = '-' } }, parser.make_diagnostic_location(parser.offset - 1));
+                    return error.InvalidCharacter;
+                }
+            },
         }
 
         return parser.slice(start, parser.offset);
@@ -3434,19 +3635,6 @@ pub const Parser = struct {
         };
     }
 
-    pub fn is_ident_char(c: u8) bool {
-        return switch (c) {
-            'a'...'z',
-            'A'...'Z',
-            '0'...'9',
-            '_',
-            '-',
-            '\\',
-            => true,
-            else => false,
-        };
-    }
-
     pub const Token = struct {
         text: []const u8,
         location: Location,
@@ -3639,6 +3827,7 @@ pub const Diagnostic = struct {
     pub const UnexpectedEof = struct { context: []const u8, expected_char: ?u8 = null };
     pub const UnexpectedCharacter = struct { expected: u8, found: u8 };
     pub const InvalidIdentifierStart = struct { char: u8 };
+    pub const InvalidIdentifierCharacter = struct { char: u8 };
     pub const DuplicateAttribute = struct { name: []const u8 };
     pub const NodeAttributeError = struct { type: Parser.NodeType, name: []const u8 };
     pub const NodeBodyError = struct { type: Parser.NodeType };
@@ -3647,12 +3836,13 @@ pub const Diagnostic = struct {
     pub const InvalidBlockError = struct { name: []const u8 };
     pub const InlineUsageError = struct { attribute: InlineAttribute };
     pub const InlineCombinationError = struct { first: InlineAttribute, second: InlineAttribute };
-    pub const DateTimeFormatError = struct { fmt: []const u8 };
     pub const InvalidStringEscape = struct { codepoint: u21 };
     pub const ForbiddenControlCharacter = struct { codepoint: u21 };
     pub const TableShapeError = struct { actual: usize, expected: usize };
     pub const ReferenceError = struct { ref: []const u8 };
     pub const AutomaticHeading = struct { level: Block.Heading.Level };
+    pub const HeadingSequenceError = struct { level: Block.Heading.Level, missing: Block.Heading.Level };
+    pub const InlineContextError = struct { node_type: Parser.NodeType };
 
     pub const Code = union(enum) {
         // errors:
@@ -3661,6 +3851,7 @@ pub const Diagnostic = struct {
         unexpected_character: UnexpectedCharacter,
         unterminated_string,
         invalid_identifier_start: InvalidIdentifierStart,
+        invalid_identifier_character: InvalidIdentifierCharacter,
         unterminated_block_list,
         missing_hdoc_header: MissingHdocHeader,
         duplicate_hdoc_header: DuplicateHdocHeader,
@@ -3673,10 +3864,11 @@ pub const Diagnostic = struct {
         invalid_block_type: InvalidBlockError,
         block_list_required: NodeBodyError,
         invalid_inline_combination: InlineCombinationError,
+        inline_not_allowed: InlineContextError,
         link_not_nestable,
         invalid_date_time,
         invalid_date_time_body,
-        invalid_date_time_fmt: DateTimeFormatError,
+        invalid_date_time_fmt,
         missing_timezone,
         invalid_unicode_string_escape,
         invalid_string_escape: InvalidStringEscape,
@@ -3700,6 +3892,7 @@ pub const Diagnostic = struct {
         footnote_missing_ref,
         footnote_missing_body,
         footnote_kind_on_reference,
+        invalid_heading_sequence: HeadingSequenceError,
 
         // warnings:
         document_starts_with_bom,
@@ -3725,6 +3918,7 @@ pub const Diagnostic = struct {
                 .unexpected_character,
                 .unterminated_string,
                 .invalid_identifier_start,
+                .invalid_identifier_character,
                 .unterminated_block_list,
                 .missing_hdoc_header,
                 .duplicate_hdoc_header,
@@ -3737,6 +3931,7 @@ pub const Diagnostic = struct {
                 .invalid_block_type,
                 .block_list_required,
                 .invalid_inline_combination,
+                .inline_not_allowed,
                 .link_not_nestable,
                 .invalid_date_time,
                 .invalid_date_time_fmt,
@@ -3764,6 +3959,7 @@ pub const Diagnostic = struct {
                 .footnote_missing_ref,
                 .footnote_missing_body,
                 .footnote_kind_on_reference,
+                .invalid_heading_sequence,
                 => .@"error",
 
                 .missing_document_language,
@@ -3800,6 +3996,7 @@ pub const Diagnostic = struct {
                 .unexpected_character => |ctx| try w.print("Expected '{c}' but found '{c}'.", .{ ctx.expected, ctx.found }),
                 .unterminated_string => try w.writeAll("Unterminated string literal (missing closing \")."),
                 .invalid_identifier_start => |ctx| try w.print("Invalid identifier start character: '{c}'.", .{ctx.char}),
+                .invalid_identifier_character => |ctx| try w.print("Invalid identifier character: '{c}'.", .{ctx.char}),
                 .unterminated_block_list => try w.writeAll("Block list body is unterminated (missing '}' before end of file)."),
                 .missing_hdoc_header => try w.writeAll("Document must start with an 'hdoc' header."),
                 .duplicate_hdoc_header => try w.writeAll("Only one 'hdoc' header is allowed; additional header found."),
@@ -3823,6 +4020,7 @@ pub const Diagnostic = struct {
 
                 .redundant_inline => |ctx| try w.print("The inline \\{t} has no effect.", .{ctx.attribute}),
                 .invalid_inline_combination => |ctx| try w.print("Cannot combine \\{t} with \\{t}.", .{ ctx.first, ctx.second }),
+                .inline_not_allowed => |ctx| try w.print("\\{t} is not allowed in this context.", .{ctx.node_type}),
                 .link_not_nestable => try w.writeAll("Links are not nestable"),
 
                 .attribute_leading_trailing_whitespace => try w.writeAll("Attribute value has invalid leading or trailing whitespace."),
@@ -3831,7 +4029,7 @@ pub const Diagnostic = struct {
 
                 .missing_timezone => try w.writeAll("Missing timezone offset; add a 'tz' header attribute or include a timezone in the value."),
 
-                .invalid_date_time_fmt => |ctx| try w.print("Invalid 'fmt' value '{s}' for date/time.", .{ctx.fmt}),
+                .invalid_date_time_fmt => try w.writeAll("Invalid 'fmt' value for date/time."),
 
                 .invalid_string_escape => |ctx| if (ctx.codepoint > 0x20 and ctx.codepoint <= 0x7F)
                     try w.print("\\{u} is not a valid escape sequence.", .{ctx.codepoint})
@@ -3866,6 +4064,7 @@ pub const Diagnostic = struct {
                 .footnote_missing_ref => try w.writeAll("\\footnote without a body requires a ref=\"...\" attribute."),
                 .footnote_missing_body => try w.writeAll("\\footnote definitions require a non-empty body."),
                 .footnote_kind_on_reference => try w.writeAll("Attribute 'kind' is only valid on defining \\footnote entries."),
+                .invalid_heading_sequence => |ctx| try w.print("{t} requires a preceding {t}.", .{ ctx.level, ctx.missing }),
 
                 .missing_document_language => try w.writeAll("Document language is missing; set lang on the hdoc header."),
                 .tab_character => try w.writeAll("Tab character is not allowed; use spaces instead."),
diff --git a/src/render/html5.zig b/src/render/html5.zig
index 5aa9b97..a7acf35 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -973,7 +973,7 @@ fn formatTimeValue(value: hdoc.FormattedDateTime(hdoc.Time), buffer: []u8) Rende
 
     switch (value.format) {
         .short, .rough => try writer.print("{d:0>2}:{d:0>2}", .{ value.value.hour, value.value.minute }),
-        .long, .relative => {
+        .long => {
             try writer.print("{d:0>2}:{d:0>2}:{d:0>2}", .{ value.value.hour, value.value.minute, value.value.second });
             if (value.value.microsecond > 0) {
                 try writer.print(".{d:0>6}", .{value.value.microsecond});
diff --git a/src/testsuite.zig b/src/testsuite.zig
index e2003c8..7d98959 100644
--- a/src/testsuite.zig
+++ b/src/testsuite.zig
@@ -60,7 +60,7 @@ test "parser accept identifier and word tokens" {
         .diagnostics = null,
     };
 
-    const ident = try parser.accept_identifier();
+    const ident = try parser.accept_identifier(.node);
     try std.testing.expectEqualStrings("h1", ident.text);
     try std.testing.expectEqual(@as(usize, 0), ident.location.offset);
     try std.testing.expectEqual(@as(usize, 2), ident.location.length);
@@ -82,7 +82,7 @@ test "parser rejects identifiers with invalid start characters" {
         .diagnostics = null,
     };
 
-    try std.testing.expectError(error.InvalidCharacter, parser.accept_identifier());
+    try std.testing.expectError(error.InvalidCharacter, parser.accept_identifier(.node));
 }
 
 test "parser accept string literals and unescape" {
@@ -563,10 +563,16 @@ test "table of contents inserts automatic headings when skipping levels" {
     var doc = try hdoc.parse(std.testing.allocator, source, &diagnostics);
     defer doc.deinit();
 
-    try std.testing.expectEqual(@as(usize, 3), diagnostics.items.items.len);
+    try std.testing.expectEqual(@as(usize, 5), diagnostics.items.items.len);
     try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[0].code, .missing_document_language));
-    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[1].code, .{ .automatic_heading_insertion = .{ .level = .h1 } }));
-    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[2].code, .{ .automatic_heading_insertion = .{ .level = .h2 } }));
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[1].code, .{
+        .invalid_heading_sequence = .{ .level = .h3, .missing = .h2 },
+    }));
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[2].code, .{
+        .invalid_heading_sequence = .{ .level = .h2, .missing = .h1 },
+    }));
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[3].code, .{ .automatic_heading_insertion = .{ .level = .h1 } }));
+    try std.testing.expect(diagnosticCodesEqual(diagnostics.items.items[4].code, .{ .automatic_heading_insertion = .{ .level = .h2 } }));
 
     const toc = doc.toc;
     try std.testing.expectEqual(.h1, toc.level);
@@ -879,7 +885,7 @@ test "diagnostic codes are emitted for expected samples" {
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); hdoc(version=\"2.0\",lang=\"en\");", &.{ .misplaced_hdoc_header, .duplicate_hdoc_header });
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\q\"", &.{.{ .invalid_string_escape = .{ .codepoint = 'q' } }});
     try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); h1 \"bad\\u{9}\"", &.{.{ .illegal_character = .{ .codepoint = 0x9 } }});
-    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); ul{ li{ toc; } }", &.{.illegal_child_item});
+    try validateDiagnostics(.{}, "hdoc(version=\"2.0\",lang=\"en\"); ul{ li{ toc; } }", &.{ .illegal_child_item, .list_body_required });
 }
 
 test "table derives column count from first data row" {
diff --git a/test/conformance/accept/inline_escape.yaml b/test/conformance/accept/inline_escape.yaml
index c222dd3..4f58ab7 100644
--- a/test/conformance/accept/inline_escape.yaml
+++ b/test/conformance/accept/inline_escape.yaml
@@ -14,6 +14,6 @@ document:
     - paragraph:
       lang: ""
       content:
-        - [] "backslash \\\\ brace-open \\{ brace-close \\}"
+        - [] "backslash \\ brace-open { brace-close }"
   ids:
     - null
diff --git a/test/conformance/reject/container_children.diag b/test/conformance/reject/container_children.diag
index e69de29..d6354d0 100644
--- a/test/conformance/reject/container_children.diag
+++ b/test/conformance/reject/container_children.diag
@@ -0,0 +1,4 @@
+/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:5:5: Node not allowed here.
+/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:4:3: Node requires list body.
+/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:10:3: Node not allowed here.
+error: failed to parse "/workspace/hyperdoc/test/conformance/reject/container_children.hdoc": InvalidFile
diff --git a/test/conformance/reject/heading_sequence.diag b/test/conformance/reject/heading_sequence.diag
index 31568cd..ecae9b9 100644
--- a/test/conformance/reject/heading_sequence.diag
+++ b/test/conformance/reject/heading_sequence.diag
@@ -1,3 +1,6 @@
-test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h1 to fill heading level gap.
-test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h2 to fill heading level gap.
-test/conformance/reject/heading_sequence.hdoc:5:1: Inserted automatic h2 to fill heading level gap.
+/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: h3 requires a preceding h2.
+/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:5:1: h3 requires a preceding h2.
+/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h1 to fill heading level gap.
+/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h2 to fill heading level gap.
+/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:5:1: Inserted automatic h2 to fill heading level gap.
+error: failed to parse "/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc": InvalidFile
diff --git a/test/conformance/reject/inline_identifier_dash.diag b/test/conformance/reject/inline_identifier_dash.diag
index e69de29..0528512 100644
--- a/test/conformance/reject/inline_identifier_dash.diag
+++ b/test/conformance/reject/inline_identifier_dash.diag
@@ -0,0 +1,2 @@
+/workspace/hyperdoc/test/conformance/reject/inline_identifier_dash.hdoc:3:9: Invalid identifier character: '-'.
+error: failed to parse "/workspace/hyperdoc/test/conformance/reject/inline_identifier_dash.hdoc": SyntaxError
diff --git a/test/conformance/reject/nested_top_level.diag b/test/conformance/reject/nested_top_level.diag
index e69de29..064fdbe 100644
--- a/test/conformance/reject/nested_top_level.diag
+++ b/test/conformance/reject/nested_top_level.diag
@@ -0,0 +1,2 @@
+/workspace/hyperdoc/test/conformance/reject/nested_top_level.hdoc:4:3: Node not allowed here.
+error: failed to parse "/workspace/hyperdoc/test/conformance/reject/nested_top_level.hdoc": InvalidFile
diff --git a/test/conformance/reject/ref_in_heading.diag b/test/conformance/reject/ref_in_heading.diag
index e69de29..60d0cd0 100644
--- a/test/conformance/reject/ref_in_heading.diag
+++ b/test/conformance/reject/ref_in_heading.diag
@@ -0,0 +1,2 @@
+/workspace/hyperdoc/test/conformance/reject/ref_in_heading.hdoc:5:14: \\ref is not allowed in this context.
+error: failed to parse "/workspace/hyperdoc/test/conformance/reject/ref_in_heading.hdoc": InvalidFile
diff --git a/test/conformance/reject/string_cr_escape.diag b/test/conformance/reject/string_cr_escape.diag
index e69de29..f85f8c7 100644
--- a/test/conformance/reject/string_cr_escape.diag
+++ b/test/conformance/reject/string_cr_escape.diag
@@ -0,0 +1,2 @@
+/workspace/hyperdoc/test/conformance/reject/string_cr_escape.hdoc:3:8: Forbidden control character U+000D.
+error: failed to parse "/workspace/hyperdoc/test/conformance/reject/string_cr_escape.hdoc": InvalidFile
diff --git a/test/conformance/reject/time_relative_fmt.diag b/test/conformance/reject/time_relative_fmt.diag
index e69de29..5cbffa2 100644
--- a/test/conformance/reject/time_relative_fmt.diag
+++ b/test/conformance/reject/time_relative_fmt.diag
@@ -0,0 +1,2 @@
+/workspace/hyperdoc/test/conformance/reject/time_relative_fmt.hdoc:3:15: Invalid 'fmt' value for date/time.
+error: failed to parse "/workspace/hyperdoc/test/conformance/reject/time_relative_fmt.hdoc": InvalidFile

From 53888e53935fff9657a0322a1fe6b504b3a01d36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sun, 4 Jan 2026 07:07:57 +0100
Subject: [PATCH 105/116] Pretty-print JSON diagnostics in snapshots

---
 build.zig                                     |  1 +
 src/main.zig                                  | 28 ++++++--
 .../reject/container_children.diag            | 33 ++++++++--
 test/conformance/reject/heading_sequence.diag | 65 +++++++++++++++++--
 .../reject/inline_identifier_dash.diag        | 15 ++++-
 test/conformance/reject/nested_top_level.diag | 13 +++-
 test/conformance/reject/ref_in_heading.diag   | 15 ++++-
 test/conformance/reject/string_cr_escape.diag | 15 ++++-
 .../conformance/reject/time_relative_fmt.diag | 13 +++-
 9 files changed, 171 insertions(+), 27 deletions(-)

diff --git a/build.zig b/build.zig
index 8d8607f..2570831 100644
--- a/build.zig
+++ b/build.zig
@@ -112,6 +112,7 @@ pub fn build(b: *std.Build) void {
         const diag_file = b.fmt("{s}.diag", .{path[0 .. path.len - 5]});
 
         const test_run = b.addRunArtifact(exe);
+        test_run.addArgs(&.{"--json-diagnostics"});
         test_run.addFileArg(b.path(path));
         test_run.expectExitCode(1);
         const generated_diag = test_run.captureStdErr();
diff --git a/src/main.zig b/src/main.zig
index 776d241..fffdc9d 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -34,17 +34,25 @@ pub fn main() !u8 {
         options,
     );
 
-    for (diagnostics.items.items) |diag| {
-        try stderr.interface.print("{s}:{f}: {f}\n", .{
-            options.file_path,
-            diag.location,
-            diag.code,
-        });
+    if (options.json_diagnostics) {
+        const json_options: std.json.Stringify.Options = .{ .whitespace = .indent_2 };
+        try std.json.Stringify.value(diagnostics.items.items, json_options, &stderr.interface);
+        try stderr.interface.writeByte('\n');
+    } else {
+        for (diagnostics.items.items) |diag| {
+            try stderr.interface.print("{s}:{f}: {f}\n", .{
+                options.file_path,
+                diag.location,
+                diag.code,
+            });
+        }
     }
     try stderr.interface.flush();
 
     parse_result catch |err| {
-        std.log.err("failed to parse \"{s}\": {t}", .{ options.file_path, err });
+        if (!options.json_diagnostics) {
+            std.log.err("failed to parse \"{s}\": {t}", .{ options.file_path, err });
+        }
         return 1;
     };
 
@@ -73,6 +81,7 @@ fn parse_and_process(allocator: std.mem.Allocator, diagnostics: *hdoc.Diagnostic
 const CliOptions = struct {
     format: RenderFormat = .html,
     file_path: []const u8,
+    json_diagnostics: bool = false,
 };
 
 const RenderFormat = enum {
@@ -98,6 +107,11 @@ fn parse_options(stderr: *std.Io.Writer, argv: []const []const u8) !CliOptions {
                     i += 1;
                     continue;
                 }
+                if (std.mem.eql(u8, value, "--json-diagnostics")) {
+                    options.json_diagnostics = true;
+                    i += 1;
+                    continue;
+                }
                 return error.InvalidCli;
             }
 
diff --git a/test/conformance/reject/container_children.diag b/test/conformance/reject/container_children.diag
index d6354d0..9d4bba4 100644
--- a/test/conformance/reject/container_children.diag
+++ b/test/conformance/reject/container_children.diag
@@ -1,4 +1,29 @@
-/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:5:5: Node not allowed here.
-/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:4:3: Node requires list body.
-/workspace/hyperdoc/test/conformance/reject/container_children.hdoc:10:3: Node not allowed here.
-error: failed to parse "/workspace/hyperdoc/test/conformance/reject/container_children.hdoc": InvalidFile
+[
+  {
+    "code": {
+      "illegal_child_item": {}
+    },
+    "location": {
+      "line": 5,
+      "column": 5
+    }
+  },
+  {
+    "code": {
+      "list_body_required": {}
+    },
+    "location": {
+      "line": 4,
+      "column": 3
+    }
+  },
+  {
+    "code": {
+      "illegal_child_item": {}
+    },
+    "location": {
+      "line": 10,
+      "column": 3
+    }
+  }
+]
diff --git a/test/conformance/reject/heading_sequence.diag b/test/conformance/reject/heading_sequence.diag
index ecae9b9..02e90f2 100644
--- a/test/conformance/reject/heading_sequence.diag
+++ b/test/conformance/reject/heading_sequence.diag
@@ -1,6 +1,59 @@
-/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: h3 requires a preceding h2.
-/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:5:1: h3 requires a preceding h2.
-/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h1 to fill heading level gap.
-/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:3:1: Inserted automatic h2 to fill heading level gap.
-/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc:5:1: Inserted automatic h2 to fill heading level gap.
-error: failed to parse "/workspace/hyperdoc/test/conformance/reject/heading_sequence.hdoc": InvalidFile
+[
+  {
+    "code": {
+      "invalid_heading_sequence": {
+        "level": "h3",
+        "missing": "h2"
+      }
+    },
+    "location": {
+      "line": 3,
+      "column": 1
+    }
+  },
+  {
+    "code": {
+      "invalid_heading_sequence": {
+        "level": "h3",
+        "missing": "h2"
+      }
+    },
+    "location": {
+      "line": 5,
+      "column": 1
+    }
+  },
+  {
+    "code": {
+      "automatic_heading_insertion": {
+        "level": "h1"
+      }
+    },
+    "location": {
+      "line": 3,
+      "column": 1
+    }
+  },
+  {
+    "code": {
+      "automatic_heading_insertion": {
+        "level": "h2"
+      }
+    },
+    "location": {
+      "line": 3,
+      "column": 1
+    }
+  },
+  {
+    "code": {
+      "automatic_heading_insertion": {
+        "level": "h2"
+      }
+    },
+    "location": {
+      "line": 5,
+      "column": 1
+    }
+  }
+]
diff --git a/test/conformance/reject/inline_identifier_dash.diag b/test/conformance/reject/inline_identifier_dash.diag
index 0528512..3c8dfb8 100644
--- a/test/conformance/reject/inline_identifier_dash.diag
+++ b/test/conformance/reject/inline_identifier_dash.diag
@@ -1,2 +1,13 @@
-/workspace/hyperdoc/test/conformance/reject/inline_identifier_dash.hdoc:3:9: Invalid identifier character: '-'.
-error: failed to parse "/workspace/hyperdoc/test/conformance/reject/inline_identifier_dash.hdoc": SyntaxError
+[
+  {
+    "code": {
+      "invalid_identifier_character": {
+        "char": 45
+      }
+    },
+    "location": {
+      "line": 3,
+      "column": 9
+    }
+  }
+]
diff --git a/test/conformance/reject/nested_top_level.diag b/test/conformance/reject/nested_top_level.diag
index 064fdbe..76ea6e6 100644
--- a/test/conformance/reject/nested_top_level.diag
+++ b/test/conformance/reject/nested_top_level.diag
@@ -1,2 +1,11 @@
-/workspace/hyperdoc/test/conformance/reject/nested_top_level.hdoc:4:3: Node not allowed here.
-error: failed to parse "/workspace/hyperdoc/test/conformance/reject/nested_top_level.hdoc": InvalidFile
+[
+  {
+    "code": {
+      "illegal_child_item": {}
+    },
+    "location": {
+      "line": 4,
+      "column": 3
+    }
+  }
+]
diff --git a/test/conformance/reject/ref_in_heading.diag b/test/conformance/reject/ref_in_heading.diag
index 60d0cd0..68602ea 100644
--- a/test/conformance/reject/ref_in_heading.diag
+++ b/test/conformance/reject/ref_in_heading.diag
@@ -1,2 +1,13 @@
-/workspace/hyperdoc/test/conformance/reject/ref_in_heading.hdoc:5:14: \\ref is not allowed in this context.
-error: failed to parse "/workspace/hyperdoc/test/conformance/reject/ref_in_heading.hdoc": InvalidFile
+[
+  {
+    "code": {
+      "inline_not_allowed": {
+        "node_type": "\\ref"
+      }
+    },
+    "location": {
+      "line": 5,
+      "column": 14
+    }
+  }
+]
diff --git a/test/conformance/reject/string_cr_escape.diag b/test/conformance/reject/string_cr_escape.diag
index f85f8c7..ac57a8b 100644
--- a/test/conformance/reject/string_cr_escape.diag
+++ b/test/conformance/reject/string_cr_escape.diag
@@ -1,2 +1,13 @@
-/workspace/hyperdoc/test/conformance/reject/string_cr_escape.hdoc:3:8: Forbidden control character U+000D.
-error: failed to parse "/workspace/hyperdoc/test/conformance/reject/string_cr_escape.hdoc": InvalidFile
+[
+  {
+    "code": {
+      "illegal_character": {
+        "codepoint": 13
+      }
+    },
+    "location": {
+      "line": 3,
+      "column": 8
+    }
+  }
+]
diff --git a/test/conformance/reject/time_relative_fmt.diag b/test/conformance/reject/time_relative_fmt.diag
index 5cbffa2..decc5a5 100644
--- a/test/conformance/reject/time_relative_fmt.diag
+++ b/test/conformance/reject/time_relative_fmt.diag
@@ -1,2 +1,11 @@
-/workspace/hyperdoc/test/conformance/reject/time_relative_fmt.hdoc:3:15: Invalid 'fmt' value for date/time.
-error: failed to parse "/workspace/hyperdoc/test/conformance/reject/time_relative_fmt.hdoc": InvalidFile
+[
+  {
+    "code": {
+      "invalid_date_time_fmt": {}
+    },
+    "location": {
+      "line": 3,
+      "column": 15
+    }
+  }
+]

From 6f8fc9921ff3e1148c0d71632356d70a3903fa7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sun, 4 Jan 2026 07:39:10 +0100
Subject: [PATCH 106/116] Add conformance tests for document headers and images

---
 build.zig                                     | 14 ++++++++---
 .../accept/header_and_title_order.hdoc        |  5 ++++
 .../accept/header_and_title_order.yaml        | 24 +++++++++++++++++++
 .../accept/image_with_required_path.hdoc      |  3 +++
 .../accept/image_with_required_path.yaml      | 21 ++++++++++++++++
 .../conformance/accept/no_title_document.hdoc |  3 +++
 .../conformance/accept/no_title_document.yaml | 19 +++++++++++++++
 test/conformance/reject/duplicate_header.diag | 20 ++++++++++++++++
 test/conformance/reject/duplicate_header.hdoc |  5 ++++
 .../reject/hdoc_body_non_empty.diag           | 11 +++++++++
 .../reject/hdoc_body_non_empty.hdoc           |  1 +
 .../reject/image_missing_path.diag            | 14 +++++++++++
 .../reject/image_missing_path.hdoc            |  3 +++
 test/conformance/reject/missing_header.diag   | 11 +++++++++
 test/conformance/reject/missing_header.hdoc   |  1 +
 .../reject/title_after_content.diag           | 11 +++++++++
 .../reject/title_after_content.hdoc           |  5 ++++
 17 files changed, 168 insertions(+), 3 deletions(-)
 create mode 100644 test/conformance/accept/header_and_title_order.hdoc
 create mode 100644 test/conformance/accept/header_and_title_order.yaml
 create mode 100644 test/conformance/accept/image_with_required_path.hdoc
 create mode 100644 test/conformance/accept/image_with_required_path.yaml
 create mode 100644 test/conformance/accept/no_title_document.hdoc
 create mode 100644 test/conformance/accept/no_title_document.yaml
 create mode 100644 test/conformance/reject/duplicate_header.diag
 create mode 100644 test/conformance/reject/duplicate_header.hdoc
 create mode 100644 test/conformance/reject/hdoc_body_non_empty.diag
 create mode 100644 test/conformance/reject/hdoc_body_non_empty.hdoc
 create mode 100644 test/conformance/reject/image_missing_path.diag
 create mode 100644 test/conformance/reject/image_missing_path.hdoc
 create mode 100644 test/conformance/reject/missing_header.diag
 create mode 100644 test/conformance/reject/missing_header.hdoc
 create mode 100644 test/conformance/reject/title_after_content.diag
 create mode 100644 test/conformance/reject/title_after_content.hdoc

diff --git a/build.zig b/build.zig
index 2570831..626ffc0 100644
--- a/build.zig
+++ b/build.zig
@@ -11,18 +11,26 @@ const snapshot_files: []const []const u8 = &.{
 };
 
 const conformance_accept_files: []const []const u8 = &.{
+    "test/conformance/accept/header_and_title_order.hdoc",
+    "test/conformance/accept/image_with_required_path.hdoc",
     "test/conformance/accept/inline_escape.hdoc",
+    "test/conformance/accept/no_title_document.hdoc",
     "test/conformance/accept/title_header_redundant.hdoc",
 };
 
 const conformance_reject_files: []const []const u8 = &.{
-    "test/conformance/reject/string_cr_escape.hdoc",
-    "test/conformance/reject/inline_identifier_dash.hdoc",
+    "test/conformance/reject/container_children.hdoc",
+    "test/conformance/reject/duplicate_header.hdoc",
+    "test/conformance/reject/hdoc_body_non_empty.hdoc",
     "test/conformance/reject/heading_sequence.hdoc",
+    "test/conformance/reject/image_missing_path.hdoc",
+    "test/conformance/reject/inline_identifier_dash.hdoc",
+    "test/conformance/reject/missing_header.hdoc",
     "test/conformance/reject/nested_top_level.hdoc",
-    "test/conformance/reject/container_children.hdoc",
     "test/conformance/reject/time_relative_fmt.hdoc",
     "test/conformance/reject/ref_in_heading.hdoc",
+    "test/conformance/reject/string_cr_escape.hdoc",
+    "test/conformance/reject/title_after_content.hdoc",
 };
 
 pub fn build(b: *std.Build) void {
diff --git a/test/conformance/accept/header_and_title_order.hdoc b/test/conformance/accept/header_and_title_order.hdoc
new file mode 100644
index 0000000..3357233
--- /dev/null
+++ b/test/conformance/accept/header_and_title_order.hdoc
@@ -0,0 +1,5 @@
+hdoc(version="2.0", lang="en");
+
+title { Proper Order }
+
+p "Body content"
diff --git a/test/conformance/accept/header_and_title_order.yaml b/test/conformance/accept/header_and_title_order.yaml
new file mode 100644
index 0000000..604bdd5
--- /dev/null
+++ b/test/conformance/accept/header_and_title_order.yaml
@@ -0,0 +1,24 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title:
+    simple: "Proper Order"
+    full:
+      lang: ""
+      content:
+        - [] "Proper Order"
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings: []
+    children: []
+  contents:
+    - paragraph:
+      lang: ""
+      content:
+        - [] "Body content"
+  ids:
+    - null
diff --git a/test/conformance/accept/image_with_required_path.hdoc b/test/conformance/accept/image_with_required_path.hdoc
new file mode 100644
index 0000000..5152870
--- /dev/null
+++ b/test/conformance/accept/image_with_required_path.hdoc
@@ -0,0 +1,3 @@
+hdoc(version="2.0", lang="en");
+
+img(path="media/picture.png", alt="Example figure") { Figure caption }
diff --git a/test/conformance/accept/image_with_required_path.yaml b/test/conformance/accept/image_with_required_path.yaml
new file mode 100644
index 0000000..9376937
--- /dev/null
+++ b/test/conformance/accept/image_with_required_path.yaml
@@ -0,0 +1,21 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title: null
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings: []
+    children: []
+  contents:
+    - image:
+      lang: ""
+      alt: "Example figure"
+      path: "media/picture.png"
+      content:
+        - [] "Figure caption"
+  ids:
+    - null
diff --git a/test/conformance/accept/no_title_document.hdoc b/test/conformance/accept/no_title_document.hdoc
new file mode 100644
index 0000000..1c046ef
--- /dev/null
+++ b/test/conformance/accept/no_title_document.hdoc
@@ -0,0 +1,3 @@
+hdoc(version="2.0", lang="en");
+
+p "Untitled body"
diff --git a/test/conformance/accept/no_title_document.yaml b/test/conformance/accept/no_title_document.yaml
new file mode 100644
index 0000000..4be7da4
--- /dev/null
+++ b/test/conformance/accept/no_title_document.yaml
@@ -0,0 +1,19 @@
+document:
+  version:
+    major: 2
+    minor: 0
+  lang: "en"
+  title: null
+  author: null
+  date: null
+  toc:
+    level: h1
+    headings: []
+    children: []
+  contents:
+    - paragraph:
+      lang: ""
+      content:
+        - [] "Untitled body"
+  ids:
+    - null
diff --git a/test/conformance/reject/duplicate_header.diag b/test/conformance/reject/duplicate_header.diag
new file mode 100644
index 0000000..79d5d3b
--- /dev/null
+++ b/test/conformance/reject/duplicate_header.diag
@@ -0,0 +1,20 @@
+[
+  {
+    "code": {
+      "misplaced_hdoc_header": {}
+    },
+    "location": {
+      "line": 3,
+      "column": 1
+    }
+  },
+  {
+    "code": {
+      "duplicate_hdoc_header": {}
+    },
+    "location": {
+      "line": 3,
+      "column": 1
+    }
+  }
+]
diff --git a/test/conformance/reject/duplicate_header.hdoc b/test/conformance/reject/duplicate_header.hdoc
new file mode 100644
index 0000000..faeb809
--- /dev/null
+++ b/test/conformance/reject/duplicate_header.hdoc
@@ -0,0 +1,5 @@
+hdoc(version="2.0", lang="en");
+
+hdoc(version="2.0", lang="en");
+
+p "Duplicate headers"
diff --git a/test/conformance/reject/hdoc_body_non_empty.diag b/test/conformance/reject/hdoc_body_non_empty.diag
new file mode 100644
index 0000000..1b0ff8b
--- /dev/null
+++ b/test/conformance/reject/hdoc_body_non_empty.diag
@@ -0,0 +1,11 @@
+[
+  {
+    "code": {
+      "non_empty_hdoc_body": {}
+    },
+    "location": {
+      "line": 1,
+      "column": 1
+    }
+  }
+]
diff --git a/test/conformance/reject/hdoc_body_non_empty.hdoc b/test/conformance/reject/hdoc_body_non_empty.hdoc
new file mode 100644
index 0000000..cf1aa2a
--- /dev/null
+++ b/test/conformance/reject/hdoc_body_non_empty.hdoc
@@ -0,0 +1 @@
+hdoc(version="2.0", lang="en") "not empty"
diff --git a/test/conformance/reject/image_missing_path.diag b/test/conformance/reject/image_missing_path.diag
new file mode 100644
index 0000000..9cc8cbe
--- /dev/null
+++ b/test/conformance/reject/image_missing_path.diag
@@ -0,0 +1,14 @@
+[
+  {
+    "code": {
+      "missing_attribute": {
+        "type": "img",
+        "name": "path"
+      }
+    },
+    "location": {
+      "line": 3,
+      "column": 1
+    }
+  }
+]
diff --git a/test/conformance/reject/image_missing_path.hdoc b/test/conformance/reject/image_missing_path.hdoc
new file mode 100644
index 0000000..3051dc6
--- /dev/null
+++ b/test/conformance/reject/image_missing_path.hdoc
@@ -0,0 +1,3 @@
+hdoc(version="2.0", lang="en");
+
+img { Figure caption }
diff --git a/test/conformance/reject/missing_header.diag b/test/conformance/reject/missing_header.diag
new file mode 100644
index 0000000..901fdd5
--- /dev/null
+++ b/test/conformance/reject/missing_header.diag
@@ -0,0 +1,11 @@
+[
+  {
+    "code": {
+      "missing_hdoc_header": {}
+    },
+    "location": {
+      "line": 1,
+      "column": 1
+    }
+  }
+]
diff --git a/test/conformance/reject/missing_header.hdoc b/test/conformance/reject/missing_header.hdoc
new file mode 100644
index 0000000..f942349
--- /dev/null
+++ b/test/conformance/reject/missing_header.hdoc
@@ -0,0 +1 @@
+p "No header present"
diff --git a/test/conformance/reject/title_after_content.diag b/test/conformance/reject/title_after_content.diag
new file mode 100644
index 0000000..8d0abcb
--- /dev/null
+++ b/test/conformance/reject/title_after_content.diag
@@ -0,0 +1,11 @@
+[
+  {
+    "code": {
+      "misplaced_title_block": {}
+    },
+    "location": {
+      "line": 5,
+      "column": 1
+    }
+  }
+]
diff --git a/test/conformance/reject/title_after_content.hdoc b/test/conformance/reject/title_after_content.hdoc
new file mode 100644
index 0000000..8aa7651
--- /dev/null
+++ b/test/conformance/reject/title_after_content.hdoc
@@ -0,0 +1,5 @@
+hdoc(version="2.0", lang="en");
+
+p "First content"
+
+title { Late Title }

From 27bd7c3be47abf93bddb7a6d43d36fd2afdea65c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sun, 4 Jan 2026 11:13:38 +0100
Subject: [PATCH 107/116] Simplify wasm buffers and logging

---
 build.zig           |  15 +++
 src/playground.html | 284 ++++++++++++++++++++++++++++++++++++++++++++
 src/wasm.zig        | 221 ++++++++++++++++++++++++++++++++++
 3 files changed, 520 insertions(+)
 create mode 100644 src/playground.html
 create mode 100644 src/wasm.zig

diff --git a/build.zig b/build.zig
index 626ffc0..ab971a0 100644
--- a/build.zig
+++ b/build.zig
@@ -41,6 +41,7 @@ pub fn build(b: *std.Build) void {
     // Targets:
     const run_step = b.step("run", "Run the app");
     const test_step = b.step("test", "Run unit tests");
+    const wasm_target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .os_tag = .freestanding });
 
     // Build:
     const hyperdoc = b.addModule("hyperdoc", .{
@@ -60,6 +61,20 @@ pub fn build(b: *std.Build) void {
     });
     b.installArtifact(exe);
 
+    const wasm_exe = b.addExecutable(.{
+        .name = "hyperdoc_wasm",
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/wasm.zig"),
+            .target = wasm_target,
+            .optimize = optimize,
+            .single_threaded = true,
+            .imports = &.{
+                .{ .name = "hyperdoc", .module = hyperdoc },
+            },
+        }),
+    });
+    b.installArtifact(wasm_exe);
+
     const run_cmd = b.addRunArtifact(exe);
     run_cmd.step.dependOn(b.getInstallStep());
     if (b.args) |arg| {
diff --git a/src/playground.html b/src/playground.html
new file mode 100644
index 0000000..6336eed
--- /dev/null
+++ b/src/playground.html
@@ -0,0 +1,284 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>HyperDoc Playground</title>
+  <style>
+    :root {
+      color-scheme: light dark;
+      font-family: system-ui, -apple-system, "Segoe UI", sans-serif;
+    }
+
+    body {
+      margin: 0;
+      height: 100vh;
+      display: flex;
+      flex-direction: column;
+      background: #f8f9fb;
+    }
+
+    header {
+      padding: 12px 16px;
+      font-size: 18px;
+      font-weight: 600;
+      border-bottom: 1px solid #d8dde6;
+      background: #ffffff;
+    }
+
+    .layout {
+      flex: 1;
+      display: grid;
+      grid-template-columns: 1fr 1fr;
+      gap: 12px;
+      padding: 12px;
+      box-sizing: border-box;
+    }
+
+    .pane {
+      display: flex;
+      flex-direction: column;
+      border: 1px solid #d8dde6;
+      border-radius: 8px;
+      background: #ffffff;
+      overflow: hidden;
+    }
+
+    .pane-header {
+      padding: 10px 12px;
+      font-weight: 600;
+      border-bottom: 1px solid #e4e7ed;
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      gap: 8px;
+    }
+
+    textarea {
+      flex: 1;
+      width: 100%;
+      border: none;
+      padding: 12px;
+      resize: none;
+      font-family: "JetBrains Mono", Consolas, "Courier New", monospace;
+      font-size: 14px;
+      box-sizing: border-box;
+      outline: none;
+    }
+
+    .preview {
+      flex: 1;
+      padding: 12px;
+      overflow: auto;
+      box-sizing: border-box;
+    }
+
+    .diagnostics {
+      list-style: none;
+      margin: 0;
+      padding: 12px;
+      border-top: 1px solid #e4e7ed;
+      max-height: 180px;
+      overflow: auto;
+      display: none;
+      gap: 8px;
+      flex-direction: column;
+      background: #fff6f6;
+    }
+
+    .diagnostics.visible {
+      display: flex;
+    }
+
+    .diagnostics li {
+      margin: 0;
+      padding: 8px 10px;
+      background: #ffe3e3;
+      border: 1px solid #ffc2c2;
+      border-radius: 6px;
+      font-family: "JetBrains Mono", Consolas, "Courier New", monospace;
+      font-size: 13px;
+    }
+
+    .status {
+      font-size: 13px;
+      color: #4a5568;
+    }
+
+    .status.error {
+      color: #c53030;
+      font-weight: 600;
+    }
+
+    .status.ok {
+      color: #2f855a;
+      font-weight: 600;
+    }
+  </style>
+</head>
+<body>
+  <header>HyperDoc Playground</header>
+  <div class="layout">
+    <section class="pane">
+      <div class="pane-header">
+        <span>HyperDoc Source</span>
+        <span class="status" id="left-status">Waiting for WASM…</span>
+      </div>
+      <textarea id="source" aria-label="HyperDoc source"></textarea>
+    </section>
+    <section class="pane">
+      <div class="pane-header">
+        <span>Preview</span>
+        <span class="status" id="render-status"></span>
+      </div>
+      <div class="preview" id="preview"></div>
+      <ul class="diagnostics" id="diagnostics"></ul>
+    </section>
+  </div>
+  <script type="module">
+    const sourceField = document.getElementById("source");
+    const preview = document.getElementById("preview");
+    const diagnosticsList = document.getElementById("diagnostics");
+    const renderStatus = document.getElementById("render-status");
+    const leftStatus = document.getElementById("left-status");
+
+    const encoder = new TextEncoder();
+    const decoder = new TextDecoder();
+    const wasmUrl = "./hyperdoc_wasm.wasm";
+
+    const initialText = `hdoc(version="2.0", lang="en");
+title {
+  HyperDoc Playground
+}
+
+paragraph {
+  Type HyperDoc content on the left to render HTML here.
+}`;
+
+    sourceField.value = initialText;
+
+    function setStatus(text, className) {
+      renderStatus.textContent = text;
+      renderStatus.className = `status ${className ?? ""}`.trim();
+    }
+
+    function setDiagnostics(items) {
+      diagnosticsList.replaceChildren();
+      if (items.length === 0) {
+        diagnosticsList.classList.remove("visible");
+        return;
+      }
+
+      items.forEach((item) => {
+        const li = document.createElement("li");
+        li.textContent = `Line ${item.line}, Column ${item.column}: ${item.message}`;
+        diagnosticsList.append(li);
+      });
+      diagnosticsList.classList.add("visible");
+    }
+
+    async function bootstrap() {
+      try {
+        const logs = {
+          buffer: "",
+          reset() {
+            this.buffer = "";
+          },
+          append(ptr, len, memory) {
+            if (len === 0 || ptr === 0 || !memory) return;
+            const chunk = new Uint8Array(memory.buffer, ptr, len);
+            this.buffer += decoder.decode(chunk);
+          },
+          flush(level) {
+            const msg = this.buffer;
+            this.reset();
+            const method = ["error", "warn", "info", "debug"][level] ?? "log";
+            console[method](msg);
+          },
+        };
+
+        const importObject = {
+          env: {
+            reset_log() {
+              logs.reset();
+            },
+            append_log(ptr, len) {
+              logs.append(ptr, len, wasmMemory);
+            },
+            flush_log(level) {
+              logs.flush(level);
+            },
+          },
+        };
+
+        const response = await fetch(wasmUrl);
+        const bytes = await response.arrayBuffer();
+        const { instance } = await WebAssembly.instantiate(bytes, importObject);
+        const wasm = instance.exports;
+        let wasmMemory = wasm.memory;
+
+        leftStatus.textContent = "WASM ready";
+
+        function getMemory() {
+          wasmMemory = wasm.memory || wasmMemory;
+          return wasmMemory;
+        }
+
+        function process() {
+          const text = sourceField.value;
+          const data = encoder.encode(text);
+          if (!wasm.hdoc_set_document_len(data.length)) {
+            setStatus("Allocation failed", "error");
+            setDiagnostics([]);
+            preview.textContent = "";
+            return;
+          }
+
+          const ptr = wasm.hdoc_document_ptr();
+
+          const memory = getMemory();
+          if (data.length > 0 && memory && ptr !== 0) {
+            new Uint8Array(memory.buffer, ptr, data.length).set(data);
+          }
+
+          const ok = wasm.hdoc_process() !== 0;
+
+          if (ok) {
+            const htmlPtr = wasm.hdoc_html_ptr();
+            const htmlLen = wasm.hdoc_html_len();
+            const htmlBytes = htmlLen === 0 ? new Uint8Array() : new Uint8Array(getMemory().buffer, htmlPtr, htmlLen);
+            preview.innerHTML = decoder.decode(htmlBytes);
+            setStatus("Rendered", "ok");
+            setDiagnostics([]);
+          } else {
+            preview.innerHTML = "";
+            const count = wasm.hdoc_diagnostic_count();
+            const entries = [];
+            for (let i = 0; i < count; i += 1) {
+              const msgPtr = wasm.hdoc_diagnostic_message_ptr(i);
+              const msgLen = wasm.hdoc_diagnostic_message_len(i);
+              const message = msgLen === 0 ? "" : decoder.decode(new Uint8Array(getMemory().buffer, msgPtr, msgLen));
+              entries.push({
+                line: wasm.hdoc_diagnostic_line(i),
+                column: wasm.hdoc_diagnostic_column(i),
+                message,
+              });
+            }
+            setStatus("Diagnostics found", "error");
+            setDiagnostics(entries);
+          }
+        }
+
+        sourceField.addEventListener("input", process);
+        process();
+      } catch (error) {
+        leftStatus.textContent = "Failed to load WASM";
+        setStatus("Unable to start playground", "error");
+        diagnosticsList.classList.add("visible");
+        diagnosticsList.textContent = String(error);
+      }
+    }
+
+    bootstrap();
+  </script>
+</body>
+</html>
diff --git a/src/wasm.zig b/src/wasm.zig
new file mode 100644
index 0000000..852768a
--- /dev/null
+++ b/src/wasm.zig
@@ -0,0 +1,221 @@
+const std = @import("std");
+const hyperdoc = @import("hyperdoc");
+
+const LogLevel = enum(u8) { err, warn, info, debug };
+
+extern fn reset_log() void;
+extern fn append_log(ptr: [*]const u8, len: usize) void;
+extern fn flush_log(level: LogLevel) void;
+
+const LogWriter = struct {
+    fn appendWrite(self: LogWriter, chunk: []const u8) error{OutOfMemory}!usize {
+        _ = self;
+        append_log(chunk.ptr, chunk.len);
+        return chunk.len;
+    }
+
+    fn writer(self: LogWriter) std.io.GenericWriter(LogWriter, error{OutOfMemory}, appendWrite) {
+        return .{ .context = self };
+    }
+};
+
+fn log_to_host(
+    comptime level: std.log.Level,
+    comptime _scope: @TypeOf(.enum_literal),
+    comptime format: []const u8,
+    args: anytype,
+) void {
+    _ = _scope;
+
+    reset_log();
+
+    const log_writer = LogWriter{};
+    const writer = log_writer.writer();
+    _ = std.fmt.format(writer, format, args) catch {};
+
+    const mapped: LogLevel = switch (level) {
+        .err => .err,
+        .warn => .warn,
+        .info => .info,
+        .debug => .debug,
+    };
+
+    flush_log(mapped);
+}
+
+fn fixedPageSize() usize {
+    return 4096;
+}
+
+fn zeroRandom(buffer: []u8) void {
+    @memset(buffer, 0);
+}
+
+pub const std_options: std.Options = .{
+    .enable_segfault_handler = false,
+    .logFn = log_to_host,
+    .queryPageSize = fixedPageSize,
+    .cryptoRandomSeed = zeroRandom,
+};
+
+const allocator = std.heap.wasm_allocator;
+
+pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace, ret_addr: ?usize) noreturn {
+    _ = message;
+    _ = stack_trace;
+    _ = ret_addr;
+    @breakpoint();
+    unreachable;
+}
+
+pub fn main() !void {}
+
+const DiagnosticView = struct {
+    line: u32,
+    column: u32,
+    message: []u8,
+};
+
+var document_buffer: std.array_list.Managed(u8) = std.array_list.Managed(u8).init(allocator);
+var html_buffer: std.array_list.Managed(u8) = std.array_list.Managed(u8).init(allocator);
+var diagnostic_views: std.array_list.Managed(DiagnosticView) = std.array_list.Managed(DiagnosticView).init(allocator);
+var diagnostic_text: std.array_list.Managed(u8) = std.array_list.Managed(u8).init(allocator);
+
+const CountingWriter = struct {
+    count: usize = 0,
+
+    fn write(self: *CountingWriter, bytes: []const u8) error{}!usize {
+        self.count += bytes.len;
+        return bytes.len;
+    }
+
+    fn generic(self: *CountingWriter) std.Io.GenericWriter(*CountingWriter, error{}, write) {
+        return .{ .context = self };
+    }
+};
+
+fn capture_diagnostics(source: *hyperdoc.Diagnostics) !void {
+    diagnostic_views.clearRetainingCapacity();
+    diagnostic_text.clearRetainingCapacity();
+
+    if (source.items.items.len == 0) return;
+
+    var total: usize = 0;
+    for (source.items.items) |diag| {
+        var cw: CountingWriter = .{};
+        _ = diag.code.format(cw.generic()) catch {};
+        total += cw.count;
+    }
+
+    diagnostic_text.ensureTotalCapacityPrecise(total) catch return;
+
+    var diag_writer = diagnostic_text.writer();
+    var adapter_buffer: [256]u8 = undefined;
+    var adapter = diag_writer.any().adaptToNewApi(&adapter_buffer);
+
+    for (source.items.items) |diag| {
+        const start = diagnostic_text.items.len;
+        diag.code.format(&adapter.new_interface) catch {
+            adapter.err = error.WriteFailed;
+        };
+        if (adapter.err) |_| return;
+
+        const rendered = diagnostic_text.items[start..];
+        try diagnostic_views.append(.{
+            .line = diag.location.line,
+            .column = diag.location.column,
+            .message = rendered,
+        });
+    }
+}
+
+export fn hdoc_set_document_len(len: usize) bool {
+    document_buffer.clearRetainingCapacity();
+    document_buffer.items.len = 0;
+
+    if (len == 0) return true;
+
+    document_buffer.ensureTotalCapacityPrecise(len) catch return false;
+    document_buffer.items.len = len;
+    return true;
+}
+
+export fn hdoc_document_ptr() [*]u8 {
+    return document_buffer.items.ptr;
+}
+
+export fn hdoc_process() bool {
+    html_buffer.clearRetainingCapacity();
+    diagnostic_views.clearRetainingCapacity();
+    diagnostic_text.clearRetainingCapacity();
+
+    const source: []const u8 = document_buffer.items;
+
+    var diagnostics = hyperdoc.Diagnostics.init(allocator);
+    defer diagnostics.deinit();
+
+    var parsed = hyperdoc.parse(allocator, source, &diagnostics) catch {
+        capture_diagnostics(&diagnostics) catch {};
+        return false;
+    };
+    defer parsed.deinit();
+
+    if (diagnostics.has_error()) {
+        capture_diagnostics(&diagnostics) catch {};
+        return false;
+    }
+
+    var html_writer = html_buffer.writer();
+    var html_adapter_buffer: [256]u8 = undefined;
+    var html_adapter = html_writer.any().adaptToNewApi(&html_adapter_buffer);
+
+    hyperdoc.render.html5(parsed, &html_adapter.new_interface) catch {
+        html_adapter.err = error.WriteFailed;
+    };
+    if (html_adapter.err) |_| {
+        capture_diagnostics(&diagnostics) catch {};
+        return false;
+    }
+
+    capture_diagnostics(&diagnostics) catch {};
+    return true;
+}
+
+export fn hdoc_html_ptr() ?[*]const u8 {
+    if (html_buffer.items.len == 0) return null;
+    return html_buffer.items.ptr;
+}
+
+export fn hdoc_html_len() usize {
+    return html_buffer.items.len;
+}
+
+export fn hdoc_diagnostic_count() usize {
+    return diagnostic_views.items.len;
+}
+
+export fn hdoc_diagnostic_line(index: usize) u32 {
+    if (index >= diagnostic_views.items.len) return 0;
+
+    return diagnostic_views.items[index].line;
+}
+
+export fn hdoc_diagnostic_column(index: usize) u32 {
+    if (index >= diagnostic_views.items.len) return 0;
+
+    return diagnostic_views.items[index].column;
+}
+
+export fn hdoc_diagnostic_message_ptr(index: usize) ?[*]const u8 {
+    if (index >= diagnostic_views.items.len) return null;
+
+    if (diagnostic_views.items[index].message.len == 0) return null;
+
+    return diagnostic_views.items[index].message.ptr;
+}
+
+export fn hdoc_diagnostic_message_len(index: usize) usize {
+    if (index >= diagnostic_views.items.len) return 0;
+
+    return diagnostic_views.items[index].message.len;
+}

From ccedc73b1dc61460500e505a9a0fa2c5bcccfabc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Sun, 4 Jan 2026 12:48:56 +0100
Subject: [PATCH 108/116] Gate WASM Node tests via build

---
 README.md                           |   2 +
 build.zig                           |  21 ++++
 src/wasm.zig                        |   6 +
 test/wasm/diagnostic_accepted.hdoc  |   3 +
 test/wasm/diagnostic_rejected.hdoc  |   2 +
 test/wasm/diagnostics_expected.json |  16 +++
 test/wasm/validate.js               | 177 ++++++++++++++++++++++++++++
 7 files changed, 227 insertions(+)
 create mode 100644 test/wasm/diagnostic_accepted.hdoc
 create mode 100644 test/wasm/diagnostic_rejected.hdoc
 create mode 100644 test/wasm/diagnostics_expected.json
 create mode 100644 test/wasm/validate.js

diff --git a/README.md b/README.md
index 6806429..08847f7 100644
--- a/README.md
+++ b/README.md
@@ -29,3 +29,5 @@ Requires [Zig 0.15.2](https://ziglang.org/) installed.
 ```sh-session
 [user@host] hyperdoc$ zig build test
 ```
+
+> Optional: installing Node.js enables the WASM integration tests that exercise the compiled `hyperdoc_wasm.wasm` via `node test/wasm/validate.js`.
diff --git a/build.zig b/build.zig
index ab971a0..5e35295 100644
--- a/build.zig
+++ b/build.zig
@@ -73,6 +73,18 @@ pub fn build(b: *std.Build) void {
             },
         }),
     });
+    wasm_exe.root_module.export_symbol_names = &.{
+        "hdoc_set_document_len",
+        "hdoc_document_ptr",
+        "hdoc_process",
+        "hdoc_html_ptr",
+        "hdoc_html_len",
+        "hdoc_diagnostic_count",
+        "hdoc_diagnostic_line",
+        "hdoc_diagnostic_column",
+        "hdoc_diagnostic_message_ptr",
+        "hdoc_diagnostic_message_len",
+    };
     b.installArtifact(wasm_exe);
 
     const run_cmd = b.addRunArtifact(exe);
@@ -177,6 +189,15 @@ pub fn build(b: *std.Build) void {
         .use_llvm = true,
     });
     test_step.dependOn(&b.addRunArtifact(main_tests).step);
+
+    const node_path = b.findProgram(&.{"node"}, &.{}) catch null;
+    if (node_path) |node| {
+        const wasm_validate = b.addSystemCommand(&.{ node, "test/wasm/validate.js" });
+        wasm_validate.step.dependOn(b.getInstallStep());
+        test_step.dependOn(&wasm_validate.step);
+    } else {
+        std.debug.print("node not found; skipping WASM integration tests\n", .{});
+    }
 }
 
 fn rawFileMod(b: *std.Build, path: []const u8) std.Build.Module.Import {
diff --git a/src/wasm.zig b/src/wasm.zig
index 852768a..8cc0627 100644
--- a/src/wasm.zig
+++ b/src/wasm.zig
@@ -118,6 +118,9 @@ fn capture_diagnostics(source: *hyperdoc.Diagnostics) !void {
         diag.code.format(&adapter.new_interface) catch {
             adapter.err = error.WriteFailed;
         };
+        adapter.new_interface.flush() catch {
+            adapter.err = error.WriteFailed;
+        };
         if (adapter.err) |_| return;
 
         const rendered = diagnostic_text.items[start..];
@@ -172,6 +175,9 @@ export fn hdoc_process() bool {
     hyperdoc.render.html5(parsed, &html_adapter.new_interface) catch {
         html_adapter.err = error.WriteFailed;
     };
+    html_adapter.new_interface.flush() catch {
+        html_adapter.err = error.WriteFailed;
+    };
     if (html_adapter.err) |_| {
         capture_diagnostics(&diagnostics) catch {};
         return false;
diff --git a/test/wasm/diagnostic_accepted.hdoc b/test/wasm/diagnostic_accepted.hdoc
new file mode 100644
index 0000000..fcd9f85
--- /dev/null
+++ b/test/wasm/diagnostic_accepted.hdoc
@@ -0,0 +1,3 @@
+hdoc(version="2.0");
+title "WASM Warning Coverage"
+p { The header intentionally omits a lang attribute. }
diff --git a/test/wasm/diagnostic_rejected.hdoc b/test/wasm/diagnostic_rejected.hdoc
new file mode 100644
index 0000000..a43140b
--- /dev/null
+++ b/test/wasm/diagnostic_rejected.hdoc
@@ -0,0 +1,2 @@
+h1 "Missing header"
+p { This file lacks the required hdoc header. }
diff --git a/test/wasm/diagnostics_expected.json b/test/wasm/diagnostics_expected.json
new file mode 100644
index 0000000..703225c
--- /dev/null
+++ b/test/wasm/diagnostics_expected.json
@@ -0,0 +1,16 @@
+{
+  "accepted": [
+    {
+      "line": 1,
+      "column": 1,
+      "message": "Document language is missing; set lang on the hdoc header."
+    }
+  ],
+  "rejected": [
+    {
+      "line": 1,
+      "column": 1,
+      "message": "Document must start with an 'hdoc' header."
+    }
+  ]
+}
diff --git a/test/wasm/validate.js b/test/wasm/validate.js
new file mode 100644
index 0000000..45ffd18
--- /dev/null
+++ b/test/wasm/validate.js
@@ -0,0 +1,177 @@
+#!/usr/bin/env node
+'use strict';
+
+const assert = require('node:assert/strict');
+const fs = require('node:fs');
+const path = require('node:path');
+
+const textEncoder = new TextEncoder();
+const textDecoder = new TextDecoder();
+
+const repoRoot = path.join(__dirname, '..', '..');
+const wasmPath = path.join(repoRoot, 'zig-out', 'bin', 'hyperdoc_wasm.wasm');
+
+const htmlSnapshotTests = [
+  {
+    name: 'document_header',
+    source: path.join(repoRoot, 'test', 'snapshot', 'document_header.hdoc'),
+    expected: path.join(repoRoot, 'test', 'snapshot', 'document_header.html'),
+  },
+  {
+    name: 'paragraph_styles',
+    source: path.join(repoRoot, 'test', 'snapshot', 'paragraph_styles.hdoc'),
+    expected: path.join(repoRoot, 'test', 'snapshot', 'paragraph_styles.html'),
+  },
+  {
+    name: 'tables',
+    source: path.join(repoRoot, 'test', 'snapshot', 'tables.hdoc'),
+    expected: path.join(repoRoot, 'test', 'snapshot', 'tables.html'),
+  },
+];
+
+const diagnosticsInput = {
+  accepted: path.join(__dirname, 'diagnostic_accepted.hdoc'),
+  rejected: path.join(__dirname, 'diagnostic_rejected.hdoc'),
+  expected: path.join(__dirname, 'diagnostics_expected.json'),
+};
+
+function assertFileExists(filePath) {
+  if (!fs.existsSync(filePath)) {
+    throw new Error(`Missing required file: ${filePath}`);
+  }
+}
+
+function readUtf8(filePath) {
+  return fs.readFileSync(filePath, 'utf8');
+}
+
+function createLogImports(memoryRef) {
+  const state = { buffer: '' };
+  return {
+    reset_log() {
+      state.buffer = '';
+    },
+    append_log(ptr, len) {
+      if (len === 0 || ptr === 0) return;
+      const memory = memoryRef.current;
+      if (!memory) return;
+      const view = new Uint8Array(memory.buffer, ptr, len);
+      state.buffer += textDecoder.decode(view);
+    },
+    flush_log(level) {
+      if (state.buffer.length === 0) return;
+      const method = ['error', 'warn', 'info', 'debug'][level] || 'log';
+      console[method](`[wasm ${method}] ${state.buffer}`);
+      state.buffer = '';
+    },
+  };
+}
+
+function getMemory(wasm, memoryRef) {
+  const memory = wasm.memory || memoryRef.current;
+  memoryRef.current = memory;
+  if (!memory) {
+    throw new Error('WASM memory is unavailable');
+  }
+  return memory;
+}
+
+async function instantiateWasm() {
+  assertFileExists(wasmPath);
+  const bytes = await fs.promises.readFile(wasmPath);
+  const memoryRef = { current: null };
+  const env = createLogImports(memoryRef);
+  const { instance } = await WebAssembly.instantiate(bytes, { env });
+  memoryRef.current = instance.exports.memory;
+  return { wasm: instance.exports, memoryRef };
+}
+
+function readString(memory, ptr, len) {
+  if (!ptr || len === 0) return '';
+  const view = new Uint8Array(memory.buffer, ptr, len);
+  return textDecoder.decode(view);
+}
+
+function processDocument(ctx, sourceText) {
+  const { wasm, memoryRef } = ctx;
+  const bytes = textEncoder.encode(sourceText);
+
+  if (!wasm.hdoc_set_document_len(bytes.length)) {
+    throw new Error('Failed to allocate WASM document buffer');
+  }
+
+  const memoryForInput = getMemory(wasm, memoryRef);
+  const docPtr = wasm.hdoc_document_ptr();
+  if (bytes.length > 0) {
+    new Uint8Array(memoryForInput.buffer, docPtr, bytes.length).set(bytes);
+  }
+
+  const ok = wasm.hdoc_process() !== 0;
+  const memory = getMemory(wasm, memoryRef);
+
+  const htmlPtr = wasm.hdoc_html_ptr();
+  const htmlLen = wasm.hdoc_html_len();
+  const html = readString(memory, htmlPtr ?? 0, htmlLen);
+
+  const diagnostics = [];
+  const diagCount = wasm.hdoc_diagnostic_count();
+  for (let i = 0; i < diagCount; i += 1) {
+    const msgPtr = wasm.hdoc_diagnostic_message_ptr(i) ?? 0;
+    const msgLen = wasm.hdoc_diagnostic_message_len(i);
+    diagnostics.push({
+      line: wasm.hdoc_diagnostic_line(i),
+      column: wasm.hdoc_diagnostic_column(i),
+      message: readString(memory, msgPtr, msgLen),
+    });
+  }
+
+  return { ok, html, diagnostics };
+}
+
+function compareDiagnostics(actual, expected, label) {
+  assert.deepStrictEqual(
+    actual,
+    expected,
+    `${label} diagnostics differ.\nExpected: ${JSON.stringify(expected, null, 2)}\nActual: ${JSON.stringify(actual, null, 2)}`,
+  );
+}
+
+async function runHtmlTests(ctx) {
+  for (const test of htmlSnapshotTests) {
+    assertFileExists(test.source);
+    assertFileExists(test.expected);
+    const { ok, html, diagnostics } = processDocument(ctx, readUtf8(test.source));
+    assert.equal(ok, true, `WASM processing failed for ${test.name}`);
+    assert.deepStrictEqual(diagnostics, [], `Expected no diagnostics for ${test.name}`);
+    const expectedHtml = readUtf8(test.expected);
+    assert.equal(html, expectedHtml, `Rendered HTML mismatch for ${test.name}`);
+  }
+}
+
+async function runDiagnosticTests(ctx) {
+  assertFileExists(diagnosticsInput.accepted);
+  assertFileExists(diagnosticsInput.rejected);
+  assertFileExists(diagnosticsInput.expected);
+
+  const expectations = JSON.parse(readUtf8(diagnosticsInput.expected));
+
+  const acceptedResult = processDocument(ctx, readUtf8(diagnosticsInput.accepted));
+  assert.equal(acceptedResult.ok, true, 'Accepted diagnostic test should render successfully');
+  compareDiagnostics(acceptedResult.diagnostics, expectations.accepted, 'Accepted');
+
+  const rejectedResult = processDocument(ctx, readUtf8(diagnosticsInput.rejected));
+  assert.equal(rejectedResult.ok, false, 'Rejected diagnostic test should fail');
+  compareDiagnostics(rejectedResult.diagnostics, expectations.rejected, 'Rejected');
+}
+
+async function main() {
+  const ctx = await instantiateWasm();
+  await runHtmlTests(ctx);
+  await runDiagnosticTests(ctx);
+  console.log('WASM integration tests passed.');
+}
+
+main().catch((error) => {
+  console.error(error);
+  process.exitCode = 1;
+});

From 1dec7142bf20a1ad697a4816b0f8231a277a2524 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 4 Jan 2026 13:34:55 +0100
Subject: [PATCH 109/116] Improves Wasm playground

---
 build.zig             | 15 ++++++--
 examples/guide.hdoc   | 20 ++++++-----
 src/playground.html   | 84 +++++++++++++++++++++++++++++++++----------
 src/wasm.zig          | 11 ++++++
 test/wasm/validate.js |  2 +-
 5 files changed, 102 insertions(+), 30 deletions(-)

diff --git a/build.zig b/build.zig
index 5e35295..fbbdf6a 100644
--- a/build.zig
+++ b/build.zig
@@ -33,6 +33,8 @@ const conformance_reject_files: []const []const u8 = &.{
     "test/conformance/reject/title_after_content.hdoc",
 };
 
+const www_dir: std.Build.InstallDir = .{ .custom = "www" };
+
 pub fn build(b: *std.Build) void {
     // Options:
     const target = b.standardTargetOptions(.{});
@@ -73,7 +75,7 @@ pub fn build(b: *std.Build) void {
             },
         }),
     });
-    wasm_exe.root_module.export_symbol_names = &.{
+    wasm_exe.root_module.export_symbol_names = comptime &.{
         "hdoc_set_document_len",
         "hdoc_document_ptr",
         "hdoc_process",
@@ -82,10 +84,17 @@ pub fn build(b: *std.Build) void {
         "hdoc_diagnostic_count",
         "hdoc_diagnostic_line",
         "hdoc_diagnostic_column",
+        "hdoc_diagnostic_fatal",
         "hdoc_diagnostic_message_ptr",
         "hdoc_diagnostic_message_len",
     };
-    b.installArtifact(wasm_exe);
+    const install_wasm = b.addInstallArtifact(wasm_exe, .{
+        .dest_dir = .{ .override = www_dir },
+    });
+    b.getInstallStep().dependOn(&install_wasm.step);
+
+    const install_web = b.addInstallFileWithDir(b.path("src/playground.html"), www_dir, "index.html");
+    b.getInstallStep().dependOn(&install_web.step);
 
     const run_cmd = b.addRunArtifact(exe);
     run_cmd.step.dependOn(b.getInstallStep());
@@ -193,7 +202,7 @@ pub fn build(b: *std.Build) void {
     const node_path = b.findProgram(&.{"node"}, &.{}) catch null;
     if (node_path) |node| {
         const wasm_validate = b.addSystemCommand(&.{ node, "test/wasm/validate.js" });
-        wasm_validate.step.dependOn(b.getInstallStep());
+        wasm_validate.step.dependOn(&install_wasm.step);
         test_step.dependOn(&wasm_validate.step);
     } else {
         std.debug.print("node not found; skipping WASM integration tests\n", .{});
diff --git a/examples/guide.hdoc b/examples/guide.hdoc
index b102260..0e7e946 100644
--- a/examples/guide.hdoc
+++ b/examples/guide.hdoc
@@ -1,10 +1,10 @@
-hdoc(version="2.0");
+hdoc(version="2.0", lang="en", author="Felix \"xq\" Queißner", date="2025-12-17T13:45:00+01:00");
 
-h1(id="intro", lang="en") { HyperDoc 2.0 Examples }
+title { HyperDoc 2.0 Examples }
 
 toc(depth="2") {}
 
-h2(id="paragraphs") { Paragraphs and Inline Text }
+h1(id="paragraphs") { Paragraphs and Inline Text }
 
 p(id="p-basic") {
   This paragraph shows plain text mixed with \em{emphasis}, \strike{strike-through}, and \mono{monospaced} spans.
@@ -31,7 +31,7 @@ tip "Tips provide actionable hints."
 quote "Quotes include sourced or emphasized wording."
 spoiler "Spoilers hide key story information until revealed."
 
-h2(id="literals") { Literal and Preformatted Blocks }
+h1(id="literals") { Literal and Preformatted Blocks }
 
 p:
 | Literal blocks keep exact content and skip parsing, so characters like \{ \} are not special.
@@ -45,7 +45,9 @@ test {
 }
 }
 
-h2(id="lists") { Lists }
+h1(id="lists") { Lists }
+
+h2 { Unordered Lists }
 
 ul {
   li { p { Apples } }
@@ -53,13 +55,15 @@ ul {
   li { p { \em{Cucumbers} with inline markup. } }
 }
 
+h2 { Ordered Lists }
+
 ol(first="3") {
   li { p { Start counting at three. } }
   li "Continue with a string item."
   li { p { Finish the sequence. } }
 }
 
-h2(id="media") { Figures }
+h1(id="media") { Figures }
 
 p {
   The image below has a caption, alt text, and a relative asset path. 
@@ -69,14 +73,14 @@ img(id="fig-diagram", alt="Diagram showing a HyperDoc example", path="./assets/d
   HyperDoc is centered inside a rounded rectangle.
 }
 
-h2(id="dates") { Dates and Times }
+h1(id="dates") { Dates and Times }
 
 p {
   The event was announced on \date(fmt="long"){2025-12-17} at \time(fmt="short"){13:45:00Z}.
   A combined timestamp looks like \datetime(fmt="iso"){2025-12-17T13:45:00Z}.
 }
 
-h2(id="table-ref") { Tables }
+h1(id="table-ref") { Tables }
 
 p {
   See the dedicated tables example file for row groups and colspan usage.
diff --git a/src/playground.html b/src/playground.html
index 6336eed..325c6b7 100644
--- a/src/playground.html
+++ b/src/playground.html
@@ -1,11 +1,12 @@
-<!doctype html>
+<!DOCTYPE html>
 <html lang="en">
+
 <head>
   <meta charset="utf-8">
   <title>HyperDoc Playground</title>
   <style>
     :root {
-      color-scheme: light dark;
+      /* color-scheme: light dark; */
       font-family: system-ui, -apple-system, "Segoe UI", sans-serif;
     }
 
@@ -72,6 +73,10 @@
       box-sizing: border-box;
     }
 
+    .preview.outdated {
+      opacity: 0.4;
+    }
+
     .diagnostics {
       list-style: none;
       margin: 0;
@@ -92,12 +97,20 @@
     .diagnostics li {
       margin: 0;
       padding: 8px 10px;
-      background: #ffe3e3;
-      border: 1px solid #ffc2c2;
+      background: lime;
+      border: 1px solid magenta;
       border-radius: 6px;
       font-family: "JetBrains Mono", Consolas, "Courier New", monospace;
       font-size: 13px;
     }
+    .diagnostics li.error {
+      background: #ffe3e3;
+      border-color: #ffc2c2;
+    }
+    .diagnostics li.warning {
+      background: #ffe371;
+      border-color: #e3a00f;
+    }
 
     .status {
       font-size: 13px;
@@ -113,8 +126,14 @@
       color: #2f855a;
       font-weight: 600;
     }
+
+    .status.warning {
+      color: #ecaa04;
+      font-weight: 600;
+    }
   </style>
 </head>
+
 <body>
   <header>HyperDoc Playground</header>
   <div class="layout">
@@ -124,6 +143,7 @@
         <span class="status" id="left-status">Waiting for WASM…</span>
       </div>
       <textarea id="source" aria-label="HyperDoc source"></textarea>
+      <ul class="diagnostics" id="diagnostics"></ul>
     </section>
     <section class="pane">
       <div class="pane-header">
@@ -131,7 +151,6 @@
         <span class="status" id="render-status"></span>
       </div>
       <div class="preview" id="preview"></div>
-      <ul class="diagnostics" id="diagnostics"></ul>
     </section>
   </div>
   <script type="module">
@@ -150,7 +169,7 @@
   HyperDoc Playground
 }
 
-paragraph {
+p {
   Type HyperDoc content on the left to render HTML here.
 }`;
 
@@ -171,6 +190,7 @@
       items.forEach((item) => {
         const li = document.createElement("li");
         li.textContent = `Line ${item.line}, Column ${item.column}: ${item.message}`;
+        li.classList.add(item.fatal ? "error" : "warning")
         diagnosticsList.append(li);
       });
       diagnosticsList.classList.add("visible");
@@ -242,17 +262,9 @@
 
           const ok = wasm.hdoc_process() !== 0;
 
-          if (ok) {
-            const htmlPtr = wasm.hdoc_html_ptr();
-            const htmlLen = wasm.hdoc_html_len();
-            const htmlBytes = htmlLen === 0 ? new Uint8Array() : new Uint8Array(getMemory().buffer, htmlPtr, htmlLen);
-            preview.innerHTML = decoder.decode(htmlBytes);
-            setStatus("Rendered", "ok");
-            setDiagnostics([]);
-          } else {
-            preview.innerHTML = "";
+          const entries = [];
+          {
             const count = wasm.hdoc_diagnostic_count();
-            const entries = [];
             for (let i = 0; i < count; i += 1) {
               const msgPtr = wasm.hdoc_diagnostic_message_ptr(i);
               const msgLen = wasm.hdoc_diagnostic_message_len(i);
@@ -261,14 +273,49 @@
                 line: wasm.hdoc_diagnostic_line(i),
                 column: wasm.hdoc_diagnostic_column(i),
                 message,
+                fatal: wasm.hdoc_diagnostic_fatal(i) !== 0,
               });
             }
-            setStatus("Diagnostics found", "error");
             setDiagnostics(entries);
           }
+
+          if (ok) {
+            const htmlPtr = wasm.hdoc_html_ptr();
+            const htmlLen = wasm.hdoc_html_len();
+            const htmlBytes = htmlLen === 0 ? new Uint8Array() : new Uint8Array(getMemory().buffer, htmlPtr, htmlLen);
+            preview.innerHTML = decoder.decode(htmlBytes);
+            if (entries.length > 0) {
+              setStatus("Warnings found", "warning");
+            } else {
+              setStatus("Rendered", "ok");
+            }
+            preview.classList.remove("outdated");
+          } else {
+            preview.classList.add("outdated");
+            setStatus("Errors found", "error");
+          }
         }
 
         sourceField.addEventListener("input", process);
+        sourceField.addEventListener("keydown", (e) => {
+          if (e.keyCode == 9 || e.which == 9) {
+            e.preventDefault();
+            const head_pos = sourceField.selectionStart;
+
+            const head = sourceField.value.substring(0, head_pos);
+            const tail = sourceField.value.substring(sourceField.selectionEnd);
+
+            const line_start = head.lastIndexOf("\n") + 1;
+            const line_pos = head_pos - line_start;
+
+            const indent_len = 2;
+
+            const indent = "  ".substring(0, indent_len - (line_pos % indent_len));
+
+            sourceField.value = head + indent + tail;
+            sourceField.selectionEnd = head_pos + indent.length;
+          }
+        });
         process();
       } catch (error) {
         leftStatus.textContent = "Failed to load WASM";
@@ -281,4 +328,5 @@
     bootstrap();
   </script>
 </body>
-</html>
+
+</html>
\ No newline at end of file
diff --git a/src/wasm.zig b/src/wasm.zig
index 8cc0627..fb6001c 100644
--- a/src/wasm.zig
+++ b/src/wasm.zig
@@ -74,6 +74,7 @@ const DiagnosticView = struct {
     line: u32,
     column: u32,
     message: []u8,
+    is_fatal: bool,
 };
 
 var document_buffer: std.array_list.Managed(u8) = std.array_list.Managed(u8).init(allocator);
@@ -128,6 +129,10 @@ fn capture_diagnostics(source: *hyperdoc.Diagnostics) !void {
             .line = diag.location.line,
             .column = diag.location.column,
             .message = rendered,
+            .is_fatal = switch (diag.code.severity()) {
+                .warning => false,
+                .@"error" => true,
+            },
         });
     }
 }
@@ -212,6 +217,12 @@ export fn hdoc_diagnostic_column(index: usize) u32 {
     return diagnostic_views.items[index].column;
 }
 
+export fn hdoc_diagnostic_fatal(index: usize) bool {
+    if (index >= diagnostic_views.items.len) return false;
+
+    return diagnostic_views.items[index].is_fatal;
+}
+
 export fn hdoc_diagnostic_message_ptr(index: usize) ?[*]const u8 {
     if (index >= diagnostic_views.items.len) return null;
 
diff --git a/test/wasm/validate.js b/test/wasm/validate.js
index 45ffd18..5e4318f 100644
--- a/test/wasm/validate.js
+++ b/test/wasm/validate.js
@@ -9,7 +9,7 @@ const textEncoder = new TextEncoder();
 const textDecoder = new TextDecoder();
 
 const repoRoot = path.join(__dirname, '..', '..');
-const wasmPath = path.join(repoRoot, 'zig-out', 'bin', 'hyperdoc_wasm.wasm');
+const wasmPath = path.join(repoRoot, 'zig-out', 'www', 'hyperdoc_wasm.wasm');
 
 const htmlSnapshotTests = [
   {

From aa26a71709b382997136bc29c25dc9635663a44d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 4 Jan 2026 13:38:04 +0100
Subject: [PATCH 110/116] Fixes playground for long text

---
 src/playground.html | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/playground.html b/src/playground.html
index 325c6b7..03aa96e 100644
--- a/src/playground.html
+++ b/src/playground.html
@@ -10,12 +10,19 @@
       font-family: system-ui, -apple-system, "Segoe UI", sans-serif;
     }
 
+    html {
+      height: 100%;
+    }
+
     body {
       margin: 0;
-      height: 100vh;
+      height: 100%;
+      min-height: 100vh;
+      min-height: 100dvh;
       display: flex;
       flex-direction: column;
       background: #f8f9fb;
+      overflow: hidden;
     }
 
     header {
@@ -33,6 +40,8 @@
       gap: 12px;
       padding: 12px;
       box-sizing: border-box;
+      min-height: 0;
+      overflow: hidden;
     }
 
     .pane {
@@ -42,6 +51,7 @@
       border-radius: 8px;
       background: #ffffff;
       overflow: hidden;
+      min-height: 0;
     }
 
     .pane-header {
@@ -64,6 +74,8 @@
       font-size: 14px;
       box-sizing: border-box;
       outline: none;
+      min-height: 0;
+      overflow: auto;
     }
 
     .preview {
@@ -71,6 +83,7 @@
       padding: 12px;
       overflow: auto;
       box-sizing: border-box;
+      min-height: 0;
     }
 
     .preview.outdated {
@@ -329,4 +342,4 @@
   </script>
 </body>
 
-</html>
\ No newline at end of file
+</html>

From 5d43542dde9c8f9276aa83065d27eeb4601543c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 4 Jan 2026 14:44:14 +0100
Subject: [PATCH 111/116] Improves example documents and playground

---
 examples/featurematrix.hdoc |  10 +-
 examples/guide.hdoc         | 109 ++++++++--
 src/playground.html         | 399 +++++++++++++++++++++++++++++++++++-
 src/render/html5.zig        |   2 +-
 test/snapshot/tables.html   |   4 +-
 5 files changed, 490 insertions(+), 34 deletions(-)

diff --git a/examples/featurematrix.hdoc b/examples/featurematrix.hdoc
index d6dd2a3..bb2e5fc 100644
--- a/examples/featurematrix.hdoc
+++ b/examples/featurematrix.hdoc
@@ -1,6 +1,6 @@
 hdoc(version="2.0");
 
-h1 { Small Computer Feature Matrix }
+title { Small Computer Feature Matrix }
 
 table {
   columns {
@@ -63,7 +63,7 @@ table {
     td "✅"
     td "✅"
     td "❌"
-    td { p { ❌\sup{1} } }
+    td { p { ❌\footnote{Neotron Pico uses PS/2 for mouse/keyboard and VGA for video.} } }
     td "❌"
   }
   row(title="Modular Design") {
@@ -71,7 +71,7 @@ table {
     td "❌"
     td "❌"
     td "✅"
-    td { p { ✅\sup{2} } }
+    td { p { ✅\footnote{Cody Computer has a single cartridge that can be added.} } }
   }
   row(title="Full Documentation") {
     td "✅"
@@ -96,6 +96,6 @@ table {
   }
 }
 
-p { \sup{1}: Neotron Pico uses PS/2 for mouse/keyboard and VGA for video. }
+footnotes;
+
 
-p { \sup{2}: Cody Computer has a single cartridge that can be added. }
diff --git a/examples/guide.hdoc b/examples/guide.hdoc
index 0e7e946..adac6bb 100644
--- a/examples/guide.hdoc
+++ b/examples/guide.hdoc
@@ -24,26 +24,7 @@ p {
   Links can target \ref(ref="fig-diagram"){other blocks} or external \link(uri="https://ashet.computer"){resources}.
 }
 
-note "Notes highlight supportive information."
-warning "Warnings call out risky behavior."
-danger "Danger paragraphs emphasize critical hazards."
-tip "Tips provide actionable hints."
-quote "Quotes include sourced or emphasized wording."
-spoiler "Spoilers hide key story information until revealed."
-
-h1(id="literals") { Literal and Preformatted Blocks }
-
-p:
-| Literal blocks keep exact content and skip parsing, so characters like \{ \} are not special.
-| They are introduced by a trailing colon.
-| You can capture snippets without escaping anything.
-
-pre(id="code-sample", syntax="zig") {
-test {
-    const message = "HyperDoc";
-    const answer = \mono(syntax="zig"){42};
-}
-}
+footnotes;
 
 h1(id="lists") { Lists }
 
@@ -55,7 +36,7 @@ ul {
   li { p { \em{Cucumbers} with inline markup. } }
 }
 
-h2 { Ordered Lists }
+h2(id="ol") { Ordered Lists }
 
 ol(first="3") {
   li { p { Start counting at three. } }
@@ -63,6 +44,50 @@ ol(first="3") {
   li { p { Finish the sequence. } }
 }
 
+h1(id="admonitions") "Admonitions"
+tip {
+  p { Tips provide actionable hints\footnote{You can also write footnotes this way}. }
+  p { Tips can also include multiple paragraphs and even lists: }
+  ul {
+    li "Item 1"
+    li "Item 2"
+  }
+}
+
+note "Notes highlight supportive information."
+
+warning {
+  p { Warnings call out risky behavior\footnote(key="shared"){You can even share the same footnote in multiple locations.,}. }
+}
+
+danger {
+  p { Danger paragraphs emphasize critical hazards\footnote(ref="shared");. }
+}
+
+quote {
+  p "Quotes include sourced or emphasized wording."
+  p { - Unknown author }
+}
+
+spoiler "Spoilers hide key story information until revealed."
+
+footnotes;
+
+h1(id="literals") { Literal and Preformatted Blocks }
+
+p:
+| Literal blocks keep exact content and skip parsing, so characters like \{ \} are not special.
+| They are introduced by a trailing colon.
+| You can capture snippets without escaping anything.
+
+pre(id="code-sample", syntax="zig") {test {
+    const message = "HyperDoc";
+    const answer = \mono(syntax="zig"){42};
+}
+}
+
+
+
 h1(id="media") { Figures }
 
 p {
@@ -80,8 +105,50 @@ p {
   A combined timestamp looks like \datetime(fmt="iso"){2025-12-17T13:45:00Z}.
 }
 
+h1 "References"
+
+p {
+  In chapter \ref(ref="paragraphs"); we've read about how we can format paragraphs.
+  This chapter here teaches about references.
+}
+
+p {
+  References to another chapter can use the numeric value (like \ref(ref="ol", fmt="index");), use only the name (like \ref(ref="table-ref",fmt="name");) or we can use a \ref(ref="table-ref"){custom name like with regular links}.
+}
+
 h1(id="table-ref") { Tables }
 
 p {
   See the dedicated tables example file for row groups and colspan usage.
 }
+
+table(id="inventory") {
+  columns {
+    td "Item"
+    td "Quantity"
+    td "Notes"
+  }
+  group { Fresh Produce }
+  row(title="Fruit") {
+    td "Apples"
+    td "12"
+    td { p { Delivered on \date(fmt="short"){2025-02-08}. } }
+  }
+  row(title="Vegetables") {
+    td "Carrots"
+    td "7"
+    td { p { Store at \time(fmt="rough"){08:00:00Z}. } }
+  }
+  group { Pantry }
+  row(title="Dry Goods") {
+    td "Rice"
+    td "3"
+    td { p { Packed on \datetime(fmt="relative"){2025-02-08T08:00:00Z}. } }
+  }
+  row(title="Bulk") {
+    td(colspan="2") { p { This cell spans two columns. } }
+    td "Requires label"
+  }
+}
+
+
diff --git a/src/playground.html b/src/playground.html
index 03aa96e..c1000e0 100644
--- a/src/playground.html
+++ b/src/playground.html
@@ -144,6 +144,252 @@
       color: #ecaa04;
       font-weight: 600;
     }
+
+    #preview {
+      color: #111827;
+      font-size: 15px;
+      line-height: 1.55;
+    }
+
+    #preview :is(h1, h2, h3, h4, h5, h6) {
+      margin: 0.9em 0 0.35em;
+      line-height: 1.25;
+      letter-spacing: -0.01em;
+    }
+
+    #preview :is(p, ul, ol, figure, pre, blockquote) {
+      margin: 0 0 0.9em;
+    }
+
+    #preview :is(ul, ol) {
+      padding-left: 1.35em;
+    }
+
+    #preview li {
+      margin: 0.25em 0;
+    }
+
+    #preview a {
+      color: #2563eb;
+      text-decoration-thickness: 1px;
+      text-underline-offset: 2px;
+    }
+
+    #preview a:hover {
+      text-decoration-thickness: 2px;
+    }
+
+    #preview img {
+      max-width: 100%;
+      height: auto;
+    }
+
+    #preview .hdoc-doc-meta {
+      margin: 0.25em 0 0;
+      color: #6b7280;
+      font-size: 0.95em;
+    }
+
+    #preview code.hdoc-code {
+      font-family: ui-monospace, "JetBrains Mono", Consolas, "Courier New", monospace;
+      font-size: 0.95em;
+      background: #f3f4f6;
+      border: 1px solid #e5e7eb;
+      padding: 0.08em 0.32em;
+      border-radius: 6px;
+    }
+
+    #preview pre {
+      padding: 12px 14px;
+      border-radius: 10px;
+      background: #0b1220;
+      color: #e5e7eb;
+      border: 1px solid #111827;
+      overflow: auto;
+    }
+
+    #preview pre code.hdoc-code {
+      white-space: pre;
+      background: transparent;
+      border: none;
+      padding: 0;
+      border-radius: 0;
+      color: inherit;
+      font-size: 13px;
+    }
+
+    #preview pre code.hdoc-code code.hdoc-code {
+      background: rgba(148, 163, 184, 0.14);
+      border: 1px solid rgba(148, 163, 184, 0.18);
+      padding: 0.05em 0.25em;
+    }
+
+    #preview :is(.hdoc-note, .hdoc-warning, .hdoc-danger, .hdoc-tip) {
+      margin: 1em 0;
+      border-radius: 12px;
+      padding: 12px 12px 12px 44px;
+      border: 1px solid var(--hdoc-callout-border);
+      background: var(--hdoc-callout-bg);
+      color: var(--hdoc-callout-fg);
+      position: relative;
+    }
+
+    #preview :is(.hdoc-note, .hdoc-warning, .hdoc-danger, .hdoc-tip)::before {
+      content: var(--hdoc-callout-icon);
+      position: absolute;
+      left: 12px;
+      top: 12px;
+      line-height: 1;
+    }
+
+    #preview .hdoc-note {
+      --hdoc-callout-icon: "ℹ️";
+      --hdoc-callout-bg: #eff6ff;
+      --hdoc-callout-border: #bfdbfe;
+      --hdoc-callout-fg: #0f172a;
+    }
+
+    #preview .hdoc-warning {
+      --hdoc-callout-icon: "⚠️";
+      --hdoc-callout-bg: #fffbeb;
+      --hdoc-callout-border: #fcd34d;
+      --hdoc-callout-fg: #78350f;
+    }
+
+    #preview .hdoc-danger {
+      --hdoc-callout-icon: "🛑";
+      --hdoc-callout-bg: #fef2f2;
+      --hdoc-callout-border: #fecaca;
+      --hdoc-callout-fg: #7f1d1d;
+    }
+
+    #preview .hdoc-tip {
+      --hdoc-callout-icon: "💡";
+      --hdoc-callout-bg: #ecfdf5;
+      --hdoc-callout-border: #a7f3d0;
+      --hdoc-callout-fg: #064e3b;
+    }
+
+    #preview :is(.hdoc-note, .hdoc-warning, .hdoc-danger, .hdoc-tip, .hdoc-quote, .hdoc-spoiler) > :first-child {
+      margin-top: 0;
+    }
+
+    #preview :is(.hdoc-note, .hdoc-warning, .hdoc-danger, .hdoc-tip, .hdoc-quote, .hdoc-spoiler) > :last-child {
+      margin-bottom: 0;
+    }
+
+    #preview .hdoc-quote {
+      margin: 1em 0;
+      border-radius: 12px;
+      padding: 12px 12px 12px 14px;
+      border-left: 4px solid #10b981;
+      background: #f0fdf4;
+      color: #064e3b;
+    }
+
+    #preview .hdoc-spoiler {
+      margin: 1em 0;
+      border-radius: 12px;
+      padding: 12px;
+      border: 1px dashed #d1d5db;
+      background: #f3f4f6;
+      position: relative;
+    }
+
+    #preview .hdoc-spoiler::before {
+      content: "Spoiler (hover to reveal)";
+      display: block;
+      font-size: 12px;
+      color: #6b7280;
+      margin-bottom: 8px;
+    }
+
+    #preview .hdoc-spoiler > * {
+      filter: blur(6px);
+      transition: filter 120ms ease;
+    }
+
+    #preview .hdoc-spoiler:hover > *,
+    #preview .hdoc-spoiler:focus-within > * {
+      filter: none;
+    }
+
+    #preview table {
+      width: 100%;
+      border-collapse: separate;
+      border-spacing: 0;
+      border: 1px solid #e5e7eb;
+      border-radius: 12px;
+      background: #ffffff;
+      overflow: hidden;
+      font-size: 14px;
+    }
+
+    #preview :is(th, td) {
+      padding: 8px 10px;
+      border-bottom: 1px solid #e5e7eb;
+      border-right: 1px solid #e5e7eb;
+      vertical-align: top;
+      text-align: left;
+    }
+
+    #preview tr > :last-child {
+      border-right: none;
+    }
+
+    #preview tbody tr:last-child > :is(th, td) {
+      border-bottom: none;
+    }
+
+    #preview thead th {
+      background: #f9fafb;
+      font-weight: 650;
+      color: #111827;
+    }
+
+    #preview th[scope="row"] {
+      background: #f9fafb;
+      font-weight: 600;
+      white-space: nowrap;
+    }
+
+    #preview th[scope="rowgroup"] {
+      background: #eef2ff;
+      color: #312e81;
+      font-weight: 700;
+    }
+
+    #preview tbody tr:nth-child(even) td {
+      background: #fcfcfd;
+    }
+
+    #preview :is(th, td):empty {
+      padding: 0;
+    }
+
+    #preview table :is(p, ul, ol) {
+      margin: 0;
+    }
+
+    #preview table :is(ul, ol) {
+      padding-left: 1.2em;
+    }
+
+    #preview sup.hdoc-footnote-ref {
+      font-size: 0.75em;
+    }
+
+    #preview .hdoc-footnotes {
+      margin: 1.2em 0 0;
+      padding-top: 0.8em;
+      border-top: 1px solid #e5e7eb;
+      color: #374151;
+    }
+
+    #preview .hdoc-footnote-list {
+      margin: 0;
+      padding-left: 1.3em;
+    }
   </style>
 </head>
 
@@ -177,14 +423,157 @@
     const decoder = new TextDecoder();
     const wasmUrl = "./hyperdoc_wasm.wasm";
 
-    const initialText = `hdoc(version="2.0", lang="en");
-title {
-  HyperDoc Playground
+    const initialText = `hdoc(version="2.0", lang="en", author="Felix \\"xq\\" Queißner", date="2025-12-17T13:45:00+01:00");
+
+title { HyperDoc 2.0 Examples }
+
+toc(depth="2") {}
+
+h1(id="paragraphs") { Paragraphs and Inline Text }
+
+p(id="p-basic") {
+  This paragraph shows plain text mixed with \\em{emphasis}, \\strike{strike-through}, and \\mono{monospaced} spans.
+}
+
+p(lang="de") {
+  Dieser Absatz zeigt das Attribut \\mono{lang} auf Blockebene.
+}
+
+p "This paragraph uses a string literal body instead of a list."
+
+p {
+  Inline scripts support \\mono(syntax="zig"){const version = "2.0";} as well as sub/superscripts like H\\sub{2}O and x\\sup{2}.
 }
 
 p {
-  Type HyperDoc content on the left to render HTML here.
-}`;
+  Links can target \\ref(ref="fig-diagram"){other blocks} or external \\link(uri="https://ashet.computer"){resources}.
+}
+
+footnotes;
+
+h1(id="lists") { Lists }
+
+h2 { Unordered Lists }
+
+ul {
+  li { p { Apples } }
+  li "Bananas"
+  li { p { \em{Cucumbers} with inline markup. } }
+}
+
+h2(id="ol") { Ordered Lists }
+
+ol(first="3") {
+  li { p { Start counting at three. } }
+  li "Continue with a string item."
+  li { p { Finish the sequence. } }
+}
+
+h1(id="admonitions") "Admonitions"
+tip {
+  p { Tips provide actionable hints\\footnote{You can also write footnotes this way}. }
+  p { Tips can also include multiple paragraphs and even lists: }
+  ul {
+    li "Item 1"
+    li "Item 2"
+  }
+}
+
+note "Notes highlight supportive information."
+
+warning {
+  p { Warnings call out risky behavior\\footnote(key="shared"){You can even share the same footnote in multiple locations.,}. }
+}
+
+danger {
+  p { Danger paragraphs emphasize critical hazards\\footnote(ref="shared");. }
+}
+
+quote {
+  p "Quotes include sourced or emphasized wording."
+  p { - Unknown author }
+}
+
+spoiler "Spoilers hide key story information until revealed."
+
+footnotes;
+
+h1(id="literals") { Literal and Preformatted Blocks }
+
+p:
+| Literal blocks keep exact content and skip parsing, so characters like \\{ \\} are not special.
+| They are introduced by a trailing colon.
+| You can capture snippets without escaping anything.
+
+pre(id="code-sample", syntax="zig") {test {
+    const message = "HyperDoc";
+    const answer = \\mono(syntax="zig"){42};
+}
+}
+
+h1(id="media") { Figures }
+
+p {
+  The image below has a caption, alt text, and a relative asset path. 
+}
+
+img(id="fig-diagram", alt="Diagram showing a HyperDoc example", path="./assets/diagram.svg") {
+  HyperDoc is centered inside a rounded rectangle.
+}
+
+h1(id="dates") { Dates and Times }
+
+p {
+  The event was announced on \\date(fmt="long"){2025-12-17} at \\time(fmt="short"){13:45:00Z}.
+  A combined timestamp looks like \\datetime(fmt="iso"){2025-12-17T13:45:00Z}.
+}
+
+h1 "References"
+
+p {
+  In chapter \\ref(ref="paragraphs"); we've read about how we can format paragraphs.
+  This chapter here teaches about references.
+}
+
+p {
+  References to another chapter can use the numeric value (like \\ref(ref="ol", fmt="index");), use only the name (like \\ref(ref="table-ref",fmt="name");) or we can use a \\ref(ref="table-ref"){custom name like with regular links}.
+}
+
+h1(id="table-ref") { Tables }
+
+p {
+  See the dedicated tables example file for row groups and colspan usage.
+}
+
+table(id="inventory") {
+  columns {
+    td "Item"
+    td "Quantity"
+    td "Notes"
+  }
+  group { Fresh Produce }
+  row(title="Fruit") {
+    td "Apples"
+    td "12"
+    td { p { Delivered on \\date(fmt="short"){2025-02-08}. } }
+  }
+  row(title="Vegetables") {
+    td "Carrots"
+    td "7"
+    td { p { Store at \\time(fmt="rough"){08:00:00Z}. } }
+  }
+  group { Pantry }
+  row(title="Dry Goods") {
+    td "Rice"
+    td "3"
+    td { p { Packed on \\datetime(fmt="relative"){2025-02-08T08:00:00Z}. } }
+  }
+  row(title="Bulk") {
+    td(colspan="2") { p { This cell spans two columns. } }
+    td "Requires label"
+  }
+}
+`;
 
     sourceField.value = initialText;
 
diff --git a/src/render/html5.zig b/src/render/html5.zig
index a7acf35..a7ac6e9 100644
--- a/src/render/html5.zig
+++ b/src/render/html5.zig
@@ -491,7 +491,7 @@ const RenderContext = struct {
 
         try writeIndent(ctx.writer, indent + indent_step);
         try writeStartTag(ctx.writer, "th", .regular, .{
-            .scope = "colgroup",
+            .scope = "rowgroup",
             .colspan = @as(u32, @intCast(@max(@as(usize, 1), column_count))),
         });
         try ctx.renderSpans(group.content);
diff --git a/test/snapshot/tables.html b/test/snapshot/tables.html
index 9384ec5..9ffe48c 100644
--- a/test/snapshot/tables.html
+++ b/test/snapshot/tables.html
@@ -21,7 +21,7 @@ <h2 id="hdoc-auto-0">§1 Table Coverage</h2>
   <tbody>
     <tr>
       <td></td>
-      <th scope="colgroup" colspan="3">&quot;Section One&quot;</th>
+      <th scope="rowgroup" colspan="3">&quot;Section One&quot;</th>
     </tr>
     <tr>
       <th scope="row">Row 1</th>
@@ -43,7 +43,7 @@ <h2 id="hdoc-auto-0">§1 Table Coverage</h2>
     </tr>
     <tr>
       <td></td>
-      <th scope="colgroup" colspan="3">&quot;Section Two&quot;</th>
+      <th scope="rowgroup" colspan="3">&quot;Section Two&quot;</th>
     </tr>
     <tr>
       <th scope="row">Row 3</th>

From 4535359e2f1466401bb25548b85f9acb0b5a8853 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Sun, 4 Jan 2026 14:48:04 +0100
Subject: [PATCH 112/116] Adds pages rendering workflow

---
 .github/workflows/pages.yml | 42 +++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 .github/workflows/pages.yml

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
new file mode 100644
index 0000000..7704611
--- /dev/null
+++ b/.github/workflows/pages.yml
@@ -0,0 +1,42 @@
+name: Pages
+
+on:
+  push:
+    branches: [hdoc-2.0]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
+    permissions:
+      pages: write      # to deploy to Pages
+      id-token: write   # to verify the deployment originates from an appropriate source
+    environment:
+      name: github-pages
+      url: ${{steps.deployment.outputs.page_url}}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Setup Zig
+        uses: mlugg/setup-zig@v2
+        with:
+          version: 0.15.2
+
+      - name: Build
+        run: |
+          zig build install
+
+      - name: Test
+        run: |
+          zig build test
+
+      - name: Upload static files as artifact
+        id: pages-upload
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: zig-out/www
+
+      - name: Deploy artifact
+        id: deployment
+        uses: actions/deploy-pages@v4

From 1668ff11d68881b3ab3a8f40dcfabec52b816f89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Mon, 5 Jan 2026 10:10:09 +0100
Subject: [PATCH 113/116] Add wasm LSP stub and extension CI workflow

---
 .github/workflows/extension-ci.yml           |   75 +
 build.zig                                    |   17 +
 src/wasm-lsp.zig                             |    9 +
 vscode-ext/.gitignore                        |    6 +
 vscode-ext/justfile                          |   13 +
 vscode-ext/language-configuration.json       |   44 +
 vscode-ext/package-lock.json                 | 3140 ++++++++++++++++++
 vscode-ext/package.json                      |   76 +
 vscode-ext/src/extension.ts                  |  141 +
 vscode-ext/src/utils.ts                      |   98 +
 vscode-ext/syntaxes/hyperdoc.tmLanguage.json |   74 +
 vscode-ext/test/utils.test.ts                |   79 +
 vscode-ext/tsconfig.json                     |   26 +
 13 files changed, 3798 insertions(+)
 create mode 100644 .github/workflows/extension-ci.yml
 create mode 100644 src/wasm-lsp.zig
 create mode 100644 vscode-ext/.gitignore
 create mode 100644 vscode-ext/justfile
 create mode 100644 vscode-ext/language-configuration.json
 create mode 100644 vscode-ext/package-lock.json
 create mode 100644 vscode-ext/package.json
 create mode 100644 vscode-ext/src/extension.ts
 create mode 100644 vscode-ext/src/utils.ts
 create mode 100644 vscode-ext/syntaxes/hyperdoc.tmLanguage.json
 create mode 100644 vscode-ext/test/utils.test.ts
 create mode 100644 vscode-ext/tsconfig.json

diff --git a/.github/workflows/extension-ci.yml b/.github/workflows/extension-ci.yml
new file mode 100644
index 0000000..6393547
--- /dev/null
+++ b/.github/workflows/extension-ci.yml
@@ -0,0 +1,75 @@
+name: Extension CI
+
+on:
+  pull_request:
+    branches: [master, hdoc-2.0]
+  push:
+    branches: [master, hdoc-2.0]
+
+jobs:
+  build-and-package:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Zig
+        uses: mlugg/setup-zig@v2
+        with:
+          version: 0.15.2
+
+      - name: Build Zig artifacts
+        run: zig build install
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 18
+
+      - name: Install extension dependencies
+        run: npm ci
+        working-directory: vscode-ext
+
+      - name: Copy wasm artifacts into extension bundle
+        run: |
+          mkdir -p vscode-ext/wasm
+          cp zig-out/www/hyperdoc_wasm.wasm vscode-ext/wasm/ || true
+          cp zig-out/www/hyperdoc_wasm_lsp.wasm vscode-ext/wasm/
+
+      - name: Build extension
+        run: npm run compile
+        working-directory: vscode-ext
+
+      - name: Test extension
+        run: npm test
+        working-directory: vscode-ext
+
+      - name: Package extension
+        run: |
+          npm run package
+          mv *.vsix hyperdoc-vscode.vsix
+        working-directory: vscode-ext
+
+      - name: Upload packaged extension
+        uses: actions/upload-artifact@v4
+        with:
+          name: hyperdoc-vscode.vsix
+          path: vscode-ext/hyperdoc-vscode.vsix
+
+  publish:
+    needs: build-and-package
+    if: github.event_name == 'push' && github.ref == 'refs/heads/hdoc-2.0'
+    runs-on: ubuntu-latest
+    environment:
+      name: vscode-marketplace
+    steps:
+      - name: Download packaged extension
+        uses: actions/download-artifact@v4
+        with:
+          name: hyperdoc-vscode.vsix
+          path: artifacts
+
+      - name: Publish to VS Code Marketplace
+        run: npx --yes vsce publish --packagePath artifacts/hyperdoc-vscode.vsix -p "$VSCE_PAT"
+        env:
+          VSCE_PAT: ${{ secrets.VSCE_PAT }}
diff --git a/build.zig b/build.zig
index fbbdf6a..3c33a0f 100644
--- a/build.zig
+++ b/build.zig
@@ -93,6 +93,23 @@ pub fn build(b: *std.Build) void {
     });
     b.getInstallStep().dependOn(&install_wasm.step);
 
+    const wasm_lsp_exe = b.addExecutable(.{
+        .name = "hyperdoc_wasm_lsp",
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/wasm-lsp.zig"),
+            .target = wasm_target,
+            .optimize = optimize,
+            .single_threaded = true,
+            .imports = &.{
+                .{ .name = "hyperdoc", .module = hyperdoc },
+            },
+        }),
+    });
+    const install_wasm_lsp = b.addInstallArtifact(wasm_lsp_exe, .{
+        .dest_dir = .{ .override = www_dir },
+    });
+    b.getInstallStep().dependOn(&install_wasm_lsp.step);
+
     const install_web = b.addInstallFileWithDir(b.path("src/playground.html"), www_dir, "index.html");
     b.getInstallStep().dependOn(&install_web.step);
 
diff --git a/src/wasm-lsp.zig b/src/wasm-lsp.zig
new file mode 100644
index 0000000..0160f40
--- /dev/null
+++ b/src/wasm-lsp.zig
@@ -0,0 +1,9 @@
+const std = @import("std");
+
+pub export fn _start() void {}
+
+pub export fn hyperdoc_lsp_ping() void {
+    // Placeholder entrypoint for a wasm-based language server.
+    // Real initialization will be wired once the wasm server is implemented.
+    std.mem.doNotOptimizeAway(@as(u32, 0));
+}
diff --git a/vscode-ext/.gitignore b/vscode-ext/.gitignore
new file mode 100644
index 0000000..b5d8d59
--- /dev/null
+++ b/vscode-ext/.gitignore
@@ -0,0 +1,6 @@
+node_modules
+out
+*.vsix
+.vscode-test
+.DS_Store
+wasm/
diff --git a/vscode-ext/justfile b/vscode-ext/justfile
new file mode 100644
index 0000000..6cdaa8a
--- /dev/null
+++ b/vscode-ext/justfile
@@ -0,0 +1,13 @@
+default: setup build test
+
+setup:
+	npm install
+
+build:
+	npm run compile
+
+test:
+	npm test
+
+package:
+	npm run package
diff --git a/vscode-ext/language-configuration.json b/vscode-ext/language-configuration.json
new file mode 100644
index 0000000..de94959
--- /dev/null
+++ b/vscode-ext/language-configuration.json
@@ -0,0 +1,44 @@
+{
+  "wordPattern": "(-?\\d*\\.\\d\\w*)|([^\\`\\~\\!\\@\\#\\%\\^\\&\\*\\(\\)\\=\\+\\[\\{\\]\\}\\\\\\|\\;\\:\\'\\\"\\,\\.\\<\\>\\/?\\s]+)",
+  "brackets": [
+    [
+      "{",
+      "}"
+    ],
+    [
+      "(",
+      ")"
+    ]
+  ],
+  "autoClosingPairs": [
+    {
+      "open": "{",
+      "close": "}"
+    },
+    {
+      "open": "(",
+      "close": ")"
+    },
+    {
+      "open": "\"",
+      "close": "\"",
+      "notIn": [
+        "string"
+      ]
+    }
+  ],
+  "surroundingPairs": [
+    [
+      "{",
+      "}"
+    ],
+    [
+      "(",
+      ")"
+    ],
+    [
+      "\"",
+      "\""
+    ]
+  ]
+}
diff --git a/vscode-ext/package-lock.json b/vscode-ext/package-lock.json
new file mode 100644
index 0000000..4bfd06c
--- /dev/null
+++ b/vscode-ext/package-lock.json
@@ -0,0 +1,3140 @@
+{
+  "name": "hyperdoc-vscode",
+  "version": "0.0.1",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "hyperdoc-vscode",
+      "version": "0.0.1",
+      "license": "MIT",
+      "devDependencies": {
+        "@types/mocha": "^10.0.6",
+        "@types/node": "^18.19.0",
+        "@types/vscode": "1.85.0",
+        "mocha": "^10.4.0",
+        "ts-node": "^10.9.2",
+        "typescript": "^5.4.0",
+        "vsce": "^2.15.0",
+        "vscode": "^1.1.37",
+        "vscode-languageclient": "^9.0.1"
+      },
+      "engines": {
+        "vscode": "^1.85.0"
+      }
+    },
+    "node_modules/@cspotcode/source-map-support": {
+      "version": "0.8.1",
+      "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
+      "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/trace-mapping": "0.3.9"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@jridgewell/resolve-uri": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
+      "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/sourcemap-codec": {
+      "version": "1.5.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
+      "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.9",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz",
+      "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.0.3",
+        "@jridgewell/sourcemap-codec": "^1.4.10"
+      }
+    },
+    "node_modules/@tootallnate/once": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-1.1.2.tgz",
+      "integrity": "sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/@tsconfig/node10": {
+      "version": "1.0.12",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.12.tgz",
+      "integrity": "sha512-UCYBaeFvM11aU2y3YPZ//O5Rhj+xKyzy7mvcIoAjASbigy8mHMryP5cK7dgjlz2hWxh1g5pLw084E0a/wlUSFQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node12": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz",
+      "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node14": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz",
+      "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node16": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz",
+      "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/mocha": {
+      "version": "10.0.10",
+      "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.10.tgz",
+      "integrity": "sha512-xPyYSz1cMPnJQhl0CLMH68j3gprKZaTjG3s5Vi+fDgx+uhG9NOXwbVt52eFS8ECyXhyKcjDLCBEqBExKuiZb7Q==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/node": {
+      "version": "18.19.130",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
+      "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~5.26.4"
+      }
+    },
+    "node_modules/@types/vscode": {
+      "version": "1.85.0",
+      "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.85.0.tgz",
+      "integrity": "sha512-CF/RBon/GXwdfmnjZj0WTUMZN5H6YITOfBCP4iEZlOtVQXuzw6t7Le7+cR+7JzdMrnlm7Mfp49Oj2TuSXIWo3g==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/acorn": {
+      "version": "8.15.0",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
+      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "acorn": "bin/acorn"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/acorn-walk": {
+      "version": "8.3.4",
+      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz",
+      "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "acorn": "^8.11.0"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/agent-base": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz",
+      "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 6.0.0"
+      }
+    },
+    "node_modules/ansi-colors": {
+      "version": "4.1.3",
+      "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz",
+      "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/ansi-styles": {
+      "version": "3.2.1",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
+      "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^1.9.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/anymatch": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
+      "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "normalize-path": "^3.0.0",
+        "picomatch": "^2.0.4"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/arg": {
+      "version": "4.1.3",
+      "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz",
+      "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/argparse": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
+      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
+      "dev": true,
+      "license": "Python-2.0"
+    },
+    "node_modules/azure-devops-node-api": {
+      "version": "11.2.0",
+      "resolved": "https://registry.npmjs.org/azure-devops-node-api/-/azure-devops-node-api-11.2.0.tgz",
+      "integrity": "sha512-XdiGPhrpaT5J8wdERRKs5g8E0Zy1pvOYTli7z9E8nmOn3YGp4FhtjhrOyFmX/8veWCwdI69mCHKJw6l+4J/bHA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "tunnel": "0.0.6",
+        "typed-rest-client": "^1.8.4"
+      }
+    },
+    "node_modules/balanced-match": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/base64-js": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/binary-extensions": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz",
+      "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/bl": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
+      "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "buffer": "^5.5.0",
+        "inherits": "^2.0.4",
+        "readable-stream": "^3.4.0"
+      }
+    },
+    "node_modules/boolbase": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
+      "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/brace-expansion": {
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
+    "node_modules/braces": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fill-range": "^7.1.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/browser-stdout": {
+      "version": "1.3.1",
+      "resolved": "https://registry.npmjs.org/browser-stdout/-/browser-stdout-1.3.1.tgz",
+      "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/buffer": {
+      "version": "5.7.1",
+      "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
+      "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "base64-js": "^1.3.1",
+        "ieee754": "^1.1.13"
+      }
+    },
+    "node_modules/buffer-crc32": {
+      "version": "0.2.13",
+      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
+      "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/buffer-from": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
+      "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/call-bind-apply-helpers": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
+      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/call-bound": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz",
+      "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "get-intrinsic": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/camelcase": {
+      "version": "6.3.0",
+      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz",
+      "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/chalk": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
+      "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^3.2.1",
+        "escape-string-regexp": "^1.0.5",
+        "supports-color": "^5.3.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/cheerio": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.1.2.tgz",
+      "integrity": "sha512-IkxPpb5rS/d1IiLbHMgfPuS0FgiWTtFIm/Nj+2woXDLTZ7fOT2eqzgYbdMlLweqlHbsZjxEChoVK+7iph7jyQg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cheerio-select": "^2.1.0",
+        "dom-serializer": "^2.0.0",
+        "domhandler": "^5.0.3",
+        "domutils": "^3.2.2",
+        "encoding-sniffer": "^0.2.1",
+        "htmlparser2": "^10.0.0",
+        "parse5": "^7.3.0",
+        "parse5-htmlparser2-tree-adapter": "^7.1.0",
+        "parse5-parser-stream": "^7.1.2",
+        "undici": "^7.12.0",
+        "whatwg-mimetype": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=20.18.1"
+      },
+      "funding": {
+        "url": "https://github.com/cheeriojs/cheerio?sponsor=1"
+      }
+    },
+    "node_modules/cheerio-select": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
+      "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "boolbase": "^1.0.0",
+        "css-select": "^5.1.0",
+        "css-what": "^6.1.0",
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.3",
+        "domutils": "^3.0.1"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/fb55"
+      }
+    },
+    "node_modules/chokidar": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
+      "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "anymatch": "~3.1.2",
+        "braces": "~3.0.2",
+        "glob-parent": "~5.1.2",
+        "is-binary-path": "~2.1.0",
+        "is-glob": "~4.0.1",
+        "normalize-path": "~3.0.0",
+        "readdirp": "~3.6.0"
+      },
+      "engines": {
+        "node": ">= 8.10.0"
+      },
+      "funding": {
+        "url": "https://paulmillr.com/funding/"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.2"
+      }
+    },
+    "node_modules/chownr": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
+      "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/cliui": {
+      "version": "7.0.4",
+      "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz",
+      "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "string-width": "^4.2.0",
+        "strip-ansi": "^6.0.0",
+        "wrap-ansi": "^7.0.0"
+      }
+    },
+    "node_modules/color-convert": {
+      "version": "1.9.3",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
+      "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "1.1.3"
+      }
+    },
+    "node_modules/color-name": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
+      "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/commander": {
+      "version": "6.2.1",
+      "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz",
+      "integrity": "sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/concat-map": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
+      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/create-require": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
+      "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/css-select": {
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz",
+      "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "boolbase": "^1.0.0",
+        "css-what": "^6.1.0",
+        "domhandler": "^5.0.2",
+        "domutils": "^3.0.1",
+        "nth-check": "^2.0.1"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/fb55"
+      }
+    },
+    "node_modules/css-what": {
+      "version": "6.2.2",
+      "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz",
+      "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">= 6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/fb55"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.4.3",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
+      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/decamelize": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-4.0.0.tgz",
+      "integrity": "sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/decompress-response": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
+      "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mimic-response": "^3.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/deep-extend": {
+      "version": "0.6.0",
+      "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
+      "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4.0.0"
+      }
+    },
+    "node_modules/detect-libc": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
+      "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/diff": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.0.tgz",
+      "integrity": "sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.3.1"
+      }
+    },
+    "node_modules/dom-serializer": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
+      "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.2",
+        "entities": "^4.2.0"
+      },
+      "funding": {
+        "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
+      }
+    },
+    "node_modules/domelementtype": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
+      "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fb55"
+        }
+      ],
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/domhandler": {
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
+      "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "domelementtype": "^2.3.0"
+      },
+      "engines": {
+        "node": ">= 4"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/domhandler?sponsor=1"
+      }
+    },
+    "node_modules/domutils": {
+      "version": "3.2.2",
+      "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz",
+      "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "dom-serializer": "^2.0.0",
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.3"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/domutils?sponsor=1"
+      }
+    },
+    "node_modules/dunder-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/encoding-sniffer": {
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz",
+      "integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "iconv-lite": "^0.6.3",
+        "whatwg-encoding": "^3.1.1"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/encoding-sniffer?sponsor=1"
+      }
+    },
+    "node_modules/end-of-stream": {
+      "version": "1.4.5",
+      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
+      "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "once": "^1.4.0"
+      }
+    },
+    "node_modules/entities": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
+      "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
+    "node_modules/es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-object-atoms": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es6-promise": {
+      "version": "4.2.8",
+      "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.2.8.tgz",
+      "integrity": "sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/es6-promisify": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/es6-promisify/-/es6-promisify-5.0.0.tgz",
+      "integrity": "sha512-C+d6UdsYDk0lMebHNR4S2NybQMMngAOnOwYBQjTOiv0MkoJMP0Myw2mgpDLBcpfCmRLxyFqYhS/CfOENq4SJhQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es6-promise": "^4.0.3"
+      }
+    },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/escape-string-regexp": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
+      "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.8.0"
+      }
+    },
+    "node_modules/expand-template": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
+      "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
+      "dev": true,
+      "license": "(MIT OR WTFPL)",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/fd-slicer": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
+      "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "pend": "~1.2.0"
+      }
+    },
+    "node_modules/fill-range": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "to-regex-range": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/find-up": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
+      "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "locate-path": "^6.0.0",
+        "path-exists": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/flat": {
+      "version": "5.0.2",
+      "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
+      "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "bin": {
+        "flat": "cli.js"
+      }
+    },
+    "node_modules/fs-constants": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
+      "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/fs.realpath": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
+      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/function-bind": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-caller-file": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
+      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": "6.* || 8.* || >= 10.*"
+      }
+    },
+    "node_modules/get-intrinsic": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
+      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "es-define-property": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.1.1",
+        "function-bind": "^1.1.2",
+        "get-proto": "^1.0.1",
+        "gopd": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "hasown": "^2.0.2",
+        "math-intrinsics": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "dunder-proto": "^1.0.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/github-from-package": {
+      "version": "0.0.0",
+      "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
+      "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/glob": {
+      "version": "7.2.3",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
+      "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
+      "deprecated": "Glob versions prior to v9 are no longer supported",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "fs.realpath": "^1.0.0",
+        "inflight": "^1.0.4",
+        "inherits": "2",
+        "minimatch": "^3.1.1",
+        "once": "^1.3.0",
+        "path-is-absolute": "^1.0.0"
+      },
+      "engines": {
+        "node": "*"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/growl": {
+      "version": "1.10.5",
+      "resolved": "https://registry.npmjs.org/growl/-/growl-1.10.5.tgz",
+      "integrity": "sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4.x"
+      }
+    },
+    "node_modules/has-flag": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
+      "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/has-symbols": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/he": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
+      "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "he": "bin/he"
+      }
+    },
+    "node_modules/hosted-git-info": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-4.1.0.tgz",
+      "integrity": "sha512-kyCuEOWjJqZuDbRHzL8V93NzQhwIB71oFWSyzVo+KPZI+pnQPPxucdkrOZvkLRnrf5URsQM+IJ09Dw29cRALIA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "lru-cache": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/htmlparser2": {
+      "version": "10.0.0",
+      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
+      "integrity": "sha512-TwAZM+zE5Tq3lrEHvOlvwgj1XLWQCtaaibSN11Q+gGBAS7Y1uZSWwXXRe4iF6OXnaq1riyQAPFOBtYc77Mxq0g==",
+      "dev": true,
+      "funding": [
+        "https://github.com/fb55/htmlparser2?sponsor=1",
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fb55"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.3",
+        "domutils": "^3.2.1",
+        "entities": "^6.0.0"
+      }
+    },
+    "node_modules/htmlparser2/node_modules/entities": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
+      "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
+    "node_modules/http-proxy-agent": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-4.0.1.tgz",
+      "integrity": "sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@tootallnate/once": "1",
+        "agent-base": "6",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/https-proxy-agent": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz",
+      "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "6",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/iconv-lite": {
+      "version": "0.6.3",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
+      "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/ieee754": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
+      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/inflight": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
+      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
+      "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "once": "^1.3.0",
+        "wrappy": "1"
+      }
+    },
+    "node_modules/inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/ini": {
+      "version": "1.3.8",
+      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
+      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/is-binary-path": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz",
+      "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "binary-extensions": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-extglob": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
+      "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-glob": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
+      "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-extglob": "^2.1.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-number": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
+      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.12.0"
+      }
+    },
+    "node_modules/is-plain-obj": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-2.1.0.tgz",
+      "integrity": "sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-unicode-supported": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz",
+      "integrity": "sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/js-yaml": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
+      "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "argparse": "^2.0.1"
+      },
+      "bin": {
+        "js-yaml": "bin/js-yaml.js"
+      }
+    },
+    "node_modules/keytar": {
+      "version": "7.9.0",
+      "resolved": "https://registry.npmjs.org/keytar/-/keytar-7.9.0.tgz",
+      "integrity": "sha512-VPD8mtVtm5JNtA2AErl6Chp06JBfy7diFQ7TQQhdpWOl6MrCRB+eRbvAZUsbGQS9kiMq0coJsy0W0vHpDCkWsQ==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "node-addon-api": "^4.3.0",
+        "prebuild-install": "^7.0.1"
+      }
+    },
+    "node_modules/leven": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz",
+      "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/linkify-it": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-3.0.3.tgz",
+      "integrity": "sha512-ynTsyrFSdE5oZ/O9GEf00kPngmOfVwazR5GKDq6EYfhlpFug3J2zybX56a2PRRpc9P+FuSoGNAwjlbDs9jJBPQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "uc.micro": "^1.0.1"
+      }
+    },
+    "node_modules/locate-path": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
+      "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-locate": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/log-symbols": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz",
+      "integrity": "sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chalk": "^4.1.0",
+        "is-unicode-supported": "^0.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/log-symbols/node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/log-symbols/node_modules/chalk": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
+      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.1.0",
+        "supports-color": "^7.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/chalk?sponsor=1"
+      }
+    },
+    "node_modules/log-symbols/node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/log-symbols/node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/log-symbols/node_modules/has-flag": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/log-symbols/node_modules/supports-color": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
+      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/lru-cache": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
+      "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "yallist": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/make-error": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
+      "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/markdown-it": {
+      "version": "12.3.2",
+      "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-12.3.2.tgz",
+      "integrity": "sha512-TchMembfxfNVpHkbtriWltGWc+m3xszaRD0CZup7GFFhzIgQqxIfn3eGj1yZpfuflzPvfkt611B2Q/Bsk1YnGg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "argparse": "^2.0.1",
+        "entities": "~2.1.0",
+        "linkify-it": "^3.0.1",
+        "mdurl": "^1.0.1",
+        "uc.micro": "^1.0.5"
+      },
+      "bin": {
+        "markdown-it": "bin/markdown-it.js"
+      }
+    },
+    "node_modules/markdown-it/node_modules/entities": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-2.1.0.tgz",
+      "integrity": "sha512-hCx1oky9PFrJ611mf0ifBLBRW8lUUVRlFolb5gWRfIELabBlbp9xZvrqZLZAs+NxFnbfQoeGd8wDkygjg7U85w==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
+    "node_modules/math-intrinsics": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/mdurl": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz",
+      "integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/mime": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
+      "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "mime": "cli.js"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/mimic-response": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
+      "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/minimatch": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
+      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "brace-expansion": "^1.1.7"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/minimist": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
+      "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/mkdirp": {
+      "version": "0.5.1",
+      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz",
+      "integrity": "sha512-SknJC52obPfGQPnjIkXbmA6+5H15E+fR+E4iR2oQ3zzCLbd7/ONua69R/Gw7AgkTLsRG+r5fzksYwWe1AgTyWA==",
+      "deprecated": "Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.)",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "minimist": "0.0.8"
+      },
+      "bin": {
+        "mkdirp": "bin/cmd.js"
+      }
+    },
+    "node_modules/mkdirp-classic": {
+      "version": "0.5.3",
+      "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
+      "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/mkdirp/node_modules/minimist": {
+      "version": "0.0.8",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz",
+      "integrity": "sha512-miQKw5Hv4NS1Psg2517mV4e4dYNaO3++hjAvLOAzKqZ61rH8NS1SK+vbfBWZ5PY/Me/bEWhUwqMghEW5Fb9T7Q==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/mocha": {
+      "version": "10.8.2",
+      "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.8.2.tgz",
+      "integrity": "sha512-VZlYo/WE8t1tstuRmqgeyBgCbJc/lEdopaa+axcKzTBJ+UIdlAB9XnmvTCAH4pwR4ElNInaedhEBmZD8iCSVEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-colors": "^4.1.3",
+        "browser-stdout": "^1.3.1",
+        "chokidar": "^3.5.3",
+        "debug": "^4.3.5",
+        "diff": "^5.2.0",
+        "escape-string-regexp": "^4.0.0",
+        "find-up": "^5.0.0",
+        "glob": "^8.1.0",
+        "he": "^1.2.0",
+        "js-yaml": "^4.1.0",
+        "log-symbols": "^4.1.0",
+        "minimatch": "^5.1.6",
+        "ms": "^2.1.3",
+        "serialize-javascript": "^6.0.2",
+        "strip-json-comments": "^3.1.1",
+        "supports-color": "^8.1.1",
+        "workerpool": "^6.5.1",
+        "yargs": "^16.2.0",
+        "yargs-parser": "^20.2.9",
+        "yargs-unparser": "^2.0.0"
+      },
+      "bin": {
+        "_mocha": "bin/_mocha",
+        "mocha": "bin/mocha.js"
+      },
+      "engines": {
+        "node": ">= 14.0.0"
+      }
+    },
+    "node_modules/mocha/node_modules/brace-expansion": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
+      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0"
+      }
+    },
+    "node_modules/mocha/node_modules/escape-string-regexp": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
+      "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/mocha/node_modules/glob": {
+      "version": "8.1.0",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz",
+      "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==",
+      "deprecated": "Glob versions prior to v9 are no longer supported",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "fs.realpath": "^1.0.0",
+        "inflight": "^1.0.4",
+        "inherits": "2",
+        "minimatch": "^5.0.1",
+        "once": "^1.3.0"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/mocha/node_modules/has-flag": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/mocha/node_modules/minimatch": {
+      "version": "5.1.6",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
+      "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "brace-expansion": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/mocha/node_modules/strip-json-comments": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
+      "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/mocha/node_modules/supports-color": {
+      "version": "8.1.1",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz",
+      "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/supports-color?sponsor=1"
+      }
+    },
+    "node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/mute-stream": {
+      "version": "0.0.8",
+      "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-0.0.8.tgz",
+      "integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/napi-build-utils": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz",
+      "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/node-abi": {
+      "version": "3.85.0",
+      "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.85.0.tgz",
+      "integrity": "sha512-zsFhmbkAzwhTft6nd3VxcG0cvJsT70rL+BIGHWVq5fi6MwGrHwzqKaxXE+Hl2GmnGItnDKPPkO5/LQqjVkIdFg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "semver": "^7.3.5"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/node-abi/node_modules/semver": {
+      "version": "7.7.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
+      "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/node-addon-api": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-4.3.0.tgz",
+      "integrity": "sha512-73sE9+3UaLYYFmDsFZnqCInzPyh3MqIwZO9cw58yIqAZhONrrabrYyYe3TuIqtIiOuTXVhsGau8hcrhhwSsDIQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/normalize-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
+      "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/nth-check": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
+      "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "boolbase": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/nth-check?sponsor=1"
+      }
+    },
+    "node_modules/object-inspect": {
+      "version": "1.13.4",
+      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
+      "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "wrappy": "1"
+      }
+    },
+    "node_modules/p-limit": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
+      "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "yocto-queue": "^0.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-locate": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz",
+      "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "p-limit": "^3.0.2"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/parse-semver": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/parse-semver/-/parse-semver-1.1.1.tgz",
+      "integrity": "sha512-Eg1OuNntBMH0ojvEKSrvDSnwLmvVuUOSdylH/pSCPNMIspLlweJyIWXCE+k/5hm3cj/EBUYwmWkjhBALNP4LXQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "semver": "^5.1.0"
+      }
+    },
+    "node_modules/parse5": {
+      "version": "7.3.0",
+      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
+      "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "entities": "^6.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
+    "node_modules/parse5-htmlparser2-tree-adapter": {
+      "version": "7.1.0",
+      "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz",
+      "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "domhandler": "^5.0.3",
+        "parse5": "^7.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
+    "node_modules/parse5-parser-stream": {
+      "version": "7.1.2",
+      "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz",
+      "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "parse5": "^7.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
+    "node_modules/parse5/node_modules/entities": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
+      "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
+    "node_modules/path-exists": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
+      "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/path-is-absolute": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
+      "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/pend": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
+      "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/picomatch": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
+      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/prebuild-install": {
+      "version": "7.1.3",
+      "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
+      "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "detect-libc": "^2.0.0",
+        "expand-template": "^2.0.3",
+        "github-from-package": "0.0.0",
+        "minimist": "^1.2.3",
+        "mkdirp-classic": "^0.5.3",
+        "napi-build-utils": "^2.0.0",
+        "node-abi": "^3.3.0",
+        "pump": "^3.0.0",
+        "rc": "^1.2.7",
+        "simple-get": "^4.0.0",
+        "tar-fs": "^2.0.0",
+        "tunnel-agent": "^0.6.0"
+      },
+      "bin": {
+        "prebuild-install": "bin.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/pump": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz",
+      "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "end-of-stream": "^1.1.0",
+        "once": "^1.3.1"
+      }
+    },
+    "node_modules/qs": {
+      "version": "6.14.1",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz",
+      "integrity": "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "side-channel": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/randombytes": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz",
+      "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "safe-buffer": "^5.1.0"
+      }
+    },
+    "node_modules/rc": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
+      "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
+      "dev": true,
+      "license": "(BSD-2-Clause OR MIT OR Apache-2.0)",
+      "dependencies": {
+        "deep-extend": "^0.6.0",
+        "ini": "~1.3.0",
+        "minimist": "^1.2.0",
+        "strip-json-comments": "~2.0.1"
+      },
+      "bin": {
+        "rc": "cli.js"
+      }
+    },
+    "node_modules/read": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/read/-/read-1.0.7.tgz",
+      "integrity": "sha512-rSOKNYUmaxy0om1BNjMN4ezNT6VKK+2xF4GBhc81mkH7L60i6dp8qPYrkndNLT3QPphoII3maL9PVC9XmhHwVQ==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "mute-stream": "~0.0.4"
+      },
+      "engines": {
+        "node": ">=0.8"
+      }
+    },
+    "node_modules/readable-stream": {
+      "version": "3.6.2",
+      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
+      "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "inherits": "^2.0.3",
+        "string_decoder": "^1.1.1",
+        "util-deprecate": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/readdirp": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
+      "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "picomatch": "^2.2.1"
+      },
+      "engines": {
+        "node": ">=8.10.0"
+      }
+    },
+    "node_modules/require-directory": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
+      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/safe-buffer": {
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
+      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/safer-buffer": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/sax": {
+      "version": "1.4.3",
+      "resolved": "https://registry.npmjs.org/sax/-/sax-1.4.3.tgz",
+      "integrity": "sha512-yqYn1JhPczigF94DMS+shiDMjDowYO6y9+wB/4WgO0Y19jWYk0lQ4tuG5KI7kj4FTp1wxPj5IFfcrz/s1c3jjQ==",
+      "dev": true,
+      "license": "BlueOak-1.0.0"
+    },
+    "node_modules/semver": {
+      "version": "5.7.2",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz",
+      "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver"
+      }
+    },
+    "node_modules/serialize-javascript": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz",
+      "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "randombytes": "^2.1.0"
+      }
+    },
+    "node_modules/side-channel": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz",
+      "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "object-inspect": "^1.13.3",
+        "side-channel-list": "^1.0.0",
+        "side-channel-map": "^1.0.1",
+        "side-channel-weakmap": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-list": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz",
+      "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "object-inspect": "^1.13.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-map": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz",
+      "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.5",
+        "object-inspect": "^1.13.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-weakmap": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz",
+      "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.5",
+        "object-inspect": "^1.13.3",
+        "side-channel-map": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/simple-concat": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
+      "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/simple-get": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
+      "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "decompress-response": "^6.0.0",
+        "once": "^1.3.1",
+        "simple-concat": "^1.0.0"
+      }
+    },
+    "node_modules/source-map": {
+      "version": "0.6.1",
+      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
+      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/source-map-support": {
+      "version": "0.5.21",
+      "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz",
+      "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "buffer-from": "^1.0.0",
+        "source-map": "^0.6.0"
+      }
+    },
+    "node_modules/string_decoder": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
+      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "safe-buffer": "~5.2.0"
+      }
+    },
+    "node_modules/string-width": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-ansi": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
+      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-json-comments": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
+      "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/supports-color": {
+      "version": "5.5.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
+      "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/tar-fs": {
+      "version": "2.1.4",
+      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz",
+      "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chownr": "^1.1.1",
+        "mkdirp-classic": "^0.5.2",
+        "pump": "^3.0.0",
+        "tar-stream": "^2.1.4"
+      }
+    },
+    "node_modules/tar-stream": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
+      "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "bl": "^4.0.3",
+        "end-of-stream": "^1.4.1",
+        "fs-constants": "^1.0.0",
+        "inherits": "^2.0.3",
+        "readable-stream": "^3.1.1"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/tmp": {
+      "version": "0.2.5",
+      "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz",
+      "integrity": "sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.14"
+      }
+    },
+    "node_modules/to-regex-range": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
+      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-number": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
+    "node_modules/ts-node": {
+      "version": "10.9.2",
+      "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz",
+      "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@cspotcode/source-map-support": "^0.8.0",
+        "@tsconfig/node10": "^1.0.7",
+        "@tsconfig/node12": "^1.0.7",
+        "@tsconfig/node14": "^1.0.0",
+        "@tsconfig/node16": "^1.0.2",
+        "acorn": "^8.4.1",
+        "acorn-walk": "^8.1.1",
+        "arg": "^4.1.0",
+        "create-require": "^1.1.0",
+        "diff": "^4.0.1",
+        "make-error": "^1.1.1",
+        "v8-compile-cache-lib": "^3.0.1",
+        "yn": "3.1.1"
+      },
+      "bin": {
+        "ts-node": "dist/bin.js",
+        "ts-node-cwd": "dist/bin-cwd.js",
+        "ts-node-esm": "dist/bin-esm.js",
+        "ts-node-script": "dist/bin-script.js",
+        "ts-node-transpile-only": "dist/bin-transpile.js",
+        "ts-script": "dist/bin-script-deprecated.js"
+      },
+      "peerDependencies": {
+        "@swc/core": ">=1.2.50",
+        "@swc/wasm": ">=1.2.50",
+        "@types/node": "*",
+        "typescript": ">=2.7"
+      },
+      "peerDependenciesMeta": {
+        "@swc/core": {
+          "optional": true
+        },
+        "@swc/wasm": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/ts-node/node_modules/diff": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
+      "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.3.1"
+      }
+    },
+    "node_modules/tunnel": {
+      "version": "0.0.6",
+      "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz",
+      "integrity": "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.6.11 <=0.7.0 || >=0.7.3"
+      }
+    },
+    "node_modules/tunnel-agent": {
+      "version": "0.6.0",
+      "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
+      "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "safe-buffer": "^5.0.1"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/typed-rest-client": {
+      "version": "1.8.11",
+      "resolved": "https://registry.npmjs.org/typed-rest-client/-/typed-rest-client-1.8.11.tgz",
+      "integrity": "sha512-5UvfMpd1oelmUPRbbaVnq+rHP7ng2cE4qoQkQeAqxRL6PklkxsM0g32/HL0yfvruK6ojQ5x8EE+HF4YV6DtuCA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "qs": "^6.9.1",
+        "tunnel": "0.0.6",
+        "underscore": "^1.12.1"
+      }
+    },
+    "node_modules/typescript": {
+      "version": "5.9.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
+      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/uc.micro": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz",
+      "integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/underscore": {
+      "version": "1.13.7",
+      "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.7.tgz",
+      "integrity": "sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/undici": {
+      "version": "7.16.0",
+      "resolved": "https://registry.npmjs.org/undici/-/undici-7.16.0.tgz",
+      "integrity": "sha512-QEg3HPMll0o3t2ourKwOeUAZ159Kn9mx5pnzHRQO8+Wixmh88YdZRiIwat0iNzNNXn0yoEtXJqFpyW7eM8BV7g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.18.1"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "5.26.5",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/url-join": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz",
+      "integrity": "sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/util-deprecate": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
+      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/v8-compile-cache-lib": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
+      "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vsce": {
+      "version": "2.15.0",
+      "resolved": "https://registry.npmjs.org/vsce/-/vsce-2.15.0.tgz",
+      "integrity": "sha512-P8E9LAZvBCQnoGoizw65JfGvyMqNGlHdlUXD1VAuxtvYAaHBKLBdKPnpy60XKVDAkQCfmMu53g+gq9FM+ydepw==",
+      "deprecated": "vsce has been renamed to @vscode/vsce. Install using @vscode/vsce instead.",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "azure-devops-node-api": "^11.0.1",
+        "chalk": "^2.4.2",
+        "cheerio": "^1.0.0-rc.9",
+        "commander": "^6.1.0",
+        "glob": "^7.0.6",
+        "hosted-git-info": "^4.0.2",
+        "keytar": "^7.7.0",
+        "leven": "^3.1.0",
+        "markdown-it": "^12.3.2",
+        "mime": "^1.3.4",
+        "minimatch": "^3.0.3",
+        "parse-semver": "^1.1.1",
+        "read": "^1.0.7",
+        "semver": "^5.1.0",
+        "tmp": "^0.2.1",
+        "typed-rest-client": "^1.8.4",
+        "url-join": "^4.0.1",
+        "xml2js": "^0.4.23",
+        "yauzl": "^2.3.1",
+        "yazl": "^2.2.2"
+      },
+      "bin": {
+        "vsce": "vsce"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/vscode": {
+      "version": "1.1.37",
+      "resolved": "https://registry.npmjs.org/vscode/-/vscode-1.1.37.tgz",
+      "integrity": "sha512-vJNj6IlN7IJPdMavlQa1KoFB3Ihn06q1AiN3ZFI/HfzPNzbKZWPPuiU+XkpNOfGU5k15m4r80nxNPlM7wcc0wg==",
+      "deprecated": "This package is deprecated in favor of @types/vscode and vscode-test. For more information please read: https://code.visualstudio.com/updates/v1_36#_splitting-vscode-package-into-typesvscode-and-vscodetest",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "glob": "^7.1.2",
+        "http-proxy-agent": "^4.0.1",
+        "https-proxy-agent": "^5.0.0",
+        "mocha": "^5.2.0",
+        "semver": "^5.4.1",
+        "source-map-support": "^0.5.0",
+        "vscode-test": "^0.4.1"
+      },
+      "bin": {
+        "vscode-install": "bin/install"
+      },
+      "engines": {
+        "node": ">=8.9.3"
+      }
+    },
+    "node_modules/vscode-jsonrpc": {
+      "version": "8.2.0",
+      "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz",
+      "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/vscode-languageclient": {
+      "version": "9.0.1",
+      "resolved": "https://registry.npmjs.org/vscode-languageclient/-/vscode-languageclient-9.0.1.tgz",
+      "integrity": "sha512-JZiimVdvimEuHh5olxhxkht09m3JzUGwggb5eRUkzzJhZ2KjCN0nh55VfiED9oez9DyF8/fz1g1iBV3h+0Z2EA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "minimatch": "^5.1.0",
+        "semver": "^7.3.7",
+        "vscode-languageserver-protocol": "3.17.5"
+      },
+      "engines": {
+        "vscode": "^1.82.0"
+      }
+    },
+    "node_modules/vscode-languageclient/node_modules/brace-expansion": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
+      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0"
+      }
+    },
+    "node_modules/vscode-languageclient/node_modules/minimatch": {
+      "version": "5.1.6",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
+      "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "brace-expansion": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/vscode-languageclient/node_modules/semver": {
+      "version": "7.7.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
+      "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+      "dev": true,
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/vscode-languageserver-protocol": {
+      "version": "3.17.5",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz",
+      "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "vscode-jsonrpc": "8.2.0",
+        "vscode-languageserver-types": "3.17.5"
+      }
+    },
+    "node_modules/vscode-languageserver-types": {
+      "version": "3.17.5",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz",
+      "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vscode-test": {
+      "version": "0.4.3",
+      "resolved": "https://registry.npmjs.org/vscode-test/-/vscode-test-0.4.3.tgz",
+      "integrity": "sha512-EkMGqBSefZH2MgW65nY05rdRSko15uvzq4VAPM5jVmwYuFQKE7eikKXNJDRxL+OITXHB6pI+a3XqqD32Y3KC5w==",
+      "deprecated": "This package has been renamed to @vscode/test-electron, please update to the new name",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "http-proxy-agent": "^2.1.0",
+        "https-proxy-agent": "^2.2.1"
+      },
+      "engines": {
+        "node": ">=8.9.3"
+      }
+    },
+    "node_modules/vscode-test/node_modules/agent-base": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-4.3.0.tgz",
+      "integrity": "sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es6-promisify": "^5.0.0"
+      },
+      "engines": {
+        "node": ">= 4.0.0"
+      }
+    },
+    "node_modules/vscode-test/node_modules/debug": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz",
+      "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ms": "2.0.0"
+      }
+    },
+    "node_modules/vscode-test/node_modules/http-proxy-agent": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-2.1.0.tgz",
+      "integrity": "sha512-qwHbBLV7WviBl0rQsOzH6o5lwyOIvwp/BdFnvVxXORldu5TmjFfjzBcWUWS5kWAZhmv+JtiDhSuQCp4sBfbIgg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "4",
+        "debug": "3.1.0"
+      },
+      "engines": {
+        "node": ">= 4.5.0"
+      }
+    },
+    "node_modules/vscode-test/node_modules/https-proxy-agent": {
+      "version": "2.2.4",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz",
+      "integrity": "sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^4.3.0",
+        "debug": "^3.1.0"
+      },
+      "engines": {
+        "node": ">= 4.5.0"
+      }
+    },
+    "node_modules/vscode-test/node_modules/ms": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
+      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vscode/node_modules/commander": {
+      "version": "2.15.1",
+      "resolved": "https://registry.npmjs.org/commander/-/commander-2.15.1.tgz",
+      "integrity": "sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vscode/node_modules/debug": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz",
+      "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ms": "2.0.0"
+      }
+    },
+    "node_modules/vscode/node_modules/diff": {
+      "version": "3.5.0",
+      "resolved": "https://registry.npmjs.org/diff/-/diff-3.5.0.tgz",
+      "integrity": "sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.3.1"
+      }
+    },
+    "node_modules/vscode/node_modules/glob": {
+      "version": "7.1.2",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.2.tgz",
+      "integrity": "sha512-MJTUg1kjuLeQCJ+ccE4Vpa6kKVXkPYJ2mOCQyUuKLcLQsdrMCpBPUi8qVE6+YuaJkozeA9NusTAw3hLr8Xe5EQ==",
+      "deprecated": "Glob versions prior to v9 are no longer supported",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "fs.realpath": "^1.0.0",
+        "inflight": "^1.0.4",
+        "inherits": "2",
+        "minimatch": "^3.0.4",
+        "once": "^1.3.0",
+        "path-is-absolute": "^1.0.0"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/vscode/node_modules/he": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/he/-/he-1.1.1.tgz",
+      "integrity": "sha512-z/GDPjlRMNOa2XJiB4em8wJpuuBfrFOlYKTZxtpkdr1uPdibHI8rYA3MY0KDObpVyaes0e/aunid/t88ZI2EKA==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "he": "bin/he"
+      }
+    },
+    "node_modules/vscode/node_modules/minimatch": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
+      "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "brace-expansion": "^1.1.7"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/vscode/node_modules/mocha": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/mocha/-/mocha-5.2.0.tgz",
+      "integrity": "sha512-2IUgKDhc3J7Uug+FxMXuqIyYzH7gJjXECKe/w43IGgQHTSj3InJi+yAA7T24L9bQMRKiUEHxEX37G5JpVUGLcQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "browser-stdout": "1.3.1",
+        "commander": "2.15.1",
+        "debug": "3.1.0",
+        "diff": "3.5.0",
+        "escape-string-regexp": "1.0.5",
+        "glob": "7.1.2",
+        "growl": "1.10.5",
+        "he": "1.1.1",
+        "minimatch": "3.0.4",
+        "mkdirp": "0.5.1",
+        "supports-color": "5.4.0"
+      },
+      "bin": {
+        "_mocha": "bin/_mocha",
+        "mocha": "bin/mocha"
+      },
+      "engines": {
+        "node": ">= 4.0.0"
+      }
+    },
+    "node_modules/vscode/node_modules/ms": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
+      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vscode/node_modules/supports-color": {
+      "version": "5.4.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.4.0.tgz",
+      "integrity": "sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "has-flag": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/whatwg-encoding": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz",
+      "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==",
+      "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "iconv-lite": "0.6.3"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/whatwg-mimetype": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz",
+      "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/workerpool": {
+      "version": "6.5.1",
+      "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.5.1.tgz",
+      "integrity": "sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/wrap-ansi": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "string-width": "^4.1.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
+    "node_modules/wrap-ansi/node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/wrap-ansi/node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/wrap-ansi/node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/xml2js": {
+      "version": "0.4.23",
+      "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz",
+      "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "sax": ">=0.6.0",
+        "xmlbuilder": "~11.0.0"
+      },
+      "engines": {
+        "node": ">=4.0.0"
+      }
+    },
+    "node_modules/xmlbuilder": {
+      "version": "11.0.1",
+      "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz",
+      "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/y18n": {
+      "version": "5.0.8",
+      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
+      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/yallist": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
+      "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/yargs": {
+      "version": "16.2.0",
+      "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz",
+      "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cliui": "^7.0.2",
+        "escalade": "^3.1.1",
+        "get-caller-file": "^2.0.5",
+        "require-directory": "^2.1.1",
+        "string-width": "^4.2.0",
+        "y18n": "^5.0.5",
+        "yargs-parser": "^20.2.2"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/yargs-parser": {
+      "version": "20.2.9",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz",
+      "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==",
+      "dev": true,
+      "license": "ISC",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/yargs-unparser": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/yargs-unparser/-/yargs-unparser-2.0.0.tgz",
+      "integrity": "sha512-7pRTIA9Qc1caZ0bZ6RYRGbHJthJWuakf+WmHK0rVeLkNrrGhfoabBNdue6kdINI6r4if7ocq9aD/n7xwKOdzOA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "camelcase": "^6.0.0",
+        "decamelize": "^4.0.0",
+        "flat": "^5.0.2",
+        "is-plain-obj": "^2.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/yauzl": {
+      "version": "2.10.0",
+      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
+      "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "buffer-crc32": "~0.2.3",
+        "fd-slicer": "~1.1.0"
+      }
+    },
+    "node_modules/yazl": {
+      "version": "2.5.1",
+      "resolved": "https://registry.npmjs.org/yazl/-/yazl-2.5.1.tgz",
+      "integrity": "sha512-phENi2PLiHnHb6QBVot+dJnaAZ0xosj7p3fWl+znIjBDlnMI2PsZCJZ306BPTFOaHf5qdDEI8x5qFrSOBN5vrw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "buffer-crc32": "~0.2.3"
+      }
+    },
+    "node_modules/yn": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
+      "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/yocto-queue": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
+      "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    }
+  }
+}
diff --git a/vscode-ext/package.json b/vscode-ext/package.json
new file mode 100644
index 0000000..64d354a
--- /dev/null
+++ b/vscode-ext/package.json
@@ -0,0 +1,76 @@
+{
+  "name": "hyperdoc-vscode",
+  "displayName": "HyperDoc",
+  "description": "HyperDoc language basics, highlighting, and completion.",
+  "version": "0.0.1",
+  "publisher": "hyperdoc",
+  "license": "MIT",
+  "engines": {
+    "vscode": "^1.85.0"
+  },
+  "categories": [
+    "Programming Languages"
+  ],
+  "activationEvents": [
+    "onLanguage:hyperdoc",
+    "onCommand:hyperdoc.startWasmLanguageServer"
+  ],
+  "main": "./out/extension.js",
+  "contributes": {
+    "languages": [
+      {
+        "id": "hyperdoc",
+        "aliases": [
+          "HyperDoc",
+          "hyperdoc"
+        ],
+        "extensions": [
+          ".hdoc"
+        ],
+        "configuration": "./language-configuration.json"
+      }
+    ],
+    "grammars": [
+      {
+        "language": "hyperdoc",
+        "scopeName": "source.hyperdoc",
+        "path": "./syntaxes/hyperdoc.tmLanguage.json"
+      }
+    ],
+    "commands": [
+      {
+        "command": "hyperdoc.startWasmLanguageServer",
+        "title": "HyperDoc: Start Wasm Language Server"
+      }
+    ],
+    "configuration": {
+      "title": "HyperDoc",
+      "properties": {
+        "hyperdoc.languageServer.wasmPath": {
+          "type": "string",
+          "default": "",
+          "description": "Path to a HyperDoc language server WebAssembly binary. Leave empty to disable the wasm language server stub."
+        }
+      }
+    }
+  },
+  "scripts": {
+    "vscode:prepublish": "npm run compile",
+    "compile": "tsc -p ./",
+    "watch": "tsc -watch -p ./",
+    "lint": "echo \"No lint configured\"",
+    "test": "mocha -r ts-node/register test/**/*.test.ts",
+    "package": "vsce package"
+  },
+  "devDependencies": {
+    "@types/node": "^18.19.0",
+    "@types/vscode": "1.85.0",
+    "@types/mocha": "^10.0.6",
+    "mocha": "^10.4.0",
+    "ts-node": "^10.9.2",
+    "typescript": "^5.4.0",
+    "vsce": "^2.15.0",
+    "vscode": "^1.1.37",
+    "vscode-languageclient": "^9.0.1"
+  }
+}
diff --git a/vscode-ext/src/extension.ts b/vscode-ext/src/extension.ts
new file mode 100644
index 0000000..59da2fc
--- /dev/null
+++ b/vscode-ext/src/extension.ts
@@ -0,0 +1,141 @@
+import * as fs from "fs/promises";
+import * as vscode from "vscode";
+import {
+  ATTRIBUTE_SUGGESTIONS,
+  ELEMENT_SUGGESTIONS,
+  Suggestion,
+  computeIsInAttributeList,
+  mapSuggestionKind,
+  resolveWasmPath
+} from "./utils";
+
+class HyperdocCompletionProvider implements vscode.CompletionItemProvider {
+  provideCompletionItems(
+    document: vscode.TextDocument,
+    position: vscode.Position
+  ): vscode.ProviderResult<vscode.CompletionItem[]> {
+    const inAttributeList = isInAttributeList(document, position);
+    const pool = inAttributeList ? ATTRIBUTE_SUGGESTIONS : ELEMENT_SUGGESTIONS;
+
+    return pool.map((item) => createCompletionItem(item));
+  }
+}
+
+function createCompletionItem(item: Suggestion): vscode.CompletionItem {
+  const completion = new vscode.CompletionItem(
+    item.label,
+    mapSuggestionKind(item.kind)
+  );
+  completion.detail = item.detail;
+  return completion;
+}
+
+export function isInAttributeList(
+  document: vscode.TextDocument,
+  position: vscode.Position
+): boolean {
+  const text = document.getText(
+    new vscode.Range(new vscode.Position(0, 0), position)
+  );
+  return computeIsInAttributeList(text);
+}
+
+class WasmLanguageServerController {
+  private wasmModule: WebAssembly.Module | undefined;
+  private readonly output: vscode.OutputChannel;
+
+  constructor(private readonly context: vscode.ExtensionContext) {
+    this.output = vscode.window.createOutputChannel("HyperDoc");
+  }
+
+  async prepareFromConfiguration(): Promise<void> {
+    const configuredPath = vscode.workspace
+      .getConfiguration("hyperdoc")
+      .get<string>("languageServer.wasmPath")
+      ?.trim();
+
+    if (!configuredPath) {
+      this.wasmModule = undefined;
+      this.output.appendLine(
+        "HyperDoc wasm language server is disabled (no path configured)."
+      );
+      return;
+    }
+
+    await this.loadWasmModule(configuredPath);
+  }
+
+  dispose(): void {
+    this.wasmModule = undefined;
+    this.output.dispose();
+  }
+
+  private async loadWasmModule(rawPath: string): Promise<void> {
+    const resolvedPath = resolveWasmPath(rawPath, {
+      extensionPath: this.context.extensionPath,
+      workspaceFolders: vscode.workspace.workspaceFolders?.map(
+        (folder) => folder.uri.fsPath
+      )
+    });
+    this.output.appendLine(
+      `Preparing HyperDoc wasm language server stub from: ${resolvedPath}`
+    );
+
+    try {
+      const bytes = await fs.readFile(resolvedPath);
+      const wasmBytes = Uint8Array.from(bytes);
+      this.wasmModule = await WebAssembly.compile(wasmBytes);
+      this.output.appendLine(
+        "Wasm module compiled. Language client wiring is intentionally disabled until the server shim is available."
+      );
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      vscode.window.showWarningMessage(
+        `HyperDoc: failed to load wasm language server (${message}).`
+      );
+    }
+  }
+}
+
+export async function activate(context: vscode.ExtensionContext): Promise<void> {
+  const completionProvider = vscode.languages.registerCompletionItemProvider(
+    { language: "hyperdoc" },
+    new HyperdocCompletionProvider(),
+    "\\",
+    "{",
+    "("
+  );
+
+  const wasmController = new WasmLanguageServerController(context);
+
+  const startWasmCommand = vscode.commands.registerCommand(
+    "hyperdoc.startWasmLanguageServer",
+    async () => {
+      await wasmController.prepareFromConfiguration();
+      vscode.window.showInformationMessage(
+        "HyperDoc wasm language server stub prepared (when configured)."
+      );
+    }
+  );
+
+  const configChangeListener = vscode.workspace.onDidChangeConfiguration(
+    async (event) => {
+      if (event.affectsConfiguration("hyperdoc.languageServer.wasmPath")) {
+        await wasmController.prepareFromConfiguration();
+      }
+    }
+  );
+
+  context.subscriptions.push(
+    completionProvider,
+    wasmController,
+    startWasmCommand,
+    configChangeListener
+  );
+
+  await wasmController.prepareFromConfiguration();
+}
+
+export function deactivate(): void {
+  // No-op
+}
diff --git a/vscode-ext/src/utils.ts b/vscode-ext/src/utils.ts
new file mode 100644
index 0000000..9d896be
--- /dev/null
+++ b/vscode-ext/src/utils.ts
@@ -0,0 +1,98 @@
+import * as path from "path";
+
+export type Suggestion = {
+  label: string;
+  detail: string;
+  kind: "class" | "function" | "property";
+};
+
+export const ELEMENT_SUGGESTIONS: Suggestion[] = [
+  { label: "hdoc", detail: "Document header", kind: "class" },
+  { label: "title", detail: "Document title", kind: "class" },
+  { label: "h1", detail: "Heading level 1", kind: "class" },
+  { label: "h2", detail: "Heading level 2", kind: "class" },
+  { label: "h3", detail: "Heading level 3", kind: "class" },
+  { label: "toc", detail: "Table of contents", kind: "class" },
+  { label: "footnotes", detail: "Footnote dump", kind: "class" },
+  { label: "p", detail: "Paragraph", kind: "class" },
+  { label: "note", detail: "Admonition block: note", kind: "class" },
+  { label: "warning", detail: "Admonition block: warning", kind: "class" },
+  { label: "danger", detail: "Admonition block: danger", kind: "class" },
+  { label: "tip", detail: "Admonition block: tip", kind: "class" },
+  { label: "quote", detail: "Admonition block: quote", kind: "class" },
+  { label: "spoiler", detail: "Admonition block: spoiler", kind: "class" },
+  { label: "ul", detail: "Unordered list", kind: "class" },
+  { label: "ol", detail: "Ordered list", kind: "class" },
+  { label: "li", detail: "List item", kind: "class" },
+  { label: "img", detail: "Figure/image", kind: "class" },
+  { label: "pre", detail: "Preformatted block", kind: "class" },
+  { label: "table", detail: "Table", kind: "class" },
+  { label: "columns", detail: "Table columns header", kind: "class" },
+  { label: "row", detail: "Table row", kind: "class" },
+  { label: "group", detail: "Table row group", kind: "class" },
+  { label: "td", detail: "Table cell", kind: "class" },
+  { label: "\\em", detail: "Inline emphasis", kind: "function" },
+  { label: "\\mono", detail: "Inline monospace", kind: "function" },
+  { label: "\\strike", detail: "Inline strikethrough", kind: "function" },
+  { label: "\\sub", detail: "Inline subscript", kind: "function" },
+  { label: "\\sup", detail: "Inline superscript", kind: "function" },
+  { label: "\\link", detail: "Inline link", kind: "function" },
+  { label: "\\date", detail: "Inline date", kind: "function" },
+  { label: "\\time", detail: "Inline time", kind: "function" },
+  { label: "\\datetime", detail: "Inline datetime", kind: "function" },
+  { label: "\\ref", detail: "Inline reference", kind: "function" },
+  { label: "\\footnote", detail: "Inline footnote", kind: "function" }
+];
+
+export const ATTRIBUTE_SUGGESTIONS: Suggestion[] = [
+  { label: "id", detail: "Block identifier", kind: "property" },
+  { label: "title", detail: "Title attribute", kind: "property" },
+  { label: "lang", detail: "Language override", kind: "property" },
+  { label: "fmt", detail: "Format selection", kind: "property" },
+  { label: "ref", detail: "Reference target", kind: "property" },
+  { label: "key", detail: "Footnote key", kind: "property" }
+];
+
+export function computeIsInAttributeList(text: string): boolean {
+  const lastOpen = text.lastIndexOf("(");
+  if (lastOpen === -1) {
+    return false;
+  }
+
+  const lastClose = text.lastIndexOf(")");
+  if (lastClose > lastOpen) {
+    return false;
+  }
+
+  const afterOpen = text.slice(lastOpen + 1);
+  return !afterOpen.includes("{") && !afterOpen.includes("}");
+}
+
+export function mapSuggestionKind(kind: Suggestion["kind"]): number {
+  switch (kind) {
+    case "class":
+      return 6;
+    case "function":
+      return 3;
+    case "property":
+      return 10;
+    default:
+      return 9;
+  }
+}
+
+export function resolveWasmPath(
+  rawPath: string,
+  context: { extensionPath: string; workspaceFolders?: string[] }
+): string {
+  if (path.isAbsolute(rawPath)) {
+    return rawPath;
+  }
+
+  const workspaceFolder = context.workspaceFolders?.[0];
+  if (workspaceFolder) {
+    return path.join(workspaceFolder, rawPath);
+  }
+
+  return path.join(context.extensionPath, rawPath);
+}
diff --git a/vscode-ext/syntaxes/hyperdoc.tmLanguage.json b/vscode-ext/syntaxes/hyperdoc.tmLanguage.json
new file mode 100644
index 0000000..0fa088e
--- /dev/null
+++ b/vscode-ext/syntaxes/hyperdoc.tmLanguage.json
@@ -0,0 +1,74 @@
+{
+  "name": "HyperDoc",
+  "scopeName": "source.hyperdoc",
+  "patterns": [
+    {
+      "name": "entity.name.type.hyperdoc",
+      "match": "^(\\s*)(\\\\?[A-Za-z0-9_]+)",
+      "captures": {
+        "1": {
+          "name": "punctuation.whitespace.leading.hyperdoc"
+        },
+        "2": {
+          "name": "entity.name.tag.hyperdoc"
+        }
+      }
+    },
+    {
+      "name": "support.function.inline.hyperdoc",
+      "match": "\\\\[A-Za-z0-9_]+"
+    },
+    {
+      "name": "variable.parameter.attribute.hyperdoc",
+      "match": "([A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)?)(\\s*)(=)",
+      "captures": {
+        "1": {
+          "name": "variable.parameter.attribute.hyperdoc"
+        },
+        "2": {
+          "name": "punctuation.separator.attribute.hyperdoc"
+        },
+        "3": {
+          "name": "keyword.operator.assignment.hyperdoc"
+        }
+      }
+    },
+    {
+      "name": "string.quoted.double.hyperdoc",
+      "match": "\"(?:\\\\.|[^\"\\\\])*\""
+    },
+    {
+      "name": "punctuation.section.braces.hyperdoc",
+      "match": "[{}]"
+    },
+    {
+      "name": "punctuation.section.parens.hyperdoc",
+      "match": "[()]"
+    },
+    {
+      "name": "punctuation.terminator.empty-body.hyperdoc",
+      "match": ";"
+    },
+    {
+      "name": "punctuation.definition.verbatim.hyperdoc",
+      "match": "(:)(?=\\s*(?:$|\\n|\\|))",
+      "captures": {
+        "1": {
+          "name": "punctuation.definition.verbatim.start.hyperdoc"
+        }
+      }
+    },
+    {
+      "name": "meta.verbatim.line.hyperdoc",
+      "match": "^\\s*(\\|)(.*)$",
+      "captures": {
+        "1": {
+          "name": "punctuation.definition.verbatim.bar.hyperdoc"
+        },
+        "2": {
+          "name": "string.unquoted.verbatim.hyperdoc"
+        }
+      }
+    }
+  ]
+}
diff --git a/vscode-ext/test/utils.test.ts b/vscode-ext/test/utils.test.ts
new file mode 100644
index 0000000..e201bd8
--- /dev/null
+++ b/vscode-ext/test/utils.test.ts
@@ -0,0 +1,79 @@
+import { strict as assert } from "node:assert";
+import path from "path";
+import {
+  ATTRIBUTE_SUGGESTIONS,
+  ELEMENT_SUGGESTIONS,
+  computeIsInAttributeList,
+  mapSuggestionKind,
+  resolveWasmPath
+} from "../src/utils";
+
+describe("computeIsInAttributeList", () => {
+  it("returns false when no opening paren is present", () => {
+    assert.equal(computeIsInAttributeList("hdoc "), false);
+  });
+
+  it("returns true between parentheses before closing", () => {
+    const text = 'node(attr="1"';
+    assert.equal(computeIsInAttributeList(text), true);
+  });
+
+  it("returns false after the closing parenthesis", () => {
+    const text = 'node(attr="1") ';
+    assert.equal(computeIsInAttributeList(text), false);
+  });
+
+  it("returns false if a block brace appears after the last open paren", () => {
+    const text = 'node(attr="1"{';
+    assert.equal(computeIsInAttributeList(text), false);
+  });
+});
+
+describe("completion suggestions", () => {
+  it("exposes element suggestions with both block and inline names", () => {
+    const labels = ELEMENT_SUGGESTIONS.map((s) => s.label);
+    assert(labels.includes("hdoc"));
+    assert(labels.includes("\\em"));
+  });
+
+  it("exposes attribute suggestions", () => {
+    const labels = ATTRIBUTE_SUGGESTIONS.map((s) => s.label);
+    assert(labels.includes("id"));
+    assert(labels.includes("fmt"));
+  });
+});
+
+describe("mapSuggestionKind", () => {
+  it("maps to completion item kinds", () => {
+    assert.equal(mapSuggestionKind("class"), 6);
+    assert.equal(mapSuggestionKind("function"), 3);
+    assert.equal(mapSuggestionKind("property"), 10);
+  });
+});
+
+describe("resolveWasmPath", () => {
+  const extPath = "/extension";
+
+  it("returns absolute paths unchanged", () => {
+    const input = "/tmp/server.wasm";
+    assert.equal(
+      resolveWasmPath(input, { extensionPath: extPath }),
+      input
+    );
+  });
+
+  it("uses workspace folder when available", () => {
+    const output = resolveWasmPath("server.wasm", {
+      extensionPath: extPath,
+      workspaceFolders: ["/workspace/project"]
+    });
+    assert.equal(output, path.join("/workspace/project", "server.wasm"));
+  });
+
+  it("falls back to the extension path", () => {
+    const output = resolveWasmPath("server.wasm", {
+      extensionPath: extPath
+    });
+    assert.equal(output, path.join(extPath, "server.wasm"));
+  });
+});
diff --git a/vscode-ext/tsconfig.json b/vscode-ext/tsconfig.json
new file mode 100644
index 0000000..c24d51b
--- /dev/null
+++ b/vscode-ext/tsconfig.json
@@ -0,0 +1,26 @@
+{
+  "compilerOptions": {
+    "module": "commonjs",
+    "target": "ES2020",
+    "outDir": "out",
+    "lib": [
+      "ES2020",
+      "DOM"
+    ],
+    "sourceMap": true,
+    "rootDir": ".",
+    "strict": true,
+    "moduleResolution": "node",
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "types": [
+      "node",
+      "vscode",
+      "mocha"
+    ]
+  },
+  "include": [
+    "src",
+    "test"
+  ]
+}

From 7a07e4692cc20357bc1767c3c192cf7068c5637d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Mon, 5 Jan 2026 10:16:30 +0100
Subject: [PATCH 114/116] Use Node 20 for extension packaging workflow

---
 .github/workflows/extension-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/extension-ci.yml b/.github/workflows/extension-ci.yml
index 6393547..a4b123c 100644
--- a/.github/workflows/extension-ci.yml
+++ b/.github/workflows/extension-ci.yml
@@ -24,7 +24,7 @@ jobs:
       - name: Setup Node.js
         uses: actions/setup-node@v4
         with:
-          node-version: 18
+          node-version: 20
 
       - name: Install extension dependencies
         run: npm ci

From a4e2b0cbe668bb4c3f22ab1fd14aeb8a953525ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Mon, 5 Jan 2026 11:01:35 +0100
Subject: [PATCH 115/116] Point extension main to compiled output

---
 vscode-ext/package.json | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vscode-ext/package.json b/vscode-ext/package.json
index 64d354a..84b47d0 100644
--- a/vscode-ext/package.json
+++ b/vscode-ext/package.json
@@ -15,7 +15,11 @@
     "onLanguage:hyperdoc",
     "onCommand:hyperdoc.startWasmLanguageServer"
   ],
-  "main": "./out/extension.js",
+  "main": "./out/src/extension.js",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/ashet-hypertext/hyperdoc.git"
+  },
   "contributes": {
     "languages": [
       {

From cc5e7206784153a2b32c6d72249e72ed331c7967 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= <felix@ib-queissner.de>
Date: Mon, 5 Jan 2026 11:01:39 +0100
Subject: [PATCH 116/116] Add repository link and license for extension
 packaging

---
 LICENSE                 | 21 +++++++++++++++++++++
 vscode-ext/package.json |  2 +-
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..637c23a
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 HyperDoc Contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vscode-ext/package.json b/vscode-ext/package.json
index 84b47d0..a72ce19 100644
--- a/vscode-ext/package.json
+++ b/vscode-ext/package.json
@@ -18,7 +18,7 @@
   "main": "./out/src/extension.js",
   "repository": {
     "type": "git",
-    "url": "https://github.com/ashet-hypertext/hyperdoc.git"
+    "url": "https://github.com/Ashet-Technologies/hyperdoc.git"
   },
   "contributes": {
     "languages": [