From 6ad6ee380ee4d6b151532a49b89aca5ace2b5707 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 14 Jul 2025 17:57:53 +0800
Subject: [PATCH 01/76] init segment

---
 packages/poml/reader/segment.ts     | 209 +++++++++++++++-
 packages/poml/tests/segment.test.ts | 353 ++++++++++++++++++++++++++++
 2 files changed, 560 insertions(+), 2 deletions(-)
 create mode 100644 packages/poml/tests/segment.test.ts
diff --git a/packages/poml/reader/segment.ts b/packages/poml/reader/segment.ts
index cf4e4425..2218c086 100644
--- a/packages/poml/reader/segment.ts
+++ b/packages/poml/reader/segment.ts
@@ -1,3 +1,5 @@
+import componentDocs from '../assets/componentDocs.json';
+
 export interface Segment {
   // Unique ID for caching and React keys
   id: string;
@@ -16,6 +18,209 @@ export interface Segment {
   tagName?: string;
 }
 
-export function createSegments(content: string, path?: string): Segment[] {
-  throw new Error('createSegments is not implemented yet');
+class Segmenter {
+  private nextId: number;
+  private sourcePath: string | undefined;
+
+  constructor(sourcePath: string | undefined) {
+    this.nextId = 0;
+    this.sourcePath = sourcePath;
+  }
+
+  private generateId(): string {
+    return `segment_${this.nextId++}`;
+  }
+
+  private isValidPomlTag(tagName: string): boolean {
+    const validTags = new Set<string>();
+    
+    for (const doc of componentDocs) {
+      if (doc.name) {
+        validTags.add(doc.name.toLowerCase());
+        validTags.add(doc.name.toLowerCase().replace(/([A-Z])/g, '-$1').toLowerCase());
+      }
+    }
+    
+    validTags.add('poml');
+    validTags.add('text');
+    validTags.add('meta');
+    
+    return validTags.has(tagName.toLowerCase());
+  }
+
+  private parseSegments(text: string, start: number = 0, parent?: Segment): Segment[] {
+    const segments: Segment[] = [];
+    let currentPos = start;
+    
+    while (currentPos < text.length) {
+      const nextOpenTag = text.indexOf('<', currentPos);
+      
+      if (nextOpenTag === -1) {
+        if (currentPos < text.length) {
+          const textContent = text.substring(currentPos);
+          if (textContent.trim()) {
+            segments.push({
+              id: this.generateId(),
+              kind: 'TEXT',
+              start: currentPos,
+              end: text.length,
+              content: textContent,
+              path: this.sourcePath,
+              parent,
+              children: []
+            });
+          }
+        }
+        break;
+      }
+      
+      if (nextOpenTag > currentPos) {
+        const textContent = text.substring(currentPos, nextOpenTag);
+        if (textContent.trim()) {
+          segments.push({
+            id: this.generateId(),
+            kind: 'TEXT',
+            start: currentPos,
+            end: nextOpenTag,
+            content: textContent,
+            path: this.sourcePath,
+            parent,
+            children: []
+          });
+        }
+      }
+      
+      const tagEndPos = text.indexOf('>', nextOpenTag);
+      if (tagEndPos === -1) {
+        currentPos = nextOpenTag + 1;
+        continue;
+      }
+      
+      const tagContent = text.substring(nextOpenTag + 1, tagEndPos);
+      const tagName = tagContent.trim().split(/\s+/)[0];
+      
+      if (tagName.startsWith('/')) {
+        currentPos = tagEndPos + 1;
+        continue;
+      }
+      
+      if (tagContent.endsWith('/')) {
+        currentPos = tagEndPos + 1;
+        continue;
+      }
+      
+      if (!this.isValidPomlTag(tagName)) {
+        currentPos = tagEndPos + 1;
+        continue;
+      }
+      
+      const closingTag = `</${tagName}>`;
+      const closingTagPos = this.findClosingTag(text, tagName, tagEndPos + 1);
+      
+      if (closingTagPos === -1) {
+        currentPos = tagEndPos + 1;
+        continue;
+      }
+      
+      const segmentContent = text.substring(nextOpenTag, closingTagPos + closingTag.length);
+      const innerContent = text.substring(tagEndPos + 1, closingTagPos);
+      
+      const segment: Segment = {
+        id: this.generateId(),
+        kind: tagName.toLowerCase() === 'meta' ? 'META' : 'POML',
+        start: nextOpenTag,
+        end: closingTagPos + closingTag.length,
+        content: segmentContent,
+        path: this.sourcePath,
+        parent,
+        children: [],
+        tagName: tagName.toLowerCase()
+      };
+      
+      if (tagName.toLowerCase() === 'text') {
+        segment.children = this.parseSegments(innerContent, tagEndPos + 1, segment);
+      } else if (tagName.toLowerCase() !== 'meta') {
+        const childSegments = this.parseSegments(innerContent, tagEndPos + 1, segment);
+        segment.children = childSegments;
+      }
+      
+      segments.push(segment);
+      currentPos = closingTagPos + closingTag.length;
+    }
+    
+    return segments;
+  }
+
+  private findClosingTag(text: string, tagName: string, startPos: number): number {
+    let depth = 1;
+    let pos = startPos;
+    
+    while (pos < text.length && depth > 0) {
+      const nextTag = text.indexOf('<', pos);
+      if (nextTag === -1) {
+        break;
+      }
+      
+      const tagEndPos = text.indexOf('>', nextTag);
+      if (tagEndPos === -1) {
+        break;
+      }
+      
+      const tagContent = text.substring(nextTag + 1, tagEndPos);
+      const currentTagName = tagContent.trim().split(/\s+/)[0];
+      
+      if (currentTagName === tagName) {
+        depth++;
+      } else if (currentTagName === `/${tagName}`) {
+        depth--;
+      }
+      
+      pos = tagEndPos + 1;
+    }
+    
+    return depth === 0 ? pos - (`</${tagName}>`.length) : -1;
+  }
+
+  public createSegments(content: string): Segment {
+    const rootSegments = this.parseSegments(content);
+    
+    if (rootSegments.length === 1 && rootSegments[0].kind === 'POML') {
+      return rootSegments[0];
+    }
+    
+    if (rootSegments.length === 0) {
+      return {
+        id: this.generateId(),
+        kind: 'TEXT',
+        start: 0,
+        end: content.length,
+        content: content,
+        path: this.sourcePath,
+        children: [],
+        parent: undefined
+      };
+    }
+    
+    const rootSegment: Segment = {
+      id: this.generateId(),
+      kind: 'TEXT',
+      start: 0,
+      end: content.length,
+      content: content,
+      path: this.sourcePath,
+      children: rootSegments,
+      parent: undefined
+    };
+    
+    rootSegments.forEach(segment => {
+      segment.parent = rootSegment;
+    });
+    
+    return rootSegment;
+  }
+}
+
+export function createSegments(content: string, sourcePath?: string): Segment {
+  const segmenter = new Segmenter(sourcePath);
+  return segmenter.createSegments(content);
 }
diff --git a/packages/poml/tests/segment.test.ts b/packages/poml/tests/segment.test.ts
new file mode 100644
index 00000000..9c5d850e
--- /dev/null
+++ b/packages/poml/tests/segment.test.ts
@@ -0,0 +1,353 @@
+import { describe, expect, test } from '@jest/globals';
+import { createSegments, Segment } from '../reader/segment';
+
+describe('createSegments', () => {
+  test('pure text content', () => {
+    const content = 'This is pure text content with no POML tags.';
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.content).toBe(content);
+    expect(segment.start).toBe(0);
+    expect(segment.end).toBe(content.length);
+    expect(segment.children).toHaveLength(0);
+  });
+
+  test('single POML tag', () => {
+    const content = '<task>Analyze the data</task>';
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('POML');
+    expect(segment.tagName).toBe('task');
+    expect(segment.content).toBe(content);
+    expect(segment.start).toBe(0);
+    expect(segment.end).toBe(content.length);
+  });
+
+  test('mixed content with text and POML', () => {
+    const content = `# My Analysis Document
+
+This is a regular markdown document that explains the task.
+
+<task>
+  Analyze the following data and provide insights.
+</task>
+
+Here are some key points to consider:
+
+- Data quality
+- Statistical significance  
+- Business impact`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(4);
+    
+    const children = segment.children;
+    expect(children[0].kind).toBe('TEXT');
+    expect(children[0].content).toContain('# My Analysis Document');
+    
+    expect(children[1].kind).toBe('POML');
+    expect(children[1].tagName).toBe('task');
+    expect(children[1].content).toBe(`<task>
+  Analyze the following data and provide insights.
+</task>`);
+    
+    expect(children[2].kind).toBe('TEXT');
+    expect(children[2].content).toContain('Here are some key points');
+    
+    expect(children[3].kind).toBe('TEXT');
+    expect(children[3].content).toContain('- Data quality');
+  });
+
+  test('nested POML segments', () => {
+    const content = `<examples>
+  <example>
+    <input>Sample data point 1</input>
+    <output>Analysis result 1</output>
+  </example>
+</examples>`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('POML');
+    expect(segment.tagName).toBe('examples');
+    expect(segment.children).toHaveLength(2);
+    
+    const exampleSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'example');
+    expect(exampleSegment).toBeDefined();
+    expect(exampleSegment!.children).toHaveLength(3);
+    
+    const inputSegment = exampleSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'input');
+    const outputSegment = exampleSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'output');
+    
+    expect(inputSegment).toBeDefined();
+    expect(outputSegment).toBeDefined();
+  });
+
+  test('text tag with nested content', () => {
+    const content = `<poml>
+  <task>Process the following data</task>
+  <text>
+    This is **markdown** content that will be processed as pure text.
+    
+    - Item 1
+    - Item 2
+
+    <cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>
+
+    No POML processing happens here.
+  </text>
+  <hint>Remember to check the format</hint>
+</poml>`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('POML');
+    expect(segment.tagName).toBe('poml');
+    expect(segment.children).toHaveLength(4);
+    
+    const textSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'text');
+    expect(textSegment).toBeDefined();
+    expect(textSegment!.children).toHaveLength(3);
+    
+    const nestedCpSegment = textSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'cp');
+    expect(nestedCpSegment).toBeDefined();
+    expect(nestedCpSegment!.content).toBe('<cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>');
+  });
+
+  test('meta tags', () => {
+    const content = `<meta name="author">John Doe</variable>
+  <stylesheet>
+    { "task": { "captionStyle": "bold" } }
+  </stylesheet>
+</meta>
+
+<task>Complete the analysis</task>`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(3);
+    
+    const metaSegment = segment.children.find(c => c.kind === 'META');
+    expect(metaSegment).toBeDefined();
+    expect(metaSegment!.tagName).toBe('meta');
+    expect(metaSegment!.children).toHaveLength(0);
+    
+    const taskSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'task');
+    expect(taskSegment).toBeDefined();
+  });
+
+  test('invalid tags are ignored', () => {
+    const content = `<invalid-tag>This should be ignored</invalid-tag>
+<task>This should be processed</task>
+<random>This should also be ignored</random>`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(3);
+    
+    const taskSegment = segment.children.find(c => c.kind === 'POML');
+    expect(taskSegment).toBeDefined();
+    expect(taskSegment!.tagName).toBe('task');
+    
+    const textSegments = segment.children.filter(c => c.kind === 'TEXT');
+    expect(textSegments).toHaveLength(2);
+    expect(textSegments[0].content).toContain('<invalid-tag>This should be ignored</invalid-tag>');
+    expect(textSegments[1].content).toContain('<random>This should also be ignored</random>');
+  });
+
+  test('self-closing tags are ignored', () => {
+    const content = `<task>Valid task</task>
+<br />
+<img src="test.jpg" />
+<hint>Valid hint</hint>`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(4);
+    
+    const pomlSegments = segment.children.filter(c => c.kind === 'POML');
+    expect(pomlSegments).toHaveLength(3);
+    expect(pomlSegments[0].tagName).toBe('task');
+    expect(pomlSegments[2].tagName).toBe('hint');
+  });
+
+  test('malformed tags are handled gracefully', () => {
+    const content = `<task>Incomplete tag
+<hint>Complete hint</hint>
+<unclosed>This has no closing tag`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(3);
+    
+    const hintSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'hint');
+    expect(hintSegment).toBeDefined();
+    expect(hintSegment!.content).toBe('<hint>Complete hint</hint>');
+    
+    const textSegments = segment.children.filter(c => c.kind === 'TEXT');
+    expect(textSegments).toHaveLength(2);
+    expect(textSegments[0].content).toBe('<task>Incomplete tag\n');
+    expect(textSegments[1].content).toBe('\n<unclosed>This has no closing tag');
+  });
+
+  test('malformed POML tags are ignored', () => {
+    const content = `<task>Valid task`;
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(0);
+  });
+
+  test('empty content', () => {
+    const content = '';
+    const segment = createSegments(content);
+    
+  });
+
+  test('whitespace-only content', () => {
+    const content = '   \n\n\t  \n  ';
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.content).toBe(content);
+    expect(segment.children).toHaveLength(0);
+  });
+
+  test('hyphenated tag names', () => {
+    const content = `<output-format>JSON format</output-format>
+<system-msg>System message</system-msg>
+<user-msg>User message</user-msg>`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(4);
+    
+    const pomlSegments = segment.children.filter(c => c.kind === 'POML');
+    expect(pomlSegments).toHaveLength(3);
+    expect(pomlSegments[0].tagName).toBe('output-format');
+    expect(pomlSegments[1].tagName).toBe('system-msg');
+    expect(pomlSegments[2].tagName).toBe('user-msg');
+  });
+
+  test('parent-child relationships', () => {
+    const content = `<task>
+  <hint>This is a hint</hint>
+  Some text
+  <examples>
+    <example>Example 1</example>
+  </examples>
+</task>`;
+
+    const segment = createSegments(content);
+    
+    const taskSegment = segment;
+    expect(taskSegment.kind).toBe('POML');
+    expect(taskSegment.tagName).toBe('task');
+    expect(taskSegment.parent).toBeUndefined();
+    
+    const hintSegment = taskSegment.children.find(c => c.kind === 'POML' && c.tagName === 'hint');
+    expect(hintSegment).toBeDefined();
+    expect(hintSegment!.parent).toBe(taskSegment);
+    
+    const examplesSegment = taskSegment.children.find(c => c.kind === 'POML' && c.tagName === 'examples');
+    expect(examplesSegment).toBeDefined();
+    expect(examplesSegment!.parent).toBe(taskSegment);
+    
+    const exampleSegment = examplesSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'example');
+    expect(exampleSegment).toBeDefined();
+    expect(exampleSegment!.parent).toBe(examplesSegment);
+  });
+
+  test('segment IDs are unique', () => {
+    const content = `<task>First task</task>
+<task>Second task</task>
+<hint>A hint</hint>`;
+
+    const segment = createSegments(content);
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(5);
+    
+    function collectAllSegments(segment: Segment): Segment[] {
+      const all = [segment];
+      segment.children.forEach(child => {
+        all.push(...collectAllSegments(child));
+      });
+      return all;
+    }
+    
+    const allSegments = collectAllSegments(segment);
+    const ids = allSegments.map(s => s.id);
+    const uniqueIds = new Set(ids);
+    
+    expect(uniqueIds.size).toBe(ids.length);
+  });
+
+  test('path parameter is preserved', () => {
+    const content = '<task>Test task</task>';
+    const path = '/test/path/file.poml';
+    const segment = createSegments(content, path);
+    
+    expect(segment.path).toBe(path);
+    expect(segment.children[0].path).toBe(path);
+  });
+
+  test('complex example from specification', () => {
+    const content = `<poml>
+  <task>Process the following data</task>
+  <text>
+    This is **markdown** content that will be processed as pure text.
+    
+    - Item 1
+    - Item 2
+
+    {{ VARIABLES_WILL_ALSO_SHOWN_AS_IS }}
+    <cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>
+
+    No POML processing happens here.
+  </text>
+  <hint>Remember to check the format</hint>
+</poml>
+
+There can be some intervening text here as well.
+
+<poml>
+  <p>You can add another POML segment here: {{variable_will_be_substituted}}</p>
+</poml>
+
+<p>POML elements do not necessarily reside in a poml element.</p>`;
+
+    const segment = createSegments(content);
+    
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.children).toHaveLength(5);
+    
+    const firstPomlSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'poml');
+    expect(firstPomlSegment).toBeDefined();
+    expect(firstPomlSegment!.children).toHaveLength(4);
+    
+    const textSegment = firstPomlSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'text');
+    expect(textSegment).toBeDefined();
+    expect(textSegment!.children).toHaveLength(3);
+    
+    const cpSegment = textSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'cp');
+    expect(cpSegment).toBeDefined();
+    
+    const secondPomlSegment = segment.children.filter(c => c.kind === 'POML' && c.tagName === 'poml')[1];
+    expect(secondPomlSegment).toBeDefined();
+
+    const lineBreakSegment = segment.children[3];
+    expect(lineBreakSegment.kind).toBe('TEXT');
+    expect(lineBreakSegment.content).toBe('\n\n');
+
+    const pSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'p');
+    expect(pSegment).toBeDefined();
+  });
+});
\ No newline at end of file

From c602a27bcb537578947995e34af3e0ff24ee9d28 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 14 Jul 2025 23:10:38 +0800
Subject: [PATCH 02/76] add tests

---
 packages/poml/tests/segment.test.ts | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/packages/poml/tests/segment.test.ts b/packages/poml/tests/segment.test.ts
index 9c5d850e..dad79c64 100644
--- a/packages/poml/tests/segment.test.ts
+++ b/packages/poml/tests/segment.test.ts
@@ -86,6 +86,34 @@ Here are some key points to consider:
     expect(outputSegment).toBeDefined();
   });
 
+  test('text in text', () => {
+    const content = `<text>This is a text<text> with nested text content.</text></text>`;
+    const segment = createSegments(content);
+    expect(segment.kind).toBe('TEXT');
+    expect(segment.content).toBe(content);
+    expect(segment.children).toHaveLength(0);
+  });
+
+  test('text in text in POML', () => {
+    const content = `<poml><text>This is a text<text> with nested text content.</text></text></poml>`;
+    const segment = createSegments(content);
+    expect(segment.kind).toBe('POML');  
+    expect(segment.tagName).toBe('poml');
+    expect(segment.children).toHaveLength(1);
+    const textSegment = segment.children[0];
+    expect(textSegment.kind).toBe('TEXT');
+    expect(textSegment.content).toBe('This is a text<text> with nested text content.</text>');
+  });
+
+  test('nested tag in POML', () => {
+    const content = `<task>Process data<task> with nested task content.</task></task>`;
+    const segment = createSegments(content);
+    expect(segment.kind).toBe('POML');
+    expect(segment.tagName).toBe('poml');
+    expect(segment.children).toHaveLength(0);
+    expect(segment.content).toBe('<task>Process data<task> with nested task content.</task></task>');
+  });
+
   test('text tag with nested content', () => {
     const content = `<poml>
   <task>Process the following data</task>

From 6e855ba98559916f720bb4c4f066e24e1de0e444 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 14 Jul 2025 23:15:03 +0800
Subject: [PATCH 03/76] .

---
 packages/poml/tests/segment.test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/poml/tests/segment.test.ts b/packages/poml/tests/segment.test.ts
index dad79c64..45b9e082 100644
--- a/packages/poml/tests/segment.test.ts
+++ b/packages/poml/tests/segment.test.ts
@@ -62,7 +62,7 @@ Here are some key points to consider:
   });
 
   test('nested POML segments', () => {
-    const content = `<examples>
+    const content = `<examples syntax="json">
   <example>
     <input>Sample data point 1</input>
     <output>Analysis result 1</output>

From 9d3484b1e067aa7fb914ee5e77c2961463b714d3 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 14 Jul 2025 23:15:47 +0800
Subject: [PATCH 04/76] .

---
 packages/poml/tests/segment.test.ts | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/packages/poml/tests/segment.test.ts b/packages/poml/tests/segment.test.ts
index 45b9e082..72a6f867 100644
--- a/packages/poml/tests/segment.test.ts
+++ b/packages/poml/tests/segment.test.ts
@@ -73,17 +73,8 @@ Here are some key points to consider:
     
     expect(segment.kind).toBe('POML');
     expect(segment.tagName).toBe('examples');
-    expect(segment.children).toHaveLength(2);
-    
-    const exampleSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'example');
-    expect(exampleSegment).toBeDefined();
-    expect(exampleSegment!.children).toHaveLength(3);
-    
-    const inputSegment = exampleSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'input');
-    const outputSegment = exampleSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'output');
-    
-    expect(inputSegment).toBeDefined();
-    expect(outputSegment).toBeDefined();
+    expect(segment.children).toHaveLength(0);
+    expect(segment.content).toBe(content);
   });
 
   test('text in text', () => {
@@ -105,7 +96,7 @@ Here are some key points to consider:
     expect(textSegment.content).toBe('This is a text<text> with nested text content.</text>');
   });
 
-  test('nested tag in POML', () => {
+  test('nested same tag in POML', () => {
     const content = `<task>Process data<task> with nested task content.</task></task>`;
     const segment = createSegments(content);
     expect(segment.kind).toBe('POML');

From 4575d3ad215558e88190eb0c45e28deb80c41820 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 15 Jul 2025 10:14:39 +0800
Subject: [PATCH 05/76] .

---
 docs/proposals/poml_extended.md | 103 +++++++++++++++++++++-----------
 1 file changed, 69 insertions(+), 34 deletions(-)

diff --git a/docs/proposals/poml_extended.md b/docs/proposals/poml_extended.md
index b2a2f000..491a8949 100644
--- a/docs/proposals/poml_extended.md
+++ b/docs/proposals/poml_extended.md
@@ -107,53 +107,89 @@ File-level metadata can be included at any place of the file in a special `<meta
 
 ### High-level Processing Pipeline
 
-The core of the new architecture is a three-pass process: Segmentation, Metadata Extraction, and Recursive Rendering.
+The core of the new architecture is a three-pass process: Tokenization and AST Parsing, Metadata Extraction, and Recursive Rendering.
 
-#### I. Segmentation Pass
+#### I. Tokenization and AST Parsing
 
-This initial pass is a crucial preprocessing step that scans the raw file content and partitions it into a hierarchical tree of segments. It does **not** parse the full XML structure of POML blocks; it only identifies their boundaries.
+This phase processes the raw file content through a standard compiling workflow: tokenization followed by parsing to an Abstract Syntax Tree (AST).
 
-* **Objective**: To classify every part of the file as `META`, `POML`, or `TEXT` and build a nested structure.
-* **Algorithm**:
-  1. Load all valid POML component tag names (including aliases) from `componentDocs.json`. This set of tags will be used for detection.
-  2. Initialize the root of the segment tree as a single, top-level `TEXT` segment spanning the entire file, unless the root segment is a single `<poml>...</poml>` block spanning the whole file (in which case it will be treated as a `POML` segment).
-  3. Use a stack-based algorithm to scan the text.
-    * When an opening tag (e.g., `<task>`) that matches a known POML component is found, push its name and start position onto the stack. This marks the beginning of a potential `POML` segment.
-    * When a closing tag (e.g., `</task>`) is found that matches the tag at the top of the stack, pop the stack. This marks a complete `POML` segment. This new segment is added as a child to the current parent segment in the tree.
-    * The special `<text>` tag is handled recursively. If a `<text>` tag is found *inside* a `POML` segment, the scanner will treat its content as a nested `TEXT` segment. This `TEXT` segment can, in turn, contain more `POML` children.
-    * Any content not enclosed within identified `POML` tags remains part of its parent `TEXT` segment.
-  4. `<meta>` tags are treated specially. They are identified and parsed into `META` segments at any level but are logically hoisted and processed first. They should not have children.
-* **Output**: A `Segment` tree. For backward compatibility, if the root segment is a single `<poml>...</poml>` block spanning the whole file, the system can revert to the original, simpler parsing model.
+* **Tokenization**: Standard XML tokenization logic is used to break the input into tokens (tags, text content, attributes, etc.).
 
-**`Segment` Interface**: The `children` property is key to representing the nested structure of mixed-content files.
+* **AST Parsing Algorithm**:
+  1. Scan until `<` and tag name is found.
+  2. If the tag name is `text`, create a text node and scan until the corresponding `</text>` is found (handling nested POML if present).
+  3. If the tag name matches any POML tag from `componentDocs.json`, create a node with the tag name and attributes.
+  4. Within POML tags, if another `text` tag is found, follow the same logic as step 2.
+  5. Close the node when the corresponding closing tag `</tagname>` is found.
+
+* **Error Tolerance**: The parser is designed to be error-tolerant, gracefully handling malformed markup while preserving as much structure as possible.
+
+* **Source Mapping**: The parser retains source mapping information for each AST node, enabling code intelligence features like hover, go to definition, find references, and auto completion.
+
+* **Output**: An AST representing the hierarchical structure of the document, where each node contains source position information and type metadata.
+
+**`ASTNode` Interface**: The AST nodes represent the parsed structure with source mapping.
 
 ```typescript
-interface Segment {
-  id: string;                      // Unique ID for caching and React keys
-  kind: 'META' | 'TEXT' | 'POML';
+interface SourceRange {
   start: number;
   end: number;
-  content: string;                 // The raw string content of the segment
-  parent?: Segment;                 // Reference to the parent segment
-  children: Segment[];             // Nested segments (e.g., a POML block within text)
-  tagName?: string;                 // For POML segments, the name of the root tag (e.g., 'task')
+}
+
+interface AttributeInfo {
+  key: string;
+  value: string;
+  keyRange: SourceRange;      // Position of attribute name
+  valueRange: SourceRange;    // Position of attribute value (excluding quotes)
+  fullRange: SourceRange;     // Full attribute including key="value"
+}
+
+interface ASTNode {
+  id: string;                      // Unique ID for caching and React keys
+  kind: 'META' | 'TEXT' | 'POML';
+  start: number;                   // Source position start of entire node
+  end: number;                     // Source position end of entire node
+  content: string;                 // The raw string content
+  parent?: ASTNode;                // Reference to the parent node
+  children: ASTNode[];             // Child nodes
+  
+  // For POML and META nodes
+  tagName?: string;                // Tag name (e.g., 'task', 'meta')
+  attributes?: AttributeInfo[];    // Detailed attribute information
+  
+  // Detailed source positions
+  openingTag?: {
+    start: number;                 // Position of '<'
+    end: number;                   // Position after '>'
+    nameRange: SourceRange;        // Position of tag name
+  };
+  
+  closingTag?: {
+    start: number;                 // Position of '</'
+    end: number;                   // Position after '>'
+    nameRange: SourceRange;        // Position of tag name in closing tag
+  };
+  
+  contentRange?: SourceRange;      // Position of content between tags (excluding nested tags)
+  
+  // For TEXT nodes
+  textSegments?: SourceRange[];    // Multiple ranges for text content (excluding nested POML)
 }
 ```
 
 #### II. Metadata Processing
 
-Once the segment tree is built, all `META` segments are processed.
+Once the AST is built, all `META` nodes are processed.
 
-  * **Extraction**: Traverse the tree to find all `META` segments.
+  * **Extraction**: Traverse the AST to find all `META` nodes.
   * **Population**: Parse the content of each `<meta>` tag and populate the global `PomlContext` object.
-  * **Removal**: After processing, `META` segments are removed from the tree to prevent them from being rendered.
+  * **Removal**: After processing, `META` nodes are removed from the AST to prevent them from being rendered.
 
 **`PomlContext` Interface**: This context object is the single source of truth for the entire file, passed through all readers. It's mutable, allowing stateful operations like `<let>` to have a file-wide effect.
 
 ```typescript
 interface PomlContext {
   variables: { [key: string]: any }; // For {{ substitutions }} and <let> (Read/Write)
-  texts: { [key: string]: React.ReactElement }; // Maps TEXT_ID to content for <text> replacement (Read/Write)
   stylesheet: { [key: string]: string }; // Merged styles from all <meta> tags (Read-Only during render)
   minimalPomlVersion?: string;      // From <meta> (Read-Only)
   sourcePath: string;                // File path for resolving includes (Read-Only)
@@ -162,22 +198,21 @@ interface PomlContext {
 
 #### III. Text/POML Dispatching (Recursive Rendering)
 
-Rendering starts at the root of the segment tree and proceeds recursively. A controller dispatches segments to the appropriate reader.
+Rendering starts at the root of the AST and proceeds recursively. A controller dispatches AST nodes to the appropriate reader.
 
-* **`PureTextReader`**: Handles `TEXT` segments.
+* **`PureTextReader`**: Handles `TEXT` nodes.
 
   * Currently we directly render the pure-text contents as a single React element. In future, we can:
     * Renders the text content, potentially using a Markdown processor.
     * Performs variable substitutions (`{{...}}`) using the `variables` from `PomlContext`. The logic from `handleText` in the original `PomlFile` should be extracted into a shared utility for this.
-  * Iterates through its `children` segments. For each child `POML` segment, it calls the `PomlReader`.
+  * Iterates through its `children` nodes. For each child `POML` node, it calls the `PomlReader`.
 
-* **`PomlReader`**: Handles `POML` segments.
+* **`PomlReader`**: Handles `POML` nodes.
 
-  * **Pre-processing**: Before parsing, it replaces any direct child `<text>` regions with a self-closing placeholder tag containing a unique ID: `<text ref="TEXT_ID_123" />`. The original content of the `<text>` segment is stored in `context.texts`. This ensures the XML parser inside `PomlFile` doesn't fail on non-XML content (like Markdown).
-  * **Delegation**: Instantiates a modified `PomlFile` class with the processed segment content and the shared `PomlContext`.
-  * **Rendering**: Calls the `pomlFile.react(context)` method to render the segment.
+  * **Delegation**: Instantiates a modified `PomlFile` class with the processed node content and the shared `PomlContext`.
+  * **Rendering**: Calls the `pomlFile.react(context)` method to render the node.
 
-* **`IntelliSense Layer`**: The segment tree makes it easy to provide context-aware IntelliSense. By checking the `kind` of the segment at the cursor's offset, the request can be routed to the correct provider—either the `PomlReader`'s XML-aware completion logic or a simpler text/variable completion provider for `TEXT` segments.
+* **`IntelliSense Layer`**: The AST makes it easy to provide context-aware IntelliSense. By checking the `kind` of the node at the cursor's offset, the request can be routed to the correct provider—either the `PomlReader`'s XML-aware completion logic or a simpler text/variable completion provider for `TEXT` nodes.
 
 **`Reader` Interface**: This interface defines the contract for both `PureTextReader` and `PomlReader`.
 

From d201013c7e332e65e7814d44abdcf326e63ef8c4 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 15 Jul 2025 11:07:03 +0800
Subject: [PATCH 06/76] .

---
 .claude/settings.json           |  9 +++++++++
 docs/proposals/poml_extended.md | 16 ++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)
 create mode 100644 .claude/settings.json

diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 00000000..02e05248
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,9 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(npm run lint)",
+      "Bash(npm run test*)",
+      "Read(~/.zshrc)"
+    ]
+  }
+}
\ No newline at end of file
diff --git a/docs/proposals/poml_extended.md b/docs/proposals/poml_extended.md
index 491a8949..4f6d3e11 100644
--- a/docs/proposals/poml_extended.md
+++ b/docs/proposals/poml_extended.md
@@ -113,14 +113,15 @@ The core of the new architecture is a three-pass process: Tokenization and AST P
 
 This phase processes the raw file content through a standard compiling workflow: tokenization followed by parsing to an Abstract Syntax Tree (AST).
 
-* **Tokenization**: Standard XML tokenization logic is used to break the input into tokens (tags, text content, attributes, etc.).
+* **Tokenization**: Standard XML tokenization logic is used to break the input into tokens (tags, text content, attributes, etc.). Additionally, template variables in `{{}}` format are identified and tokenized as special tokens to enable proper parsing and variable substitution.
 
 * **AST Parsing Algorithm**:
   1. Scan until `<` and tag name is found.
-  2. If the tag name is `text`, create a text node and scan until the corresponding `</text>` is found (handling nested POML if present).
-  3. If the tag name matches any POML tag from `componentDocs.json`, create a node with the tag name and attributes.
+  2. If the tag name is `text`, create a text node and scan until the corresponding `</text>` is found (handling nested POML if present; template variables are not considered here).
+  3. If the tag name matches any POML tag from `componentDocs.json`, create a node with the tag name and attributes (template variables `{{}}` in attribute values are parsed as child template nodes).
   4. Within POML tags, if another `text` tag is found, follow the same logic as step 2.
-  5. Close the node when the corresponding closing tag `</tagname>` is found.
+  5. Template variables `{{}}` found within text content or attribute values create TEMPLATE nodes as children.
+  6. Close the node when the corresponding closing tag `</tagname>` is found.
 
 * **Error Tolerance**: The parser is designed to be error-tolerant, gracefully handling malformed markup while preserving as much structure as possible.
 
@@ -138,7 +139,7 @@ interface SourceRange {
 
 interface AttributeInfo {
   key: string;
-  value: string;
+  value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[];  // Mixed content: array of text/template nodes
   keyRange: SourceRange;      // Position of attribute name
   valueRange: SourceRange;    // Position of attribute value (excluding quotes)
   fullRange: SourceRange;     // Full attribute including key="value"
@@ -146,7 +147,7 @@ interface AttributeInfo {
 
 interface ASTNode {
   id: string;                      // Unique ID for caching and React keys
-  kind: 'META' | 'TEXT' | 'POML';
+  kind: 'META' | 'TEXT' | 'POML' | 'TEMPLATE';
   start: number;                   // Source position start of entire node
   end: number;                     // Source position end of entire node
   content: string;                 // The raw string content
@@ -174,6 +175,9 @@ interface ASTNode {
   
   // For TEXT nodes
   textSegments?: SourceRange[];    // Multiple ranges for text content (excluding nested POML)
+  
+  // For TEMPLATE nodes
+  expression?: string;             // The full expression content between {{}}
 }
 ```
 

From 66e7a2cc1cff8034c7d8b76ccc4767152bb69e67 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 15 Jul 2025 11:21:41 +0800
Subject: [PATCH 07/76] update to ast implementation

---
 packages/poml/reader/ast.ts         | 543 ++++++++++++++++++++++++++++
 packages/poml/tests/segment.test.ts | 353 ++++++++++--------
 2 files changed, 744 insertions(+), 152 deletions(-)
 create mode 100644 packages/poml/reader/ast.ts

diff --git a/packages/poml/reader/ast.ts b/packages/poml/reader/ast.ts
new file mode 100644
index 00000000..4ec9fb2e
--- /dev/null
+++ b/packages/poml/reader/ast.ts
@@ -0,0 +1,543 @@
+import componentDocs from '../assets/componentDocs.json';
+
+// Source position and attribute interfaces
+export interface SourceRange {
+  start: number;
+  end: number;
+}
+
+export interface AttributeInfo {
+  key: string;
+  value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[];  // Mixed content: array of text/template nodes
+  keyRange: SourceRange;      // Position of attribute name
+  valueRange: SourceRange;    // Position of attribute value (excluding quotes)
+  fullRange: SourceRange;     // Full attribute including key="value"
+}
+
+// Main AST node interface
+export interface ASTNode {
+  id: string;                      // Unique ID for caching and React keys
+  kind: 'META' | 'TEXT' | 'POML' | 'TEMPLATE';
+  start: number;                   // Source position start of entire node
+  end: number;                     // Source position end of entire node
+  content: string;                 // The raw string content
+  parent?: ASTNode;                // Reference to the parent node
+  children: ASTNode[];             // Child nodes
+  
+  // For POML and META nodes
+  tagName?: string;                // Tag name (e.g., 'task', 'meta')
+  attributes?: AttributeInfo[];    // Detailed attribute information
+  
+  // Detailed source positions
+  openingTag?: {
+    start: number;                 // Position of '<'
+    end: number;                   // Position after '>'
+    nameRange: SourceRange;        // Position of tag name
+  };
+  
+  closingTag?: {
+    start: number;                 // Position of '</'
+    end: number;                   // Position after '>'
+    nameRange: SourceRange;        // Position of tag name in closing tag
+  };
+  
+  contentRange?: SourceRange;      // Position of content between tags (excluding nested tags)
+  
+  // For TEXT nodes
+  textSegments?: SourceRange[];    // Multiple ranges for text content (excluding nested POML)
+  
+  // For TEMPLATE nodes
+  expression?: string;             // The full expression content between {{}}
+}
+
+// Token types for tokenization
+interface Token {
+  type: 'TEXT' | 'TAG_OPEN' | 'TAG_CLOSE' | 'TAG_SELF_CLOSE' | 'TEMPLATE_VAR' | 'ATTRIBUTE';
+  value: string;
+  start: number;
+  end: number;
+}
+
+// Tokenizer class
+class Tokenizer {
+  private input: string;
+  private position: number;
+
+  constructor(input: string) {
+    this.input = input;
+    this.position = 0;
+  }
+
+  tokenize(): Token[] {
+    const tokens: Token[] = [];
+    
+    while (this.position < this.input.length) {
+      // Check for template variables first
+      if (this.peek() === '{' && this.peek(1) === '{') {
+        tokens.push(this.readTemplateVariable());
+        continue;
+      }
+      
+      // Check for XML tags
+      if (this.peek() === '<') {
+        const tagToken = this.readTag();
+        if (tagToken) {
+          tokens.push(tagToken);
+          continue;
+        }
+      }
+      
+      // Read text content
+      const textToken = this.readText();
+      if (textToken.value.length > 0) {
+        tokens.push(textToken);
+      }
+    }
+    
+    return tokens;
+  }
+
+  private peek(offset: number = 0): string {
+    return this.input[this.position + offset] || '';
+  }
+
+  private advance(): string {
+    return this.input[this.position++] || '';
+  }
+
+  private readTemplateVariable(): Token {
+    const start = this.position;
+    this.advance(); // {
+    this.advance(); // {
+    
+    while (this.position < this.input.length && !(this.peek() === '}' && this.peek(1) === '}')) {
+      this.advance();
+    }
+    
+    if (this.peek() === '}' && this.peek(1) === '}') {
+      this.advance(); // }
+      this.advance(); // }
+    }
+    
+    return {
+      type: 'TEMPLATE_VAR',
+      value: this.input.substring(start, this.position),
+      start,
+      end: this.position
+    };
+  }
+
+  private readTag(): Token | null {
+    const start = this.position;
+    this.advance(); // <
+    
+    // Skip whitespace
+    while (this.peek() === ' ' || this.peek() === '\t' || this.peek() === '\n') {
+      this.advance();
+    }
+    
+    // Check for closing tag
+    const isClosing = this.peek() === '/';
+    if (isClosing) {
+      this.advance();
+    }
+    
+    // Read tag name
+    let tagName = '';
+    while (this.position < this.input.length && 
+           this.peek() !== '>' && 
+           this.peek() !== ' ' && 
+           this.peek() !== '\t' && 
+           this.peek() !== '\n') {
+      tagName += this.advance();
+    }
+    
+    // Skip attributes for now (will be parsed separately)
+    while (this.position < this.input.length && this.peek() !== '>') {
+      this.advance();
+    }
+    
+    if (this.peek() === '>') {
+      this.advance(); // >
+      
+      // Check if self-closing
+      const content = this.input.substring(start, this.position);
+      const isSelfClosing = content.endsWith('/>');
+      
+      return {
+        type: isSelfClosing ? 'TAG_SELF_CLOSE' : (isClosing ? 'TAG_CLOSE' : 'TAG_OPEN'),
+        value: content,
+        start,
+        end: this.position
+      };
+    }
+    
+    // Invalid tag, backtrack
+    this.position = start + 1;
+    return null;
+  }
+
+  private readText(): Token {
+    const start = this.position;
+    
+    while (this.position < this.input.length && 
+           this.peek() !== '<' && 
+           !(this.peek() === '{' && this.peek(1) === '{')) {
+      this.advance();
+    }
+    
+    return {
+      type: 'TEXT',
+      value: this.input.substring(start, this.position),
+      start,
+      end: this.position
+    };
+  }
+}
+
+// AST Parser class
+class ASTParser {
+  private tokens: Token[];
+  private position: number;
+  private nextId: number;
+  private validPomlTags: Set<string>;
+
+  constructor(tokens: Token[]) {
+    this.tokens = tokens;
+    this.position = 0;
+    this.nextId = 0;
+    this.validPomlTags = this.buildValidTagsSet();
+  }
+
+  private buildValidTagsSet(): Set<string> {
+    const validTags = new Set<string>();
+    
+    for (const doc of componentDocs) {
+      if (doc.name) {
+        validTags.add(doc.name.toLowerCase());
+        // Convert camelCase to kebab-case
+        validTags.add(doc.name.toLowerCase().replace(/([A-Z])/g, '-$1').toLowerCase());
+      }
+    }
+    
+    // Add special tags
+    validTags.add('poml');
+    validTags.add('text');
+    validTags.add('meta');
+    
+    return validTags;
+  }
+
+  private generateId(): string {
+    return `ast_${this.nextId++}`;
+  }
+
+  private peek(): Token | undefined {
+    return this.tokens[this.position];
+  }
+
+  private advance(): Token | undefined {
+    return this.tokens[this.position++];
+  }
+
+  private extractTagName(tagContent: string): string {
+    // Remove < and > and any attributes
+    const content = tagContent.slice(1, -1);
+    const match = content.match(/^\/?\s*([a-zA-Z][\w-]*)/);
+    return match ? match[1] : '';
+  }
+
+  private parseAttributeValue(value: string): (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[] {
+    // Parse attribute value for mixed text and template variables
+    const result: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[] = [];
+    let currentPos = 0;
+    
+    while (currentPos < value.length) {
+      const templateStart = value.indexOf('{{', currentPos);
+      
+      if (templateStart === -1) {
+        // No more template variables, add remaining text
+        if (currentPos < value.length) {
+          result.push({
+            id: this.generateId(),
+            kind: 'TEXT',
+            start: currentPos,
+            end: value.length,
+            content: value.substring(currentPos),
+            children: []
+          });
+        }
+        break;
+      }
+      
+      // Add text before template variable
+      if (templateStart > currentPos) {
+        result.push({
+          id: this.generateId(),
+          kind: 'TEXT',
+          start: currentPos,
+          end: templateStart,
+          content: value.substring(currentPos, templateStart),
+          children: []
+        });
+      }
+      
+      // Find end of template variable
+      const templateEnd = value.indexOf('}}', templateStart + 2);
+      if (templateEnd === -1) {
+        // Malformed template, treat as text
+        result.push({
+          id: this.generateId(),
+          kind: 'TEXT',
+          start: templateStart,
+          end: value.length,
+          content: value.substring(templateStart),
+          children: []
+        });
+        break;
+      }
+      
+      // Add template variable
+      const templateContent = value.substring(templateStart + 2, templateEnd);
+      result.push({
+        id: this.generateId(),
+        kind: 'TEMPLATE',
+        start: templateStart,
+        end: templateEnd + 2,
+        content: value.substring(templateStart, templateEnd + 2),
+        expression: templateContent.trim(),
+        children: []
+      });
+      
+      currentPos = templateEnd + 2;
+    }
+    
+    return result;
+  }
+
+  private parseAttributes(tagContent: string): AttributeInfo[] {
+    const attributes: AttributeInfo[] = [];
+    
+    // Simple attribute parsing - can be enhanced later
+    const attrRegex = /(\w+)=["']([^"']*?)["']/g;
+    let match;
+    
+    while ((match = attrRegex.exec(tagContent)) !== null) {
+      const key = match[1];
+      const value = match[2];
+      const fullMatch = match[0];
+      const matchStart = match.index;
+      
+      attributes.push({
+        key,
+        value: this.parseAttributeValue(value),
+        keyRange: { start: matchStart, end: matchStart + key.length },
+        valueRange: { start: matchStart + key.length + 2, end: matchStart + key.length + 2 + value.length },
+        fullRange: { start: matchStart, end: matchStart + fullMatch.length }
+      });
+    }
+    
+    return attributes;
+  }
+
+  parse(): ASTNode {
+    const children = this.parseNodes();
+    
+    if (children.length === 1 && children[0].kind === 'POML') {
+      return children[0];
+    }
+    
+    // Create root text node
+    const rootNode: ASTNode = {
+      id: this.generateId(),
+      kind: 'TEXT',
+      start: 0,
+      end: this.tokens.length > 0 ? this.tokens[this.tokens.length - 1].end : 0,
+      content: this.tokens.map(t => t.value).join(''),
+      children,
+      textSegments: []
+    };
+    
+    // Set parent references
+    children.forEach(child => {
+      child.parent = rootNode;
+    });
+    
+    return rootNode;
+  }
+
+  private parseNodes(): ASTNode[] {
+    const nodes: ASTNode[] = [];
+    
+    while (this.position < this.tokens.length) {
+      const token = this.peek();
+      if (!token) break;
+      
+      if (token.type === 'TEMPLATE_VAR') {
+        nodes.push(this.parseTemplateVariable());
+      } else if (token.type === 'TAG_OPEN') {
+        const tagName = this.extractTagName(token.value);
+        
+        if (this.validPomlTags.has(tagName.toLowerCase())) {
+          const node = this.parsePomlNode();
+          if (node) {
+            nodes.push(node);
+          }
+        } else {
+          // Invalid tag, treat as text
+          nodes.push(this.parseTextFromToken());
+        }
+      } else if (token.type === 'TEXT') {
+        nodes.push(this.parseTextFromToken());
+      } else {
+        // Skip other token types for now
+        this.advance();
+      }
+    }
+    
+    return nodes;
+  }
+
+  private parseTemplateVariable(): ASTNode {
+    const token = this.advance()!;
+    const expression = token.value.slice(2, -2).trim(); // Remove {{ and }}
+    
+    return {
+      id: this.generateId(),
+      kind: 'TEMPLATE',
+      start: token.start,
+      end: token.end,
+      content: token.value,
+      expression,
+      children: []
+    };
+  }
+
+  private parseTextFromToken(): ASTNode {
+    const token = this.advance()!;
+    
+    return {
+      id: this.generateId(),
+      kind: 'TEXT',
+      start: token.start,
+      end: token.end,
+      content: token.value,
+      children: [],
+      textSegments: [{ start: token.start, end: token.end }]
+    };
+  }
+
+  private parsePomlNode(): ASTNode | null {
+    const openToken = this.advance()!;
+    const tagName = this.extractTagName(openToken.value);
+    
+    // Parse attributes
+    const attributes = this.parseAttributes(openToken.value);
+    
+    // Determine node kind
+    const kind = tagName.toLowerCase() === 'meta' ? 'META' : 'POML';
+    
+    const node: ASTNode = {
+      id: this.generateId(),
+      kind,
+      start: openToken.start,
+      end: openToken.end, // Will be updated when we find closing tag
+      content: openToken.value, // Will be updated
+      tagName: tagName.toLowerCase(),
+      attributes,
+      children: [],
+      openingTag: {
+        start: openToken.start,
+        end: openToken.end,
+        nameRange: { 
+          start: openToken.start + 1, 
+          end: openToken.start + 1 + tagName.length 
+        }
+      }
+    };
+    
+    // Parse children until we find the closing tag
+    const children: ASTNode[] = [];
+    let depth = 1;
+    
+    while (this.position < this.tokens.length && depth > 0) {
+      const token = this.peek();
+      if (!token) break;
+      
+      if (token.type === 'TAG_OPEN') {
+        const childTagName = this.extractTagName(token.value);
+        if (childTagName.toLowerCase() === tagName.toLowerCase()) {
+          depth++;
+        }
+        
+        // Special handling for text tags - don't process template variables
+        if (tagName.toLowerCase() === 'text') {
+          children.push(this.parseTextFromToken());
+        } else if (this.validPomlTags.has(childTagName.toLowerCase())) {
+          const childNode = this.parsePomlNode();
+          if (childNode) {
+            childNode.parent = node;
+            children.push(childNode);
+          }
+        } else {
+          children.push(this.parseTextFromToken());
+        }
+      } else if (token.type === 'TAG_CLOSE') {
+        const closeTagName = this.extractTagName(token.value);
+        if (closeTagName.toLowerCase() === tagName.toLowerCase()) {
+          depth--;
+          if (depth === 0) {
+            // Found our closing tag
+            const closeToken = this.advance()!;
+            node.end = closeToken.end;
+            node.closingTag = {
+              start: closeToken.start,
+              end: closeToken.end,
+              nameRange: {
+                start: closeToken.start + 2,
+                end: closeToken.start + 2 + tagName.length
+              }
+            };
+            break;
+          }
+        }
+        this.advance();
+      } else if (token.type === 'TEMPLATE_VAR' && tagName.toLowerCase() !== 'text') {
+        // Only parse template variables outside of text tags
+        const templateNode = this.parseTemplateVariable();
+        templateNode.parent = node;
+        children.push(templateNode);
+      } else {
+        const textNode = this.parseTextFromToken();
+        textNode.parent = node;
+        children.push(textNode);
+      }
+    }
+    
+    node.children = children;
+    
+    // Update content to include full tag
+    if (node.closingTag) {
+      node.content = this.tokens.slice(
+        this.tokens.findIndex(t => t.start === node.start),
+        this.tokens.findIndex(t => t.end === node.end) + 1
+      ).map(t => t.value).join('');
+    }
+    
+    return node;
+  }
+}
+
+// Main parsing function
+export function parseAST(content: string): ASTNode {
+  const tokenizer = new Tokenizer(content);
+  const tokens = tokenizer.tokenize();
+  const parser = new ASTParser(tokens);
+  return parser.parse();
+}
+
+export class PomlAstParser {
+  static parse(content: string): ASTNode {
+    return parseAST(content);
+  }
+}
\ No newline at end of file
diff --git a/packages/poml/tests/segment.test.ts b/packages/poml/tests/segment.test.ts
index 72a6f867..b23e16ef 100644
--- a/packages/poml/tests/segment.test.ts
+++ b/packages/poml/tests/segment.test.ts
@@ -1,27 +1,27 @@
 import { describe, expect, test } from '@jest/globals';
-import { createSegments, Segment } from '../reader/segment';
+import { parseAST, ASTNode } from '../reader/ast';
 
-describe('createSegments', () => {
+describe('parseAST', () => {
   test('pure text content', () => {
     const content = 'This is pure text content with no POML tags.';
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.content).toBe(content);
-    expect(segment.start).toBe(0);
-    expect(segment.end).toBe(content.length);
-    expect(segment.children).toHaveLength(0);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.content).toBe(content);
+    expect(ast.start).toBe(0);
+    expect(ast.end).toBe(content.length);
+    expect(ast.children).toHaveLength(0);
   });
 
   test('single POML tag', () => {
     const content = '<task>Analyze the data</task>';
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('POML');
-    expect(segment.tagName).toBe('task');
-    expect(segment.content).toBe(content);
-    expect(segment.start).toBe(0);
-    expect(segment.end).toBe(content.length);
+    expect(ast.kind).toBe('POML');
+    expect(ast.tagName).toBe('task');
+    expect(ast.content).toBe(content);
+    expect(ast.start).toBe(0);
+    expect(ast.end).toBe(content.length);
   });
 
   test('mixed content with text and POML', () => {
@@ -39,12 +39,12 @@ Here are some key points to consider:
 - Statistical significance  
 - Business impact`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(4);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(4);
     
-    const children = segment.children;
+    const children = ast.children;
     expect(children[0].kind).toBe('TEXT');
     expect(children[0].content).toContain('# My Analysis Document');
     
@@ -69,40 +69,40 @@ Here are some key points to consider:
   </example>
 </examples>`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('POML');
-    expect(segment.tagName).toBe('examples');
-    expect(segment.children).toHaveLength(0);
-    expect(segment.content).toBe(content);
+    expect(ast.kind).toBe('POML');
+    expect(ast.tagName).toBe('examples');
+    expect(ast.children).toHaveLength(0);
+    expect(ast.content).toBe(content);
   });
 
   test('text in text', () => {
     const content = `<text>This is a text<text> with nested text content.</text></text>`;
-    const segment = createSegments(content);
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.content).toBe(content);
-    expect(segment.children).toHaveLength(0);
+    const ast = parseAST(content);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.content).toBe(content);
+    expect(ast.children).toHaveLength(0);
   });
 
   test('text in text in POML', () => {
     const content = `<poml><text>This is a text<text> with nested text content.</text></text></poml>`;
-    const segment = createSegments(content);
-    expect(segment.kind).toBe('POML');  
-    expect(segment.tagName).toBe('poml');
-    expect(segment.children).toHaveLength(1);
-    const textSegment = segment.children[0];
-    expect(textSegment.kind).toBe('TEXT');
-    expect(textSegment.content).toBe('This is a text<text> with nested text content.</text>');
+    const ast = parseAST(content);
+    expect(ast.kind).toBe('POML');  
+    expect(ast.tagName).toBe('poml');
+    expect(ast.children).toHaveLength(1);
+    const textNode = ast.children[0];
+    expect(textNode.kind).toBe('TEXT');
+    expect(textNode.content).toBe('This is a text<text> with nested text content.</text>');
   });
 
   test('nested same tag in POML', () => {
     const content = `<task>Process data<task> with nested task content.</task></task>`;
-    const segment = createSegments(content);
-    expect(segment.kind).toBe('POML');
-    expect(segment.tagName).toBe('poml');
-    expect(segment.children).toHaveLength(0);
-    expect(segment.content).toBe('<task>Process data<task> with nested task content.</task></task>');
+    const ast = parseAST(content);
+    expect(ast.kind).toBe('POML');
+    expect(ast.tagName).toBe('task');
+    expect(ast.children).toHaveLength(0);
+    expect(ast.content).toBe('<task>Process data<task> with nested task content.</task></task>');
   });
 
   test('text tag with nested content', () => {
@@ -121,19 +121,19 @@ Here are some key points to consider:
   <hint>Remember to check the format</hint>
 </poml>`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('POML');
-    expect(segment.tagName).toBe('poml');
-    expect(segment.children).toHaveLength(4);
+    expect(ast.kind).toBe('POML');
+    expect(ast.tagName).toBe('poml');
+    expect(ast.children).toHaveLength(4);
     
-    const textSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'text');
-    expect(textSegment).toBeDefined();
-    expect(textSegment!.children).toHaveLength(3);
+    const textNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'text');
+    expect(textNode).toBeDefined();
+    expect(textNode!.children).toHaveLength(3);
     
-    const nestedCpSegment = textSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'cp');
-    expect(nestedCpSegment).toBeDefined();
-    expect(nestedCpSegment!.content).toBe('<cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>');
+    const nestedCpNode = textNode!.children.find(c => c.kind === 'POML' && c.tagName === 'cp');
+    expect(nestedCpNode).toBeDefined();
+    expect(nestedCpNode!.content).toBe('<cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>');
   });
 
   test('meta tags', () => {
@@ -145,18 +145,18 @@ Here are some key points to consider:
 
 <task>Complete the analysis</task>`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(3);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(3);
     
-    const metaSegment = segment.children.find(c => c.kind === 'META');
-    expect(metaSegment).toBeDefined();
-    expect(metaSegment!.tagName).toBe('meta');
-    expect(metaSegment!.children).toHaveLength(0);
+    const metaNode = ast.children.find(c => c.kind === 'META');
+    expect(metaNode).toBeDefined();
+    expect(metaNode!.tagName).toBe('meta');
+    expect(metaNode!.children).toHaveLength(0);
     
-    const taskSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'task');
-    expect(taskSegment).toBeDefined();
+    const taskNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'task');
+    expect(taskNode).toBeDefined();
   });
 
   test('invalid tags are ignored', () => {
@@ -164,19 +164,19 @@ Here are some key points to consider:
 <task>This should be processed</task>
 <random>This should also be ignored</random>`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(3);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(3);
     
-    const taskSegment = segment.children.find(c => c.kind === 'POML');
-    expect(taskSegment).toBeDefined();
-    expect(taskSegment!.tagName).toBe('task');
+    const taskNode = ast.children.find(c => c.kind === 'POML');
+    expect(taskNode).toBeDefined();
+    expect(taskNode!.tagName).toBe('task');
     
-    const textSegments = segment.children.filter(c => c.kind === 'TEXT');
-    expect(textSegments).toHaveLength(2);
-    expect(textSegments[0].content).toContain('<invalid-tag>This should be ignored</invalid-tag>');
-    expect(textSegments[1].content).toContain('<random>This should also be ignored</random>');
+    const textNodes = ast.children.filter(c => c.kind === 'TEXT');
+    expect(textNodes).toHaveLength(2);
+    expect(textNodes[0].content).toContain('<invalid-tag>This should be ignored</invalid-tag>');
+    expect(textNodes[1].content).toContain('<random>This should also be ignored</random>');
   });
 
   test('self-closing tags are ignored', () => {
@@ -185,15 +185,15 @@ Here are some key points to consider:
 <img src="test.jpg" />
 <hint>Valid hint</hint>`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(4);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(4);
     
-    const pomlSegments = segment.children.filter(c => c.kind === 'POML');
-    expect(pomlSegments).toHaveLength(3);
-    expect(pomlSegments[0].tagName).toBe('task');
-    expect(pomlSegments[2].tagName).toBe('hint');
+    const pomlNodes = ast.children.filter(c => c.kind === 'POML');
+    expect(pomlNodes).toHaveLength(3);
+    expect(pomlNodes[0].tagName).toBe('task');
+    expect(pomlNodes[2].tagName).toBe('hint');
   });
 
   test('malformed tags are handled gracefully', () => {
@@ -201,42 +201,45 @@ Here are some key points to consider:
 <hint>Complete hint</hint>
 <unclosed>This has no closing tag`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(3);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(3);
     
-    const hintSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'hint');
-    expect(hintSegment).toBeDefined();
-    expect(hintSegment!.content).toBe('<hint>Complete hint</hint>');
+    const hintNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'hint');
+    expect(hintNode).toBeDefined();
+    expect(hintNode!.content).toBe('<hint>Complete hint</hint>');
     
-    const textSegments = segment.children.filter(c => c.kind === 'TEXT');
-    expect(textSegments).toHaveLength(2);
-    expect(textSegments[0].content).toBe('<task>Incomplete tag\n');
-    expect(textSegments[1].content).toBe('\n<unclosed>This has no closing tag');
+    const textNodes = ast.children.filter(c => c.kind === 'TEXT');
+    expect(textNodes).toHaveLength(2);
+    expect(textNodes[0].content).toBe('<task>Incomplete tag\n');
+    expect(textNodes[1].content).toBe('\n<unclosed>This has no closing tag');
   });
 
   test('malformed POML tags are ignored', () => {
     const content = `<task>Valid task`;
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(0);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(0);
   });
 
   test('empty content', () => {
     const content = '';
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.content).toBe('');
+    expect(ast.children).toHaveLength(0);
   });
 
   test('whitespace-only content', () => {
     const content = '   \n\n\t  \n  ';
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.content).toBe(content);
-    expect(segment.children).toHaveLength(0);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.content).toBe(content);
+    expect(ast.children).toHaveLength(0);
   });
 
   test('hyphenated tag names', () => {
@@ -244,16 +247,16 @@ Here are some key points to consider:
 <system-msg>System message</system-msg>
 <user-msg>User message</user-msg>`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(4);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(4);
     
-    const pomlSegments = segment.children.filter(c => c.kind === 'POML');
-    expect(pomlSegments).toHaveLength(3);
-    expect(pomlSegments[0].tagName).toBe('output-format');
-    expect(pomlSegments[1].tagName).toBe('system-msg');
-    expect(pomlSegments[2].tagName).toBe('user-msg');
+    const pomlNodes = ast.children.filter(c => c.kind === 'POML');
+    expect(pomlNodes).toHaveLength(3);
+    expect(pomlNodes[0].tagName).toBe('output-format');
+    expect(pomlNodes[1].tagName).toBe('system-msg');
+    expect(pomlNodes[2].tagName).toBe('user-msg');
   });
 
   test('parent-child relationships', () => {
@@ -265,59 +268,50 @@ Here are some key points to consider:
   </examples>
 </task>`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
     
-    const taskSegment = segment;
-    expect(taskSegment.kind).toBe('POML');
-    expect(taskSegment.tagName).toBe('task');
-    expect(taskSegment.parent).toBeUndefined();
+    const taskNode = ast;
+    expect(taskNode.kind).toBe('POML');
+    expect(taskNode.tagName).toBe('task');
+    expect(taskNode.parent).toBeUndefined();
     
-    const hintSegment = taskSegment.children.find(c => c.kind === 'POML' && c.tagName === 'hint');
-    expect(hintSegment).toBeDefined();
-    expect(hintSegment!.parent).toBe(taskSegment);
+    const hintNode = taskNode.children.find(c => c.kind === 'POML' && c.tagName === 'hint');
+    expect(hintNode).toBeDefined();
+    expect(hintNode!.parent).toBe(taskNode);
     
-    const examplesSegment = taskSegment.children.find(c => c.kind === 'POML' && c.tagName === 'examples');
-    expect(examplesSegment).toBeDefined();
-    expect(examplesSegment!.parent).toBe(taskSegment);
+    const examplesNode = taskNode.children.find(c => c.kind === 'POML' && c.tagName === 'examples');
+    expect(examplesNode).toBeDefined();
+    expect(examplesNode!.parent).toBe(taskNode);
     
-    const exampleSegment = examplesSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'example');
-    expect(exampleSegment).toBeDefined();
-    expect(exampleSegment!.parent).toBe(examplesSegment);
+    const exampleNode = examplesNode!.children.find(c => c.kind === 'POML' && c.tagName === 'example');
+    expect(exampleNode).toBeDefined();
+    expect(exampleNode!.parent).toBe(examplesNode);
   });
 
-  test('segment IDs are unique', () => {
+  test('node IDs are unique', () => {
     const content = `<task>First task</task>
 <task>Second task</task>
 <hint>A hint</hint>`;
 
-    const segment = createSegments(content);
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(5);
+    const ast = parseAST(content);
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(5);
     
-    function collectAllSegments(segment: Segment): Segment[] {
-      const all = [segment];
-      segment.children.forEach(child => {
-        all.push(...collectAllSegments(child));
+    function collectAllNodes(node: ASTNode): ASTNode[] {
+      const all = [node];
+      node.children.forEach(child => {
+        all.push(...collectAllNodes(child));
       });
       return all;
     }
     
-    const allSegments = collectAllSegments(segment);
-    const ids = allSegments.map(s => s.id);
+    const allNodes = collectAllNodes(ast);
+    const ids = allNodes.map(s => s.id);
     const uniqueIds = new Set(ids);
     
     expect(uniqueIds.size).toBe(ids.length);
   });
 
-  test('path parameter is preserved', () => {
-    const content = '<task>Test task</task>';
-    const path = '/test/path/file.poml';
-    const segment = createSegments(content, path);
-    
-    expect(segment.path).toBe(path);
-    expect(segment.children[0].path).toBe(path);
-  });
-
   test('complex example from specification', () => {
     const content = `<poml>
   <task>Process the following data</task>
@@ -343,30 +337,85 @@ There can be some intervening text here as well.
 
 <p>POML elements do not necessarily reside in a poml element.</p>`;
 
-    const segment = createSegments(content);
+    const ast = parseAST(content);
+    
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.children).toHaveLength(5);
     
-    expect(segment.kind).toBe('TEXT');
-    expect(segment.children).toHaveLength(5);
+    const firstPomlNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'poml');
+    expect(firstPomlNode).toBeDefined();
+    expect(firstPomlNode!.children).toHaveLength(4);
     
-    const firstPomlSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'poml');
-    expect(firstPomlSegment).toBeDefined();
-    expect(firstPomlSegment!.children).toHaveLength(4);
+    const textNode = firstPomlNode!.children.find(c => c.kind === 'POML' && c.tagName === 'text');
+    expect(textNode).toBeDefined();
+    expect(textNode!.children).toHaveLength(3);
+    
+    const cpNode = textNode!.children.find(c => c.kind === 'POML' && c.tagName === 'cp');
+    expect(cpNode).toBeDefined();
+    
+    const secondPomlNode = ast.children.filter(c => c.kind === 'POML' && c.tagName === 'poml')[1];
+    expect(secondPomlNode).toBeDefined();
+
+    const lineBreakNode = ast.children[3];
+    expect(lineBreakNode.kind).toBe('TEXT');
+    expect(lineBreakNode.content).toBe('\n\n');
+
+    const pNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'p');
+    expect(pNode).toBeDefined();
+  });
+
+  test('template variables in content', () => {
+    const content = `<task>Process {{variable}} with {{another_variable}}</task>`;
+    const ast = parseAST(content);
     
-    const textSegment = firstPomlSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'text');
-    expect(textSegment).toBeDefined();
-    expect(textSegment!.children).toHaveLength(3);
+    expect(ast.kind).toBe('POML');
+    expect(ast.tagName).toBe('task');
+    expect(ast.children).toHaveLength(4); // text, template, text, template
     
-    const cpSegment = textSegment!.children.find(c => c.kind === 'POML' && c.tagName === 'cp');
-    expect(cpSegment).toBeDefined();
+    const templateNodes = ast.children.filter(c => c.kind === 'TEMPLATE');
+    expect(templateNodes).toHaveLength(2);
+    expect(templateNodes[0].expression).toBe('variable');
+    expect(templateNodes[1].expression).toBe('another_variable');
+  });
+
+  test('template variables in text nodes are treated as literal', () => {
+    const content = `<text>Variables like {{this}} are shown as-is</text>`;
+    const ast = parseAST(content);
     
-    const secondPomlSegment = segment.children.filter(c => c.kind === 'POML' && c.tagName === 'poml')[1];
-    expect(secondPomlSegment).toBeDefined();
+    expect(ast.kind).toBe('TEXT');
+    expect(ast.content).toBe(content);
+    expect(ast.children).toHaveLength(0);
+  });
 
-    const lineBreakSegment = segment.children[3];
-    expect(lineBreakSegment.kind).toBe('TEXT');
-    expect(lineBreakSegment.content).toBe('\n\n');
+  test('template variables in attribute values', () => {
+    const content = `<task caption="Process {{variable}}">Content</task>`;
+    const ast = parseAST(content);
+    
+    expect(ast.kind).toBe('POML');
+    expect(ast.tagName).toBe('task');
+    expect(ast.attributes).toHaveLength(1);
+    
+    const attr = ast.attributes![0];
+    expect(attr.key).toBe('caption');
+    expect(attr.value).toHaveLength(2); // text + template
+    expect(attr.value[0].kind).toBe('TEXT');
+    expect(attr.value[0].content).toBe('Process ');
+    expect(attr.value[1].kind).toBe('TEMPLATE');
+    expect(attr.value[1].expression).toBe('variable');
+  });
 
-    const pSegment = segment.children.find(c => c.kind === 'POML' && c.tagName === 'p');
-    expect(pSegment).toBeDefined();
+  test('mixed template variables and text in attributes', () => {
+    const content = `<task title="Hello {{name}}, process {{data}} please">Content</task>`;
+    const ast = parseAST(content);
+    
+    expect(ast.kind).toBe('POML');
+    expect(ast.attributes).toHaveLength(1);
+    
+    const attr = ast.attributes![0];
+    expect(attr.value).toHaveLength(4); // text, template, text, template
+    expect(attr.value[0].content).toBe('Hello ');
+    expect(attr.value[1].expression).toBe('name');
+    expect(attr.value[2].content).toBe(', process ');
+    expect(attr.value[3].expression).toBe('data');
   });
 });
\ No newline at end of file

From dffbc805b7760c4b0263bd3400549459a13dde6c Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 15 Jul 2025 11:36:17 +0800
Subject: [PATCH 08/76] .

---
 packages/poml/reader/ast.ts                   | 145 +-----------------
 packages/poml/reader/tokenizer.ts             | 142 +++++++++++++++++
 .../{segment.test.ts => reader/ast.test.ts}   |   2 +-
 3 files changed, 144 insertions(+), 145 deletions(-)
 create mode 100644 packages/poml/reader/tokenizer.ts
 rename packages/poml/tests/{segment.test.ts => reader/ast.test.ts} (99%)

diff --git a/packages/poml/reader/ast.ts b/packages/poml/reader/ast.ts
index 4ec9fb2e..3b758e1f 100644
--- a/packages/poml/reader/ast.ts
+++ b/packages/poml/reader/ast.ts
@@ -1,3 +1,4 @@
+import { Tokenizer, Token } from './tokenizer';
 import componentDocs from '../assets/componentDocs.json';
 
 // Source position and attribute interfaces
@@ -50,150 +51,6 @@ export interface ASTNode {
   expression?: string;             // The full expression content between {{}}
 }
 
-// Token types for tokenization
-interface Token {
-  type: 'TEXT' | 'TAG_OPEN' | 'TAG_CLOSE' | 'TAG_SELF_CLOSE' | 'TEMPLATE_VAR' | 'ATTRIBUTE';
-  value: string;
-  start: number;
-  end: number;
-}
-
-// Tokenizer class
-class Tokenizer {
-  private input: string;
-  private position: number;
-
-  constructor(input: string) {
-    this.input = input;
-    this.position = 0;
-  }
-
-  tokenize(): Token[] {
-    const tokens: Token[] = [];
-    
-    while (this.position < this.input.length) {
-      // Check for template variables first
-      if (this.peek() === '{' && this.peek(1) === '{') {
-        tokens.push(this.readTemplateVariable());
-        continue;
-      }
-      
-      // Check for XML tags
-      if (this.peek() === '<') {
-        const tagToken = this.readTag();
-        if (tagToken) {
-          tokens.push(tagToken);
-          continue;
-        }
-      }
-      
-      // Read text content
-      const textToken = this.readText();
-      if (textToken.value.length > 0) {
-        tokens.push(textToken);
-      }
-    }
-    
-    return tokens;
-  }
-
-  private peek(offset: number = 0): string {
-    return this.input[this.position + offset] || '';
-  }
-
-  private advance(): string {
-    return this.input[this.position++] || '';
-  }
-
-  private readTemplateVariable(): Token {
-    const start = this.position;
-    this.advance(); // {
-    this.advance(); // {
-    
-    while (this.position < this.input.length && !(this.peek() === '}' && this.peek(1) === '}')) {
-      this.advance();
-    }
-    
-    if (this.peek() === '}' && this.peek(1) === '}') {
-      this.advance(); // }
-      this.advance(); // }
-    }
-    
-    return {
-      type: 'TEMPLATE_VAR',
-      value: this.input.substring(start, this.position),
-      start,
-      end: this.position
-    };
-  }
-
-  private readTag(): Token | null {
-    const start = this.position;
-    this.advance(); // <
-    
-    // Skip whitespace
-    while (this.peek() === ' ' || this.peek() === '\t' || this.peek() === '\n') {
-      this.advance();
-    }
-    
-    // Check for closing tag
-    const isClosing = this.peek() === '/';
-    if (isClosing) {
-      this.advance();
-    }
-    
-    // Read tag name
-    let tagName = '';
-    while (this.position < this.input.length && 
-           this.peek() !== '>' && 
-           this.peek() !== ' ' && 
-           this.peek() !== '\t' && 
-           this.peek() !== '\n') {
-      tagName += this.advance();
-    }
-    
-    // Skip attributes for now (will be parsed separately)
-    while (this.position < this.input.length && this.peek() !== '>') {
-      this.advance();
-    }
-    
-    if (this.peek() === '>') {
-      this.advance(); // >
-      
-      // Check if self-closing
-      const content = this.input.substring(start, this.position);
-      const isSelfClosing = content.endsWith('/>');
-      
-      return {
-        type: isSelfClosing ? 'TAG_SELF_CLOSE' : (isClosing ? 'TAG_CLOSE' : 'TAG_OPEN'),
-        value: content,
-        start,
-        end: this.position
-      };
-    }
-    
-    // Invalid tag, backtrack
-    this.position = start + 1;
-    return null;
-  }
-
-  private readText(): Token {
-    const start = this.position;
-    
-    while (this.position < this.input.length && 
-           this.peek() !== '<' && 
-           !(this.peek() === '{' && this.peek(1) === '{')) {
-      this.advance();
-    }
-    
-    return {
-      type: 'TEXT',
-      value: this.input.substring(start, this.position),
-      start,
-      end: this.position
-    };
-  }
-}
 
 // AST Parser class
 class ASTParser {
diff --git a/packages/poml/reader/tokenizer.ts b/packages/poml/reader/tokenizer.ts
new file mode 100644
index 00000000..a8e166d1
--- /dev/null
+++ b/packages/poml/reader/tokenizer.ts
@@ -0,0 +1,142 @@
+export interface Token {
+  type: 'TEXT' | 'TAG_OPEN' | 'TAG_CLOSE' | 'TAG_SELF_CLOSE' | 'TEMPLATE_VAR' | 'ATTRIBUTE';
+  value: string;
+  start: number;
+  end: number;
+}
+
+export class Tokenizer {
+  private input: string;
+  private position: number;
+
+  constructor(input: string) {
+    this.input = input;
+    this.position = 0;
+  }
+
+  tokenize(): Token[] {
+    const tokens: Token[] = [];
+    
+    while (this.position < this.input.length) {
+      // Check for template variables first
+      if (this.peek() === '{' && this.peek(1) === '{') {
+        tokens.push(this.readTemplateVariable());
+        continue;
+      }
+      
+      // Check for XML tags
+      if (this.peek() === '<') {
+        const tagToken = this.readTag();
+        if (tagToken) {
+          tokens.push(tagToken);
+          continue;
+        }
+      }
+      
+      // Read text content
+      const textToken = this.readText();
+      if (textToken.value.length > 0) {
+        tokens.push(textToken);
+      }
+    }
+    
+    return tokens;
+  }
+
+  private peek(offset: number = 0): string {
+    return this.input[this.position + offset] || '';
+  }
+
+  private advance(): string {
+    return this.input[this.position++] || '';
+  }
+
+  private readTemplateVariable(): Token {
+    const start = this.position;
+    this.advance(); // {
+    this.advance(); // {
+    
+    while (this.position < this.input.length && !(this.peek() === '}' && this.peek(1) === '}')) {
+      this.advance();
+    }
+    
+    if (this.peek() === '}' && this.peek(1) === '}') {
+      this.advance(); // }
+      this.advance(); // }
+    }
+    
+    return {
+      type: 'TEMPLATE_VAR',
+      value: this.input.substring(start, this.position),
+      start,
+      end: this.position
+    };
+  }
+
+  private readTag(): Token | null {
+    const start = this.position;
+    this.advance(); // <
+    
+    // Skip whitespace
+    while (this.peek() === ' ' || this.peek() === '\t' || this.peek() === '\n') {
+      this.advance();
+    }
+    
+    // Check for closing tag
+    const isClosing = this.peek() === '/';
+    if (isClosing) {
+      this.advance();
+    }
+    
+    // Read tag name
+    let tagName = '';
+    while (this.position < this.input.length && 
+           this.peek() !== '>' && 
+           this.peek() !== ' ' && 
+           this.peek() !== '\t' && 
+           this.peek() !== '\n') {
+      tagName += this.advance();
+    }
+    
+    // Skip attributes for now (will be parsed separately)
+    while (this.position < this.input.length && this.peek() !== '>') {
+      this.advance();
+    }
+    
+    if (this.peek() === '>') {
+      this.advance(); // >
+      
+      // Check if self-closing
+      const content = this.input.substring(start, this.position);
+      const isSelfClosing = content.endsWith('/>');
+      
+      return {
+        type: isSelfClosing ? 'TAG_SELF_CLOSE' : (isClosing ? 'TAG_CLOSE' : 'TAG_OPEN'),
+        value: content,
+        start,
+        end: this.position
+      };
+    }
+    
+    // Invalid tag, backtrack
+    this.position = start + 1;
+    return null;
+  }
+
+  private readText(): Token {
+    const start = this.position;
+    
+    while (this.position < this.input.length && 
+           this.peek() !== '<' && 
+           !(this.peek() === '{' && this.peek(1) === '{')) {
+      this.advance();
+    }
+    
+    return {
+      type: 'TEXT',
+      value: this.input.substring(start, this.position),
+      start,
+      end: this.position
+    };
+  }
+}
diff --git a/packages/poml/tests/segment.test.ts b/packages/poml/tests/reader/ast.test.ts
similarity index 99%
rename from packages/poml/tests/segment.test.ts
rename to packages/poml/tests/reader/ast.test.ts
index b23e16ef..9921e210 100644
--- a/packages/poml/tests/segment.test.ts
+++ b/packages/poml/tests/reader/ast.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, test } from '@jest/globals';
-import { parseAST, ASTNode } from '../reader/ast';
+import { parseAST, ASTNode } from 'poml/reader/ast';
 
 describe('parseAST', () => {
   test('pure text content', () => {

From 5fe7dd3f424ee3b043c3ba0229ba2755ad728ce3 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <Yuge.Zhang@microsoft.com>
Date: Tue, 15 Jul 2025 14:23:26 +0800
Subject: [PATCH 09/76] Update poml_extended.md

---
 docs/proposals/poml_extended.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/docs/proposals/poml_extended.md b/docs/proposals/poml_extended.md
index 4f6d3e11..8564b681 100644
--- a/docs/proposals/poml_extended.md
+++ b/docs/proposals/poml_extended.md
@@ -17,7 +17,7 @@ The current POML implementation requires files to be fully enclosed within `<pom
 1. **Backward Compatibility**: Most of existing POML files should continue to work without changes
 2. **Flexibility**: Support pure text files with embedded POML elements
 3. **Seamless Integration**: Allow switching between text and POML modes within a single file
-<!-- 4. **Component Discovery**: Automatically detect POML elements from `componentDocs.json` -->
+4. **Controlled Evolution of Tags**: behaviour of new/experimental tags is opt‑in via `<meta enable="...">`, preventing accidental breakage when upgrading the tool‑chain.
 
 ## File Format Specification
 
@@ -36,6 +36,7 @@ The system will assume the whole file is a pure text file and detects certain pa
 1. Loading component definitions from `componentDocs.json` and extracting valid POML component names and their aliases.
 2. Scanning for opening tags that match these components, and scanning until the corresponding closing tag is found.
 3. If a special tag `<text>...</text>` is found within a POML segment, it will be treated as pure text content and processed following the rules above (step 1 and 2).
+4. Unknown or disabled tags are treated as literal text and, by default, raise a diagnostic warning.
 
 An example is shown below:
 
@@ -103,6 +104,14 @@ There can be some intervening text here as well.
 Metadatas are information that is useful when parsing and rendering the file, such as context variables, stylesheets, version information, file paths, etc.
 File-level metadata can be included at any place of the file in a special `<meta>` tag. This metadata will be processed before any content parsing.
 
+**Example:**
+
+```xml
+<meta minimalPomlVersion="0.3" />
+<meta stylesheet="/path/to/stylesheet.json />
+<meta enableTags="reference,table" unknownTags="warning" />
+```
+
 ## Architecture Design
 
 ### High-level Processing Pipeline

From 4a970a53d497a8cb08a79713caa19c79d0da2bc3 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 15 Jul 2025 17:58:00 +0800
Subject: [PATCH 10/76] Add cst

---
 packages/poml/reader/cst.ts | 233 ++++++++++++++++++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 packages/poml/reader/cst.ts

diff --git a/packages/poml/reader/cst.ts b/packages/poml/reader/cst.ts
new file mode 100644
index 00000000..43265cd9
--- /dev/null
+++ b/packages/poml/reader/cst.ts
@@ -0,0 +1,233 @@
+/*
+  Extended‑POML Lexer & CST Parser (Chevrotain)
+  ------------------------------------------------
+  • Implements a two‑phase scanning strategy (lex + parse) for the mixed‑content
+    POML format described in the design spec.
+  • Produces a Concrete‑Syntax‑Tree (CST) that preserves the complete source
+    structure – suitable for later AST conversion, code‑intel, or pretty‑printing.
+
+  Author: ChatGPT (o3) · Jul 15 2025
+*/
+
+import {
+  createToken,
+  Lexer,
+  CstParser,
+  IToken,
+  CstNode,
+  tokenMatcher,
+  EmbeddedActionsParser
+} from "chevrotain";
+
+/*───────────────────────────────────────────────────────────────────────────┐
+│ 1.  Token Definitions                                                     │
+└───────────────────────────────────────────────────────────────────────────*/
+// Helpers -----------------------------------------------------------------
+const makeRegexSafe = (re: RegExp) => {
+  return new RegExp(re.source, re.flags);
+};
+
+/** Matches valid XML / POML element or attribute names.      */
+const nameRegex = /[A-Za-z_](?:[A-Za-z0-9_.-]*)/;
+/** Rejects names that start with "xml" (case‑insensitive). */
+function validName(text: string) {
+  return !/^xml/i.test(text);
+}
+
+/* Longest tokens first – Chevrotain uses sequential matching order.
+   Pay attention to shared prefixes like "</" vs "<".               */
+// Comments  <!--  ....  --> (greedy, including line‑breaks)
+export const Comment = createToken({
+  name: "Comment",
+  pattern: /<!--[\s\S]*?-->/,
+  line_breaks: true
+});
+
+// Template delimiters {{ ... }} -------------------------------------------
+export const TmplStart = createToken({ name: "TmplStart", pattern: /{{/ });
+export const TmplEnd   = createToken({ name: "TmplEnd",   pattern: /}}/ });
+export const TmplBody  = createToken({
+  name: "TmplBody",
+  pattern: /[^{}]+/,
+  // will be pushed onto the stack between {{ ... }}
+  line_breaks: true
+});
+
+// Tag delimiters -----------------------------------------------------------
+export const CloseTagStart = createToken({
+  name: "CloseTagStart",
+  pattern: /<\//
+});
+export const SelfClose = createToken({ name: "SelfClose", pattern: /\/>/ });
+export const OpenTagStart = createToken({ name: "OpenTagStart", pattern: /</ });
+export const GT   = createToken({ name: "GT", pattern: />/ });
+
+// Misc tokens --------------------------------------------------------------
+export const Equals = createToken({ name: "Equals", pattern: /=/ });
+export const Quote  = createToken({ name: "Quote",  pattern: /"/ });
+
+// Identifiers (tag & attribute names) -------------------------------------
+export const Identifier = createToken({
+  name: "Identifier",
+  pattern: nameRegex,
+  line_breaks: false,
+  longer_alt: undefined,
+  // custom validator to reject names starting with XML
+  // Chevrotain v11 supports "validate" callback – fallback to pattern check
+});
+
+// Attribute value – everything inside double quotes (lazy, allows linebreaks)
+export const AttrText = createToken({
+  name: "AttrText",
+  pattern: /[^\"]+/,
+  line_breaks: true
+});
+
+// Raw text between tags – stop at the first "<" or "{{"
+export const RawText = createToken({
+  name: "RawText",
+  pattern: /[^<{]+/,
+  line_breaks: true
+});
+
+// Whitespace (skipped)
+export const WS = createToken({
+  name: "WS",
+  pattern: /[ \t\r\n]+/,
+  group: Lexer.SKIPPED
+});
+
+export const allTokens = [
+  // order matters!
+  Comment,
+  TmplStart,
+  TmplEnd,
+  CloseTagStart,
+  SelfClose,
+  OpenTagStart,
+  GT,
+  Equals,
+  Quote,
+  Identifier,
+  TmplBody, // must come after Identifier so {{name}} splits correctly
+  AttrText,
+  RawText,
+  WS
+];
+
+export const PomlLexer = new Lexer(allTokens, {
+  positionTracking: "full"
+});
+
+/*───────────────────────────────────────────────────────────────────────────┐
+│ 2.  CST Parser                                                            │
+└───────────────────────────────────────────────────────────────────────────*/
+class PomlCstParser extends CstParser {
+  constructor() {
+    super(allTokens, { recoveryEnabled: true });
+
+    const $ = this;
+
+    $.RULE("document", () => {
+      $.MANY(() => {
+        $.SUBRULE($.content);
+      });
+    });
+
+    $.RULE("content", () => {
+      $.OR([
+        { ALT: () => $.SUBRULE($.element) },
+        { ALT: () => $.SUBRULE($.template) },
+        { ALT: () => $.CONSUME(RawText) },
+        { ALT: () => $.CONSUME(Comment) }
+      ]);
+    });
+
+    // <tag ...> content* </tag>   |   <tag ... />
+    $.RULE("element", () => {
+      $.CONSUME(OpenTagStart);
+      const nameToken = $.CONSUME(Identifier);
+
+      $.MANY(() => {
+        $.SUBRULE($.attribute);
+      });
+
+      $.OR([
+        { ALT: () => {
+            $.CONSUME(SelfClose);
+          }
+        },
+        { ALT: () => {
+            $.CONSUME(GT);
+            $.MANY2(() => {
+              $.SUBRULE2($.content);
+            });
+            $.CONSUME(CloseTagStart);
+            $.CONSUME2(Identifier, { LABEL: "closingName" });
+            $.CONSUME2(GT);
+          }
+        }
+      ]);
+    });
+
+    // attrName = "value"
+    $.RULE("attribute", () => {
+      $.CONSUME(Identifier, { LABEL: "attrName" });
+      $.CONSUME(Equals);
+      $.CONSUME(Quote);
+      $.MANY(() => {
+        $.OR([
+          { ALT: () => $.SUBRULE($.template) },
+          { ALT: () => $.CONSUME(AttrText) }
+        ]);
+      });
+      $.CONSUME2(Quote);
+    });
+
+    // {{ expression }}
+    $.RULE("template", () => {
+      $.CONSUME(TmplStart);
+      $.CONSUME(TmplBody, { LABEL: "expr" });
+      $.CONSUME(TmplEnd);
+    });
+
+    this.performSelfAnalysis();
+  }
+}
+
+export const parser = new PomlCstParser();
+
+/*───────────────────────────────────────────────────────────────────────────┐
+│ 3.  Convenience API                                                       │
+└───────────────────────────────────────────────────────────────────────────*/
+export interface ParseResult {
+  cst: CstNode | undefined;
+  lexErrors: any[];
+  parseErrors: any[];
+}
+
+/**
+ * Parses a given POML / mixed‑content string and returns the CST & diagnostics.
+ */
+export function parsePoml(input: string): ParseResult {
+  const lexResult = PomlLexer.tokenize(input);
+  parser.input = lexResult.tokens;
+  const cst = parser.document();
+
+  return {
+    cst,
+    lexErrors: lexResult.errors,
+    parseErrors: parser.errors
+  };
+}
+
+/*───────────────────────────────────────────────────────────────────────────┐
+│ 4.  Quick demo                                                            │
+└───────────────────────────────────────────────────────────────────────────*/
+if (require.main === module) {
+  const sample = `\n# Hello\n<task foo="bar">Do something</task>\n`; // eslint‑disable‑line  no-console
+  const result = parsePoml(sample);
+  console.log("Lex errors:", result.lexErrors);
+  console.log("Parse errors:", result.parseErrors);
+  console.dir(result.cst, { depth: 10, colors: true });
+}

From f77b2e052dce6bca013d542abaef4191d523dc73 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Wed, 16 Jul 2025 20:24:32 +0800
Subject: [PATCH 11/76] Add lexer

---
 packages/poml/reader/cst.ts              | 233 ---------
 packages/poml/reader/lexer.ts            |  92 ++++
 packages/poml/tests/reader/lexer.test.ts | 633 +++++++++++++++++++++++
 3 files changed, 725 insertions(+), 233 deletions(-)
 create mode 100644 packages/poml/reader/lexer.ts
 create mode 100644 packages/poml/tests/reader/lexer.test.ts

diff --git a/packages/poml/reader/cst.ts b/packages/poml/reader/cst.ts
index 43265cd9..e69de29b 100644
--- a/packages/poml/reader/cst.ts
+++ b/packages/poml/reader/cst.ts
@@ -1,233 +0,0 @@
-/*
-  Extended‑POML Lexer & CST Parser (Chevrotain)
-  ------------------------------------------------
-  • Implements a two‑phase scanning strategy (lex + parse) for the mixed‑content
-    POML format described in the design spec.
-  • Produces a Concrete‑Syntax‑Tree (CST) that preserves the complete source
-    structure – suitable for later AST conversion, code‑intel, or pretty‑printing.
-
-  Author: ChatGPT (o3) · Jul 15 2025
-*/
-
-import {
-  createToken,
-  Lexer,
-  CstParser,
-  IToken,
-  CstNode,
-  tokenMatcher,
-  EmbeddedActionsParser
-} from "chevrotain";
-
-/*───────────────────────────────────────────────────────────────────────────┐
-│ 1.  Token Definitions                                                     │
-└───────────────────────────────────────────────────────────────────────────*/
-// Helpers -----------------------------------------------------------------
-const makeRegexSafe = (re: RegExp) => {
-  return new RegExp(re.source, re.flags);
-};
-
-/** Matches valid XML / POML element or attribute names.      */
-const nameRegex = /[A-Za-z_](?:[A-Za-z0-9_.-]*)/;
-/** Rejects names that start with "xml" (case‑insensitive). */
-function validName(text: string) {
-  return !/^xml/i.test(text);
-}
-
-/* Longest tokens first – Chevrotain uses sequential matching order.
-   Pay attention to shared prefixes like "</" vs "<".               */
-// Comments  <!--  ....  --> (greedy, including line‑breaks)
-export const Comment = createToken({
-  name: "Comment",
-  pattern: /<!--[\s\S]*?-->/,
-  line_breaks: true
-});
-
-// Template delimiters {{ ... }} -------------------------------------------
-export const TmplStart = createToken({ name: "TmplStart", pattern: /{{/ });
-export const TmplEnd   = createToken({ name: "TmplEnd",   pattern: /}}/ });
-export const TmplBody  = createToken({
-  name: "TmplBody",
-  pattern: /[^{}]+/,
-  // will be pushed onto the stack between {{ ... }}
-  line_breaks: true
-});
-
-// Tag delimiters -----------------------------------------------------------
-export const CloseTagStart = createToken({
-  name: "CloseTagStart",
-  pattern: /<\//
-});
-export const SelfClose = createToken({ name: "SelfClose", pattern: /\/>/ });
-export const OpenTagStart = createToken({ name: "OpenTagStart", pattern: /</ });
-export const GT   = createToken({ name: "GT", pattern: />/ });
-
-// Misc tokens --------------------------------------------------------------
-export const Equals = createToken({ name: "Equals", pattern: /=/ });
-export const Quote  = createToken({ name: "Quote",  pattern: /"/ });
-
-// Identifiers (tag & attribute names) -------------------------------------
-export const Identifier = createToken({
-  name: "Identifier",
-  pattern: nameRegex,
-  line_breaks: false,
-  longer_alt: undefined,
-  // custom validator to reject names starting with XML
-  // Chevrotain v11 supports "validate" callback – fallback to pattern check
-});
-
-// Attribute value – everything inside double quotes (lazy, allows linebreaks)
-export const AttrText = createToken({
-  name: "AttrText",
-  pattern: /[^\"]+/,
-  line_breaks: true
-});
-
-// Raw text between tags – stop at the first "<" or "{{"
-export const RawText = createToken({
-  name: "RawText",
-  pattern: /[^<{]+/,
-  line_breaks: true
-});
-
-// Whitespace (skipped)
-export const WS = createToken({
-  name: "WS",
-  pattern: /[ \t\r\n]+/,
-  group: Lexer.SKIPPED
-});
-
-export const allTokens = [
-  // order matters!
-  Comment,
-  TmplStart,
-  TmplEnd,
-  CloseTagStart,
-  SelfClose,
-  OpenTagStart,
-  GT,
-  Equals,
-  Quote,
-  Identifier,
-  TmplBody, // must come after Identifier so {{name}} splits correctly
-  AttrText,
-  RawText,
-  WS
-];
-
-export const PomlLexer = new Lexer(allTokens, {
-  positionTracking: "full"
-});
-
-/*───────────────────────────────────────────────────────────────────────────┐
-│ 2.  CST Parser                                                            │
-└───────────────────────────────────────────────────────────────────────────*/
-class PomlCstParser extends CstParser {
-  constructor() {
-    super(allTokens, { recoveryEnabled: true });
-
-    const $ = this;
-
-    $.RULE("document", () => {
-      $.MANY(() => {
-        $.SUBRULE($.content);
-      });
-    });
-
-    $.RULE("content", () => {
-      $.OR([
-        { ALT: () => $.SUBRULE($.element) },
-        { ALT: () => $.SUBRULE($.template) },
-        { ALT: () => $.CONSUME(RawText) },
-        { ALT: () => $.CONSUME(Comment) }
-      ]);
-    });
-
-    // <tag ...> content* </tag>   |   <tag ... />
-    $.RULE("element", () => {
-      $.CONSUME(OpenTagStart);
-      const nameToken = $.CONSUME(Identifier);
-
-      $.MANY(() => {
-        $.SUBRULE($.attribute);
-      });
-
-      $.OR([
-        { ALT: () => {
-            $.CONSUME(SelfClose);
-          }
-        },
-        { ALT: () => {
-            $.CONSUME(GT);
-            $.MANY2(() => {
-              $.SUBRULE2($.content);
-            });
-            $.CONSUME(CloseTagStart);
-            $.CONSUME2(Identifier, { LABEL: "closingName" });
-            $.CONSUME2(GT);
-          }
-        }
-      ]);
-    });
-
-    // attrName = "value"
-    $.RULE("attribute", () => {
-      $.CONSUME(Identifier, { LABEL: "attrName" });
-      $.CONSUME(Equals);
-      $.CONSUME(Quote);
-      $.MANY(() => {
-        $.OR([
-          { ALT: () => $.SUBRULE($.template) },
-          { ALT: () => $.CONSUME(AttrText) }
-        ]);
-      });
-      $.CONSUME2(Quote);
-    });
-
-    // {{ expression }}
-    $.RULE("template", () => {
-      $.CONSUME(TmplStart);
-      $.CONSUME(TmplBody, { LABEL: "expr" });
-      $.CONSUME(TmplEnd);
-    });
-
-    this.performSelfAnalysis();
-  }
-}
-
-export const parser = new PomlCstParser();
-
-/*───────────────────────────────────────────────────────────────────────────┐
-│ 3.  Convenience API                                                       │
-└───────────────────────────────────────────────────────────────────────────*/
-export interface ParseResult {
-  cst: CstNode | undefined;
-  lexErrors: any[];
-  parseErrors: any[];
-}
-
-/**
- * Parses a given POML / mixed‑content string and returns the CST & diagnostics.
- */
-export function parsePoml(input: string): ParseResult {
-  const lexResult = PomlLexer.tokenize(input);
-  parser.input = lexResult.tokens;
-  const cst = parser.document();
-
-  return {
-    cst,
-    lexErrors: lexResult.errors,
-    parseErrors: parser.errors
-  };
-}
-
-/*───────────────────────────────────────────────────────────────────────────┐
-│ 4.  Quick demo                                                            │
-└───────────────────────────────────────────────────────────────────────────*/
-if (require.main === module) {
-  const sample = `\n# Hello\n<task foo="bar">Do something</task>\n`; // eslint‑disable‑line  no-console
-  const result = parsePoml(sample);
-  console.log("Lex errors:", result.lexErrors);
-  console.log("Parse errors:", result.parseErrors);
-  console.dir(result.cst, { depth: 10, colors: true });
-}
diff --git a/packages/poml/reader/lexer.ts b/packages/poml/reader/lexer.ts
new file mode 100644
index 00000000..292647ed
--- /dev/null
+++ b/packages/poml/reader/lexer.ts
@@ -0,0 +1,92 @@
+import { createToken, Lexer } from 'chevrotain';
+
+// Define token types for extended POML
+export const Comment = createToken({ name: 'Comment', pattern: /<!--[\s\S]*?-->/ });
+export const TemplateOpen = createToken({ name: 'TemplateOpen', pattern: /{{/ });
+export const TemplateClose = createToken({ name: 'TemplateClose', pattern: /}}/ });
+export const TagClosingOpen = createToken({ name: 'TagClosingOpen', pattern: /<\// });
+export const TagSelfClose = createToken({ name: 'TagSelfClose', pattern: /\/>/ });
+export const TagOpen = createToken({ name: 'TagOpen', pattern: /</ });
+export const TagClose = createToken({ name: 'TagClose', pattern: />/ });
+export const Equals = createToken({ name: 'Equals', pattern: /=/ });
+
+// Individual character tokens for quotes and backslash - CST parser will handle semantics
+export const DoubleQuote = createToken({ name: 'DoubleQuote', pattern: /"/ });
+export const SingleQuote = createToken({ name: 'SingleQuote', pattern: /'/ });
+export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
+
+export const Identifier = createToken({ 
+  name: 'Identifier', 
+  pattern: /[a-zA-Z_][a-zA-Z0-9_-]*/ 
+});
+
+export const Whitespace = createToken({ 
+  name: 'Whitespace', 
+  pattern: /[ \t\r\n]+/,
+  line_breaks: true
+});
+
+export const TemplateContent = createToken({ 
+  name: 'TemplateContent', 
+  pattern: /[^}]+/,
+  line_breaks: true
+});
+
+// Text content - should not consume quotes, backslashes, or tag/template delimiters
+export const TextContent = createToken({ 
+  name: 'TextContent', 
+  pattern: /[^<{}"'\\]+/,
+  line_breaks: true
+});
+
+// Define token order - more specific patterns first
+export const allTokens = [
+  Comment,
+  TemplateOpen,
+  TemplateClose,
+  TagClosingOpen, // Must come before TagOpen
+  TagSelfClose,   // Must come before TagClose
+  TagOpen,
+  TagClose,
+  Equals,
+  DoubleQuote,
+  SingleQuote,
+  Backslash,
+  Identifier,
+  Whitespace,
+  TemplateContent,
+  TextContent
+];
+
+// Extended POML Lexer class
+export class ExtendedPomlLexer {
+  private lexer: Lexer;
+  
+  constructor() {
+    this.lexer = new Lexer(allTokens);
+  }
+
+  public tokenize(text: string) {
+    const lexingResult = this.lexer.tokenize(text);
+    
+    if (lexingResult.errors.length > 0) {
+      console.warn('Lexing errors:', lexingResult.errors);
+    }
+    
+    return {
+      tokens: lexingResult.tokens,
+      errors: lexingResult.errors,
+      groups: lexingResult.groups
+    };
+  }
+}
+
+// Create a single instance to export
+export const extendedPomlLexer = new ExtendedPomlLexer();
+
+// Export token types for use in parser
+export type {
+  IToken,
+  ILexingError,
+  ILexingResult
+} from 'chevrotain';
\ No newline at end of file
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
new file mode 100644
index 00000000..8d5c0488
--- /dev/null
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -0,0 +1,633 @@
+import { describe, expect, test } from '@jest/globals';
+import { 
+  extendedPomlLexer, 
+  Comment, 
+  TemplateOpen, 
+  TemplateClose, 
+  TagOpen, 
+  TagClose, 
+  TagClosingOpen, 
+  TagSelfClose, 
+  Equals, 
+  DoubleQuote, 
+  SingleQuote, 
+  Backslash, 
+  Identifier, 
+  Whitespace, 
+  TemplateContent, 
+  TextContent 
+} from '../../reader/lexer';
+
+describe('ExtendedPomlLexer', () => {
+  
+  describe('Comments', () => {
+    test('should tokenize HTML comments', () => {
+      const input = '<!-- This is a comment -->';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1);
+      expect(result.tokens[0].tokenType).toBe(Comment);
+      expect(result.tokens[0].image).toBe('<!-- This is a comment -->');
+    });
+
+    test('should tokenize multiline comments', () => {
+      const input = `<!-- 
+        This is a 
+        multiline comment 
+      -->`;
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1);
+      expect(result.tokens[0].tokenType).toBe(Comment);
+    });
+
+    test('should tokenize comments with content after', () => {
+      const input = '<!-- comment -->Some text';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(2);
+      expect(result.tokens[0].tokenType).toBe(Comment);
+      expect(result.tokens[1].tokenType).toBe(TextContent);
+      expect(result.tokens[1].image).toBe('Some text');
+    });
+  });
+
+  describe('Template Variables', () => {
+    test('should tokenize template variable delimiters', () => {
+      const input = '{{variable}}';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(3);
+      expect(result.tokens[0].tokenType).toBe(TemplateOpen);
+      expect(result.tokens[0].image).toBe('{{');
+      expect(result.tokens[1].tokenType).toBe(TemplateContent);
+      expect(result.tokens[1].image).toBe('variable');
+      expect(result.tokens[2].tokenType).toBe(TemplateClose);
+      expect(result.tokens[2].image).toBe('}}');
+    });
+
+    test('should tokenize template variables with complex expressions', () => {
+      const input = '{{user.name || "Anonymous"}}';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(3);
+      expect(result.tokens[0].tokenType).toBe(TemplateOpen);
+      expect(result.tokens[1].tokenType).toBe(TemplateContent);
+      expect(result.tokens[1].image).toBe('user.name || "Anonymous"');
+      expect(result.tokens[2].tokenType).toBe(TemplateClose);
+    });
+
+    test('should tokenize multiple template variables', () => {
+      const input = '{{first}} and {{second}}';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(6);
+      expect(result.tokens[0].tokenType).toBe(TemplateOpen);
+      expect(result.tokens[1].tokenType).toBe(TemplateContent);
+      expect(result.tokens[1].image).toBe('first');
+      expect(result.tokens[2].tokenType).toBe(TemplateClose);
+      expect(result.tokens[3].tokenType).toBe(TextContent);
+      expect(result.tokens[3].image).toBe(' and ');
+      expect(result.tokens[4].tokenType).toBe(TemplateOpen);
+      expect(result.tokens[5].tokenType).toBe(TemplateContent);
+    });
+  });
+
+  describe('XML Tags', () => {
+    test('should tokenize opening tags', () => {
+      const input = '<task>';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(3);
+      expect(result.tokens[0].tokenType).toBe(TagOpen);
+      expect(result.tokens[0].image).toBe('<');
+      expect(result.tokens[1].tokenType).toBe(Identifier);
+      expect(result.tokens[1].image).toBe('task');
+      expect(result.tokens[2].tokenType).toBe(TagClose);
+      expect(result.tokens[2].image).toBe('>');
+    });
+
+    test('should tokenize closing tags', () => {
+      const input = '</task>';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(3);
+      expect(result.tokens[0].tokenType).toBe(TagClosingOpen);
+      expect(result.tokens[0].image).toBe('</');
+      expect(result.tokens[1].tokenType).toBe(Identifier);
+      expect(result.tokens[1].image).toBe('task');
+      expect(result.tokens[2].tokenType).toBe(TagClose);
+      expect(result.tokens[2].image).toBe('>');
+    });
+
+    test('should tokenize self-closing tags', () => {
+      const input = '<meta />';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(3);
+      expect(result.tokens[0].tokenType).toBe(TagOpen);
+      expect(result.tokens[0].image).toBe('<');
+      expect(result.tokens[1].tokenType).toBe(Identifier);
+      expect(result.tokens[1].image).toBe('meta');
+      expect(result.tokens[2].tokenType).toBe(TagSelfClose);
+      expect(result.tokens[2].image).toBe('/>');
+    });
+
+    test('should tokenize tags with attributes', () => {
+      const input = '<task id="123" class="important">';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(TagOpen);
+      expect(tokenTypes).toContain(Identifier);
+      expect(tokenTypes).toContain(Equals);
+      expect(tokenTypes).toContain(DoubleQuote);
+      expect(tokenTypes).toContain(TagClose);
+      
+      // Verify specific tokens exist
+      expect(result.tokens[0].tokenType).toBe(TagOpen);
+      expect(result.tokens[0].image).toBe('<');
+      
+      const identifierTokens = result.tokens.filter(t => t.tokenType === Identifier);
+      expect(identifierTokens.length).toBeGreaterThanOrEqual(3); // task, id, class
+      expect(identifierTokens[0].image).toBe('task');
+      expect(identifierTokens[1].image).toBe('id');
+    });
+  });
+
+  describe('Quote and Escape Characters', () => {
+    test('should tokenize double quotes as individual tokens', () => {
+      const input = '"Hello world"';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(DoubleQuote);
+      expect(tokenTypes).toContain(TextContent);
+      
+      // First and last tokens should be quotes
+      expect(result.tokens[0].tokenType).toBe(DoubleQuote);
+      expect(result.tokens[0].image).toBe('"');
+      expect(result.tokens[result.tokens.length - 1].tokenType).toBe(DoubleQuote);
+      expect(result.tokens[result.tokens.length - 1].image).toBe('"');
+    });
+
+    test('should tokenize single quotes as individual tokens', () => {
+      const input = "'Hello world'";
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(SingleQuote);
+      expect(tokenTypes).toContain(TextContent);
+      
+      // First and last tokens should be quotes
+      expect(result.tokens[0].tokenType).toBe(SingleQuote);
+      expect(result.tokens[0].image).toBe("'");
+      expect(result.tokens[result.tokens.length - 1].tokenType).toBe(SingleQuote);
+      expect(result.tokens[result.tokens.length - 1].image).toBe("'");
+    });
+
+    test('should tokenize backslashes as individual tokens', () => {
+      const input = 'text\\with\\backslashes';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(Backslash);
+      expect(tokenTypes).toContain(TextContent);
+      
+      // Should have backslash tokens
+      const backslashTokens = result.tokens.filter(t => t.tokenType === Backslash);
+      expect(backslashTokens.length).toBe(2);
+      expect(backslashTokens[0].image).toBe('\\');
+      expect(backslashTokens[1].image).toBe('\\');
+    });
+
+    test('should handle mixed quotes and backslashes', () => {
+      const input = 'text "with \\"escaped\\" quotes"';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(DoubleQuote);
+      expect(tokenTypes).toContain(Backslash);
+      expect(tokenTypes).toContain(TextContent);
+    });
+  });
+
+  describe('Identifiers', () => {
+    test('should tokenize simple identifiers', () => {
+      const input = 'task';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1);
+      expect(result.tokens[0].tokenType).toBe(Identifier);
+      expect(result.tokens[0].image).toBe('task');
+    });
+
+    test('should tokenize identifiers with hyphens', () => {
+      const input = 'my-component';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1);
+      expect(result.tokens[0].tokenType).toBe(Identifier);
+      expect(result.tokens[0].image).toBe('my-component');
+    });
+
+    test('should tokenize identifiers with underscores', () => {
+      const input = 'my_component';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1);
+      expect(result.tokens[0].tokenType).toBe(Identifier);
+      expect(result.tokens[0].image).toBe('my_component');
+    });
+
+    test('should tokenize identifiers with numbers', () => {
+      const input = 'component123';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1);
+      expect(result.tokens[0].tokenType).toBe(Identifier);
+      expect(result.tokens[0].image).toBe('component123');
+    });
+  });
+
+  describe('Text Content', () => {
+    test('should tokenize plain text', () => {
+      const input = 'This is some plain text';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1);
+      expect(result.tokens[0].tokenType).toBe(TextContent);
+      expect(result.tokens[0].image).toBe('This is some plain text');
+    });
+
+    test('should tokenize text with newlines', () => {
+      const input = `Line 1
+Line 2
+Line 3`;
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1);
+      expect(result.tokens[0].tokenType).toBe(TextContent);
+      expect(result.tokens[0].image).toBe(`Line 1
+Line 2
+Line 3`);
+    });
+
+    test('should stop text content at tags', () => {
+      const input = 'Some text <tag>';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(4);
+      expect(result.tokens[0].tokenType).toBe(TextContent);
+      expect(result.tokens[0].image).toBe('Some text ');
+      expect(result.tokens[1].tokenType).toBe(TagOpen);
+      expect(result.tokens[2].tokenType).toBe(Identifier);
+      expect(result.tokens[3].tokenType).toBe(TagClose);
+    });
+
+    test('should stop text content at template variables', () => {
+      const input = 'Some text {{variable}}';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(4);
+      expect(result.tokens[0].tokenType).toBe(TextContent);
+      expect(result.tokens[0].image).toBe('Some text ');
+      expect(result.tokens[1].tokenType).toBe(TemplateOpen);
+      expect(result.tokens[2].tokenType).toBe(TemplateContent);
+      expect(result.tokens[3].tokenType).toBe(TemplateClose);
+    });
+  });
+
+  describe('Complex Mixed Content', () => {
+    test('should tokenize extended POML example from specification', () => {
+      const input = `# My Analysis Document
+
+This is a regular markdown document.
+
+<task>
+  Analyze the following data and provide insights.
+</task>
+
+Here are some key points:
+- Data quality
+- Statistical significance
+
+{{variable_will_be_substituted}}`;
+
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Check for presence of different token types
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(TextContent);
+      expect(tokenTypes).toContain(TagOpen);
+      expect(tokenTypes).toContain(TagClose);
+      expect(tokenTypes).toContain(Identifier);
+      expect(tokenTypes).toContain(TemplateOpen);
+      expect(tokenTypes).toContain(TemplateClose);
+      expect(tokenTypes).toContain(TemplateContent);
+    });
+
+    test('should tokenize comments with tags and templates', () => {
+      const input = '<!-- comment --><task id="{{id}}">{{content}}</task>';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // First token should be comment
+      expect(result.tokens[0].tokenType).toBe(Comment);
+    });
+
+    test('should handle self-closing tags with attributes', () => {
+      const input = '<meta stylesheet="{{stylePath}}" />';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(TagOpen);
+      expect(tokenTypes).toContain(Identifier);
+      expect(tokenTypes).toContain(Equals);
+      expect(tokenTypes).toContain(DoubleQuote);
+      expect(tokenTypes).toContain(TemplateOpen);
+      expect(tokenTypes).toContain(TemplateClose);
+      expect(tokenTypes).toContain(TagSelfClose);
+    });
+
+    test('should handle the specific case: "abc<poml>def</poml>ghi"', () => {
+      const input = '"abc<poml>def</poml>ghi"';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Should tokenize as: " abc < poml > def </ poml > ghi "
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(DoubleQuote);
+      expect(tokenTypes).toContain(TextContent);
+      expect(tokenTypes).toContain(TagOpen);
+      expect(tokenTypes).toContain(Identifier);
+      expect(tokenTypes).toContain(TagClose);
+      expect(tokenTypes).toContain(TagClosingOpen);
+      
+      // First and last tokens should be quotes
+      expect(result.tokens[0].tokenType).toBe(DoubleQuote);
+      expect(result.tokens[0].image).toBe('"');
+      expect(result.tokens[result.tokens.length - 1].tokenType).toBe(DoubleQuote);
+      expect(result.tokens[result.tokens.length - 1].image).toBe('"');
+    });
+
+    test('should handle the attribute case: <poml abc="def">ghi</poml>', () => {
+      const input = '<poml abc="def">ghi</poml>';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Should tokenize as: < poml abc = " def " > ghi </ poml >
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(TagOpen);
+      expect(tokenTypes).toContain(Identifier);
+      expect(tokenTypes).toContain(Equals);
+      expect(tokenTypes).toContain(DoubleQuote);
+      expect(tokenTypes).toContain(TextContent);
+      expect(tokenTypes).toContain(TagClose);
+      expect(tokenTypes).toContain(TagClosingOpen);
+      
+      // Verify the structure
+      expect(result.tokens[0].tokenType).toBe(TagOpen);
+      expect(result.tokens[0].image).toBe('<');
+      
+      const identifierTokens = result.tokens.filter(t => t.tokenType === Identifier);
+      expect(identifierTokens.length).toBeGreaterThanOrEqual(3); // poml, abc, def, poml
+      expect(identifierTokens[0].image).toBe('poml');
+      expect(identifierTokens[1].image).toBe('abc');
+    });
+  });
+
+  describe('Whitespace Handling', () => {
+    test('should preserve whitespace tokens', () => {
+      const input = '  \t\n  <task>  \t\n  </task>  \t\n  ';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      
+      // Whitespace should be preserved
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(Whitespace);
+      expect(tokenTypes).toContain(TagOpen);
+      expect(tokenTypes).toContain(TagClose);
+      expect(tokenTypes).toContain(TagClosingOpen);
+      expect(tokenTypes).toContain(Identifier);
+      
+      // Should start with whitespace
+      expect(result.tokens[0].tokenType).toBe(Whitespace);
+      expect(result.tokens[0].image).toBe('  \t\n  ');
+    });
+  });
+
+  describe('Error Handling and Source Index Verification', () => {
+    test('should handle malformed input gracefully with correct source positions', () => {
+      const input = '<task id="unclosed string';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      // Should not crash, should tokenize what it can
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Verify source positions are correct
+      let expectedOffset = 0;
+      for (const token of result.tokens) {
+        expect(token.startOffset).toBeGreaterThanOrEqual(expectedOffset);
+        expect(token.endOffset).toBeGreaterThan(token.startOffset);
+        expect(token.startOffset).toBeLessThan(input.length);
+        expect(token.endOffset).toBeLessThanOrEqual(input.length);
+        expectedOffset = token.startOffset;
+      }
+    });
+
+    test('should handle empty input', () => {
+      const input = '';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(0);
+    });
+
+    test('should handle input with only whitespace and preserve it', () => {
+      const input = '   \t\n   ';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens).toHaveLength(1); // Whitespace is preserved
+      expect(result.tokens[0].tokenType).toBe(Whitespace);
+      expect(result.tokens[0].startOffset).toBe(0);
+      expect(result.tokens[0].endOffset).toBe(input.length);
+    });
+
+    test('should handle unclosed comments gracefully', () => {
+      const input = '<!-- This comment is not closed\nSome text after';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      // Should tokenize as text content since comment pattern requires -->
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Verify source positions
+      for (const token of result.tokens) {
+        expect(token.startOffset).toBeLessThan(input.length);
+        expect(token.endOffset).toBeLessThanOrEqual(input.length);
+      }
+    });
+
+    test('should handle mixed valid and invalid tokens with correct positions', () => {
+      const input = '<valid>{{template}}</valid>invalid@#$%^content';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Verify all tokens have valid source positions
+      for (let i = 0; i < result.tokens.length; i++) {
+        const token = result.tokens[i];
+        if (token.startOffset !== undefined && token.endOffset !== undefined) {
+          expect(token.startOffset).toBeLessThan(input.length);
+          expect(token.endOffset).toBeLessThanOrEqual(input.length);
+          expect(token.startOffset).toBeLessThan(token.endOffset);
+          
+          // Verify token content matches input at specified positions
+          const tokenContent = input.substring(token.startOffset, token.endOffset);
+          expect(token.image).toBe(tokenContent);
+        }
+      }
+    });
+
+    test('should handle incomplete template variables', () => {
+      const input = 'text {{incomplete_template';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Should tokenize text and template open, but template content extends to end
+      const tokenTypes = result.tokens.map(t => t.tokenType);
+      expect(tokenTypes).toContain(TextContent);
+      expect(tokenTypes).toContain(TemplateOpen);
+      expect(tokenTypes).toContain(TemplateContent);
+    });
+
+    test('should handle nested incomplete structures', () => {
+      const input = '<task>{{variable<inner>content</inner>';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Verify continuous coverage of input
+      let coveredLength = 0;
+      for (const token of result.tokens) {
+        if (token.tokenType !== Whitespace) {
+          coveredLength += token.image.length;
+        }
+      }
+      expect(coveredLength).toBeLessThanOrEqual(input.length);
+    });
+
+    test('should handle line and column tracking correctly', () => {
+      const input = `line 1
+line 2 <tag>
+line 3 {{var}}`;
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Find tag on line 2
+      const tagToken = result.tokens.find(t => t.tokenType === TagOpen);
+      expect(tagToken).toBeDefined();
+      if (tagToken!.startLine !== undefined) {
+        expect(tagToken!.startLine).toBe(2);
+        expect(tagToken!.startColumn).toBe(8); // After "line 2 "
+      }
+      
+      // Find template on line 3
+      const templateToken = result.tokens.find(t => t.tokenType === TemplateOpen);
+      expect(templateToken).toBeDefined();
+      if (templateToken!.startLine !== undefined) {
+        expect(templateToken!.startLine).toBe(3);
+        expect(templateToken!.startColumn).toBe(8); // After "line 3 "
+      }
+    });
+
+    test('should verify token boundaries do not overlap', () => {
+      const input = '<task id="value">content</task>';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Sort tokens by start position
+      const sortedTokens = [...result.tokens].sort((a, b) => a.startOffset - b.startOffset);
+      
+      // Verify no overlaps
+      for (let i = 0; i < sortedTokens.length - 1; i++) {
+        const current = sortedTokens[i];
+        const next = sortedTokens[i + 1];
+        expect(current.endOffset).toBeLessThanOrEqual(next.startOffset);
+      }
+    });
+
+    test('should handle special characters in text content', () => {
+      const input = 'text with @#$%^&*()[]{}|;:,.<>?/~`';
+      const result = extendedPomlLexer.tokenize(input);
+      
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+      
+      // Should tokenize as text content
+      const textTokens = result.tokens.filter(t => t.tokenType === TextContent);
+      expect(textTokens.length).toBeGreaterThan(0);
+      
+      // Verify positions are correct
+      for (const token of textTokens) {
+        expect(token.startOffset).toBeLessThan(input.length);
+        expect(token.endOffset).toBeLessThanOrEqual(input.length);
+      }
+    });
+  });
+});
\ No newline at end of file

From 106b1a591bcbbf15916645229c9ae91c9f97c890 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Wed, 16 Jul 2025 21:03:22 +0800
Subject: [PATCH 12/76] .

---
 packages/poml/tests/reader/lexer.test.ts | 693 ++++++-----------------
 1 file changed, 173 insertions(+), 520 deletions(-)

diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 8d5c0488..a2107a49 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -18,616 +18,269 @@ import {
   TextContent 
 } from '../../reader/lexer';
 
+// Helper function to extract token images
+function tokenImages(input: string): string[] {
+  const result = extendedPomlLexer.tokenize(input);
+  return result.tokens.map(t => t.image);
+}
+
+// Helper function to extract token types
+function tokenTypes(input: string): any[] {
+  const result = extendedPomlLexer.tokenize(input);
+  return result.tokens.map(t => t.tokenType);
+}
+
+// Helper function to get full tokenization result
+function tokenize(input: string) {
+  return extendedPomlLexer.tokenize(input);
+}
+
 describe('ExtendedPomlLexer', () => {
   
-  describe('Comments', () => {
+  describe('Basic Token Images', () => {
     test('should tokenize HTML comments', () => {
-      const input = '<!-- This is a comment -->';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(Comment);
-      expect(result.tokens[0].image).toBe('<!-- This is a comment -->');
+      expect(tokenImages('<!-- comment -->')).toEqual(['<!-- comment -->']);
     });
 
-    test('should tokenize multiline comments', () => {
-      const input = `<!-- 
-        This is a 
-        multiline comment 
-      -->`;
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(Comment);
+    test('should tokenize template variables', () => {
+      expect(tokenImages('{{variable}}')).toEqual(['{{', 'variable', '}}']);
     });
 
-    test('should tokenize comments with content after', () => {
-      const input = '<!-- comment -->Some text';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(2);
-      expect(result.tokens[0].tokenType).toBe(Comment);
-      expect(result.tokens[1].tokenType).toBe(TextContent);
-      expect(result.tokens[1].image).toBe('Some text');
+    test('should tokenize XML tags', () => {
+      expect(tokenImages('<task>')).toEqual(['<', 'task', '>']);
+      expect(tokenImages('</task>')).toEqual(['</', 'task', '>']);
+      expect(tokenImages('<meta />')).toEqual(['<', 'meta', ' ', '/>']);
     });
-  });
 
-  describe('Template Variables', () => {
-    test('should tokenize template variable delimiters', () => {
-      const input = '{{variable}}';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(3);
-      expect(result.tokens[0].tokenType).toBe(TemplateOpen);
-      expect(result.tokens[0].image).toBe('{{');
-      expect(result.tokens[1].tokenType).toBe(TemplateContent);
-      expect(result.tokens[1].image).toBe('variable');
-      expect(result.tokens[2].tokenType).toBe(TemplateClose);
-      expect(result.tokens[2].image).toBe('}}');
-    });
-
-    test('should tokenize template variables with complex expressions', () => {
-      const input = '{{user.name || "Anonymous"}}';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(3);
-      expect(result.tokens[0].tokenType).toBe(TemplateOpen);
-      expect(result.tokens[1].tokenType).toBe(TemplateContent);
-      expect(result.tokens[1].image).toBe('user.name || "Anonymous"');
-      expect(result.tokens[2].tokenType).toBe(TemplateClose);
+    test('should tokenize quotes and backslashes individually', () => {
+      expect(tokenImages('"hello"')).toEqual(['"', 'hello', '"']);
+      expect(tokenImages("'world'")).toEqual(["'", 'world', "'"]);
+      expect(tokenImages('text\\escape')).toEqual(['text', '\\', 'escape']);
     });
 
-    test('should tokenize multiple template variables', () => {
-      const input = '{{first}} and {{second}}';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(6);
-      expect(result.tokens[0].tokenType).toBe(TemplateOpen);
-      expect(result.tokens[1].tokenType).toBe(TemplateContent);
-      expect(result.tokens[1].image).toBe('first');
-      expect(result.tokens[2].tokenType).toBe(TemplateClose);
-      expect(result.tokens[3].tokenType).toBe(TextContent);
-      expect(result.tokens[3].image).toBe(' and ');
-      expect(result.tokens[4].tokenType).toBe(TemplateOpen);
-      expect(result.tokens[5].tokenType).toBe(TemplateContent);
+    test('should tokenize attributes', () => {
+      expect(tokenImages('id="value"')).toEqual(['id', '=', '"', 'value', '"']);
     });
-  });
 
-  describe('XML Tags', () => {
-    test('should tokenize opening tags', () => {
-      const input = '<task>';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(3);
-      expect(result.tokens[0].tokenType).toBe(TagOpen);
-      expect(result.tokens[0].image).toBe('<');
-      expect(result.tokens[1].tokenType).toBe(Identifier);
-      expect(result.tokens[1].image).toBe('task');
-      expect(result.tokens[2].tokenType).toBe(TagClose);
-      expect(result.tokens[2].image).toBe('>');
-    });
-
-    test('should tokenize closing tags', () => {
-      const input = '</task>';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(3);
-      expect(result.tokens[0].tokenType).toBe(TagClosingOpen);
-      expect(result.tokens[0].image).toBe('</');
-      expect(result.tokens[1].tokenType).toBe(Identifier);
-      expect(result.tokens[1].image).toBe('task');
-      expect(result.tokens[2].tokenType).toBe(TagClose);
-      expect(result.tokens[2].image).toBe('>');
-    });
-
-    test('should tokenize self-closing tags', () => {
-      const input = '<meta />';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(3);
-      expect(result.tokens[0].tokenType).toBe(TagOpen);
-      expect(result.tokens[0].image).toBe('<');
-      expect(result.tokens[1].tokenType).toBe(Identifier);
-      expect(result.tokens[1].image).toBe('meta');
-      expect(result.tokens[2].tokenType).toBe(TagSelfClose);
-      expect(result.tokens[2].image).toBe('/>');
-    });
-
-    test('should tokenize tags with attributes', () => {
-      const input = '<task id="123" class="important">';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(TagOpen);
-      expect(tokenTypes).toContain(Identifier);
-      expect(tokenTypes).toContain(Equals);
-      expect(tokenTypes).toContain(DoubleQuote);
-      expect(tokenTypes).toContain(TagClose);
-      
-      // Verify specific tokens exist
-      expect(result.tokens[0].tokenType).toBe(TagOpen);
-      expect(result.tokens[0].image).toBe('<');
-      
-      const identifierTokens = result.tokens.filter(t => t.tokenType === Identifier);
-      expect(identifierTokens.length).toBeGreaterThanOrEqual(3); // task, id, class
-      expect(identifierTokens[0].image).toBe('task');
-      expect(identifierTokens[1].image).toBe('id');
+    test('should tokenize whitespace', () => {
+      expect(tokenImages('  \t\n  ')).toEqual(['  \t\n  ']);
     });
-  });
 
-  describe('Quote and Escape Characters', () => {
-    test('should tokenize double quotes as individual tokens', () => {
-      const input = '"Hello world"';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(DoubleQuote);
-      expect(tokenTypes).toContain(TextContent);
-      
-      // First and last tokens should be quotes
-      expect(result.tokens[0].tokenType).toBe(DoubleQuote);
-      expect(result.tokens[0].image).toBe('"');
-      expect(result.tokens[result.tokens.length - 1].tokenType).toBe(DoubleQuote);
-      expect(result.tokens[result.tokens.length - 1].image).toBe('"');
+    test('should tokenize identifiers', () => {
+      expect(tokenImages('simple-name_123')).toEqual(['simple-name_123']);
     });
 
-    test('should tokenize single quotes as individual tokens', () => {
-      const input = "'Hello world'";
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(SingleQuote);
-      expect(tokenTypes).toContain(TextContent);
-      
-      // First and last tokens should be quotes
-      expect(result.tokens[0].tokenType).toBe(SingleQuote);
-      expect(result.tokens[0].image).toBe("'");
-      expect(result.tokens[result.tokens.length - 1].tokenType).toBe(SingleQuote);
-      expect(result.tokens[result.tokens.length - 1].image).toBe("'");
+    test('should tokenize text content', () => {
+      expect(tokenImages('plain text here')).toEqual(['plain text here']);
     });
+  });
 
-    test('should tokenize backslashes as individual tokens', () => {
-      const input = 'text\\with\\backslashes';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(Backslash);
-      expect(tokenTypes).toContain(TextContent);
-      
-      // Should have backslash tokens
-      const backslashTokens = result.tokens.filter(t => t.tokenType === Backslash);
-      expect(backslashTokens.length).toBe(2);
-      expect(backslashTokens[0].image).toBe('\\');
-      expect(backslashTokens[1].image).toBe('\\');
+  describe('Specific Cases from Requirements', () => {
+    test('should handle "abc<poml>def</poml>ghi"', () => {
+      expect(tokenImages('"abc<poml>def</poml>ghi"')).toEqual([
+        '"', 'abc', '<', 'poml', '>', 'def', '</', 'poml', '>', 'ghi', '"'
+      ]);
     });
 
-    test('should handle mixed quotes and backslashes', () => {
-      const input = 'text "with \\"escaped\\" quotes"';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(DoubleQuote);
-      expect(tokenTypes).toContain(Backslash);
-      expect(tokenTypes).toContain(TextContent);
+    test('should handle <poml abc="def">ghi</poml>', () => {
+      expect(tokenImages('<poml abc="def">ghi</poml>')).toEqual([
+        '<', 'poml', ' ', 'abc', '=', '"', 'def', '"', '>', 'ghi', '</', 'poml', '>'
+      ]);
     });
-  });
 
-  describe('Identifiers', () => {
-    test('should tokenize simple identifiers', () => {
-      const input = 'task';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(Identifier);
-      expect(result.tokens[0].image).toBe('task');
+    test('should handle mixed content', () => {
+      expect(tokenImages('text {{var}} more')).toEqual([
+        'text ', '{{', 'var', '}}', ' more'
+      ]);
     });
 
-    test('should tokenize identifiers with hyphens', () => {
-      const input = 'my-component';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(Identifier);
-      expect(result.tokens[0].image).toBe('my-component');
+    test('should handle complex attributes', () => {
+      expect(tokenImages('<task id="{{value}}" class="test">')).toEqual([
+        '<', 'task', ' ', 'id', '=', '"', '{{', 'value', '}}', '"', ' ', 'class', '=', '"', 'test', '"', '>'
+      ]);
     });
 
-    test('should tokenize identifiers with underscores', () => {
-      const input = 'my_component';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(Identifier);
-      expect(result.tokens[0].image).toBe('my_component');
+    test('should handle escaped quotes', () => {
+      expect(tokenImages('text "with \\"escaped\\" quotes"')).toEqual([
+        'text ', '"', 'with ', '\\', '"', 'escaped', '\\', '"', ' quotes', '"'
+      ]);
     });
+  });
 
-    test('should tokenize identifiers with numbers', () => {
-      const input = 'component123';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(Identifier);
-      expect(result.tokens[0].image).toBe('component123');
+  describe('Token Types', () => {
+    test('should identify correct token types for basic elements', () => {
+      expect(tokenTypes('<task>')).toEqual([TagOpen, Identifier, TagClose]);
+      expect(tokenTypes('</task>')).toEqual([TagClosingOpen, Identifier, TagClose]);
+      expect(tokenTypes('<meta />')).toEqual([TagOpen, Identifier, Whitespace, TagSelfClose]);
     });
-  });
 
-  describe('Text Content', () => {
-    test('should tokenize plain text', () => {
-      const input = 'This is some plain text';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(TextContent);
-      expect(result.tokens[0].image).toBe('This is some plain text');
+    test('should identify quotes and backslashes', () => {
+      expect(tokenTypes('"text"')).toEqual([DoubleQuote, TextContent, DoubleQuote]);
+      expect(tokenTypes("'text'")).toEqual([SingleQuote, TextContent, SingleQuote]);
+      expect(tokenTypes('text\\escape')).toEqual([TextContent, Backslash, TextContent]);
     });
 
-    test('should tokenize text with newlines', () => {
-      const input = `Line 1
-Line 2
-Line 3`;
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(TextContent);
-      expect(result.tokens[0].image).toBe(`Line 1
-Line 2
-Line 3`);
+    test('should identify template variables', () => {
+      expect(tokenTypes('{{variable}}')).toEqual([TemplateOpen, TemplateContent, TemplateClose]);
     });
 
-    test('should stop text content at tags', () => {
-      const input = 'Some text <tag>';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(4);
-      expect(result.tokens[0].tokenType).toBe(TextContent);
-      expect(result.tokens[0].image).toBe('Some text ');
-      expect(result.tokens[1].tokenType).toBe(TagOpen);
-      expect(result.tokens[2].tokenType).toBe(Identifier);
-      expect(result.tokens[3].tokenType).toBe(TagClose);
+    test('should identify comments', () => {
+      expect(tokenTypes('<!-- comment -->')).toEqual([Comment]);
     });
 
-    test('should stop text content at template variables', () => {
-      const input = 'Some text {{variable}}';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(4);
-      expect(result.tokens[0].tokenType).toBe(TextContent);
-      expect(result.tokens[0].image).toBe('Some text ');
-      expect(result.tokens[1].tokenType).toBe(TemplateOpen);
-      expect(result.tokens[2].tokenType).toBe(TemplateContent);
-      expect(result.tokens[3].tokenType).toBe(TemplateClose);
+    test('should identify whitespace', () => {
+      expect(tokenTypes('  \t\n  ')).toEqual([Whitespace]);
     });
   });
 
-  describe('Complex Mixed Content', () => {
-    test('should tokenize extended POML example from specification', () => {
-      const input = `# My Analysis Document
-
-This is a regular markdown document.
-
-<task>
-  Analyze the following data and provide insights.
-</task>
-
-Here are some key points:
-- Data quality
-- Statistical significance
-
-{{variable_will_be_substituted}}`;
-
-      const result = extendedPomlLexer.tokenize(input);
-      
+  describe('Source Position and Error Tests', () => {
+    test('should provide correct source positions', () => {
+      const result = tokenize('<task>content</task>');
       expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
       
-      // Check for presence of different token types
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(TextContent);
-      expect(tokenTypes).toContain(TagOpen);
-      expect(tokenTypes).toContain(TagClose);
-      expect(tokenTypes).toContain(Identifier);
-      expect(tokenTypes).toContain(TemplateOpen);
-      expect(tokenTypes).toContain(TemplateClose);
-      expect(tokenTypes).toContain(TemplateContent);
-    });
-
-    test('should tokenize comments with tags and templates', () => {
-      const input = '<!-- comment --><task id="{{id}}">{{content}}</task>';
-      const result = extendedPomlLexer.tokenize(input);
+      const tokens = result.tokens;
+      expect(tokens[0].startOffset).toBe(0);
+      expect(tokens[0].endOffset).toBe(1);
+      expect(tokens[0].image).toBe('<');
       
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
+      expect(tokens[1].startOffset).toBe(1);
+      expect(tokens[1].endOffset).toBe(5);
+      expect(tokens[1].image).toBe('task');
       
-      // First token should be comment
-      expect(result.tokens[0].tokenType).toBe(Comment);
+      expect(tokens[2].startOffset).toBe(5);
+      expect(tokens[2].endOffset).toBe(6);
+      expect(tokens[2].image).toBe('>');
     });
 
-    test('should handle self-closing tags with attributes', () => {
-      const input = '<meta stylesheet="{{stylePath}}" />';
-      const result = extendedPomlLexer.tokenize(input);
+    test('should handle line and column tracking', () => {
+      const input = `line1
+line2 <tag>
+line3`;
+      const result = tokenize(input);
       
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(TagOpen);
-      expect(tokenTypes).toContain(Identifier);
-      expect(tokenTypes).toContain(Equals);
-      expect(tokenTypes).toContain(DoubleQuote);
-      expect(tokenTypes).toContain(TemplateOpen);
-      expect(tokenTypes).toContain(TemplateClose);
-      expect(tokenTypes).toContain(TagSelfClose);
-    });
-
-    test('should handle the specific case: "abc<poml>def</poml>ghi"', () => {
-      const input = '"abc<poml>def</poml>ghi"';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      // Should tokenize as: " abc < poml > def </ poml > ghi "
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(DoubleQuote);
-      expect(tokenTypes).toContain(TextContent);
-      expect(tokenTypes).toContain(TagOpen);
-      expect(tokenTypes).toContain(Identifier);
-      expect(tokenTypes).toContain(TagClose);
-      expect(tokenTypes).toContain(TagClosingOpen);
-      
-      // First and last tokens should be quotes
-      expect(result.tokens[0].tokenType).toBe(DoubleQuote);
-      expect(result.tokens[0].image).toBe('"');
-      expect(result.tokens[result.tokens.length - 1].tokenType).toBe(DoubleQuote);
-      expect(result.tokens[result.tokens.length - 1].image).toBe('"');
+      const tagToken = result.tokens.find(t => t.tokenType === TagOpen);
+      expect(tagToken).toBeDefined();
+      expect(tagToken!.startLine).toBe(2);
+      expect(tagToken!.startColumn).toBe(7); // After "line2 "
     });
 
-    test('should handle the attribute case: <poml abc="def">ghi</poml>', () => {
-      const input = '<poml abc="def">ghi</poml>';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
+    test('should handle malformed input gracefully', () => {
+      const result = tokenize('<task id="unclosed');
+      expect(result.errors).toHaveLength(0); // Should not error, just tokenize what it can
       expect(result.tokens.length).toBeGreaterThan(0);
       
-      // Should tokenize as: < poml abc = " def " > ghi </ poml >
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(TagOpen);
-      expect(tokenTypes).toContain(Identifier);
-      expect(tokenTypes).toContain(Equals);
-      expect(tokenTypes).toContain(DoubleQuote);
-      expect(tokenTypes).toContain(TextContent);
-      expect(tokenTypes).toContain(TagClose);
-      expect(tokenTypes).toContain(TagClosingOpen);
-      
-      // Verify the structure
-      expect(result.tokens[0].tokenType).toBe(TagOpen);
-      expect(result.tokens[0].image).toBe('<');
-      
-      const identifierTokens = result.tokens.filter(t => t.tokenType === Identifier);
-      expect(identifierTokens.length).toBeGreaterThanOrEqual(3); // poml, abc, def, poml
-      expect(identifierTokens[0].image).toBe('poml');
-      expect(identifierTokens[1].image).toBe('abc');
+      // Verify token positions are valid
+      for (const token of result.tokens) {
+        expect(token.startOffset).toBeLessThanOrEqual(token.endOffset);
+        expect(token.startOffset).toBeGreaterThanOrEqual(0);
+        expect(token.endOffset).toBeLessThanOrEqual(18);
+      }
     });
-  });
 
-  describe('Whitespace Handling', () => {
-    test('should preserve whitespace tokens', () => {
-      const input = '  \t\n  <task>  \t\n  </task>  \t\n  ';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      
-      // Whitespace should be preserved
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(Whitespace);
-      expect(tokenTypes).toContain(TagOpen);
-      expect(tokenTypes).toContain(TagClose);
-      expect(tokenTypes).toContain(TagClosingOpen);
-      expect(tokenTypes).toContain(Identifier);
+    test('should handle special characters with errors', () => {
+      const result = tokenize('text with @#$%^&*()[]{}|;:,.<>?/~`');
+      // Some special characters might cause lexing errors
+      expect(result.tokens.length).toBeGreaterThan(0);
       
-      // Should start with whitespace
-      expect(result.tokens[0].tokenType).toBe(Whitespace);
-      expect(result.tokens[0].image).toBe('  \t\n  ');
+      // All tokens should have valid positions
+      for (const token of result.tokens) {
+        expect(token.startOffset).toBeLessThan(token.endOffset);
+        expect(token.image).toBeTruthy();
+      }
     });
-  });
 
-  describe('Error Handling and Source Index Verification', () => {
-    test('should handle malformed input gracefully with correct source positions', () => {
-      const input = '<task id="unclosed string';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      // Should not crash, should tokenize what it can
-      expect(result.tokens.length).toBeGreaterThan(0);
+    test('should verify token boundaries do not overlap', () => {
+      const result = tokenize('<task id="value">content</task>');
+      const sortedTokens = [...result.tokens].sort((a, b) => a.startOffset - b.startOffset);
       
-      // Verify source positions are correct
-      let expectedOffset = 0;
-      for (const token of result.tokens) {
-        expect(token.startOffset).toBeGreaterThanOrEqual(expectedOffset);
-        expect(token.endOffset).toBeGreaterThan(token.startOffset);
-        expect(token.startOffset).toBeLessThan(input.length);
-        expect(token.endOffset).toBeLessThanOrEqual(input.length);
-        expectedOffset = token.startOffset;
+      for (let i = 0; i < sortedTokens.length - 1; i++) {
+        const current = sortedTokens[i];
+        const next = sortedTokens[i + 1];
+        expect(current.endOffset).toBeLessThanOrEqual(next.startOffset);
       }
     });
 
     test('should handle empty input', () => {
-      const input = '';
-      const result = extendedPomlLexer.tokenize(input);
-      
+      const result = tokenize('');
       expect(result.errors).toHaveLength(0);
       expect(result.tokens).toHaveLength(0);
     });
 
-    test('should handle input with only whitespace and preserve it', () => {
-      const input = '   \t\n   ';
-      const result = extendedPomlLexer.tokenize(input);
-      
+    test('should handle whitespace only input', () => {
+      const result = tokenize('   \t\n   ');
       expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1); // Whitespace is preserved
+      expect(result.tokens).toHaveLength(1);
       expect(result.tokens[0].tokenType).toBe(Whitespace);
-      expect(result.tokens[0].startOffset).toBe(0);
-      expect(result.tokens[0].endOffset).toBe(input.length);
     });
+  });
 
-    test('should handle unclosed comments gracefully', () => {
-      const input = '<!-- This comment is not closed\nSome text after';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      // Should tokenize as text content since comment pattern requires -->
-      expect(result.tokens.length).toBeGreaterThan(0);
+  describe('Complex Mixed Content', () => {
+    test('should handle extended POML specification example', () => {
+      const input = `# My Analysis
+
+<task>
+  Analyze data
+</task>
+
+{{variable}}`;
       
-      // Verify source positions
-      for (const token of result.tokens) {
-        expect(token.startOffset).toBeLessThan(input.length);
-        expect(token.endOffset).toBeLessThanOrEqual(input.length);
-      }
+      const images = tokenImages(input);
+      expect(images).toContain('# My Analysis\n\n');
+      expect(images).toContain('<');
+      expect(images).toContain('task');
+      expect(images).toContain('>');
+      expect(images).toContain('{{');
+      expect(images).toContain('variable');
+      expect(images).toContain('}}');
     });
 
-    test('should handle mixed valid and invalid tokens with correct positions', () => {
-      const input = '<valid>{{template}}</valid>invalid@#$%^content';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      // Verify all tokens have valid source positions
-      for (let i = 0; i < result.tokens.length; i++) {
-        const token = result.tokens[i];
-        if (token.startOffset !== undefined && token.endOffset !== undefined) {
-          expect(token.startOffset).toBeLessThan(input.length);
-          expect(token.endOffset).toBeLessThanOrEqual(input.length);
-          expect(token.startOffset).toBeLessThan(token.endOffset);
-          
-          // Verify token content matches input at specified positions
-          const tokenContent = input.substring(token.startOffset, token.endOffset);
-          expect(token.image).toBe(tokenContent);
-        }
-      }
+    test('should handle comments with mixed content', () => {
+      expect(tokenImages('<!-- comment --><task>content</task>')).toEqual([
+        '<!-- comment -->', '<', 'task', '>', 'content', '</', 'task', '>'
+      ]);
     });
 
-    test('should handle incomplete template variables', () => {
-      const input = 'text {{incomplete_template';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      // Should tokenize text and template open, but template content extends to end
-      const tokenTypes = result.tokens.map(t => t.tokenType);
-      expect(tokenTypes).toContain(TextContent);
-      expect(tokenTypes).toContain(TemplateOpen);
-      expect(tokenTypes).toContain(TemplateContent);
+    test('should handle nested quotes and templates', () => {
+      expect(tokenImages('<meta value="{{path}}/file.txt">')).toEqual([
+        '<', 'meta', ' ', 'value', '=', '"', '{{', 'path', '}}', '/file.txt', '"', '>'
+      ]);
     });
+  });
 
-    test('should handle nested incomplete structures', () => {
-      const input = '<task>{{variable<inner>content</inner>';
-      const result = extendedPomlLexer.tokenize(input);
-      
+  describe('Error Recovery', () => {
+    test('should handle incomplete template variables', () => {
+      const result = tokenize('text {{incomplete');
       expect(result.errors).toHaveLength(0);
       expect(result.tokens.length).toBeGreaterThan(0);
       
-      // Verify continuous coverage of input
-      let coveredLength = 0;
-      for (const token of result.tokens) {
-        if (token.tokenType !== Whitespace) {
-          coveredLength += token.image.length;
-        }
-      }
-      expect(coveredLength).toBeLessThanOrEqual(input.length);
+      const types = result.tokens.map(t => t.tokenType);
+      expect(types).toContain(TextContent);
+      expect(types).toContain(TemplateOpen);
+      expect(types).toContain(TemplateContent);
     });
 
-    test('should handle line and column tracking correctly', () => {
-      const input = `line 1
-line 2 <tag>
-line 3 {{var}}`;
-      const result = extendedPomlLexer.tokenize(input);
-      
+    test('should handle unclosed comments', () => {
+      const result = tokenize('<!-- unclosed comment\nmore text');
       expect(result.errors).toHaveLength(0);
       expect(result.tokens.length).toBeGreaterThan(0);
-      
-      // Find tag on line 2
-      const tagToken = result.tokens.find(t => t.tokenType === TagOpen);
-      expect(tagToken).toBeDefined();
-      if (tagToken!.startLine !== undefined) {
-        expect(tagToken!.startLine).toBe(2);
-        expect(tagToken!.startColumn).toBe(8); // After "line 2 "
-      }
-      
-      // Find template on line 3
-      const templateToken = result.tokens.find(t => t.tokenType === TemplateOpen);
-      expect(templateToken).toBeDefined();
-      if (templateToken!.startLine !== undefined) {
-        expect(templateToken!.startLine).toBe(3);
-        expect(templateToken!.startColumn).toBe(8); // After "line 3 "
-      }
     });
 
-    test('should verify token boundaries do not overlap', () => {
-      const input = '<task id="value">content</task>';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
+    test('should handle mixed valid and invalid content', () => {
+      const result = tokenize('<valid>content</valid>@#$invalid');
       expect(result.tokens.length).toBeGreaterThan(0);
       
-      // Sort tokens by start position
-      const sortedTokens = [...result.tokens].sort((a, b) => a.startOffset - b.startOffset);
-      
-      // Verify no overlaps
-      for (let i = 0; i < sortedTokens.length - 1; i++) {
-        const current = sortedTokens[i];
-        const next = sortedTokens[i + 1];
-        expect(current.endOffset).toBeLessThanOrEqual(next.startOffset);
-      }
+      // Should tokenize the valid parts
+      const images = result.tokens.map(t => t.image);
+      expect(images).toContain('<');
+      expect(images).toContain('valid');
+      expect(images).toContain('>');
+      expect(images).toContain('content');
     });
 
     test('should handle special characters in text content', () => {
       const input = 'text with @#$%^&*()[]{}|;:,.<>?/~`';
-      const result = extendedPomlLexer.tokenize(input);
-      
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      // Should tokenize as text content
-      const textTokens = result.tokens.filter(t => t.tokenType === TextContent);
-      expect(textTokens.length).toBeGreaterThan(0);
-      
-      // Verify positions are correct
-      for (const token of textTokens) {
-        expect(token.startOffset).toBeLessThan(input.length);
-        expect(token.endOffset).toBeLessThanOrEqual(input.length);
-      }
+      const images = tokenImages(input);
+      expect(images).toEqual(['text with @#$%^&*()[]{}|;:,.<>?/~`']);
     });
   });
-});
\ No newline at end of file
+});

From 9409e75800a1d3a5f2e6ec65d4daa11d13814b35 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Wed, 16 Jul 2025 21:49:42 +0800
Subject: [PATCH 13/76] pass all tests

---
 packages/poml/reader/lexer.ts            |  52 +++---
 packages/poml/tests/reader/lexer.test.ts | 193 ++++++++++++++++-------
 2 files changed, 161 insertions(+), 84 deletions(-)

diff --git a/packages/poml/reader/lexer.ts b/packages/poml/reader/lexer.ts
index 292647ed..cfa886b7 100644
--- a/packages/poml/reader/lexer.ts
+++ b/packages/poml/reader/lexer.ts
@@ -15,27 +15,34 @@ export const DoubleQuote = createToken({ name: 'DoubleQuote', pattern: /"/ });
 export const SingleQuote = createToken({ name: 'SingleQuote', pattern: /'/ });
 export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
 
-export const Identifier = createToken({ 
-  name: 'Identifier', 
-  pattern: /[a-zA-Z_][a-zA-Z0-9_-]*/ 
-});
+/* Identifier is one of the following:
+   - XML tag names
+   - XML attribute names
+   - TextContent incorrectly parsed as identifiers
 
-export const Whitespace = createToken({ 
-  name: 'Whitespace', 
-  pattern: /[ \t\r\n]+/,
-  line_breaks: true
+   Case 3 is handled later by CST parser.
+*/
+export const Identifier = createToken({
+  name: 'Identifier',
+  pattern: /[a-zA-Z_][a-zA-Z0-9_-]*/
 });
 
-export const TemplateContent = createToken({ 
-  name: 'TemplateContent', 
-  pattern: /[^}]+/,
+export const Whitespace = createToken({
+  name: 'Whitespace',
+  pattern: /[ \t\r\n]+/,
   line_breaks: true
 });
 
-// Text content - should not consume quotes, backslashes, or tag/template delimiters
-export const TextContent = createToken({ 
-  name: 'TextContent', 
-  pattern: /[^<{}"'\\]+/,
+/* Catch-all for arbitrary text content
+   - Match any char except:
+       <          — starts a tag
+       {{  or }}  — template delimiters
+       " or '     — start/end of string literals
+   - Single { or } are OK because they are not followed by another brace
+*/
+export const TextContent = createToken({
+  name: 'TextContent',
+  pattern: /(?:[^<"'{}]|{(?!{)|}(?!}))+/,
   line_breaks: true
 });
 
@@ -45,7 +52,7 @@ export const allTokens = [
   TemplateOpen,
   TemplateClose,
   TagClosingOpen, // Must come before TagOpen
-  TagSelfClose,   // Must come before TagClose
+  TagSelfClose, // Must come before TagClose
   TagOpen,
   TagClose,
   Equals,
@@ -54,25 +61,24 @@ export const allTokens = [
   Backslash,
   Identifier,
   Whitespace,
-  TemplateContent,
   TextContent
 ];
 
 // Extended POML Lexer class
 export class ExtendedPomlLexer {
   private lexer: Lexer;
-  
+
   constructor() {
     this.lexer = new Lexer(allTokens);
   }
 
   public tokenize(text: string) {
     const lexingResult = this.lexer.tokenize(text);
-    
+
     if (lexingResult.errors.length > 0) {
       console.warn('Lexing errors:', lexingResult.errors);
     }
-    
+
     return {
       tokens: lexingResult.tokens,
       errors: lexingResult.errors,
@@ -85,8 +91,4 @@ export class ExtendedPomlLexer {
 export const extendedPomlLexer = new ExtendedPomlLexer();
 
 // Export token types for use in parser
-export type {
-  IToken,
-  ILexingError,
-  ILexingResult
-} from 'chevrotain';
\ No newline at end of file
+export type { IToken, ILexingError, ILexingResult } from 'chevrotain';
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index a2107a49..4e72e763 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -1,21 +1,20 @@
 import { describe, expect, test } from '@jest/globals';
-import { 
-  extendedPomlLexer, 
-  Comment, 
-  TemplateOpen, 
-  TemplateClose, 
-  TagOpen, 
-  TagClose, 
-  TagClosingOpen, 
-  TagSelfClose, 
-  Equals, 
-  DoubleQuote, 
-  SingleQuote, 
-  Backslash, 
-  Identifier, 
-  Whitespace, 
-  TemplateContent, 
-  TextContent 
+import {
+  extendedPomlLexer,
+  Comment,
+  TemplateOpen,
+  TemplateClose,
+  TagOpen,
+  TagClose,
+  TagClosingOpen,
+  TagSelfClose,
+  Equals,
+  DoubleQuote,
+  SingleQuote,
+  Backslash,
+  Identifier,
+  Whitespace,
+  TextContent
 } from '../../reader/lexer';
 
 // Helper function to extract token images
@@ -36,7 +35,6 @@ function tokenize(input: string) {
 }
 
 describe('ExtendedPomlLexer', () => {
-  
   describe('Basic Token Images', () => {
     test('should tokenize HTML comments', () => {
       expect(tokenImages('<!-- comment -->')).toEqual(['<!-- comment -->']);
@@ -71,38 +69,108 @@ describe('ExtendedPomlLexer', () => {
     });
 
     test('should tokenize text content', () => {
-      expect(tokenImages('plain text here')).toEqual(['plain text here']);
+      expect(tokenImages('plain text here')).toEqual(['plain', ' ', 'text', ' ', 'here']);
     });
   });
 
   describe('Specific Cases from Requirements', () => {
     test('should handle "abc<poml>def</poml>ghi"', () => {
       expect(tokenImages('"abc<poml>def</poml>ghi"')).toEqual([
-        '"', 'abc', '<', 'poml', '>', 'def', '</', 'poml', '>', 'ghi', '"'
+        '"',
+        'abc',
+        '<',
+        'poml',
+        '>',
+        'def',
+        '</',
+        'poml',
+        '>',
+        'ghi',
+        '"'
       ]);
     });
 
     test('should handle <poml abc="def">ghi</poml>', () => {
       expect(tokenImages('<poml abc="def">ghi</poml>')).toEqual([
-        '<', 'poml', ' ', 'abc', '=', '"', 'def', '"', '>', 'ghi', '</', 'poml', '>'
+        '<',
+        'poml',
+        ' ',
+        'abc',
+        '=',
+        '"',
+        'def',
+        '"',
+        '>',
+        'ghi',
+        '</',
+        'poml',
+        '>'
       ]);
     });
 
     test('should handle mixed content', () => {
       expect(tokenImages('text {{var}} more')).toEqual([
-        'text ', '{{', 'var', '}}', ' more'
+        'text',
+        ' ',
+        '{{',
+        'var',
+        '}}',
+        ' ',
+        'more'
+      ]);
+    });
+
+    test('chinese characters', () => {
+      expect(tokenImages('中文 {{ 文本 }}内容< 标签>')).toEqual([
+        '中文 ',
+        '{{',
+        ' ',
+        '文本 ',
+        '}}',
+        '内容',
+        '<',
+        ' ',
+        '标签>'
       ]);
     });
 
     test('should handle complex attributes', () => {
       expect(tokenImages('<task id="{{value}}" class="test">')).toEqual([
-        '<', 'task', ' ', 'id', '=', '"', '{{', 'value', '}}', '"', ' ', 'class', '=', '"', 'test', '"', '>'
+        '<',
+        'task',
+        ' ',
+        'id',
+        '=',
+        '"',
+        '{{',
+        'value',
+        '}}',
+        '"',
+        ' ',
+        'class',
+        '=',
+        '"',
+        'test',
+        '"',
+        '>'
       ]);
     });
 
     test('should handle escaped quotes', () => {
       expect(tokenImages('text "with \\"escaped\\" quotes"')).toEqual([
-        'text ', '"', 'with ', '\\', '"', 'escaped', '\\', '"', ' quotes', '"'
+        'text',
+        ' ',
+        '"',
+        'with',
+        ' ',
+        '\\',
+        '"',
+        'escaped',
+        '\\',
+        '"',
+        ' ',
+        'quotes',
+        '"'
       ]);
     });
   });
@@ -115,13 +183,13 @@ describe('ExtendedPomlLexer', () => {
     });
 
     test('should identify quotes and backslashes', () => {
-      expect(tokenTypes('"text"')).toEqual([DoubleQuote, TextContent, DoubleQuote]);
-      expect(tokenTypes("'text'")).toEqual([SingleQuote, TextContent, SingleQuote]);
-      expect(tokenTypes('text\\escape')).toEqual([TextContent, Backslash, TextContent]);
+      expect(tokenTypes('"text"')).toEqual([DoubleQuote, Identifier, DoubleQuote]);
+      expect(tokenTypes("'text'")).toEqual([SingleQuote, Identifier, SingleQuote]);
+      expect(tokenTypes('text\\escape')).toEqual([Identifier, Backslash, Identifier]);
     });
 
     test('should identify template variables', () => {
-      expect(tokenTypes('{{variable}}')).toEqual([TemplateOpen, TemplateContent, TemplateClose]);
+      expect(tokenTypes('{{variable}}')).toEqual([TemplateOpen, Identifier, TemplateClose]);
     });
 
     test('should identify comments', () => {
@@ -137,18 +205,18 @@ describe('ExtendedPomlLexer', () => {
     test('should provide correct source positions', () => {
       const result = tokenize('<task>content</task>');
       expect(result.errors).toHaveLength(0);
-      
+
       const tokens = result.tokens;
       expect(tokens[0].startOffset).toBe(0);
-      expect(tokens[0].endOffset).toBe(1);
+      expect(tokens[0].endOffset).toBe(0);
       expect(tokens[0].image).toBe('<');
-      
+
       expect(tokens[1].startOffset).toBe(1);
-      expect(tokens[1].endOffset).toBe(5);
+      expect(tokens[1].endOffset).toBe(4);
       expect(tokens[1].image).toBe('task');
-      
+
       expect(tokens[2].startOffset).toBe(5);
-      expect(tokens[2].endOffset).toBe(6);
+      expect(tokens[2].endOffset).toBe(5);
       expect(tokens[2].image).toBe('>');
     });
 
@@ -157,7 +225,7 @@ describe('ExtendedPomlLexer', () => {
 line2 <tag>
 line3`;
       const result = tokenize(input);
-      
+
       const tagToken = result.tokens.find(t => t.tokenType === TagOpen);
       expect(tagToken).toBeDefined();
       expect(tagToken!.startLine).toBe(2);
@@ -168,31 +236,19 @@ line3`;
       const result = tokenize('<task id="unclosed');
       expect(result.errors).toHaveLength(0); // Should not error, just tokenize what it can
       expect(result.tokens.length).toBeGreaterThan(0);
-      
+
       // Verify token positions are valid
       for (const token of result.tokens) {
-        expect(token.startOffset).toBeLessThanOrEqual(token.endOffset);
+        expect(token.startOffset).toBeLessThanOrEqual(token.endOffset!);
         expect(token.startOffset).toBeGreaterThanOrEqual(0);
         expect(token.endOffset).toBeLessThanOrEqual(18);
       }
     });
 
-    test('should handle special characters with errors', () => {
-      const result = tokenize('text with @#$%^&*()[]{}|;:,.<>?/~`');
-      // Some special characters might cause lexing errors
-      expect(result.tokens.length).toBeGreaterThan(0);
-      
-      // All tokens should have valid positions
-      for (const token of result.tokens) {
-        expect(token.startOffset).toBeLessThan(token.endOffset);
-        expect(token.image).toBeTruthy();
-      }
-    });
-
     test('should verify token boundaries do not overlap', () => {
       const result = tokenize('<task id="value">content</task>');
       const sortedTokens = [...result.tokens].sort((a, b) => a.startOffset - b.startOffset);
-      
+
       for (let i = 0; i < sortedTokens.length - 1; i++) {
         const current = sortedTokens[i];
         const next = sortedTokens[i + 1];
@@ -223,7 +279,7 @@ line3`;
 </task>
 
 {{variable}}`;
-      
+
       const images = tokenImages(input);
       expect(images).toContain('# My Analysis\n\n');
       expect(images).toContain('<');
@@ -236,13 +292,31 @@ line3`;
 
     test('should handle comments with mixed content', () => {
       expect(tokenImages('<!-- comment --><task>content</task>')).toEqual([
-        '<!-- comment -->', '<', 'task', '>', 'content', '</', 'task', '>'
+        '<!-- comment -->',
+        '<',
+        'task',
+        '>',
+        'content',
+        '</',
+        'task',
+        '>'
       ]);
     });
 
     test('should handle nested quotes and templates', () => {
       expect(tokenImages('<meta value="{{path}}/file.txt">')).toEqual([
-        '<', 'meta', ' ', 'value', '=', '"', '{{', 'path', '}}', '/file.txt', '"', '>'
+        '<',
+        'meta',
+        ' ',
+        'value',
+        '=',
+        '"',
+        '{{',
+        'path',
+        '}}',
+        '/file.txt',
+        '"',
+        '>'
       ]);
     });
   });
@@ -252,11 +326,10 @@ line3`;
       const result = tokenize('text {{incomplete');
       expect(result.errors).toHaveLength(0);
       expect(result.tokens.length).toBeGreaterThan(0);
-      
+
       const types = result.tokens.map(t => t.tokenType);
-      expect(types).toContain(TextContent);
+      expect(types).toContain(Identifier);
       expect(types).toContain(TemplateOpen);
-      expect(types).toContain(TemplateContent);
     });
 
     test('should handle unclosed comments', () => {
@@ -268,7 +341,7 @@ line3`;
     test('should handle mixed valid and invalid content', () => {
       const result = tokenize('<valid>content</valid>@#$invalid');
       expect(result.tokens.length).toBeGreaterThan(0);
-      
+
       // Should tokenize the valid parts
       const images = result.tokens.map(t => t.image);
       expect(images).toContain('<');
@@ -279,8 +352,10 @@ line3`;
 
     test('should handle special characters in text content', () => {
       const input = 'text with @#$%^&*()[]{}|;:,.<>?/~`';
-      const images = tokenImages(input);
-      expect(images).toEqual(['text with @#$%^&*()[]{}|;:,.<>?/~`']);
+      const result = tokenize(input);
+      expect(result.errors).toHaveLength(0);
+      const images = result.tokens.map(t => t.image);
+      expect(images).toEqual(['text', ' ', 'with', ' ', '@#$%^&*()[]{}|;:,.', '<', '>', '?/~`']);
     });
   });
 });

From 3c3b226b60ec54a69556282d42ed68cb4a9d25ff Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Wed, 16 Jul 2025 23:50:03 +0800
Subject: [PATCH 14/76] .

---
 packages/poml/tests/reader/lexer.test.ts | 628 +++++++++++------------
 1 file changed, 313 insertions(+), 315 deletions(-)

diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 4e72e763..15da7907 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -34,328 +34,326 @@ function tokenize(input: string) {
   return extendedPomlLexer.tokenize(input);
 }
 
-describe('ExtendedPomlLexer', () => {
-  describe('Basic Token Images', () => {
-    test('should tokenize HTML comments', () => {
-      expect(tokenImages('<!-- comment -->')).toEqual(['<!-- comment -->']);
-    });
-
-    test('should tokenize template variables', () => {
-      expect(tokenImages('{{variable}}')).toEqual(['{{', 'variable', '}}']);
-    });
-
-    test('should tokenize XML tags', () => {
-      expect(tokenImages('<task>')).toEqual(['<', 'task', '>']);
-      expect(tokenImages('</task>')).toEqual(['</', 'task', '>']);
-      expect(tokenImages('<meta />')).toEqual(['<', 'meta', ' ', '/>']);
-    });
-
-    test('should tokenize quotes and backslashes individually', () => {
-      expect(tokenImages('"hello"')).toEqual(['"', 'hello', '"']);
-      expect(tokenImages("'world'")).toEqual(["'", 'world', "'"]);
-      expect(tokenImages('text\\escape')).toEqual(['text', '\\', 'escape']);
-    });
-
-    test('should tokenize attributes', () => {
-      expect(tokenImages('id="value"')).toEqual(['id', '=', '"', 'value', '"']);
-    });
-
-    test('should tokenize whitespace', () => {
-      expect(tokenImages('  \t\n  ')).toEqual(['  \t\n  ']);
-    });
-
-    test('should tokenize identifiers', () => {
-      expect(tokenImages('simple-name_123')).toEqual(['simple-name_123']);
-    });
-
-    test('should tokenize text content', () => {
-      expect(tokenImages('plain text here')).toEqual(['plain', ' ', 'text', ' ', 'here']);
-    });
-  });
-
-  describe('Specific Cases from Requirements', () => {
-    test('should handle "abc<poml>def</poml>ghi"', () => {
-      expect(tokenImages('"abc<poml>def</poml>ghi"')).toEqual([
-        '"',
-        'abc',
-        '<',
-        'poml',
-        '>',
-        'def',
-        '</',
-        'poml',
-        '>',
-        'ghi',
-        '"'
-      ]);
-    });
-
-    test('should handle <poml abc="def">ghi</poml>', () => {
-      expect(tokenImages('<poml abc="def">ghi</poml>')).toEqual([
-        '<',
-        'poml',
-        ' ',
-        'abc',
-        '=',
-        '"',
-        'def',
-        '"',
-        '>',
-        'ghi',
-        '</',
-        'poml',
-        '>'
-      ]);
-    });
-
-    test('should handle mixed content', () => {
-      expect(tokenImages('text {{var}} more')).toEqual([
-        'text',
-        ' ',
-        '{{',
-        'var',
-        '}}',
-        ' ',
-        'more'
-      ]);
-    });
-
-    test('chinese characters', () => {
-      expect(tokenImages('中文 {{ 文本 }}内容< 标签>')).toEqual([
-        '中文 ',
-        '{{',
-        ' ',
-        '文本 ',
-        '}}',
-        '内容',
-        '<',
-        ' ',
-        '标签>'
-      ]);
-    });
-
-    test('should handle complex attributes', () => {
-      expect(tokenImages('<task id="{{value}}" class="test">')).toEqual([
-        '<',
-        'task',
-        ' ',
-        'id',
-        '=',
-        '"',
-        '{{',
-        'value',
-        '}}',
-        '"',
-        ' ',
-        'class',
-        '=',
-        '"',
-        'test',
-        '"',
-        '>'
-      ]);
-    });
-
-    test('should handle escaped quotes', () => {
-      expect(tokenImages('text "with \\"escaped\\" quotes"')).toEqual([
-        'text',
-        ' ',
-        '"',
-        'with',
-        ' ',
-        '\\',
-        '"',
-        'escaped',
-        '\\',
-        '"',
-        ' ',
-        'quotes',
-        '"'
-      ]);
-    });
-  });
-
-  describe('Token Types', () => {
-    test('should identify correct token types for basic elements', () => {
-      expect(tokenTypes('<task>')).toEqual([TagOpen, Identifier, TagClose]);
-      expect(tokenTypes('</task>')).toEqual([TagClosingOpen, Identifier, TagClose]);
-      expect(tokenTypes('<meta />')).toEqual([TagOpen, Identifier, Whitespace, TagSelfClose]);
-    });
-
-    test('should identify quotes and backslashes', () => {
-      expect(tokenTypes('"text"')).toEqual([DoubleQuote, Identifier, DoubleQuote]);
-      expect(tokenTypes("'text'")).toEqual([SingleQuote, Identifier, SingleQuote]);
-      expect(tokenTypes('text\\escape')).toEqual([Identifier, Backslash, Identifier]);
-    });
-
-    test('should identify template variables', () => {
-      expect(tokenTypes('{{variable}}')).toEqual([TemplateOpen, Identifier, TemplateClose]);
-    });
-
-    test('should identify comments', () => {
-      expect(tokenTypes('<!-- comment -->')).toEqual([Comment]);
-    });
-
-    test('should identify whitespace', () => {
-      expect(tokenTypes('  \t\n  ')).toEqual([Whitespace]);
-    });
-  });
-
-  describe('Source Position and Error Tests', () => {
-    test('should provide correct source positions', () => {
-      const result = tokenize('<task>content</task>');
-      expect(result.errors).toHaveLength(0);
-
-      const tokens = result.tokens;
-      expect(tokens[0].startOffset).toBe(0);
-      expect(tokens[0].endOffset).toBe(0);
-      expect(tokens[0].image).toBe('<');
-
-      expect(tokens[1].startOffset).toBe(1);
-      expect(tokens[1].endOffset).toBe(4);
-      expect(tokens[1].image).toBe('task');
-
-      expect(tokens[2].startOffset).toBe(5);
-      expect(tokens[2].endOffset).toBe(5);
-      expect(tokens[2].image).toBe('>');
-    });
-
-    test('should handle line and column tracking', () => {
-      const input = `line1
+describe('Basic Token Images', () => {
+  test('should tokenize HTML comments', () => {
+    expect(tokenImages('<!-- comment -->')).toEqual(['<!-- comment -->']);
+  });
+
+  test('should tokenize template variables', () => {
+    expect(tokenImages('{{variable}}')).toEqual(['{{', 'variable', '}}']);
+  });
+
+  test('should tokenize XML tags', () => {
+    expect(tokenImages('<task>')).toEqual(['<', 'task', '>']);
+    expect(tokenImages('</task>')).toEqual(['</', 'task', '>']);
+    expect(tokenImages('<meta />')).toEqual(['<', 'meta', ' ', '/>']);
+  });
+
+  test('should tokenize quotes and backslashes individually', () => {
+    expect(tokenImages('"hello"')).toEqual(['"', 'hello', '"']);
+    expect(tokenImages("'world'")).toEqual(["'", 'world', "'"]);
+    expect(tokenImages('text\\escape')).toEqual(['text', '\\', 'escape']);
+  });
+
+  test('should tokenize attributes', () => {
+    expect(tokenImages('id="value"')).toEqual(['id', '=', '"', 'value', '"']);
+  });
+
+  test('should tokenize whitespace', () => {
+    expect(tokenImages('  \t\n  ')).toEqual(['  \t\n  ']);
+  });
+
+  test('should tokenize identifiers', () => {
+    expect(tokenImages('simple-name_123')).toEqual(['simple-name_123']);
+  });
+
+  test('should tokenize text content', () => {
+    expect(tokenImages('plain text here')).toEqual(['plain', ' ', 'text', ' ', 'here']);
+  });
+});
+
+describe('Edge Cases', () => {
+  test('should handle "abc<poml>def</poml>ghi"', () => {
+    expect(tokenImages('"abc<poml>def</poml>ghi"')).toEqual([
+      '"',
+      'abc',
+      '<',
+      'poml',
+      '>',
+      'def',
+      '</',
+      'poml',
+      '>',
+      'ghi',
+      '"'
+    ]);
+  });
+
+  test('should handle <poml abc="def">ghi</poml>', () => {
+    expect(tokenImages('<poml abc="def">ghi</poml>')).toEqual([
+      '<',
+      'poml',
+      ' ',
+      'abc',
+      '=',
+      '"',
+      'def',
+      '"',
+      '>',
+      'ghi',
+      '</',
+      'poml',
+      '>'
+    ]);
+  });
+
+  test('should handle mixed content', () => {
+    expect(tokenImages('text {{var}} more')).toEqual([
+      'text',
+      ' ',
+      '{{',
+      'var',
+      '}}',
+      ' ',
+      'more'
+    ]);
+  });
+
+  test('chinese characters', () => {
+    expect(tokenImages('中文 {{ 文本 }}内容< 标签>')).toEqual([
+      '中文 ',
+      '{{',
+      ' ',
+      '文本 ',
+      '}}',
+      '内容',
+      '<',
+      ' ',
+      '标签>'
+    ]);
+  });
+
+  test('should handle complex attributes', () => {
+    expect(tokenImages('<task id="{{value}}" class="test">')).toEqual([
+      '<',
+      'task',
+      ' ',
+      'id',
+      '=',
+      '"',
+      '{{',
+      'value',
+      '}}',
+      '"',
+      ' ',
+      'class',
+      '=',
+      '"',
+      'test',
+      '"',
+      '>'
+    ]);
+  });
+
+  test('should handle escaped quotes', () => {
+    expect(tokenImages('text "with \\"escaped\\" quotes"')).toEqual([
+      'text',
+      ' ',
+      '"',
+      'with',
+      ' ',
+      '\\',
+      '"',
+      'escaped',
+      '\\',
+      '"',
+      ' ',
+      'quotes',
+      '"'
+    ]);
+  });
+});
+
+describe('Token Types', () => {
+  test('should identify correct token types for basic elements', () => {
+    expect(tokenTypes('<task>')).toEqual([TagOpen, Identifier, TagClose]);
+    expect(tokenTypes('</task>')).toEqual([TagClosingOpen, Identifier, TagClose]);
+    expect(tokenTypes('<meta />')).toEqual([TagOpen, Identifier, Whitespace, TagSelfClose]);
+  });
+
+  test('should identify quotes and backslashes', () => {
+    expect(tokenTypes('"text"')).toEqual([DoubleQuote, Identifier, DoubleQuote]);
+    expect(tokenTypes("'text'")).toEqual([SingleQuote, Identifier, SingleQuote]);
+    expect(tokenTypes('text\\escape')).toEqual([Identifier, Backslash, Identifier]);
+  });
+
+  test('should identify template variables', () => {
+    expect(tokenTypes('{{variable}}')).toEqual([TemplateOpen, Identifier, TemplateClose]);
+  });
+
+  test('should identify comments', () => {
+    expect(tokenTypes('<!-- comment -->')).toEqual([Comment]);
+  });
+
+  test('should identify whitespace', () => {
+    expect(tokenTypes('  \t\n  ')).toEqual([Whitespace]);
+  });
+});
+
+describe('Source Position and Error Tests', () => {
+  test('should provide correct source positions', () => {
+    const result = tokenize('<task>content</task>');
+    expect(result.errors).toHaveLength(0);
+
+    const tokens = result.tokens;
+    expect(tokens[0].startOffset).toBe(0);
+    expect(tokens[0].endOffset).toBe(0);
+    expect(tokens[0].image).toBe('<');
+
+    expect(tokens[1].startOffset).toBe(1);
+    expect(tokens[1].endOffset).toBe(4);
+    expect(tokens[1].image).toBe('task');
+
+    expect(tokens[2].startOffset).toBe(5);
+    expect(tokens[2].endOffset).toBe(5);
+    expect(tokens[2].image).toBe('>');
+  });
+
+  test('should handle line and column tracking', () => {
+    const input = `line1
 line2 <tag>
 line3`;
-      const result = tokenize(input);
-
-      const tagToken = result.tokens.find(t => t.tokenType === TagOpen);
-      expect(tagToken).toBeDefined();
-      expect(tagToken!.startLine).toBe(2);
-      expect(tagToken!.startColumn).toBe(7); // After "line2 "
-    });
-
-    test('should handle malformed input gracefully', () => {
-      const result = tokenize('<task id="unclosed');
-      expect(result.errors).toHaveLength(0); // Should not error, just tokenize what it can
-      expect(result.tokens.length).toBeGreaterThan(0);
-
-      // Verify token positions are valid
-      for (const token of result.tokens) {
-        expect(token.startOffset).toBeLessThanOrEqual(token.endOffset!);
-        expect(token.startOffset).toBeGreaterThanOrEqual(0);
-        expect(token.endOffset).toBeLessThanOrEqual(18);
-      }
-    });
-
-    test('should verify token boundaries do not overlap', () => {
-      const result = tokenize('<task id="value">content</task>');
-      const sortedTokens = [...result.tokens].sort((a, b) => a.startOffset - b.startOffset);
-
-      for (let i = 0; i < sortedTokens.length - 1; i++) {
-        const current = sortedTokens[i];
-        const next = sortedTokens[i + 1];
-        expect(current.endOffset).toBeLessThanOrEqual(next.startOffset);
-      }
-    });
-
-    test('should handle empty input', () => {
-      const result = tokenize('');
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(0);
-    });
-
-    test('should handle whitespace only input', () => {
-      const result = tokenize('   \t\n   ');
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens).toHaveLength(1);
-      expect(result.tokens[0].tokenType).toBe(Whitespace);
-    });
-  });
-
-  describe('Complex Mixed Content', () => {
-    test('should handle extended POML specification example', () => {
-      const input = `# My Analysis
+    const result = tokenize(input);
+
+    const tagToken = result.tokens.find(t => t.tokenType === TagOpen);
+    expect(tagToken).toBeDefined();
+    expect(tagToken!.startLine).toBe(2);
+    expect(tagToken!.startColumn).toBe(7); // After "line2 "
+  });
+
+  test('should handle malformed input gracefully', () => {
+    const result = tokenize('<task id="unclosed');
+    expect(result.errors).toHaveLength(0); // Should not error, just tokenize what it can
+    expect(result.tokens.length).toBeGreaterThan(0);
+
+    // Verify token positions are valid
+    for (const token of result.tokens) {
+      expect(token.startOffset).toBeLessThanOrEqual(token.endOffset!);
+      expect(token.startOffset).toBeGreaterThanOrEqual(0);
+      expect(token.endOffset).toBeLessThanOrEqual(18);
+    }
+  });
+
+  test('should verify token boundaries do not overlap', () => {
+    const result = tokenize('<task id="value">content</task>');
+    const sortedTokens = [...result.tokens].sort((a, b) => a.startOffset - b.startOffset);
+
+    for (let i = 0; i < sortedTokens.length - 1; i++) {
+      const current = sortedTokens[i];
+      const next = sortedTokens[i + 1];
+      expect(current.endOffset).toBeLessThanOrEqual(next.startOffset);
+    }
+  });
+
+  test('should handle empty input', () => {
+    const result = tokenize('');
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens).toHaveLength(0);
+  });
+
+  test('should handle whitespace only input', () => {
+    const result = tokenize('   \t\n   ');
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens).toHaveLength(1);
+    expect(result.tokens[0].tokenType).toBe(Whitespace);
+  });
+});
+
+describe('Complex Mixed Content', () => {
+  test('should handle extended POML specification example', () => {
+    const input = `# My Analysis
 
 <task>
-  Analyze data
+Analyze data
 </task>
 
 {{variable}}`;
 
-      const images = tokenImages(input);
-      expect(images).toContain('# My Analysis\n\n');
-      expect(images).toContain('<');
-      expect(images).toContain('task');
-      expect(images).toContain('>');
-      expect(images).toContain('{{');
-      expect(images).toContain('variable');
-      expect(images).toContain('}}');
-    });
-
-    test('should handle comments with mixed content', () => {
-      expect(tokenImages('<!-- comment --><task>content</task>')).toEqual([
-        '<!-- comment -->',
-        '<',
-        'task',
-        '>',
-        'content',
-        '</',
-        'task',
-        '>'
-      ]);
-    });
-
-    test('should handle nested quotes and templates', () => {
-      expect(tokenImages('<meta value="{{path}}/file.txt">')).toEqual([
-        '<',
-        'meta',
-        ' ',
-        'value',
-        '=',
-        '"',
-        '{{',
-        'path',
-        '}}',
-        '/file.txt',
-        '"',
-        '>'
-      ]);
-    });
-  });
-
-  describe('Error Recovery', () => {
-    test('should handle incomplete template variables', () => {
-      const result = tokenize('text {{incomplete');
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-
-      const types = result.tokens.map(t => t.tokenType);
-      expect(types).toContain(Identifier);
-      expect(types).toContain(TemplateOpen);
-    });
-
-    test('should handle unclosed comments', () => {
-      const result = tokenize('<!-- unclosed comment\nmore text');
-      expect(result.errors).toHaveLength(0);
-      expect(result.tokens.length).toBeGreaterThan(0);
-    });
-
-    test('should handle mixed valid and invalid content', () => {
-      const result = tokenize('<valid>content</valid>@#$invalid');
-      expect(result.tokens.length).toBeGreaterThan(0);
-
-      // Should tokenize the valid parts
-      const images = result.tokens.map(t => t.image);
-      expect(images).toContain('<');
-      expect(images).toContain('valid');
-      expect(images).toContain('>');
-      expect(images).toContain('content');
-    });
-
-    test('should handle special characters in text content', () => {
-      const input = 'text with @#$%^&*()[]{}|;:,.<>?/~`';
-      const result = tokenize(input);
-      expect(result.errors).toHaveLength(0);
-      const images = result.tokens.map(t => t.image);
-      expect(images).toEqual(['text', ' ', 'with', ' ', '@#$%^&*()[]{}|;:,.', '<', '>', '?/~`']);
-    });
+    const images = tokenImages(input);
+    expect(images).toContain('# My Analysis\n\n');
+    expect(images).toContain('<');
+    expect(images).toContain('task');
+    expect(images).toContain('>');
+    expect(images).toContain('{{');
+    expect(images).toContain('variable');
+    expect(images).toContain('}}');
+  });
+
+  test('should handle comments with mixed content', () => {
+    expect(tokenImages('<!-- comment --><task>content</task>')).toEqual([
+      '<!-- comment -->',
+      '<',
+      'task',
+      '>',
+      'content',
+      '</',
+      'task',
+      '>'
+    ]);
+  });
+
+  test('should handle nested quotes and templates', () => {
+    expect(tokenImages('<meta value="{{path}}/file.txt">')).toEqual([
+      '<',
+      'meta',
+      ' ',
+      'value',
+      '=',
+      '"',
+      '{{',
+      'path',
+      '}}',
+      '/file.txt',
+      '"',
+      '>'
+    ]);
+  });
+});
+
+describe('Error Recovery', () => {
+  test('should handle incomplete template variables', () => {
+    const result = tokenize('text {{incomplete');
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens.length).toBeGreaterThan(0);
+
+    const types = result.tokens.map(t => t.tokenType);
+    expect(types).toContain(Identifier);
+    expect(types).toContain(TemplateOpen);
+  });
+
+  test('should handle unclosed comments', () => {
+    const result = tokenize('<!-- unclosed comment\nmore text');
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens.length).toBeGreaterThan(0);
+  });
+
+  test('should handle mixed valid and invalid content', () => {
+    const result = tokenize('<valid>content</valid>@#$invalid');
+    expect(result.tokens.length).toBeGreaterThan(0);
+
+    // Should tokenize the valid parts
+    const images = result.tokens.map(t => t.image);
+    expect(images).toContain('<');
+    expect(images).toContain('valid');
+    expect(images).toContain('>');
+    expect(images).toContain('content');
+  });
+
+  test('should handle special characters in text content', () => {
+    const input = 'text with @#$%^&*()[]{}|;:,.<>?/~`';
+    const result = tokenize(input);
+    expect(result.errors).toHaveLength(0);
+    const images = result.tokens.map(t => t.image);
+    expect(images).toEqual(['text', ' ', 'with', ' ', '@#$%^&*()[]{}|;:,.', '<', '>', '?/~`']);
   });
 });

From 321a3428e3878702946269244e192543b56df2f6 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Wed, 16 Jul 2025 23:51:52 +0800
Subject: [PATCH 15/76] .

---
 packages/poml/tests/reader/lexer.test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 15da7907..51cbe333 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -15,7 +15,7 @@ import {
   Identifier,
   Whitespace,
   TextContent
-} from '../../reader/lexer';
+} from 'poml/reader/lexer';
 
 // Helper function to extract token images
 function tokenImages(input: string): string[] {

From 35e478ff19482c7dc2cbdb8028fe6e4f30ce2094 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Thu, 17 Jul 2025 09:23:17 +0800
Subject: [PATCH 16/76] add more tests

---
 packages/poml/tests/reader/lexer.test.ts | 606 ++++++++++++++++++++++-
 1 file changed, 597 insertions(+), 9 deletions(-)

diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 51cbe333..bbd8f976 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -108,15 +108,7 @@ describe('Edge Cases', () => {
   });
 
   test('should handle mixed content', () => {
-    expect(tokenImages('text {{var}} more')).toEqual([
-      'text',
-      ' ',
-      '{{',
-      'var',
-      '}}',
-      ' ',
-      'more'
-    ]);
+    expect(tokenImages('text {{var}} more')).toEqual(['text', ' ', '{{', 'var', '}}', ' ', 'more']);
   });
 
   test('chinese characters', () => {
@@ -172,6 +164,84 @@ describe('Edge Cases', () => {
       '"'
     ]);
   });
+
+  test('should handle complex real-world scenarios', () => {
+    const realWorldTests = [
+      `<!-- Header comment -->
+<html>
+  <head>
+    <title>{{page.title}}</title>
+    <meta charset="utf-8" />
+  </head>
+  <body class="{{theme}}">
+    <div id="content">
+      {{content}}
+    </div>
+  </body>
+</html>`,
+
+      `<task priority="{{urgency}}" due="{{deadline}}">
+  {{description}}
+  <!-- Status: {{status}} -->
+</task>`,
+
+      `"Complex string with {{variables}} and <tags attr='{{nested}}'> inside"`,
+
+      `{{#each items}}
+  <li class="item-{{@index}}">
+    <span title="{{description}}">{{name}}</span>
+  </li>
+{{/each}}`
+    ];
+
+    realWorldTests.forEach(test => {
+      const result = tokenize(test);
+      expect(result.errors).toHaveLength(0);
+      expect(result.tokens.length).toBeGreaterThan(0);
+
+      // Verify position integrity
+      result.tokens.forEach(token => {
+        expect(token.startOffset).toBeGreaterThanOrEqual(0);
+        expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset!);
+      });
+    });
+  });
+
+  test('should handle equals sign in various contexts', () => {
+    const equalsTests = [
+      'attr=value',
+      'attr="value"',
+      "attr='value'",
+      'attr={{value}}',
+      'first=one second=two',
+      '=standalone',
+      'text=content',
+      'a=b=c'
+    ];
+
+    equalsTests.forEach(test => {
+      const result = tokenize(test);
+      expect(result.errors).toHaveLength(0);
+
+      const equalsTokens = result.tokens.filter(t => t.tokenType.name === 'Equals');
+      expect(equalsTokens.length).toBeGreaterThan(0);
+    });
+  });
+
+  test('should handle edge cases with zero-length matches', () => {
+    const edgeCases = ['', ' ', '\n', '\t', '\r', '{{}}', '<!---->', '<>', '""', "''", '\\'];
+
+    edgeCases.forEach(test => {
+      const result = tokenize(test);
+      expect(result.errors).toHaveLength(0);
+
+      if (test === '') {
+        expect(result.tokens).toHaveLength(0);
+      } else {
+        expect(result.tokens.length).toBeGreaterThan(0);
+      }
+    });
+  });
 });
 
 describe('Token Types', () => {
@@ -320,6 +390,524 @@ Analyze data
   });
 });
 
+describe('Boundary Conditions', () => {
+  test('should handle single character inputs', () => {
+    expect(tokenize('<').tokens).toHaveLength(1);
+    expect(tokenize('>').tokens).toHaveLength(1);
+    expect(tokenize('"').tokens).toHaveLength(1);
+    expect(tokenize("'").tokens).toHaveLength(1);
+    expect(tokenize('\\').tokens).toHaveLength(1);
+    expect(tokenize('=').tokens).toHaveLength(1);
+    expect(tokenize(' ').tokens).toHaveLength(1);
+    expect(tokenize('\t').tokens).toHaveLength(1);
+    expect(tokenize('\n').tokens).toHaveLength(1);
+    expect(tokenize('a').tokens).toHaveLength(1);
+    expect(tokenize('_').tokens).toHaveLength(1);
+    expect(tokenize('1').tokens).toHaveLength(1);
+    expect(tokenize('@').tokens).toHaveLength(1);
+  });
+
+  test('should handle two character edge cases', () => {
+    expect(tokenImages('{{')).toEqual(['{{']);
+    expect(tokenImages('}}')).toEqual(['}}']);
+    expect(tokenImages('</')).toEqual(['</']);
+    expect(tokenImages('/>')).toEqual(['/>']);
+    expect(tokenImages('{}')).toEqual(['{}']);
+    expect(tokenImages('}{')).toEqual(['}{']);
+    expect(tokenImages('""')).toEqual(['"', '"']);
+    expect(tokenImages("''")).toEqual(["'", "'"]);
+    expect(tokenImages('<>')).toEqual(['<', '>']);
+  });
+
+  test('should handle minimum valid patterns', () => {
+    expect(tokenImages('<!---->')).toEqual(['<!---->']);
+    expect(tokenImages('<a>')).toEqual(['<', 'a', '>']);
+    expect(tokenImages('</a>')).toEqual(['</', 'a', '>']);
+    expect(tokenImages('<a/>')).toEqual(['<', 'a', '/>']);
+  });
+
+  test('should handle very long inputs without crashes', () => {
+    const longText = 'a'.repeat(100000);
+    const result = tokenize(longText);
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens).toHaveLength(1);
+    expect(result.tokens[0].image).toBe(longText);
+
+    const longComment = `<!--${'x'.repeat(100000)}-->`;
+    const commentResult = tokenize(longComment);
+    expect(commentResult.errors).toHaveLength(0);
+    expect(commentResult.tokens).toHaveLength(1);
+
+    const longIdentifier = 'a' + 'b'.repeat(10000);
+    const identifierResult = tokenize(longIdentifier);
+    expect(identifierResult.errors).toHaveLength(0);
+    expect(identifierResult.tokens).toHaveLength(1);
+  });
+
+  test('should handle maximum practical complexity', () => {
+    const complexInput =
+      '<' +
+      'tag'.repeat(1000) +
+      ' attr="' +
+      'value'.repeat(1000) +
+      '">' +
+      'content'.repeat(1000) +
+      '</' +
+      'tag'.repeat(1000) +
+      '>';
+    const result = tokenize(complexInput);
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens).toHaveLength(13);
+  });
+
+  test('should handle deeply nested structures', () => {
+    let nested = '';
+    for (let i = 0; i < 100; i++) {
+      nested += `<tag${i}>`;
+    }
+    nested += 'content';
+    for (let i = 99; i >= 0; i--) {
+      nested += `</tag${i}>`;
+    }
+    const result = tokenize(nested);
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens).toHaveLength(601);
+  });
+});
+
+describe('Unicode and Special Characters', () => {
+  test('should handle CJK characters', () => {
+    expect(tokenImages('你好世界')).toEqual(['你好世界']);
+    expect(tokenImages('こんにちは')).toEqual(['こんにちは']);
+    expect(tokenImages('안녕하세요')).toEqual(['안녕하세요']);
+  });
+
+  test('should handle emoji and symbols', () => {
+    expect(tokenImages('Hello 👋 World 🌍')).toEqual(['Hello', ' ', '👋 World 🌍']);
+    expect(tokenImages('Math: ∑∞π≠∅')).toEqual(['Math', ': ∑∞π≠∅']);
+    expect(tokenImages('Arrows: ←→↑↓')).toEqual(['Arrows', ': ←→↑↓']);
+  });
+
+  test('should handle unicode', () => {
+    expect(tokenImages('<こんにちは>')).toEqual(['<', 'こんにちは>']);
+    expect(tokenImages('{{你好}}')).toEqual(['{{', '你好', '}}']);
+    expect(tokenImages('<tag attr="café">')).toEqual([
+      '<',
+      'tag',
+      ' ',
+      'attr',
+      '=',
+      '"',
+      'caf',
+      'é',
+      '"',
+      '>'
+    ]);
+  });
+
+  test('should maintain lexer stability with all edge cases', () => {
+    // Combination of many edge cases
+    const stressTest =
+      '\uFEFF\x00\x01\x02<!-- \uD800 comment -->\x03<tag\x04 attr="\uDFFF{{value\x05}}"\x06>\x07content\x08</tag>\x09\x0A';
+
+    const result = tokenize(stressTest);
+    expect(result.tokens.length).toBeGreaterThan(0);
+
+    // Verify token integrity
+    result.tokens.forEach(token => {
+      expect(token.startOffset).toBeGreaterThanOrEqual(0);
+      if (token.endOffset !== undefined) {
+        expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset);
+      }
+    });
+  });
+});
+
+describe('Malformed Patterns', () => {
+  test('should handle incomplete comments', () => {
+    expect(tokenize('<!--').tokens.length).toBeGreaterThan(0);
+    expect(tokenize('<!-- comment').tokens.length).toBeGreaterThan(0);
+    expect(tokenize('<!-- comment -').tokens.length).toBeGreaterThan(0);
+    expect(tokenize('<!-- comment --').tokens.length).toBeGreaterThan(0);
+    expect(tokenize('<!--\nunclosed\nover\nmultiple\nlines').tokens.length).toBeGreaterThan(0);
+  });
+
+  test('should handle incomplete template variables', () => {
+    expect(tokenImages('text {{')).toEqual(['text', ' ', '{{']);
+    expect(tokenImages('text {{variable')).toEqual(['text', ' ', '{{', 'variable']);
+    expect(tokenImages('{{ var }{ not closed')).toEqual(['{{', ' ', 'var', ' ', '}{ not closed']);
+    expect(tokenImages('{{nested {{inside')).toEqual(['{{', 'nested', ' ', '{{', 'inside']);
+  });
+
+  test('should handle incomplete tags', () => {
+    expect(tokenImages('<')).toEqual(['<']);
+    expect(tokenImages('<tag')).toEqual(['<', 'tag']);
+    expect(tokenImages('<tag attr="value')).toEqual(['<', 'tag', ' ', 'attr', '=', '"', 'value']);
+    expect(tokenImages('</')).toEqual(['</']);
+    expect(tokenImages('</tag')).toEqual(['</', 'tag']);
+    expect(tokenImages('<tag /no-close')).toEqual(['<', 'tag', ' ', '/no-close']);
+  });
+
+  test('should handle malformed attributes', () => {
+    expect(tokenImages('attr=')).toEqual(['attr', '=']);
+    expect(tokenImages('attr="')).toEqual(['attr', '=', '"']);
+    expect(tokenImages("attr='unclosed")).toEqual(['attr', '=', "'", 'unclosed']);
+    expect(tokenImages('attr="value')).toEqual(['attr', '=', '"', 'value']);
+    expect(tokenImages('attr=no-quotes value')).toEqual(['attr', '=', 'no-quotes', ' ', 'value']);
+  });
+
+  test('should handle broken template syntax', () => {
+    expect(tokenImages('}')).toEqual(['}']);
+    expect(tokenImages('}}')).toEqual(['}}']);
+    expect(tokenImages('{ single brace }')).toEqual(['{ single brace }']);
+    expect(tokenImages('{not a template}')).toEqual(['{not a template}']);
+  });
+
+  test('should handle nested malformed patterns', () => {
+    expect(tokenImages('<!-- <tag> -->')).toEqual(['<!-- <tag> -->']);
+    expect(tokenImages('<!-- {{template}} -->')).toEqual(['<!-- {{template}} -->']);
+    expect(tokenImages('<tag><!-- comment</tag>')).toEqual([
+      '<',
+      'tag',
+      '>',
+      '<',
+      '!-- comment',
+      '</',
+      'tag',
+      '>'
+    ]);
+    expect(tokenImages('{{<tag>}}')).toEqual(['{{', '<', 'tag', '>', '}}']);
+  });
+
+  test('should handle quotes without proper pairing', () => {
+    expect(tokenImages('"orphan quote')).toEqual(['"', 'orphan', ' ', 'quote']);
+    expect(tokenImages("'another orphan")).toEqual(["'", 'another', ' ', 'orphan']);
+    expect(tokenImages('mixed "quote\' types')).toEqual([
+      'mixed',
+      ' ',
+      '"',
+      'quote',
+      "'",
+      ' ',
+      'types'
+    ]);
+    expect(tokenImages('escaped \\"quote\\" in text')).toEqual([
+      'escaped',
+      ' ',
+      '\\',
+      '"',
+      'quote',
+      '\\',
+      '"',
+      ' ',
+      'in',
+      ' ',
+      'text'
+    ]);
+  });
+
+  test('should handle self-closing tag syntax edge cases', () => {
+    expect(tokenImages('/>')).toEqual(['/>']);
+    expect(tokenImages('text/>')).toEqual(['text', '/>']);
+    expect(tokenImages('<tag attr=value/>')).toEqual(['<', 'tag', ' ', 'attr', '=', 'value', '/>']);
+    expect(tokenImages('</ self-close>')).toEqual(['</', ' ', 'self-close', '>']);
+  });
+
+  test('should handle whitespace in unexpected places', () => {
+    expect(tokenImages('< tag >')).toEqual(['<', ' ', 'tag', ' ', '>']);
+    expect(tokenImages('</ tag >')).toEqual(['</', ' ', 'tag', ' ', '>']);
+    expect(tokenImages('{ { template } }')).toEqual(['{ { template } }']);
+    expect(tokenImages('attr = "value"')).toEqual(['attr', ' ', '=', ' ', '"', 'value', '"']);
+  });
+
+  test('should handle multiple consecutive special characters', () => {
+    expect(tokenImages('<<>>')).toEqual(['<', '<', '>', '>']);
+    expect(tokenImages('"""')).toEqual(['"', '"', '"']);
+    expect(tokenImages("'''")).toEqual(["'", "'", "'"]);
+    expect(tokenImages('\\\\\\')).toEqual(['\\', '\\', '\\']);
+    expect(tokenImages('===')).toEqual(['=', '=', '=']);
+  });
+
+  test('should handle mixed broken and valid syntax', () => {
+    expect(tokenImages('<valid>content</valid>{{ broken')).toEqual([
+      '<',
+      'valid',
+      '>',
+      'content',
+      '</',
+      'valid',
+      '>',
+      '{{',
+      ' ',
+      'broken'
+    ]);
+    expect(tokenImages('<!--comment--><tag>more{{ content')).toEqual([
+      '<!--comment-->',
+      '<',
+      'tag',
+      '>',
+      'more',
+      '{{',
+      ' ',
+      'content'
+    ]);
+    expect(tokenImages("\"quoted text<tag attr='mixed'>end")).toEqual([
+      '"',
+      'quoted',
+      ' ',
+      'text',
+      '<',
+      'tag',
+      ' ',
+      'attr',
+      '=',
+      "'",
+      'mixed',
+      "'",
+      '>',
+      'end'
+    ]);
+  });
+
+  test('should handle lookahead boundary cases for single braces', () => {
+    expect(tokenImages('{nottemplate}')).toEqual(['{nottemplate}']);
+    expect(tokenImages('}notclosing{')).toEqual(['}notclosing{']);
+    expect(tokenImages('text{more}text')).toEqual(['text', '{more}text']);
+    expect(tokenImages('before}after')).toEqual(['before', '}after']);
+    expect(tokenImages('before{after')).toEqual(['before', '{after']);
+    expect(tokenImages('text } { more')).toEqual(['text', ' ', '} { more']);
+  });
+
+  test('should handle greedy vs non-greedy matching', () => {
+    expect(tokenImages('<!--first--><!--second-->')).toEqual(['<!--first-->', '<!--second-->']);
+    expect(tokenImages('{{first}}{{second}}')).toEqual(['{{', 'first', '}}', '{{', 'second', '}}']);
+    expect(tokenImages('text<!--comment-->more')).toEqual(['text', '<!--comment-->', 'more']);
+  });
+});
+
+describe('Position Tracking Accuracy', () => {
+  test('should track positions accurately across multiple lines', () => {
+    const input = `line1
+<tag>content</tag>
+{{variable}}
+final line`;
+    const result = tokenize(input);
+
+    const tagOpenToken = result.tokens.find(t => t.image === '<' && t.startLine === 2);
+    expect(tagOpenToken).toBeDefined();
+    expect(tagOpenToken!.startColumn).toBe(1);
+
+    const variableToken = result.tokens.find(t => t.image === 'variable');
+    expect(variableToken).toBeDefined();
+    expect(variableToken!.startLine).toBe(3);
+  });
+
+  test('should track positions accurately with mixed line endings', () => {
+    const input = 'line1\r\nline2\nline3\r';
+    const result = tokenize(input);
+
+    expect(result.tokens.length).toBeGreaterThan(0);
+    result.tokens.forEach(token => {
+      expect(token.startOffset).toBeGreaterThanOrEqual(0);
+      expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset!);
+      expect(token.startLine).toBeGreaterThanOrEqual(1);
+      expect(token.startColumn).toBeGreaterThanOrEqual(1);
+    });
+  });
+
+  test('should handle position tracking with empty tokens', () => {
+    const input = '<>""\'\'{{}}<!---->< >';
+    const result = tokenize(input);
+
+    // Verify all tokens have valid positions
+    result.tokens.forEach(token => {
+      expect(token.startOffset).toBeGreaterThanOrEqual(0);
+      expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset!);
+      expect(token.startLine).toBeGreaterThanOrEqual(1);
+      expect(token.startColumn).toBeGreaterThanOrEqual(1);
+    });
+  });
+
+  test('should track positions accurately with tabs and mixed whitespace', () => {
+    const input = '\t<tag>\n\t\t<inner>\t\tcontent\t</inner>\n</tag>';
+    const result = tokenize(input);
+
+    // Find tokens and verify their positions make sense
+    const tagOpen = result.tokens.find(t => t.image === '<' && t.startLine === 1);
+    const innerOpen = result.tokens.find(t => t.image === '<' && t.startLine === 2);
+
+    expect(tagOpen).toBeDefined();
+    expect(innerOpen).toBeDefined();
+    expect(tagOpen!.startColumn).toBe(2); // After tab
+    expect(innerOpen!.startColumn).toBe(3); // After two tabs
+  });
+
+  test('should verify complete coverage with no gaps', () => {
+    const input = '<tag attr="value">content{{var}}</tag>';
+    const result = tokenize(input);
+
+    // Sort tokens by start position
+    const sortedTokens = [...result.tokens].sort((a, b) => a.startOffset! - b.startOffset!);
+
+    // Verify complete coverage
+    let expectedOffset = 0;
+    sortedTokens.forEach(token => {
+      expect(token.startOffset).toBeGreaterThanOrEqual(expectedOffset);
+      expectedOffset = token.endOffset! + 1;
+    });
+
+    // Should cover the entire input
+    expect(expectedOffset).toBeGreaterThanOrEqual(input.length);
+  });
+
+  test('should handle position tracking with comments spanning multiple lines', () => {
+    const input = `text
+<!-- this is a
+multi-line
+comment -->
+more text`;
+
+    const result = tokenize(input);
+    const commentToken = result.tokens.find(t => t.tokenType.name === 'Comment');
+
+    expect(commentToken).toBeDefined();
+    expect(commentToken!.startLine).toBe(2);
+    expect(commentToken!.endLine).toBe(4);
+  });
+
+  test('should handle position tracking with carriage returns', () => {
+    const input = 'line1\r<tag>\rcontent\r</tag>';
+    const result = tokenize(input);
+
+    // Check that line numbers increase correctly
+    const lines = new Set(result.tokens.map(t => t.startLine));
+    expect(lines.size).toBeGreaterThan(1);
+
+    // Verify positions are sequential
+    result.tokens.forEach(token => {
+      expect(token.startOffset).toBeGreaterThanOrEqual(0);
+      expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset!);
+    });
+  });
+});
+
+describe('Performance and Stress Tests', () => {
+  test('should handle extremely long text content without performance degradation', () => {
+    const longText = 'a'.repeat(1000000); // 1MB of text
+    const start = performance.now();
+    const result = tokenize(longText);
+    const end = performance.now();
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens).toHaveLength(1);
+    expect(result.tokens[0].image).toBe(longText);
+    expect(end - start).toBeLessThan(1000); // Should complete in under 1 second
+  });
+
+  test('should handle very long comments efficiently', () => {
+    const longComment = `<!--${'x'.repeat(500000)}-->`;
+    const start = performance.now();
+    const result = tokenize(longComment);
+    const end = performance.now();
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens).toHaveLength(1);
+    expect(end - start).toBeLessThan(500); // Should be fast
+  });
+
+  test('should handle many small tokens efficiently', () => {
+    const manyTokens = Array(10000).fill('<tag>content</tag>').join(' ');
+    const start = performance.now();
+    const result = tokenize(manyTokens);
+    const end = performance.now();
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens.length).toBeGreaterThan(10000);
+    expect(end - start).toBeLessThan(2000); // Should handle many tokens
+  });
+
+  test('should handle deeply nested template variables', () => {
+    let nested = '';
+    for (let i = 0; i < 1000; i++) {
+      nested += `{{var${i}}}`;
+    }
+
+    const start = performance.now();
+    const result = tokenize(nested);
+    const end = performance.now();
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens.length).toBe(3000); // 1000 * (open + content + close)
+    expect(end - start).toBeLessThan(1000);
+  });
+
+  test('should handle memory efficiently with large repetitive content', () => {
+    const pattern = '<tag attr="value">{{content}}</tag>';
+    const repeated = Array(1000).fill(pattern).join('\n');
+
+    const start = performance.now();
+    const result = tokenize(repeated);
+    const end = performance.now();
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens.length).toBeGreaterThan(5000);
+    expect(end - start).toBeLessThan(1500);
+  });
+
+  test('should handle worst-case regex backtracking scenarios', () => {
+    // Patterns that could cause regex catastrophic backtracking
+    const backtrackingTests = [
+      'a'.repeat(10000) + 'b',
+      '{'.repeat(5000) + '}',
+      '<'.repeat(1000) + '>',
+      '"'.repeat(2000),
+      '<!--' + 'x'.repeat(10000) + '-->',
+      Array(1000).fill('{{}}').join('')
+    ];
+
+    backtrackingTests.forEach(test => {
+      const start = performance.now();
+      const result = tokenize(test);
+      const end = performance.now();
+
+      expect(result.errors).toHaveLength(0);
+      expect(end - start).toBeLessThan(1000); // Should not hang
+    });
+  });
+
+  test('should maintain linear performance with input size', () => {
+    const sizes = [1000, 5000, 10000, 20000];
+    const times: number[] = [];
+
+    sizes.forEach(size => {
+      const content = 'x'.repeat(size);
+      const start = performance.now();
+      tokenize(content);
+      const end = performance.now();
+      times.push(end - start);
+    });
+
+    // Performance should scale roughly linearly
+    expect(times[1]).toBeLessThan(times[0] * 10);
+    expect(times[2]).toBeLessThan(times[1] * 5);
+    expect(times[3]).toBeLessThan(times[2] * 3);
+  });
+
+  test('should handle maximum practical input sizes', () => {
+    // Test with 10MB of content
+    const hugeContent = Array(10000).fill('<tag>content</tag>').join(' ');
+    expect(hugeContent.length).toBe(10000 * 19 - 1);
+
+    const start = performance.now();
+    const result = tokenize(hugeContent);
+    const end = performance.now();
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.tokens.length).toBeGreaterThan(0);
+    expect(end - start).toBeLessThan(5000); // 5 second max for 10MB
+  });
+});
+
 describe('Error Recovery', () => {
   test('should handle incomplete template variables', () => {
     const result = tokenize('text {{incomplete');

From c4a946660d5218c5e624ce6f86d514bb0683d47f Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 21 Jul 2025 17:01:05 +0800
Subject: [PATCH 17/76] update poml extended proposal

---
 docs/proposals/poml_extended.md | 52 +++++++++++++--------------------
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/docs/proposals/poml_extended.md b/docs/proposals/poml_extended.md
index 8564b681..95f9dffb 100644
--- a/docs/proposals/poml_extended.md
+++ b/docs/proposals/poml_extended.md
@@ -103,38 +103,44 @@ There can be some intervening text here as well.
 
 Metadatas are information that is useful when parsing and rendering the file, such as context variables, stylesheets, version information, file paths, etc.
 File-level metadata can be included at any place of the file in a special `<meta>` tag. This metadata will be processed before any content parsing.
+By default, metadata has no child contents. When child contents exist, `<meta>` tag must have type to specify what kind of content is provided.
 
 **Example:**
 
 ```xml
 <meta minimalPomlVersion="0.3" />
-<meta stylesheet="/path/to/stylesheet.json />
-<meta enableTags="reference,table" unknownTags="warning" />
+<meta stylesheet="/path/to/stylesheet.json" />
+<meta components="+reference,-table" unknownComponents="warning" />
+<meta context="/path/to/contextFile.json" />
+<meta type="context"></meta>
+{ "foo": "bar" }
+</meta>
 ```
 
 ## Architecture Design
 
 ### High-level Processing Pipeline
 
-The core of the new architecture is a three-pass process: Tokenization and AST Parsing, Metadata Extraction, and Recursive Rendering.
+The core of the new architecture is a three-pass process: Tokenization and AST Parsing, and Recursive Rendering.
 
 #### I. Tokenization and AST Parsing
 
-This phase processes the raw file content through a standard compiling workflow: tokenization followed by parsing to an Abstract Syntax Tree (AST).
+This phase processes the raw file content into an Abstract Syntax Tree (AST). It leverages the provided ExtendedPomlLexer.
 
-* **Tokenization**: Standard XML tokenization logic is used to break the input into tokens (tags, text content, attributes, etc.). Additionally, template variables in `{{}}` format are identified and tokenized as special tokens to enable proper parsing and variable substitution.
+* **Tokenization**: The ExtendedPomlLexer (using chevrotain) scans the entire input string and breaks it into a flat stream of tokens (TagOpen, Identifier, TextContent, TemplateOpen, etc.). This single lexing pass is sufficient for the entire mixed-content file. The distinction between "text" and "POML" is not made at this stage; it's simply a stream of tokens.
+* **AST Parsing Algorithm**: A CST (Concrete Syntax Tree) or AST parser will consume the token stream from the lexer. The parser is stateful, using a `PomlContext` object to track parsing configurations.
 
-* **AST Parsing Algorithm**:
-  1. Scan until `<` and tag name is found.
-  2. If the tag name is `text`, create a text node and scan until the corresponding `</text>` is found (handling nested POML if present; template variables are not considered here).
-  3. If the tag name matches any POML tag from `componentDocs.json`, create a node with the tag name and attributes (template variables `{{}}` in attribute values are parsed as child template nodes).
-  4. Within POML tags, if another `text` tag is found, follow the same logic as step 2.
-  5. Template variables `{{}}` found within text content or attribute values create TEMPLATE nodes as children.
-  6. Close the node when the corresponding closing tag `</tagname>` is found.
+  1. The parser starts in "text mode". It consumes TextContent, TemplateOpen/TemplateClose, and other non-tag tokens, bundling them into TEXT or TEMPLATE nodes.
+  2. When a TagOpen (`<`) token is followed by the Identifier "meta", a META node is created. Its attributes are immediately parsed to populate the `PomlContext`. This allows metadata to control the parsing of the remainder of the file (e.g., by enabling new tags). The META node is added to the AST but will be ignored during rendering.
+  3. When a TagOpen (`<`) token is followed by an Identifier that matches a known POML component (from componentDocs.json and enabled via PomlContext), the parser switches to "POML mode" and creates a POML node.
+  4. In "POML mode," it parses attributes (Identifier, Equals, DoubleQuote/SingleQuote), nested tags, and content until it finds a matching TagClosingOpen (`<`) token. Template variables `{{}}` within attribute values or content are parsed into child TEMPLATE nodes.
+  5. If the tag is `<text>`, it creates a POML node for `<text>` itself, but its *children* are parsed by recursively applying the "text mode" logic (step 1), allowing for nested POML within `<text>`.
+  6. If a TagOpen is followed by an Identifier that is *not* a known POML component, the parser treats these tokens (`<`, tagname, `>`) as literal text and reverts to "text mode".
+  7. The parser closes the current POML node when the corresponding TagClosingOpen (`<`) and Identifier are found. After closing the top-level POML tag, it reverts to "text mode".
 
-* **Error Tolerance**: The parser is designed to be error-tolerant, gracefully handling malformed markup while preserving as much structure as possible.
+* **Error Tolerance**: The parser will be designed to be error-tolerant. If a closing tag is missing, it can infer closure at the end of the file or when a new top-level tag begins, logging a diagnostic warning.
 
-* **Source Mapping**: The parser retains source mapping information for each AST node, enabling code intelligence features like hover, go to definition, find references, and auto completion.
+* **Source Mapping**: The chevrotain tokens inherently contain offset, line, and column information. This data is directly transferred to the ASTNode during parsing, enabling robust code intelligence features.
 
 * **Output**: An AST representing the hierarchical structure of the document, where each node contains source position information and type metadata.
 
@@ -190,14 +196,6 @@ interface ASTNode {
 }
 ```
 
-#### II. Metadata Processing
-
-Once the AST is built, all `META` nodes are processed.
-
-  * **Extraction**: Traverse the AST to find all `META` nodes.
-  * **Population**: Parse the content of each `<meta>` tag and populate the global `PomlContext` object.
-  * **Removal**: After processing, `META` nodes are removed from the AST to prevent them from being rendered.
-
 **`PomlContext` Interface**: This context object is the single source of truth for the entire file, passed through all readers. It's mutable, allowing stateful operations like `<let>` to have a file-wide effect.
 
 ```typescript
@@ -209,7 +207,7 @@ interface PomlContext {
 }
 ```
 
-#### III. Text/POML Dispatching (Recursive Rendering)
+#### II. Text/POML Dispatching (Recursive Rendering)
 
 Rendering starts at the root of the AST and proceeds recursively. A controller dispatches AST nodes to the appropriate reader.
 
@@ -255,11 +253,3 @@ To achieve this design, the existing `PomlFile` class needs significant refactor
 3. **Handling `<include>`**:
 
   * The `handleInclude` method should be **removed** from `PomlFile`. Inclusion is now handled at a higher level by the main processing pipeline. When the `PomlReader` encounters an `<include>` tag, it will invoke the entire pipeline (Segmentation, Metadata, Rendering) on the included file and insert the resulting React elements.
-
-4. **Parsing `TEXT` Placeholders**:
-
-  * The core `parseXmlElement` method needs a new branch to handle the `<text ref="..." />` placeholder.
-  * When it encounters this element:
-    1. It extracts the `ref` attribute (e.g., `"TEXT_ID_123"`).
-    2. It looks up the corresponding raw text from `context.texts`.
-    3. It fetches from the `context.texts` map and returns a React element containing the pure text content.

From 756f9f859ca5f3c1135fff5ebe0011ffbdd756f2 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Thu, 24 Jul 2025 18:40:28 +0800
Subject: [PATCH 18/76] unverified cst impl

---
 packages/poml/base.tsx                 |   4 +
 packages/poml/reader/cst.ts            | 730 +++++++++++++++++++++++++
 packages/poml/tests/reader/cst.test.ts | 129 +++++
 3 files changed, 863 insertions(+)
 create mode 100644 packages/poml/tests/reader/cst.test.ts

diff --git a/packages/poml/base.tsx b/packages/poml/base.tsx
index e2a54d5c..8d87788a 100644
--- a/packages/poml/base.tsx
+++ b/packages/poml/base.tsx
@@ -873,3 +873,7 @@ export function findComponentByAliasOrUndefined(alias: string): PomlComponent |
 export function listComponents() {
   return ComponentRegistry.instance.listComponents();
 }
+
+export function listComponentAliases() {
+  return listComponents().map(c => c.getAliases()).flat();
+}
diff --git a/packages/poml/reader/cst.ts b/packages/poml/reader/cst.ts
index e69de29b..e97d6d80 100644
--- a/packages/poml/reader/cst.ts
+++ b/packages/poml/reader/cst.ts
@@ -0,0 +1,730 @@
+import { IToken } from 'chevrotain';
+import { 
+  extendedPomlLexer, 
+  TemplateOpen, TemplateClose, TagClosingOpen, TagSelfClose, 
+  TagOpen, TagClose, Equals, DoubleQuote, SingleQuote, 
+  Identifier, Whitespace, TextContent 
+} from './lexer';
+
+import { listComponentAliases } from '../base';
+
+// Source position interfaces
+export interface SourceRange {
+  start: number;
+  end: number;
+}
+
+export interface AttributeInfo {
+  key: string;
+  value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[];
+  keyRange: SourceRange;
+  valueRange: SourceRange;
+  fullRange: SourceRange;
+}
+
+// Core AST node interface
+export interface ASTNode {
+  id: string;
+  kind: 'META' | 'TEXT' | 'POML' | 'TEMPLATE';
+  start: number;
+  end: number;
+  content: string;
+  parent?: ASTNode;
+  children: ASTNode[];
+  
+  // For POML and META nodes
+  tagName?: string;
+  attributes?: AttributeInfo[];
+  
+  // Detailed source positions
+  openingTag?: {
+    start: number;
+    end: number;
+    nameRange: SourceRange;
+  };
+  
+  closingTag?: {
+    start: number;
+    end: number;
+    nameRange: SourceRange;
+  };
+  
+  contentRange?: SourceRange;
+  
+  // For TEXT nodes
+  textSegments?: SourceRange[];
+  
+  // For TEMPLATE nodes
+  expression?: string;
+}
+
+// Context for parsing configuration
+export interface PomlContext {
+  variables: { [key: string]: any };
+  stylesheet: { [key: string]: string };
+  minimalPomlVersion?: string;
+  sourcePath: string;
+  enabledComponents: Set<string>;
+  unknownComponentBehavior: 'error' | 'warning' | 'ignore';
+}
+
+// CST Parser class
+export class CSTParser {
+  private tokens: IToken[];
+  private position: number;
+  private text: string;
+  private context: PomlContext;
+  private nodeIdCounter: number;
+
+  constructor(context: PomlContext) {
+    this.tokens = [];
+    this.position = 0;
+    this.text = '';
+    this.context = context;
+    this.nodeIdCounter = 0;
+
+    // Initialize default enabled components (can be extended/disabled via meta tags)
+    this.context.enabledComponents = new Set(listComponentAliases());
+    this.context.unknownComponentBehavior = 'warning';
+  }
+
+  private generateId(): string {
+    return `node_${++this.nodeIdCounter}`;
+  }
+
+  private currentToken(): IToken | undefined {
+    return this.tokens[this.position];
+  }
+
+  private peekToken(offset: number = 1): IToken | undefined {
+    return this.tokens[this.position + offset];
+  }
+
+  private consumeToken(): IToken | undefined {
+    if (this.position < this.tokens.length) {
+      return this.tokens[this.position++];
+    }
+    return undefined;
+  }
+
+  private skipWhitespace(): void {
+    while (this.currentToken()?.tokenType === Whitespace) {
+      this.position++;
+    }
+  }
+
+  public parse(text: string): ASTNode {
+    this.text = text;
+    const lexResult = extendedPomlLexer.tokenize(text);
+    this.tokens = lexResult.tokens;
+    this.position = 0;
+
+    const rootNode: ASTNode = {
+      id: this.generateId(),
+      kind: 'TEXT',
+      start: 0,
+      end: text.length,
+      content: text,
+      children: [],
+      textSegments: []
+    };
+
+    this.parseDocument(rootNode);
+    return rootNode;
+  }
+
+  private parseDocument(rootNode: ASTNode): void {
+    while (this.position < this.tokens.length) {
+      const token = this.currentToken();
+      if (!token) {
+        break;
+      }
+
+      if (token.tokenType === TagOpen) {
+        const nextToken = this.peekToken();
+        if (nextToken?.tokenType === Identifier) {
+          const tagName = nextToken.image;
+          
+          if (tagName === 'meta') {
+            const metaNode = this.parseMetaTag();
+            if (metaNode) {
+              rootNode.children.push(metaNode);
+              metaNode.parent = rootNode;
+              this.processMeta(metaNode);
+            }
+          } else if (this.context.enabledComponents.has(tagName)) {
+            const pomlNode = this.parsePomlElement();
+            if (pomlNode) {
+              rootNode.children.push(pomlNode);
+              pomlNode.parent = rootNode;
+            }
+          } else {
+            // Unknown tag - treat as text
+            this.handleUnknownTag(tagName);
+            const textNode = this.parseTextContent();
+            if (textNode) {
+              rootNode.children.push(textNode);
+              textNode.parent = rootNode;
+            }
+          }
+        } else {
+          // Malformed tag - treat as text
+          const textNode = this.parseTextContent();
+          if (textNode) {
+            rootNode.children.push(textNode);
+            textNode.parent = rootNode;
+          }
+        }
+      } else {
+        const textNode = this.parseTextContent();
+        if (textNode) {
+          rootNode.children.push(textNode);
+          textNode.parent = rootNode;
+        }
+      }
+    }
+  }
+
+  private parseMetaTag(): ASTNode | null {
+    const startPos = this.position;
+    const openTagStart = this.currentToken()?.startOffset || 0;
+    
+    this.consumeToken(); // consume '<'
+    this.skipWhitespace();
+    
+    const nameToken = this.consumeToken(); // consume 'meta'
+    if (!nameToken || nameToken.image !== 'meta') {
+      return null;
+    }
+
+    const nameRange: SourceRange = {
+      start: nameToken.startOffset || 0,
+      end: (nameToken.endOffset || 0) + 1
+    };
+
+    this.skipWhitespace();
+    
+    const attributes = this.parseAttributes();
+    
+    this.skipWhitespace();
+    
+    // Check for self-closing or regular closing
+    const closeToken = this.currentToken();
+    let openTagEnd = 0;
+    let hasContent = false;
+    
+    if (closeToken?.tokenType === TagSelfClose) {
+      this.consumeToken(); // consume '/>'
+      openTagEnd = (closeToken.endOffset || 0) + 1;
+    } else if (closeToken?.tokenType === TagClose) {
+      this.consumeToken(); // consume '>'
+      openTagEnd = (closeToken.endOffset || 0) + 1;
+      hasContent = true;
+    }
+
+    const metaNode: ASTNode = {
+      id: this.generateId(),
+      kind: 'META',
+      start: openTagStart,
+      end: openTagEnd, // Will be updated if there's content
+      content: '',
+      children: [],
+      tagName: 'meta',
+      attributes,
+      openingTag: {
+        start: openTagStart,
+        end: openTagEnd,
+        nameRange
+      }
+    };
+
+    if (hasContent) {
+      // Parse content until closing tag
+      while (this.position < this.tokens.length) {
+        const token = this.currentToken();
+        if (token?.tokenType === TagClosingOpen) {
+          const nextToken = this.peekToken();
+          if (nextToken?.tokenType === Identifier && nextToken.image === 'meta') {
+            break;
+          }
+        }
+        this.position++;
+      }
+      
+      // Parse closing tag
+      if (this.currentToken()?.tokenType === TagClosingOpen) {
+        const closingTagStart = this.currentToken()?.startOffset || 0;
+        this.consumeToken(); // consume '</'
+        const closingNameToken = this.consumeToken(); // consume 'meta'
+        this.skipWhitespace();
+        const finalClose = this.consumeToken(); // consume '>'
+        
+        if (closingNameToken && finalClose) {
+          metaNode.closingTag = {
+            start: closingTagStart,
+            end: (finalClose.endOffset || 0) + 1,
+            nameRange: {
+              start: closingNameToken.startOffset || 0,
+              end: (closingNameToken.endOffset || 0) + 1
+            }
+          };
+          metaNode.end = (finalClose.endOffset || 0) + 1;
+        }
+      }
+    }
+
+    metaNode.content = this.text.slice(metaNode.start, metaNode.end);
+    return metaNode;
+  }
+
+  private parsePomlElement(): ASTNode | null {
+    const openTagStart = this.currentToken()?.startOffset || 0;
+    
+    this.consumeToken(); // consume '<'
+    this.skipWhitespace();
+    
+    const nameToken = this.consumeToken();
+    if (!nameToken) {
+      return null;
+    }
+
+    const tagName = nameToken.image;
+    const nameRange: SourceRange = {
+      start: nameToken.startOffset || 0,
+      end: (nameToken.endOffset || 0) + 1
+    };
+
+    this.skipWhitespace();
+    
+    const attributes = this.parseAttributes();
+    
+    this.skipWhitespace();
+    
+    // Check for self-closing or regular closing
+    const closeToken = this.currentToken();
+    let openTagEnd = 0;
+    let hasContent = false;
+    
+    if (closeToken?.tokenType === TagSelfClose) {
+      this.consumeToken(); // consume '/>'
+      openTagEnd = (closeToken.endOffset || 0) + 1;
+    } else if (closeToken?.tokenType === TagClose) {
+      this.consumeToken(); // consume '>'
+      openTagEnd = (closeToken.endOffset || 0) + 1;
+      hasContent = true;
+    }
+
+    const pomlNode: ASTNode = {
+      id: this.generateId(),
+      kind: 'POML',
+      start: openTagStart,
+      end: openTagEnd, // Will be updated if there's content
+      content: '',
+      children: [],
+      tagName,
+      attributes,
+      openingTag: {
+        start: openTagStart,
+        end: openTagEnd,
+        nameRange
+      }
+    };
+
+    if (hasContent) {
+      if (tagName === 'text') {
+        // Special handling for <text> tags - parse content as pure text
+        this.parseTextContentForTextTag(pomlNode);
+      } else {
+        // Parse mixed content (POML and text)
+        this.parseMixedContent(pomlNode);
+      }
+      
+      // Parse closing tag
+      if (this.currentToken()?.tokenType === TagClosingOpen) {
+        const closingTagStart = this.currentToken()?.startOffset || 0;
+        this.consumeToken(); // consume '</'
+        const closingNameToken = this.consumeToken();
+        this.skipWhitespace();
+        const finalClose = this.consumeToken(); // consume '>'
+        
+        if (closingNameToken && finalClose) {
+          pomlNode.closingTag = {
+            start: closingTagStart,
+            end: (finalClose.endOffset || 0) + 1,
+            nameRange: {
+              start: closingNameToken.startOffset || 0,
+              end: (closingNameToken.endOffset || 0) + 1
+            }
+          };
+          pomlNode.end = (finalClose.endOffset || 0) + 1;
+        }
+      }
+    }
+
+    pomlNode.content = this.text.slice(pomlNode.start, pomlNode.end);
+    return pomlNode;
+  }
+
+  private parseTextContentForTextTag(parentNode: ASTNode): void {
+    // In <text> tags, we parse content as pure text but still need to handle nested POML
+    while (this.position < this.tokens.length) {
+      const token = this.currentToken();
+      if (!token) {
+        break;
+      }
+
+      if (token.tokenType === TagClosingOpen) {
+        const nextToken = this.peekToken();
+        if (nextToken?.tokenType === Identifier && nextToken.image === parentNode.tagName) {
+          break; // Found closing tag
+        }
+      }
+
+      if (token.tokenType === TagOpen) {
+        const nextToken = this.peekToken();
+        if (nextToken?.tokenType === Identifier && this.context.enabledComponents.has(nextToken.image)) {
+          // Found nested POML element
+          const nestedNode = this.parsePomlElement();
+          if (nestedNode) {
+            parentNode.children.push(nestedNode);
+            nestedNode.parent = parentNode;
+          }
+        } else {
+          // Treat as text
+          const textNode = this.parseTextContent();
+          if (textNode) {
+            parentNode.children.push(textNode);
+            textNode.parent = parentNode;
+          }
+        }
+      } else {
+        const textNode = this.parseTextContent();
+        if (textNode) {
+          parentNode.children.push(textNode);
+          textNode.parent = parentNode;
+        }
+      }
+    }
+  }
+
+  private parseMixedContent(parentNode: ASTNode): void {
+    while (this.position < this.tokens.length) {
+      const token = this.currentToken();
+      if (!token) {
+        break;
+      }
+
+      if (token.tokenType === TagClosingOpen) {
+        const nextToken = this.peekToken();
+        if (nextToken?.tokenType === Identifier && nextToken.image === parentNode.tagName) {
+          break; // Found closing tag
+        }
+      }
+
+      if (token.tokenType === TagOpen) {
+        const nextToken = this.peekToken();
+        if (nextToken?.tokenType === Identifier && this.context.enabledComponents.has(nextToken.image)) {
+          // Found nested POML element
+          const nestedNode = this.parsePomlElement();
+          if (nestedNode) {
+            parentNode.children.push(nestedNode);
+            nestedNode.parent = parentNode;
+          }
+        } else {
+          // Unknown tag or malformed - treat as text
+          const textNode = this.parseTextContent();
+          if (textNode) {
+            parentNode.children.push(textNode);
+            textNode.parent = parentNode;
+          }
+        }
+      } else if (token.tokenType === TemplateOpen) {
+        // Parse template expression
+        const templateNode = this.parseTemplate();
+        if (templateNode) {
+          parentNode.children.push(templateNode);
+          templateNode.parent = parentNode;
+        }
+      } else {
+        const textNode = this.parseTextContent();
+        if (textNode) {
+          parentNode.children.push(textNode);
+          textNode.parent = parentNode;
+        }
+      }
+    }
+  }
+
+  private parseTextContent(): ASTNode | null {
+    const startOffset = this.currentToken()?.startOffset || 0;
+    let endOffset = startOffset;
+
+    // Collect consecutive text tokens
+    while (this.position < this.tokens.length) {
+      const token = this.currentToken();
+      if (!token) {
+        break;
+      }
+
+      if (token.tokenType === TextContent || token.tokenType === Whitespace) {
+        endOffset = (token.endOffset || 0) + 1;
+        this.position++;
+      } else if (token.tokenType === TagOpen || token.tokenType === TemplateOpen || token.tokenType === TagClosingOpen) {
+        break;
+      } else {
+        // Other tokens treated as text in this context
+        endOffset = (token.endOffset || 0) + 1;
+        this.position++;
+      }
+    }
+
+    if (endOffset === startOffset) {
+      return null;
+    }
+
+    const textNode: ASTNode = {
+      id: this.generateId(),
+      kind: 'TEXT',
+      start: startOffset,
+      end: endOffset,
+      content: this.text.slice(startOffset, endOffset),
+      children: [],
+      textSegments: [{ start: startOffset, end: endOffset }]
+    };
+
+    return textNode;
+  }
+
+  private parseTemplate(): ASTNode | null {
+    const startToken = this.currentToken();
+    if (!startToken || startToken.tokenType !== TemplateOpen) {
+      return null;
+    }
+
+    const startOffset = startToken.startOffset || 0;
+    this.consumeToken(); // consume '{{'
+
+    let expression = '';
+    let endOffset = startOffset + 2;
+
+    // Collect content until TemplateClose
+    while (this.position < this.tokens.length) {
+      const token = this.currentToken();
+      if (!token) {
+        break;
+      }
+
+      if (token.tokenType === TemplateClose) {
+        endOffset = (token.endOffset || 0) + 1;
+        this.consumeToken();
+        break;
+      } else {
+        expression += token.image;
+        endOffset = (token.endOffset || 0) + 1;
+        this.consumeToken();
+      }
+    }
+
+    const templateNode: ASTNode = {
+      id: this.generateId(),
+      kind: 'TEMPLATE',
+      start: startOffset,
+      end: endOffset,
+      content: this.text.slice(startOffset, endOffset),
+      children: [],
+      expression: expression.trim()
+    };
+
+    return templateNode;
+  }
+
+  private parseAttributes(): AttributeInfo[] {
+    const attributes: AttributeInfo[] = [];
+
+    while (this.position < this.tokens.length) {
+      this.skipWhitespace();
+      
+      const token = this.currentToken();
+      if (!token || token.tokenType !== Identifier) {
+        break;
+      }
+
+      const keyToken = this.consumeToken()!;
+      const keyRange: SourceRange = {
+        start: keyToken.startOffset || 0,
+        end: (keyToken.endOffset || 0) + 1
+      };
+
+      this.skipWhitespace();
+
+      if (this.currentToken()?.tokenType !== Equals) {
+        // Boolean attribute
+        attributes.push({
+          key: keyToken.image,
+          value: [{
+            id: this.generateId(),
+            kind: 'TEXT',
+            start: keyRange.start,
+            end: keyRange.end,
+            content: 'true',
+            children: []
+          }],
+          keyRange,
+          valueRange: keyRange,
+          fullRange: keyRange
+        });
+        continue;
+      }
+
+      this.consumeToken(); // consume '='
+      this.skipWhitespace();
+
+      const quoteToken = this.currentToken();
+      if (!quoteToken || (quoteToken.tokenType !== DoubleQuote && quoteToken.tokenType !== SingleQuote)) {
+        break; // Invalid attribute
+      }
+
+      const isDoubleQuote = quoteToken.tokenType === DoubleQuote;
+      const valueStart = (quoteToken.endOffset || 0) + 1;
+      this.consumeToken(); // consume opening quote
+
+      const valueNodes: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[] = [];
+      let valueEnd = valueStart;
+
+      // Parse attribute value content
+      while (this.position < this.tokens.length) {
+        const token = this.currentToken();
+        if (!token) {
+        break;
+      }
+
+        if ((isDoubleQuote && token.tokenType === DoubleQuote) || 
+            (!isDoubleQuote && token.tokenType === SingleQuote)) {
+          valueEnd = token.startOffset || valueEnd;
+          this.consumeToken(); // consume closing quote
+          break;
+        } else if (token.tokenType === TemplateOpen) {
+          const templateNode = this.parseTemplate();
+          if (templateNode && (templateNode.kind === 'TEXT' || templateNode.kind === 'TEMPLATE')) {
+            valueNodes.push(templateNode as ASTNode & { kind: 'TEXT' | 'TEMPLATE' });
+          }
+        } else {
+          // Collect text content
+          const textStart = token.startOffset || 0;
+          let textEnd = (token.endOffset || 0) + 1;
+          let textContent = token.image;
+          
+          this.consumeToken();
+          
+          // Collect more text tokens
+          while (this.position < this.tokens.length) {
+            const nextToken = this.currentToken();
+            if (!nextToken) {
+              break;
+            }
+            
+            if ((isDoubleQuote && nextToken.tokenType === DoubleQuote) ||
+                (!isDoubleQuote && nextToken.tokenType === SingleQuote) ||
+                nextToken.tokenType === TemplateOpen) {
+              break;
+            }
+            
+            textContent += nextToken.image;
+            textEnd = (nextToken.endOffset || 0) + 1;
+            this.consumeToken();
+          }
+
+          valueNodes.push({
+            id: this.generateId(),
+            kind: 'TEXT',
+            start: textStart,
+            end: textEnd,
+            content: textContent,
+            children: []
+          });
+        }
+      }
+
+      const valueRange: SourceRange = { start: valueStart, end: valueEnd };
+      const fullRange: SourceRange = { 
+        start: keyRange.start, 
+        end: (this.tokens[this.position - 1]?.endOffset || 0) + 1 
+      };
+
+      attributes.push({
+        key: keyToken.image,
+        value: valueNodes,
+        keyRange,
+        valueRange,
+        fullRange
+      });
+    }
+
+    return attributes;
+  }
+
+  private processMeta(metaNode: ASTNode): void {
+    if (!metaNode.attributes) {
+      return;
+    }
+
+    for (const attr of metaNode.attributes) {
+      switch (attr.key) {
+        case 'components':
+          this.processComponentsAttribute(attr.value);
+          break;
+        case 'unknownComponents':
+          const behavior = attr.value[0]?.content;
+          if (behavior === 'error' || behavior === 'warning' || behavior === 'ignore') {
+            this.context.unknownComponentBehavior = behavior;
+          }
+          break;
+        case 'minimalPomlVersion':
+          this.context.minimalPomlVersion = attr.value[0]?.content;
+          break;
+        // Add other meta attributes as needed
+      }
+    }
+  }
+
+  private processComponentsAttribute(value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[]): void {
+    const components = value[0]?.content || '';
+    const parts = components.split(',').map(s => s.trim());
+    
+    for (const part of parts) {
+      if (part.startsWith('+')) {
+        this.context.enabledComponents.add(part.slice(1));
+      } else if (part.startsWith('-')) {
+        this.context.enabledComponents.delete(part.slice(1));
+      }
+    }
+  }
+
+  private handleUnknownTag(tagName: string): void {
+    switch (this.context.unknownComponentBehavior) {
+      case 'error':
+        throw new Error(`Unknown POML component: ${tagName}`);
+      case 'warning':
+        console.warn(`Unknown POML component: ${tagName}`);
+        break;
+      case 'ignore':
+        // Do nothing
+        break;
+    }
+  }
+}
+
+// Export function to create and use the parser
+export function parseExtendedPoml(text: string, context: Partial<PomlContext> = {}): ASTNode {
+  const fullContext: PomlContext = {
+    variables: {},
+    stylesheet: {},
+    sourcePath: '',
+    enabledComponents: new Set(),
+    unknownComponentBehavior: 'warning',
+    ...context
+  };
+
+  const parser = new CSTParser(fullContext);
+  return parser.parse(text);
+}
\ No newline at end of file
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
new file mode 100644
index 00000000..1e9e5ad6
--- /dev/null
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -0,0 +1,129 @@
+import { describe, expect, test } from '@jest/globals';
+import { parseExtendedPoml, ASTNode } from 'poml/reader/cst';
+
+describe('Extended POML CST Parser', () => {
+  test('parses pure text content', () => {
+    const input = 'This is plain text content.';
+    const result = parseExtendedPoml(input);
+    
+    expect(result.kind).toBe('TEXT');
+    expect(result.content).toBe(input);
+    expect(result.children).toHaveLength(0);
+  });
+
+  test('parses simple POML element', () => {
+    const input = '<task>Analyze the data</task>';
+    const result = parseExtendedPoml(input);
+    
+    expect(result.kind).toBe('TEXT');
+    expect(result.children).toHaveLength(1);
+    
+    const taskNode = result.children[0];
+    expect(taskNode.kind).toBe('POML');
+    expect(taskNode.tagName).toBe('task');
+    expect(taskNode.children).toHaveLength(1);
+    expect(taskNode.children[0].content).toBe('Analyze the data');
+  });
+
+  test('parses mixed content', () => {
+    const input = `# My Document
+
+This is regular text.
+
+<task>
+  Process this data
+</task>
+
+More text here.`;
+    
+    const result = parseExtendedPoml(input);
+    
+    expect(result.kind).toBe('TEXT');
+    expect(result.children.length).toBeGreaterThan(1);
+    
+    // Should have text nodes and POML nodes
+    const pomlNodes = result.children.filter(child => child.kind === 'POML');
+    expect(pomlNodes).toHaveLength(1);
+    expect(pomlNodes[0].tagName).toBe('task');
+  });
+
+  test('parses self-closing elements', () => {
+    const input = '<meta components="+reference,-table" />';
+    const result = parseExtendedPoml(input);
+    
+    expect(result.children).toHaveLength(1);
+    const metaNode = result.children[0];
+    expect(metaNode.kind).toBe('META');
+    expect(metaNode.tagName).toBe('meta');
+    expect(metaNode.attributes).toHaveLength(1);
+    expect(metaNode.attributes![0].key).toBe('components');
+  });
+
+  test('parses template expressions', () => {
+    const input = 'Hello {{name}}!';
+    const result = parseExtendedPoml(input);
+    
+    expect(result.children.length).toBeGreaterThan(1);
+    const templateNode = result.children.find(child => child.kind === 'TEMPLATE');
+    expect(templateNode).toBeDefined();
+    expect(templateNode!.expression).toBe('name');
+  });
+
+  test('parses attributes with mixed content', () => {
+    const input = '<p class="header" id="{{elementId}}">Content</p>';
+    const result = parseExtendedPoml(input);
+    
+    const pNode = result.children.find(child => child.kind === 'POML');
+    expect(pNode).toBeDefined();
+    expect(pNode!.attributes).toHaveLength(2);
+    
+    const classAttr = pNode!.attributes!.find(attr => attr.key === 'class');
+    expect(classAttr).toBeDefined();
+    expect(classAttr!.value[0].content).toBe('header');
+    
+    const idAttr = pNode!.attributes!.find(attr => attr.key === 'id');
+    expect(idAttr).toBeDefined();
+    expect(idAttr!.value[0].kind).toBe('TEMPLATE');
+  });
+
+  test('handles text tag with nested POML', () => {
+    const input = `<text>
+This is **markdown** content.
+<cp caption="Nested">This is nested POML</cp>
+More markdown here.
+</text>`;
+    
+    const result = parseExtendedPoml(input);
+    const textNode = result.children.find(child => child.kind === 'POML' && child.tagName === 'text');
+    
+    expect(textNode).toBeDefined();
+    expect(textNode!.children.length).toBeGreaterThan(1);
+    
+    const cpNode = textNode!.children.find(child => child.kind === 'POML' && child.tagName === 'cp');
+    expect(cpNode).toBeDefined();
+  });
+
+  test('preserves source position information', () => {
+    const input = '<task>Test</task>';
+    const result = parseExtendedPoml(input);
+    
+    const taskNode = result.children[0];
+    expect(taskNode.start).toBe(0);
+    expect(taskNode.end).toBe(input.length);
+    expect(taskNode.openingTag).toBeDefined();
+    expect(taskNode.closingTag).toBeDefined();
+    expect(taskNode.openingTag!.nameRange.start).toBeGreaterThan(0);
+    expect(taskNode.openingTag!.nameRange.end).toBeGreaterThan(taskNode.openingTag!.nameRange.start);
+  });
+
+  test('handles unknown components gracefully', () => {
+    const input = '<unknown>This should be treated as text</unknown>';
+    
+    // Should not throw by default (warning behavior)
+    const result = parseExtendedPoml(input);
+    expect(result).toBeDefined();
+    
+    // Should treat unknown tag as text content
+    expect(result.children.length).toBeGreaterThan(0);
+  });
+});
\ No newline at end of file

From f57785d65017d788b5574f91948f1d999f66ed4f Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 26 Aug 2025 14:57:17 +0800
Subject: [PATCH 19/76] move to next

---
 packages/poml/{reader => next}/ast.ts       | 177 ++++++++++----------
 packages/poml/{reader => next}/base.tsx     |   0
 packages/poml/next/context.ts               |  17 ++
 packages/poml/{reader => next}/cst.ts       | 164 ++++++++++--------
 packages/poml/{reader => next}/index.tsx    |   0
 packages/poml/{reader => next}/lexer.ts     |  12 +-
 packages/poml/{reader => next}/meta.ts      |   0
 packages/poml/{reader => next}/poml.tsx     |   0
 packages/poml/{reader => next}/segment.ts   |  79 +++++----
 packages/poml/{reader => next}/text.tsx     |   0
 packages/poml/{reader => next}/tokenizer.ts |  58 +++----
 packages/poml/tests/reader/ast.test.ts      | 144 ++++++++--------
 packages/poml/tests/reader/cst.test.ts      |  52 +++---
 packages/poml/tests/reader/lexer.test.ts    | 114 +++++--------
 14 files changed, 419 insertions(+), 398 deletions(-)
 rename packages/poml/{reader => next}/ast.ts (79%)
 rename packages/poml/{reader => next}/base.tsx (100%)
 create mode 100644 packages/poml/next/context.ts
 rename packages/poml/{reader => next}/cst.ts (91%)
 rename packages/poml/{reader => next}/index.tsx (100%)
 rename packages/poml/{reader => next}/lexer.ts (92%)
 rename packages/poml/{reader => next}/meta.ts (100%)
 rename packages/poml/{reader => next}/poml.tsx (100%)
 rename packages/poml/{reader => next}/segment.ts (91%)
 rename packages/poml/{reader => next}/text.tsx (100%)
 rename packages/poml/{reader => next}/tokenizer.ts (82%)

diff --git a/packages/poml/reader/ast.ts b/packages/poml/next/ast.ts
similarity index 79%
rename from packages/poml/reader/ast.ts
rename to packages/poml/next/ast.ts
index 3b758e1f..c8db6b8a 100644
--- a/packages/poml/reader/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -9,49 +9,48 @@ export interface SourceRange {
 
 export interface AttributeInfo {
   key: string;
-  value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[];  // Mixed content: array of text/template nodes
-  keyRange: SourceRange;      // Position of attribute name
-  valueRange: SourceRange;    // Position of attribute value (excluding quotes)
-  fullRange: SourceRange;     // Full attribute including key="value"
+  value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[]; // Mixed content: array of text/template nodes
+  keyRange: SourceRange; // Position of attribute name
+  valueRange: SourceRange; // Position of attribute value (excluding quotes)
+  fullRange: SourceRange; // Full attribute including key="value"
 }
 
 // Main AST node interface
 export interface ASTNode {
-  id: string;                      // Unique ID for caching and React keys
+  id: string; // Unique ID for caching and React keys
   kind: 'META' | 'TEXT' | 'POML' | 'TEMPLATE';
-  start: number;                   // Source position start of entire node
-  end: number;                     // Source position end of entire node
-  content: string;                 // The raw string content
-  parent?: ASTNode;                // Reference to the parent node
-  children: ASTNode[];             // Child nodes
-  
+  start: number; // Source position start of entire node
+  end: number; // Source position end of entire node
+  content: string; // The raw string content
+  parent?: ASTNode; // Reference to the parent node
+  children: ASTNode[]; // Child nodes
+
   // For POML and META nodes
-  tagName?: string;                // Tag name (e.g., 'task', 'meta')
-  attributes?: AttributeInfo[];    // Detailed attribute information
-  
+  tagName?: string; // Tag name (e.g., 'task', 'meta')
+  attributes?: AttributeInfo[]; // Detailed attribute information
+
   // Detailed source positions
   openingTag?: {
-    start: number;                 // Position of '<'
-    end: number;                   // Position after '>'
-    nameRange: SourceRange;        // Position of tag name
+    start: number; // Position of '<'
+    end: number; // Position after '>'
+    nameRange: SourceRange; // Position of tag name
   };
-  
+
   closingTag?: {
-    start: number;                 // Position of '</'
-    end: number;                   // Position after '>'
-    nameRange: SourceRange;        // Position of tag name in closing tag
+    start: number; // Position of '</'
+    end: number; // Position after '>'
+    nameRange: SourceRange; // Position of tag name in closing tag
   };
-  
-  contentRange?: SourceRange;      // Position of content between tags (excluding nested tags)
-  
+
+  contentRange?: SourceRange; // Position of content between tags (excluding nested tags)
+
   // For TEXT nodes
-  textSegments?: SourceRange[];    // Multiple ranges for text content (excluding nested POML)
-  
+  textSegments?: SourceRange[]; // Multiple ranges for text content (excluding nested POML)
+
   // For TEMPLATE nodes
-  expression?: string;             // The full expression content between {{}}
+  expression?: string; // The full expression content between {{}}
 }
 
-
 // AST Parser class
 class ASTParser {
   private tokens: Token[];
@@ -68,20 +67,25 @@ class ASTParser {
 
   private buildValidTagsSet(): Set<string> {
     const validTags = new Set<string>();
-    
+
     for (const doc of componentDocs) {
       if (doc.name) {
         validTags.add(doc.name.toLowerCase());
         // Convert camelCase to kebab-case
-        validTags.add(doc.name.toLowerCase().replace(/([A-Z])/g, '-$1').toLowerCase());
+        validTags.add(
+          doc.name
+            .toLowerCase()
+            .replace(/([A-Z])/g, '-$1')
+            .toLowerCase(),
+        );
       }
     }
-    
+
     // Add special tags
     validTags.add('poml');
     validTags.add('text');
     validTags.add('meta');
-    
+
     return validTags;
   }
 
@@ -108,10 +112,10 @@ class ASTParser {
     // Parse attribute value for mixed text and template variables
     const result: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[] = [];
     let currentPos = 0;
-    
+
     while (currentPos < value.length) {
       const templateStart = value.indexOf('{{', currentPos);
-      
+
       if (templateStart === -1) {
         // No more template variables, add remaining text
         if (currentPos < value.length) {
@@ -121,12 +125,12 @@ class ASTParser {
             start: currentPos,
             end: value.length,
             content: value.substring(currentPos),
-            children: []
+            children: [],
           });
         }
         break;
       }
-      
+
       // Add text before template variable
       if (templateStart > currentPos) {
         result.push({
@@ -135,10 +139,10 @@ class ASTParser {
           start: currentPos,
           end: templateStart,
           content: value.substring(currentPos, templateStart),
-          children: []
+          children: [],
         });
       }
-      
+
       // Find end of template variable
       const templateEnd = value.indexOf('}}', templateStart + 2);
       if (templateEnd === -1) {
@@ -149,11 +153,11 @@ class ASTParser {
           start: templateStart,
           end: value.length,
           content: value.substring(templateStart),
-          children: []
+          children: [],
         });
         break;
       }
-      
+
       // Add template variable
       const templateContent = value.substring(templateStart + 2, templateEnd);
       result.push({
@@ -163,78 +167,78 @@ class ASTParser {
         end: templateEnd + 2,
         content: value.substring(templateStart, templateEnd + 2),
         expression: templateContent.trim(),
-        children: []
+        children: [],
       });
-      
+
       currentPos = templateEnd + 2;
     }
-    
+
     return result;
   }
 
   private parseAttributes(tagContent: string): AttributeInfo[] {
     const attributes: AttributeInfo[] = [];
-    
+
     // Simple attribute parsing - can be enhanced later
     const attrRegex = /(\w+)=["']([^"']*?)["']/g;
     let match;
-    
+
     while ((match = attrRegex.exec(tagContent)) !== null) {
       const key = match[1];
       const value = match[2];
       const fullMatch = match[0];
       const matchStart = match.index;
-      
+
       attributes.push({
         key,
         value: this.parseAttributeValue(value),
         keyRange: { start: matchStart, end: matchStart + key.length },
         valueRange: { start: matchStart + key.length + 2, end: matchStart + key.length + 2 + value.length },
-        fullRange: { start: matchStart, end: matchStart + fullMatch.length }
+        fullRange: { start: matchStart, end: matchStart + fullMatch.length },
       });
     }
-    
+
     return attributes;
   }
 
   parse(): ASTNode {
     const children = this.parseNodes();
-    
+
     if (children.length === 1 && children[0].kind === 'POML') {
       return children[0];
     }
-    
+
     // Create root text node
     const rootNode: ASTNode = {
       id: this.generateId(),
       kind: 'TEXT',
       start: 0,
       end: this.tokens.length > 0 ? this.tokens[this.tokens.length - 1].end : 0,
-      content: this.tokens.map(t => t.value).join(''),
+      content: this.tokens.map((t) => t.value).join(''),
       children,
-      textSegments: []
+      textSegments: [],
     };
-    
+
     // Set parent references
-    children.forEach(child => {
+    children.forEach((child) => {
       child.parent = rootNode;
     });
-    
+
     return rootNode;
   }
 
   private parseNodes(): ASTNode[] {
     const nodes: ASTNode[] = [];
-    
+
     while (this.position < this.tokens.length) {
       const token = this.peek();
       if (!token) break;
-      
+
       if (token.type === 'TEMPLATE_VAR') {
         nodes.push(this.parseTemplateVariable());
       } else if (token.type === 'TAG_OPEN') {
         const tagName = this.extractTagName(token.value);
-        
+
         if (this.validPomlTags.has(tagName.toLowerCase())) {
           const node = this.parsePomlNode();
           if (node) {
@@ -251,14 +255,14 @@ class ASTParser {
         this.advance();
       }
     }
-    
+
     return nodes;
   }
 
   private parseTemplateVariable(): ASTNode {
     const token = this.advance()!;
     const expression = token.value.slice(2, -2).trim(); // Remove {{ and }}
-    
+
     return {
       id: this.generateId(),
       kind: 'TEMPLATE',
@@ -266,13 +270,13 @@ class ASTParser {
       end: token.end,
       content: token.value,
       expression,
-      children: []
+      children: [],
     };
   }
 
   private parseTextFromToken(): ASTNode {
     const token = this.advance()!;
-    
+
     return {
       id: this.generateId(),
       kind: 'TEXT',
@@ -280,20 +284,20 @@ class ASTParser {
       end: token.end,
       content: token.value,
       children: [],
-      textSegments: [{ start: token.start, end: token.end }]
+      textSegments: [{ start: token.start, end: token.end }],
     };
   }
 
   private parsePomlNode(): ASTNode | null {
     const openToken = this.advance()!;
     const tagName = this.extractTagName(openToken.value);
-    
+
     // Parse attributes
     const attributes = this.parseAttributes(openToken.value);
-    
+
     // Determine node kind
     const kind = tagName.toLowerCase() === 'meta' ? 'META' : 'POML';
-    
+
     const node: ASTNode = {
       id: this.generateId(),
       kind,
@@ -306,27 +310,27 @@ class ASTParser {
       openingTag: {
         start: openToken.start,
         end: openToken.end,
-        nameRange: { 
-          start: openToken.start + 1, 
-          end: openToken.start + 1 + tagName.length 
-        }
-      }
+        nameRange: {
+          start: openToken.start + 1,
+          end: openToken.start + 1 + tagName.length,
+        },
+      },
     };
-    
+
     // Parse children until we find the closing tag
     const children: ASTNode[] = [];
     let depth = 1;
-    
+
     while (this.position < this.tokens.length && depth > 0) {
       const token = this.peek();
       if (!token) break;
-      
+
       if (token.type === 'TAG_OPEN') {
         const childTagName = this.extractTagName(token.value);
         if (childTagName.toLowerCase() === tagName.toLowerCase()) {
           depth++;
         }
-        
+
         // Special handling for text tags - don't process template variables
         if (tagName.toLowerCase() === 'text') {
           children.push(this.parseTextFromToken());
@@ -352,8 +356,8 @@ class ASTParser {
               end: closeToken.end,
               nameRange: {
                 start: closeToken.start + 2,
-                end: closeToken.start + 2 + tagName.length
-              }
+                end: closeToken.start + 2 + tagName.length,
+              },
             };
             break;
           }
@@ -370,17 +374,20 @@ class ASTParser {
         children.push(textNode);
       }
     }
-    
+
     node.children = children;
-    
+
     // Update content to include full tag
     if (node.closingTag) {
-      node.content = this.tokens.slice(
-        this.tokens.findIndex(t => t.start === node.start),
-        this.tokens.findIndex(t => t.end === node.end) + 1
-      ).map(t => t.value).join('');
+      node.content = this.tokens
+        .slice(
+          this.tokens.findIndex((t) => t.start === node.start),
+          this.tokens.findIndex((t) => t.end === node.end) + 1,
+        )
+        .map((t) => t.value)
+        .join('');
     }
-    
+
     return node;
   }
 }
@@ -397,4 +404,4 @@ export class PomlAstParser {
   static parse(content: string): ASTNode {
     return parseAST(content);
   }
-}
\ No newline at end of file
+}
diff --git a/packages/poml/reader/base.tsx b/packages/poml/next/base.tsx
similarity index 100%
rename from packages/poml/reader/base.tsx
rename to packages/poml/next/base.tsx
diff --git a/packages/poml/next/context.ts b/packages/poml/next/context.ts
new file mode 100644
index 00000000..92939bda
--- /dev/null
+++ b/packages/poml/next/context.ts
@@ -0,0 +1,17 @@
+export class ContextEvaluator {
+  private contextStore: { [key: string]: any } = {};
+  private stack: Array<{ [key: string]: any }> = [];
+
+  public setGlobalVariable(key: string, value: any) {
+    this.contextStore[key] = value;
+  }
+
+  public setLocalVariable(key: string, value: any) {
+    if (this.stack.length === 0) {
+      throw new Error('No local stack available');
+    }
+    this.stack[this.stack.length - 1][key] = value;
+  }
+
+  public pushStack() {}
+}
diff --git a/packages/poml/reader/cst.ts b/packages/poml/next/cst.ts
similarity index 91%
rename from packages/poml/reader/cst.ts
rename to packages/poml/next/cst.ts
index e97d6d80..6ede8a39 100644
--- a/packages/poml/reader/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -1,9 +1,18 @@
 import { IToken } from 'chevrotain';
-import { 
-  extendedPomlLexer, 
-  TemplateOpen, TemplateClose, TagClosingOpen, TagSelfClose, 
-  TagOpen, TagClose, Equals, DoubleQuote, SingleQuote, 
-  Identifier, Whitespace, TextContent 
+import {
+  extendedPomlLexer,
+  TemplateOpen,
+  TemplateClose,
+  TagClosingOpen,
+  TagSelfClose,
+  TagOpen,
+  TagClose,
+  Equals,
+  DoubleQuote,
+  SingleQuote,
+  Identifier,
+  Whitespace,
+  TextContent,
 } from './lexer';
 
 import { listComponentAliases } from '../base';
@@ -31,29 +40,29 @@ export interface ASTNode {
   content: string;
   parent?: ASTNode;
   children: ASTNode[];
-  
+
   // For POML and META nodes
   tagName?: string;
   attributes?: AttributeInfo[];
-  
+
   // Detailed source positions
   openingTag?: {
     start: number;
     end: number;
     nameRange: SourceRange;
   };
-  
+
   closingTag?: {
     start: number;
     end: number;
     nameRange: SourceRange;
   };
-  
+
   contentRange?: SourceRange;
-  
+
   // For TEXT nodes
   textSegments?: SourceRange[];
-  
+
   // For TEMPLATE nodes
   expression?: string;
 }
@@ -126,7 +135,7 @@ export class CSTParser {
       end: text.length,
       content: text,
       children: [],
-      textSegments: []
+      textSegments: [],
     };
 
     this.parseDocument(rootNode);
@@ -144,7 +153,7 @@ export class CSTParser {
         const nextToken = this.peekToken();
         if (nextToken?.tokenType === Identifier) {
           const tagName = nextToken.image;
-          
+
           if (tagName === 'meta') {
             const metaNode = this.parseMetaTag();
             if (metaNode) {
@@ -188,10 +197,10 @@ export class CSTParser {
   private parseMetaTag(): ASTNode | null {
     const startPos = this.position;
     const openTagStart = this.currentToken()?.startOffset || 0;
-    
+
     this.consumeToken(); // consume '<'
     this.skipWhitespace();
-    
+
     const nameToken = this.consumeToken(); // consume 'meta'
     if (!nameToken || nameToken.image !== 'meta') {
       return null;
@@ -199,20 +208,20 @@ export class CSTParser {
 
     const nameRange: SourceRange = {
       start: nameToken.startOffset || 0,
-      end: (nameToken.endOffset || 0) + 1
+      end: (nameToken.endOffset || 0) + 1,
     };
 
     this.skipWhitespace();
-    
+
     const attributes = this.parseAttributes();
-    
+
     this.skipWhitespace();
-    
+
     // Check for self-closing or regular closing
     const closeToken = this.currentToken();
     let openTagEnd = 0;
     let hasContent = false;
-    
+
     if (closeToken?.tokenType === TagSelfClose) {
       this.consumeToken(); // consume '/>'
       openTagEnd = (closeToken.endOffset || 0) + 1;
@@ -234,8 +243,8 @@ export class CSTParser {
       openingTag: {
         start: openTagStart,
         end: openTagEnd,
-        nameRange
-      }
+        nameRange,
+      },
     };
 
     if (hasContent) {
@@ -250,7 +259,7 @@ export class CSTParser {
         }
         this.position++;
       }
-      
+
       // Parse closing tag
       if (this.currentToken()?.tokenType === TagClosingOpen) {
         const closingTagStart = this.currentToken()?.startOffset || 0;
@@ -258,15 +267,15 @@ export class CSTParser {
         const closingNameToken = this.consumeToken(); // consume 'meta'
         this.skipWhitespace();
         const finalClose = this.consumeToken(); // consume '>'
-        
+
         if (closingNameToken && finalClose) {
           metaNode.closingTag = {
             start: closingTagStart,
             end: (finalClose.endOffset || 0) + 1,
             nameRange: {
               start: closingNameToken.startOffset || 0,
-              end: (closingNameToken.endOffset || 0) + 1
-            }
+              end: (closingNameToken.endOffset || 0) + 1,
+            },
           };
           metaNode.end = (finalClose.endOffset || 0) + 1;
         }
@@ -279,10 +288,10 @@ export class CSTParser {
 
   private parsePomlElement(): ASTNode | null {
     const openTagStart = this.currentToken()?.startOffset || 0;
-    
+
     this.consumeToken(); // consume '<'
     this.skipWhitespace();
-    
+
     const nameToken = this.consumeToken();
     if (!nameToken) {
       return null;
@@ -291,20 +300,20 @@ export class CSTParser {
     const tagName = nameToken.image;
     const nameRange: SourceRange = {
       start: nameToken.startOffset || 0,
-      end: (nameToken.endOffset || 0) + 1
+      end: (nameToken.endOffset || 0) + 1,
     };
 
     this.skipWhitespace();
-    
+
     const attributes = this.parseAttributes();
-    
+
     this.skipWhitespace();
-    
+
     // Check for self-closing or regular closing
     const closeToken = this.currentToken();
     let openTagEnd = 0;
     let hasContent = false;
-    
+
     if (closeToken?.tokenType === TagSelfClose) {
       this.consumeToken(); // consume '/>'
       openTagEnd = (closeToken.endOffset || 0) + 1;
@@ -326,8 +335,8 @@ export class CSTParser {
       openingTag: {
         start: openTagStart,
         end: openTagEnd,
-        nameRange
-      }
+        nameRange,
+      },
     };
 
     if (hasContent) {
@@ -338,7 +347,7 @@ export class CSTParser {
         // Parse mixed content (POML and text)
         this.parseMixedContent(pomlNode);
       }
-      
+
       // Parse closing tag
       if (this.currentToken()?.tokenType === TagClosingOpen) {
         const closingTagStart = this.currentToken()?.startOffset || 0;
@@ -346,15 +355,15 @@ export class CSTParser {
         const closingNameToken = this.consumeToken();
         this.skipWhitespace();
         const finalClose = this.consumeToken(); // consume '>'
-        
+
         if (closingNameToken && finalClose) {
           pomlNode.closingTag = {
             start: closingTagStart,
             end: (finalClose.endOffset || 0) + 1,
             nameRange: {
               start: closingNameToken.startOffset || 0,
-              end: (closingNameToken.endOffset || 0) + 1
-            }
+              end: (closingNameToken.endOffset || 0) + 1,
+            },
           };
           pomlNode.end = (finalClose.endOffset || 0) + 1;
         }
@@ -469,7 +478,11 @@ export class CSTParser {
       if (token.tokenType === TextContent || token.tokenType === Whitespace) {
         endOffset = (token.endOffset || 0) + 1;
         this.position++;
-      } else if (token.tokenType === TagOpen || token.tokenType === TemplateOpen || token.tokenType === TagClosingOpen) {
+      } else if (
+        token.tokenType === TagOpen ||
+        token.tokenType === TemplateOpen ||
+        token.tokenType === TagClosingOpen
+      ) {
         break;
       } else {
         // Other tokens treated as text in this context
@@ -489,7 +502,7 @@ export class CSTParser {
       end: endOffset,
       content: this.text.slice(startOffset, endOffset),
       children: [],
-      textSegments: [{ start: startOffset, end: endOffset }]
+      textSegments: [{ start: startOffset, end: endOffset }],
     };
 
     return textNode;
@@ -532,7 +545,7 @@ export class CSTParser {
       end: endOffset,
       content: this.text.slice(startOffset, endOffset),
       children: [],
-      expression: expression.trim()
+      expression: expression.trim(),
     };
 
     return templateNode;
@@ -543,7 +556,7 @@ export class CSTParser {
 
     while (this.position < this.tokens.length) {
       this.skipWhitespace();
-      
+
       const token = this.currentToken();
       if (!token || token.tokenType !== Identifier) {
         break;
@@ -552,7 +565,7 @@ export class CSTParser {
       const keyToken = this.consumeToken()!;
       const keyRange: SourceRange = {
         start: keyToken.startOffset || 0,
-        end: (keyToken.endOffset || 0) + 1
+        end: (keyToken.endOffset || 0) + 1,
       };
 
       this.skipWhitespace();
@@ -561,17 +574,19 @@ export class CSTParser {
         // Boolean attribute
         attributes.push({
           key: keyToken.image,
-          value: [{
-            id: this.generateId(),
-            kind: 'TEXT',
-            start: keyRange.start,
-            end: keyRange.end,
-            content: 'true',
-            children: []
-          }],
+          value: [
+            {
+              id: this.generateId(),
+              kind: 'TEXT',
+              start: keyRange.start,
+              end: keyRange.end,
+              content: 'true',
+              children: [],
+            },
+          ],
           keyRange,
           valueRange: keyRange,
-          fullRange: keyRange
+          fullRange: keyRange,
         });
         continue;
       }
@@ -595,11 +610,10 @@ export class CSTParser {
       while (this.position < this.tokens.length) {
         const token = this.currentToken();
         if (!token) {
-        break;
-      }
+          break;
+        }
 
-        if ((isDoubleQuote && token.tokenType === DoubleQuote) || 
-            (!isDoubleQuote && token.tokenType === SingleQuote)) {
+        if ((isDoubleQuote && token.tokenType === DoubleQuote) || (!isDoubleQuote && token.tokenType === SingleQuote)) {
           valueEnd = token.startOffset || valueEnd;
           this.consumeToken(); // consume closing quote
           break;
@@ -613,22 +627,24 @@ export class CSTParser {
           const textStart = token.startOffset || 0;
           let textEnd = (token.endOffset || 0) + 1;
           let textContent = token.image;
-          
+
           this.consumeToken();
-          
+
           // Collect more text tokens
           while (this.position < this.tokens.length) {
             const nextToken = this.currentToken();
             if (!nextToken) {
               break;
             }
-            
-            if ((isDoubleQuote && nextToken.tokenType === DoubleQuote) ||
-                (!isDoubleQuote && nextToken.tokenType === SingleQuote) ||
-                nextToken.tokenType === TemplateOpen) {
+
+            if (
+              (isDoubleQuote && nextToken.tokenType === DoubleQuote) ||
+              (!isDoubleQuote && nextToken.tokenType === SingleQuote) ||
+              nextToken.tokenType === TemplateOpen
+            ) {
               break;
             }
-            
+
             textContent += nextToken.image;
             textEnd = (nextToken.endOffset || 0) + 1;
             this.consumeToken();
@@ -640,15 +656,15 @@ export class CSTParser {
             start: textStart,
             end: textEnd,
             content: textContent,
-            children: []
+            children: [],
           });
         }
       }
 
       const valueRange: SourceRange = { start: valueStart, end: valueEnd };
-      const fullRange: SourceRange = { 
-        start: keyRange.start, 
-        end: (this.tokens[this.position - 1]?.endOffset || 0) + 1 
+      const fullRange: SourceRange = {
+        start: keyRange.start,
+        end: (this.tokens[this.position - 1]?.endOffset || 0) + 1,
       };
 
       attributes.push({
@@ -656,7 +672,7 @@ export class CSTParser {
         value: valueNodes,
         keyRange,
         valueRange,
-        fullRange
+        fullRange,
       });
     }
 
@@ -674,7 +690,7 @@ export class CSTParser {
           this.processComponentsAttribute(attr.value);
           break;
         case 'unknownComponents':
-          const behavior = attr.value[0]?.content;
+          const behavior = attr.value[0]?.content; // eslint-disable-line
           if (behavior === 'error' || behavior === 'warning' || behavior === 'ignore') {
             this.context.unknownComponentBehavior = behavior;
           }
@@ -689,8 +705,8 @@ export class CSTParser {
 
   private processComponentsAttribute(value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[]): void {
     const components = value[0]?.content || '';
-    const parts = components.split(',').map(s => s.trim());
-    
+    const parts = components.split(',').map((s) => s.trim());
+
     for (const part of parts) {
       if (part.startsWith('+')) {
         this.context.enabledComponents.add(part.slice(1));
@@ -722,9 +738,9 @@ export function parseExtendedPoml(text: string, context: Partial<PomlContext> =
     sourcePath: '',
     enabledComponents: new Set(),
     unknownComponentBehavior: 'warning',
-    ...context
+    ...context,
   };
 
   const parser = new CSTParser(fullContext);
   return parser.parse(text);
-}
\ No newline at end of file
+}
diff --git a/packages/poml/reader/index.tsx b/packages/poml/next/index.tsx
similarity index 100%
rename from packages/poml/reader/index.tsx
rename to packages/poml/next/index.tsx
diff --git a/packages/poml/reader/lexer.ts b/packages/poml/next/lexer.ts
similarity index 92%
rename from packages/poml/reader/lexer.ts
rename to packages/poml/next/lexer.ts
index cfa886b7..a646defd 100644
--- a/packages/poml/reader/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -24,15 +24,16 @@ export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
 */
 export const Identifier = createToken({
   name: 'Identifier',
-  pattern: /[a-zA-Z_][a-zA-Z0-9_-]*/
+  pattern: /[a-zA-Z_][a-zA-Z0-9_-]*/,
 });
 
 export const Whitespace = createToken({
   name: 'Whitespace',
   pattern: /[ \t\r\n]+/,
-  line_breaks: true
+  line_breaks: true,
 });
 
+/* eslint-disable no-irregular-whitespace */
 /* Catch-all for arbitrary text content
    - Match any char except:
        <          — starts a tag
@@ -43,8 +44,9 @@ export const Whitespace = createToken({
 export const TextContent = createToken({
   name: 'TextContent',
   pattern: /(?:[^<"'{}]|{(?!{)|}(?!}))+/,
-  line_breaks: true
+  line_breaks: true,
 });
+/* eslint-enable no-irregular-whitespace */
 
 // Define token order - more specific patterns first
 export const allTokens = [
@@ -61,7 +63,7 @@ export const allTokens = [
   Backslash,
   Identifier,
   Whitespace,
-  TextContent
+  TextContent,
 ];
 
 // Extended POML Lexer class
@@ -82,7 +84,7 @@ export class ExtendedPomlLexer {
     return {
       tokens: lexingResult.tokens,
       errors: lexingResult.errors,
-      groups: lexingResult.groups
+      groups: lexingResult.groups,
     };
   }
 }
diff --git a/packages/poml/reader/meta.ts b/packages/poml/next/meta.ts
similarity index 100%
rename from packages/poml/reader/meta.ts
rename to packages/poml/next/meta.ts
diff --git a/packages/poml/reader/poml.tsx b/packages/poml/next/poml.tsx
similarity index 100%
rename from packages/poml/reader/poml.tsx
rename to packages/poml/next/poml.tsx
diff --git a/packages/poml/reader/segment.ts b/packages/poml/next/segment.ts
similarity index 91%
rename from packages/poml/reader/segment.ts
rename to packages/poml/next/segment.ts
index 2218c086..440ea74a 100644
--- a/packages/poml/reader/segment.ts
+++ b/packages/poml/next/segment.ts
@@ -33,28 +33,33 @@ class Segmenter {
 
   private isValidPomlTag(tagName: string): boolean {
     const validTags = new Set<string>();
-    
+
     for (const doc of componentDocs) {
       if (doc.name) {
         validTags.add(doc.name.toLowerCase());
-        validTags.add(doc.name.toLowerCase().replace(/([A-Z])/g, '-$1').toLowerCase());
+        validTags.add(
+          doc.name
+            .toLowerCase()
+            .replace(/([A-Z])/g, '-$1')
+            .toLowerCase(),
+        );
       }
     }
-    
+
     validTags.add('poml');
     validTags.add('text');
     validTags.add('meta');
-    
+
     return validTags.has(tagName.toLowerCase());
   }
 
   private parseSegments(text: string, start: number = 0, parent?: Segment): Segment[] {
     const segments: Segment[] = [];
     let currentPos = start;
-    
+
     while (currentPos < text.length) {
       const nextOpenTag = text.indexOf('<', currentPos);
-      
+
       if (nextOpenTag === -1) {
         if (currentPos < text.length) {
           const textContent = text.substring(currentPos);
@@ -67,13 +72,13 @@ class Segmenter {
               content: textContent,
               path: this.sourcePath,
               parent,
-              children: []
+              children: [],
             });
           }
         }
         break;
       }
-      
+
       if (nextOpenTag > currentPos) {
         const textContent = text.substring(currentPos, nextOpenTag);
         if (textContent.trim()) {
@@ -85,46 +90,46 @@ class Segmenter {
             content: textContent,
             path: this.sourcePath,
             parent,
-            children: []
+            children: [],
           });
         }
       }
-      
+
       const tagEndPos = text.indexOf('>', nextOpenTag);
       if (tagEndPos === -1) {
         currentPos = nextOpenTag + 1;
         continue;
       }
-      
+
       const tagContent = text.substring(nextOpenTag + 1, tagEndPos);
       const tagName = tagContent.trim().split(/\s+/)[0];
-      
+
       if (tagName.startsWith('/')) {
         currentPos = tagEndPos + 1;
         continue;
       }
-      
+
       if (tagContent.endsWith('/')) {
         currentPos = tagEndPos + 1;
         continue;
       }
-      
+
       if (!this.isValidPomlTag(tagName)) {
         currentPos = tagEndPos + 1;
         continue;
       }
-      
+
       const closingTag = `</${tagName}>`;
       const closingTagPos = this.findClosingTag(text, tagName, tagEndPos + 1);
-      
+
       if (closingTagPos === -1) {
         currentPos = tagEndPos + 1;
         continue;
       }
-      
+
       const segmentContent = text.substring(nextOpenTag, closingTagPos + closingTag.length);
       const innerContent = text.substring(tagEndPos + 1, closingTagPos);
-      
+
       const segment: Segment = {
         id: this.generateId(),
         kind: tagName.toLowerCase() === 'meta' ? 'META' : 'POML',
@@ -134,60 +139,60 @@ class Segmenter {
         path: this.sourcePath,
         parent,
         children: [],
-        tagName: tagName.toLowerCase()
+        tagName: tagName.toLowerCase(),
       };
-      
+
       if (tagName.toLowerCase() === 'text') {
         segment.children = this.parseSegments(innerContent, tagEndPos + 1, segment);
       } else if (tagName.toLowerCase() !== 'meta') {
         const childSegments = this.parseSegments(innerContent, tagEndPos + 1, segment);
         segment.children = childSegments;
       }
-      
+
       segments.push(segment);
       currentPos = closingTagPos + closingTag.length;
     }
-    
+
     return segments;
   }
 
   private findClosingTag(text: string, tagName: string, startPos: number): number {
     let depth = 1;
     let pos = startPos;
-    
+
     while (pos < text.length && depth > 0) {
       const nextTag = text.indexOf('<', pos);
       if (nextTag === -1) {
         break;
       }
-      
+
       const tagEndPos = text.indexOf('>', nextTag);
       if (tagEndPos === -1) {
         break;
       }
-      
+
       const tagContent = text.substring(nextTag + 1, tagEndPos);
       const currentTagName = tagContent.trim().split(/\s+/)[0];
-      
+
       if (currentTagName === tagName) {
         depth++;
       } else if (currentTagName === `/${tagName}`) {
         depth--;
       }
-      
+
       pos = tagEndPos + 1;
     }
-    
-    return depth === 0 ? pos - (`</${tagName}>`.length) : -1;
+
+    return depth === 0 ? pos - `</${tagName}>`.length : -1;
   }
 
   public createSegments(content: string): Segment {
     const rootSegments = this.parseSegments(content);
-    
+
     if (rootSegments.length === 1 && rootSegments[0].kind === 'POML') {
       return rootSegments[0];
     }
-    
+
     if (rootSegments.length === 0) {
       return {
         id: this.generateId(),
@@ -197,10 +202,10 @@ class Segmenter {
         content: content,
         path: this.sourcePath,
         children: [],
-        parent: undefined
+        parent: undefined,
       };
     }
-    
+
     const rootSegment: Segment = {
       id: this.generateId(),
       kind: 'TEXT',
@@ -209,13 +214,13 @@ class Segmenter {
       content: content,
       path: this.sourcePath,
       children: rootSegments,
-      parent: undefined
+      parent: undefined,
     };
-    
-    rootSegments.forEach(segment => {
+
+    rootSegments.forEach((segment) => {
       segment.parent = rootSegment;
     });
-    
+
     return rootSegment;
   }
 }
diff --git a/packages/poml/reader/text.tsx b/packages/poml/next/text.tsx
similarity index 100%
rename from packages/poml/reader/text.tsx
rename to packages/poml/next/text.tsx
diff --git a/packages/poml/reader/tokenizer.ts b/packages/poml/next/tokenizer.ts
similarity index 82%
rename from packages/poml/reader/tokenizer.ts
rename to packages/poml/next/tokenizer.ts
index a8e166d1..ce1930b8 100644
--- a/packages/poml/reader/tokenizer.ts
+++ b/packages/poml/next/tokenizer.ts
@@ -16,14 +16,14 @@ export class Tokenizer {
 
   tokenize(): Token[] {
     const tokens: Token[] = [];
-    
+
     while (this.position < this.input.length) {
       // Check for template variables first
       if (this.peek() === '{' && this.peek(1) === '{') {
         tokens.push(this.readTemplateVariable());
         continue;
       }
-      
+
       // Check for XML tags
       if (this.peek() === '<') {
         const tagToken = this.readTag();
@@ -32,14 +32,14 @@ export class Tokenizer {
           continue;
         }
       }
-      
+
       // Read text content
       const textToken = this.readText();
       if (textToken.value.length > 0) {
         tokens.push(textToken);
       }
     }
-    
+
     return tokens;
   }
 
@@ -55,69 +55,71 @@ export class Tokenizer {
     const start = this.position;
     this.advance(); // {
     this.advance(); // {
-    
+
     while (this.position < this.input.length && !(this.peek() === '}' && this.peek(1) === '}')) {
       this.advance();
     }
-    
+
     if (this.peek() === '}' && this.peek(1) === '}') {
       this.advance(); // }
       this.advance(); // }
     }
-    
+
     return {
       type: 'TEMPLATE_VAR',
       value: this.input.substring(start, this.position),
       start,
-      end: this.position
+      end: this.position,
     };
   }
 
   private readTag(): Token | null {
     const start = this.position;
     this.advance(); // <
-    
+
     // Skip whitespace
     while (this.peek() === ' ' || this.peek() === '\t' || this.peek() === '\n') {
       this.advance();
     }
-    
+
     // Check for closing tag
     const isClosing = this.peek() === '/';
     if (isClosing) {
       this.advance();
     }
-    
+
     // Read tag name
     let tagName = '';
-    while (this.position < this.input.length && 
-           this.peek() !== '>' && 
-           this.peek() !== ' ' && 
-           this.peek() !== '\t' && 
-           this.peek() !== '\n') {
+    while (
+      this.position < this.input.length &&
+      this.peek() !== '>' &&
+      this.peek() !== ' ' &&
+      this.peek() !== '\t' &&
+      this.peek() !== '\n'
+    ) {
       tagName += this.advance();
     }
-    
+
     // Skip attributes for now (will be parsed separately)
     while (this.position < this.input.length && this.peek() !== '>') {
       this.advance();
     }
-    
+
     if (this.peek() === '>') {
       this.advance(); // >
-      
+
       // Check if self-closing
       const content = this.input.substring(start, this.position);
       const isSelfClosing = content.endsWith('/>');
-      
+
       return {
-        type: isSelfClosing ? 'TAG_SELF_CLOSE' : (isClosing ? 'TAG_CLOSE' : 'TAG_OPEN'),
+        type: isSelfClosing ? 'TAG_SELF_CLOSE' : isClosing ? 'TAG_CLOSE' : 'TAG_OPEN',
         value: content,
         start,
-        end: this.position
+        end: this.position,
       };
     }
-    
+
     // Invalid tag, backtrack
     this.position = start + 1;
     return null;
@@ -125,18 +127,16 @@ export class Tokenizer {
 
   private readText(): Token {
     const start = this.position;
-    
-    while (this.position < this.input.length && 
-           this.peek() !== '<' && 
-           !(this.peek() === '{' && this.peek(1) === '{')) {
+
+    while (this.position < this.input.length && this.peek() !== '<' && !(this.peek() === '{' && this.peek(1) === '{')) {
       this.advance();
     }
-    
+
     return {
       type: 'TEXT',
       value: this.input.substring(start, this.position),
       start,
-      end: this.position
+      end: this.position,
     };
   }
 }
diff --git a/packages/poml/tests/reader/ast.test.ts b/packages/poml/tests/reader/ast.test.ts
index 9921e210..4b5819db 100644
--- a/packages/poml/tests/reader/ast.test.ts
+++ b/packages/poml/tests/reader/ast.test.ts
@@ -1,11 +1,11 @@
 import { describe, expect, test } from '@jest/globals';
-import { parseAST, ASTNode } from 'poml/reader/ast';
+import { parseAST, ASTNode } from 'poml/next/ast';
 
 describe('parseAST', () => {
   test('pure text content', () => {
     const content = 'This is pure text content with no POML tags.';
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.content).toBe(content);
     expect(ast.start).toBe(0);
@@ -16,7 +16,7 @@ describe('parseAST', () => {
   test('single POML tag', () => {
     const content = '<task>Analyze the data</task>';
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('POML');
     expect(ast.tagName).toBe('task');
     expect(ast.content).toBe(content);
@@ -40,23 +40,23 @@ Here are some key points to consider:
 - Business impact`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(4);
-    
+
     const children = ast.children;
     expect(children[0].kind).toBe('TEXT');
     expect(children[0].content).toContain('# My Analysis Document');
-    
+
     expect(children[1].kind).toBe('POML');
     expect(children[1].tagName).toBe('task');
     expect(children[1].content).toBe(`<task>
   Analyze the following data and provide insights.
 </task>`);
-    
+
     expect(children[2].kind).toBe('TEXT');
     expect(children[2].content).toContain('Here are some key points');
-    
+
     expect(children[3].kind).toBe('TEXT');
     expect(children[3].content).toContain('- Data quality');
   });
@@ -70,7 +70,7 @@ Here are some key points to consider:
 </examples>`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('POML');
     expect(ast.tagName).toBe('examples');
     expect(ast.children).toHaveLength(0);
@@ -88,7 +88,7 @@ Here are some key points to consider:
   test('text in text in POML', () => {
     const content = `<poml><text>This is a text<text> with nested text content.</text></text></poml>`;
     const ast = parseAST(content);
-    expect(ast.kind).toBe('POML');  
+    expect(ast.kind).toBe('POML');
     expect(ast.tagName).toBe('poml');
     expect(ast.children).toHaveLength(1);
     const textNode = ast.children[0];
@@ -122,18 +122,20 @@ Here are some key points to consider:
 </poml>`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('POML');
     expect(ast.tagName).toBe('poml');
     expect(ast.children).toHaveLength(4);
-    
-    const textNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'text');
+
+    const textNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'text');
     expect(textNode).toBeDefined();
     expect(textNode!.children).toHaveLength(3);
-    
-    const nestedCpNode = textNode!.children.find(c => c.kind === 'POML' && c.tagName === 'cp');
+
+    const nestedCpNode = textNode!.children.find((c) => c.kind === 'POML' && c.tagName === 'cp');
     expect(nestedCpNode).toBeDefined();
-    expect(nestedCpNode!.content).toBe('<cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>');
+    expect(nestedCpNode!.content).toBe(
+      '<cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>',
+    );
   });
 
   test('meta tags', () => {
@@ -146,16 +148,16 @@ Here are some key points to consider:
 <task>Complete the analysis</task>`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(3);
-    
-    const metaNode = ast.children.find(c => c.kind === 'META');
+
+    const metaNode = ast.children.find((c) => c.kind === 'META');
     expect(metaNode).toBeDefined();
     expect(metaNode!.tagName).toBe('meta');
     expect(metaNode!.children).toHaveLength(0);
-    
-    const taskNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'task');
+
+    const taskNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'task');
     expect(taskNode).toBeDefined();
   });
 
@@ -165,15 +167,15 @@ Here are some key points to consider:
 <random>This should also be ignored</random>`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(3);
-    
-    const taskNode = ast.children.find(c => c.kind === 'POML');
+
+    const taskNode = ast.children.find((c) => c.kind === 'POML');
     expect(taskNode).toBeDefined();
     expect(taskNode!.tagName).toBe('task');
-    
-    const textNodes = ast.children.filter(c => c.kind === 'TEXT');
+
+    const textNodes = ast.children.filter((c) => c.kind === 'TEXT');
     expect(textNodes).toHaveLength(2);
     expect(textNodes[0].content).toContain('<invalid-tag>This should be ignored</invalid-tag>');
     expect(textNodes[1].content).toContain('<random>This should also be ignored</random>');
@@ -186,11 +188,11 @@ Here are some key points to consider:
 <hint>Valid hint</hint>`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(4);
-    
-    const pomlNodes = ast.children.filter(c => c.kind === 'POML');
+
+    const pomlNodes = ast.children.filter((c) => c.kind === 'POML');
     expect(pomlNodes).toHaveLength(3);
     expect(pomlNodes[0].tagName).toBe('task');
     expect(pomlNodes[2].tagName).toBe('hint');
@@ -202,15 +204,15 @@ Here are some key points to consider:
 <unclosed>This has no closing tag`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(3);
-    
-    const hintNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'hint');
+
+    const hintNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'hint');
     expect(hintNode).toBeDefined();
     expect(hintNode!.content).toBe('<hint>Complete hint</hint>');
-    
-    const textNodes = ast.children.filter(c => c.kind === 'TEXT');
+
+    const textNodes = ast.children.filter((c) => c.kind === 'TEXT');
     expect(textNodes).toHaveLength(2);
     expect(textNodes[0].content).toBe('<task>Incomplete tag\n');
     expect(textNodes[1].content).toBe('\n<unclosed>This has no closing tag');
@@ -219,7 +221,7 @@ Here are some key points to consider:
   test('malformed POML tags are ignored', () => {
     const content = `<task>Valid task`;
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(0);
   });
@@ -227,7 +229,7 @@ Here are some key points to consider:
   test('empty content', () => {
     const content = '';
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.content).toBe('');
     expect(ast.children).toHaveLength(0);
@@ -236,7 +238,7 @@ Here are some key points to consider:
   test('whitespace-only content', () => {
     const content = '   \n\n\t  \n  ';
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.content).toBe(content);
     expect(ast.children).toHaveLength(0);
@@ -248,11 +250,11 @@ Here are some key points to consider:
 <user-msg>User message</user-msg>`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(4);
-    
-    const pomlNodes = ast.children.filter(c => c.kind === 'POML');
+
+    const pomlNodes = ast.children.filter((c) => c.kind === 'POML');
     expect(pomlNodes).toHaveLength(3);
     expect(pomlNodes[0].tagName).toBe('output-format');
     expect(pomlNodes[1].tagName).toBe('system-msg');
@@ -269,21 +271,21 @@ Here are some key points to consider:
 </task>`;
 
     const ast = parseAST(content);
-    
+
     const taskNode = ast;
     expect(taskNode.kind).toBe('POML');
     expect(taskNode.tagName).toBe('task');
     expect(taskNode.parent).toBeUndefined();
-    
-    const hintNode = taskNode.children.find(c => c.kind === 'POML' && c.tagName === 'hint');
+
+    const hintNode = taskNode.children.find((c) => c.kind === 'POML' && c.tagName === 'hint');
     expect(hintNode).toBeDefined();
     expect(hintNode!.parent).toBe(taskNode);
-    
-    const examplesNode = taskNode.children.find(c => c.kind === 'POML' && c.tagName === 'examples');
+
+    const examplesNode = taskNode.children.find((c) => c.kind === 'POML' && c.tagName === 'examples');
     expect(examplesNode).toBeDefined();
     expect(examplesNode!.parent).toBe(taskNode);
-    
-    const exampleNode = examplesNode!.children.find(c => c.kind === 'POML' && c.tagName === 'example');
+
+    const exampleNode = examplesNode!.children.find((c) => c.kind === 'POML' && c.tagName === 'example');
     expect(exampleNode).toBeDefined();
     expect(exampleNode!.parent).toBe(examplesNode);
   });
@@ -296,19 +298,19 @@ Here are some key points to consider:
     const ast = parseAST(content);
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(5);
-    
+
     function collectAllNodes(node: ASTNode): ASTNode[] {
       const all = [node];
-      node.children.forEach(child => {
+      node.children.forEach((child) => {
         all.push(...collectAllNodes(child));
       });
       return all;
     }
-    
+
     const allNodes = collectAllNodes(ast);
-    const ids = allNodes.map(s => s.id);
+    const ids = allNodes.map((s) => s.id);
     const uniqueIds = new Set(ids);
-    
+
     expect(uniqueIds.size).toBe(ids.length);
   });
 
@@ -338,41 +340,41 @@ There can be some intervening text here as well.
 <p>POML elements do not necessarily reside in a poml element.</p>`;
 
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.children).toHaveLength(5);
-    
-    const firstPomlNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'poml');
+
+    const firstPomlNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'poml');
     expect(firstPomlNode).toBeDefined();
     expect(firstPomlNode!.children).toHaveLength(4);
-    
-    const textNode = firstPomlNode!.children.find(c => c.kind === 'POML' && c.tagName === 'text');
+
+    const textNode = firstPomlNode!.children.find((c) => c.kind === 'POML' && c.tagName === 'text');
     expect(textNode).toBeDefined();
     expect(textNode!.children).toHaveLength(3);
-    
-    const cpNode = textNode!.children.find(c => c.kind === 'POML' && c.tagName === 'cp');
+
+    const cpNode = textNode!.children.find((c) => c.kind === 'POML' && c.tagName === 'cp');
     expect(cpNode).toBeDefined();
-    
-    const secondPomlNode = ast.children.filter(c => c.kind === 'POML' && c.tagName === 'poml')[1];
+
+    const secondPomlNode = ast.children.filter((c) => c.kind === 'POML' && c.tagName === 'poml')[1];
     expect(secondPomlNode).toBeDefined();
 
     const lineBreakNode = ast.children[3];
     expect(lineBreakNode.kind).toBe('TEXT');
     expect(lineBreakNode.content).toBe('\n\n');
 
-    const pNode = ast.children.find(c => c.kind === 'POML' && c.tagName === 'p');
+    const pNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'p');
     expect(pNode).toBeDefined();
   });
 
   test('template variables in content', () => {
     const content = `<task>Process {{variable}} with {{another_variable}}</task>`;
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('POML');
     expect(ast.tagName).toBe('task');
     expect(ast.children).toHaveLength(4); // text, template, text, template
-    
-    const templateNodes = ast.children.filter(c => c.kind === 'TEMPLATE');
+
+    const templateNodes = ast.children.filter((c) => c.kind === 'TEMPLATE');
     expect(templateNodes).toHaveLength(2);
     expect(templateNodes[0].expression).toBe('variable');
     expect(templateNodes[1].expression).toBe('another_variable');
@@ -381,7 +383,7 @@ There can be some intervening text here as well.
   test('template variables in text nodes are treated as literal', () => {
     const content = `<text>Variables like {{this}} are shown as-is</text>`;
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('TEXT');
     expect(ast.content).toBe(content);
     expect(ast.children).toHaveLength(0);
@@ -390,11 +392,11 @@ There can be some intervening text here as well.
   test('template variables in attribute values', () => {
     const content = `<task caption="Process {{variable}}">Content</task>`;
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('POML');
     expect(ast.tagName).toBe('task');
     expect(ast.attributes).toHaveLength(1);
-    
+
     const attr = ast.attributes![0];
     expect(attr.key).toBe('caption');
     expect(attr.value).toHaveLength(2); // text + template
@@ -407,10 +409,10 @@ There can be some intervening text here as well.
   test('mixed template variables and text in attributes', () => {
     const content = `<task title="Hello {{name}}, process {{data}} please">Content</task>`;
     const ast = parseAST(content);
-    
+
     expect(ast.kind).toBe('POML');
     expect(ast.attributes).toHaveLength(1);
-    
+
     const attr = ast.attributes![0];
     expect(attr.value).toHaveLength(4); // text, template, text, template
     expect(attr.value[0].content).toBe('Hello ');
@@ -418,4 +420,4 @@ There can be some intervening text here as well.
     expect(attr.value[2].content).toBe(', process ');
     expect(attr.value[3].expression).toBe('data');
   });
-});
\ No newline at end of file
+});
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index 1e9e5ad6..4406f53f 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -1,11 +1,11 @@
 import { describe, expect, test } from '@jest/globals';
-import { parseExtendedPoml, ASTNode } from 'poml/reader/cst';
+import { parseExtendedPoml, ASTNode } from 'poml/next/cst';
 
 describe('Extended POML CST Parser', () => {
   test('parses pure text content', () => {
     const input = 'This is plain text content.';
     const result = parseExtendedPoml(input);
-    
+
     expect(result.kind).toBe('TEXT');
     expect(result.content).toBe(input);
     expect(result.children).toHaveLength(0);
@@ -14,10 +14,10 @@ describe('Extended POML CST Parser', () => {
   test('parses simple POML element', () => {
     const input = '<task>Analyze the data</task>';
     const result = parseExtendedPoml(input);
-    
+
     expect(result.kind).toBe('TEXT');
     expect(result.children).toHaveLength(1);
-    
+
     const taskNode = result.children[0];
     expect(taskNode.kind).toBe('POML');
     expect(taskNode.tagName).toBe('task');
@@ -35,14 +35,14 @@ This is regular text.
 </task>
 
 More text here.`;
-    
+
     const result = parseExtendedPoml(input);
-    
+
     expect(result.kind).toBe('TEXT');
     expect(result.children.length).toBeGreaterThan(1);
-    
+
     // Should have text nodes and POML nodes
-    const pomlNodes = result.children.filter(child => child.kind === 'POML');
+    const pomlNodes = result.children.filter((child) => child.kind === 'POML');
     expect(pomlNodes).toHaveLength(1);
     expect(pomlNodes[0].tagName).toBe('task');
   });
@@ -50,7 +50,7 @@ More text here.`;
   test('parses self-closing elements', () => {
     const input = '<meta components="+reference,-table" />';
     const result = parseExtendedPoml(input);
-    
+
     expect(result.children).toHaveLength(1);
     const metaNode = result.children[0];
     expect(metaNode.kind).toBe('META');
@@ -62,9 +62,9 @@ More text here.`;
   test('parses template expressions', () => {
     const input = 'Hello {{name}}!';
     const result = parseExtendedPoml(input);
-    
+
     expect(result.children.length).toBeGreaterThan(1);
-    const templateNode = result.children.find(child => child.kind === 'TEMPLATE');
+    const templateNode = result.children.find((child) => child.kind === 'TEMPLATE');
     expect(templateNode).toBeDefined();
     expect(templateNode!.expression).toBe('name');
   });
@@ -72,16 +72,16 @@ More text here.`;
   test('parses attributes with mixed content', () => {
     const input = '<p class="header" id="{{elementId}}">Content</p>';
     const result = parseExtendedPoml(input);
-    
-    const pNode = result.children.find(child => child.kind === 'POML');
+
+    const pNode = result.children.find((child) => child.kind === 'POML');
     expect(pNode).toBeDefined();
     expect(pNode!.attributes).toHaveLength(2);
-    
-    const classAttr = pNode!.attributes!.find(attr => attr.key === 'class');
+
+    const classAttr = pNode!.attributes!.find((attr) => attr.key === 'class');
     expect(classAttr).toBeDefined();
     expect(classAttr!.value[0].content).toBe('header');
-    
-    const idAttr = pNode!.attributes!.find(attr => attr.key === 'id');
+
+    const idAttr = pNode!.attributes!.find((attr) => attr.key === 'id');
     expect(idAttr).toBeDefined();
     expect(idAttr!.value[0].kind).toBe('TEMPLATE');
   });
@@ -92,21 +92,21 @@ This is **markdown** content.
 <cp caption="Nested">This is nested POML</cp>
 More markdown here.
 </text>`;
-    
+
     const result = parseExtendedPoml(input);
-    const textNode = result.children.find(child => child.kind === 'POML' && child.tagName === 'text');
-    
+    const textNode = result.children.find((child) => child.kind === 'POML' && child.tagName === 'text');
+
     expect(textNode).toBeDefined();
     expect(textNode!.children.length).toBeGreaterThan(1);
-    
-    const cpNode = textNode!.children.find(child => child.kind === 'POML' && child.tagName === 'cp');
+
+    const cpNode = textNode!.children.find((child) => child.kind === 'POML' && child.tagName === 'cp');
     expect(cpNode).toBeDefined();
   });
 
   test('preserves source position information', () => {
     const input = '<task>Test</task>';
     const result = parseExtendedPoml(input);
-    
+
     const taskNode = result.children[0];
     expect(taskNode.start).toBe(0);
     expect(taskNode.end).toBe(input.length);
@@ -118,12 +118,12 @@ More markdown here.
 
   test('handles unknown components gracefully', () => {
     const input = '<unknown>This should be treated as text</unknown>';
-    
+
     // Should not throw by default (warning behavior)
     const result = parseExtendedPoml(input);
     expect(result).toBeDefined();
-    
+
     // Should treat unknown tag as text content
     expect(result.children.length).toBeGreaterThan(0);
   });
-});
\ No newline at end of file
+});
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index bbd8f976..432c06ee 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -14,19 +14,19 @@ import {
   Backslash,
   Identifier,
   Whitespace,
-  TextContent
-} from 'poml/reader/lexer';
+  TextContent,
+} from 'poml/next/lexer';
 
 // Helper function to extract token images
 function tokenImages(input: string): string[] {
   const result = extendedPomlLexer.tokenize(input);
-  return result.tokens.map(t => t.image);
+  return result.tokens.map((t) => t.image);
 }
 
 // Helper function to extract token types
 function tokenTypes(input: string): any[] {
   const result = extendedPomlLexer.tokenize(input);
-  return result.tokens.map(t => t.tokenType);
+  return result.tokens.map((t) => t.tokenType);
 }
 
 // Helper function to get full tokenization result
@@ -85,7 +85,7 @@ describe('Edge Cases', () => {
       'poml',
       '>',
       'ghi',
-      '"'
+      '"',
     ]);
   });
 
@@ -103,7 +103,7 @@ describe('Edge Cases', () => {
       'ghi',
       '</',
       'poml',
-      '>'
+      '>',
     ]);
   });
 
@@ -121,7 +121,7 @@ describe('Edge Cases', () => {
       '内容',
       '<',
       ' ',
-      '标签>'
+      '标签>',
     ]);
   });
 
@@ -143,7 +143,7 @@ describe('Edge Cases', () => {
       '"',
       'test',
       '"',
-      '>'
+      '>',
     ]);
   });
 
@@ -161,7 +161,7 @@ describe('Edge Cases', () => {
       '"',
       ' ',
       'quotes',
-      '"'
+      '"',
     ]);
   });
 
@@ -191,16 +191,16 @@ describe('Edge Cases', () => {
   <li class="item-{{@index}}">
     <span title="{{description}}">{{name}}</span>
   </li>
-{{/each}}`
+{{/each}}`,
     ];
 
-    realWorldTests.forEach(test => {
+    realWorldTests.forEach((test) => {
       const result = tokenize(test);
       expect(result.errors).toHaveLength(0);
       expect(result.tokens.length).toBeGreaterThan(0);
 
       // Verify position integrity
-      result.tokens.forEach(token => {
+      result.tokens.forEach((token) => {
         expect(token.startOffset).toBeGreaterThanOrEqual(0);
         expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset!);
       });
@@ -216,14 +216,14 @@ describe('Edge Cases', () => {
       'first=one second=two',
       '=standalone',
       'text=content',
-      'a=b=c'
+      'a=b=c',
     ];
 
-    equalsTests.forEach(test => {
+    equalsTests.forEach((test) => {
       const result = tokenize(test);
       expect(result.errors).toHaveLength(0);
 
-      const equalsTokens = result.tokens.filter(t => t.tokenType.name === 'Equals');
+      const equalsTokens = result.tokens.filter((t) => t.tokenType.name === 'Equals');
       expect(equalsTokens.length).toBeGreaterThan(0);
     });
   });
@@ -231,7 +231,7 @@ describe('Edge Cases', () => {
   test('should handle edge cases with zero-length matches', () => {
     const edgeCases = ['', ' ', '\n', '\t', '\r', '{{}}', '<!---->', '<>', '""', "''", '\\'];
 
-    edgeCases.forEach(test => {
+    edgeCases.forEach((test) => {
       const result = tokenize(test);
       expect(result.errors).toHaveLength(0);
 
@@ -295,7 +295,7 @@ line2 <tag>
 line3`;
     const result = tokenize(input);
 
-    const tagToken = result.tokens.find(t => t.tokenType === TagOpen);
+    const tagToken = result.tokens.find((t) => t.tokenType === TagOpen);
     expect(tagToken).toBeDefined();
     expect(tagToken!.startLine).toBe(2);
     expect(tagToken!.startColumn).toBe(7); // After "line2 "
@@ -368,7 +368,7 @@ Analyze data
       'content',
       '</',
       'task',
-      '>'
+      '>',
     ]);
   });
 
@@ -385,7 +385,7 @@ Analyze data
       '}}',
       '/file.txt',
       '"',
-      '>'
+      '>',
     ]);
   });
 });
@@ -491,18 +491,7 @@ describe('Unicode and Special Characters', () => {
   test('should handle unicode', () => {
     expect(tokenImages('<こんにちは>')).toEqual(['<', 'こんにちは>']);
     expect(tokenImages('{{你好}}')).toEqual(['{{', '你好', '}}']);
-    expect(tokenImages('<tag attr="café">')).toEqual([
-      '<',
-      'tag',
-      ' ',
-      'attr',
-      '=',
-      '"',
-      'caf',
-      'é',
-      '"',
-      '>'
-    ]);
+    expect(tokenImages('<tag attr="café">')).toEqual(['<', 'tag', ' ', 'attr', '=', '"', 'caf', 'é', '"', '>']);
   });
 
   test('should maintain lexer stability with all edge cases', () => {
@@ -514,7 +503,7 @@ describe('Unicode and Special Characters', () => {
     expect(result.tokens.length).toBeGreaterThan(0);
 
     // Verify token integrity
-    result.tokens.forEach(token => {
+    result.tokens.forEach((token) => {
       expect(token.startOffset).toBeGreaterThanOrEqual(0);
       if (token.endOffset !== undefined) {
         expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset);
@@ -566,31 +555,14 @@ describe('Malformed Patterns', () => {
   test('should handle nested malformed patterns', () => {
     expect(tokenImages('<!-- <tag> -->')).toEqual(['<!-- <tag> -->']);
     expect(tokenImages('<!-- {{template}} -->')).toEqual(['<!-- {{template}} -->']);
-    expect(tokenImages('<tag><!-- comment</tag>')).toEqual([
-      '<',
-      'tag',
-      '>',
-      '<',
-      '!-- comment',
-      '</',
-      'tag',
-      '>'
-    ]);
+    expect(tokenImages('<tag><!-- comment</tag>')).toEqual(['<', 'tag', '>', '<', '!-- comment', '</', 'tag', '>']);
     expect(tokenImages('{{<tag>}}')).toEqual(['{{', '<', 'tag', '>', '}}']);
   });
 
   test('should handle quotes without proper pairing', () => {
     expect(tokenImages('"orphan quote')).toEqual(['"', 'orphan', ' ', 'quote']);
     expect(tokenImages("'another orphan")).toEqual(["'", 'another', ' ', 'orphan']);
-    expect(tokenImages('mixed "quote\' types')).toEqual([
-      'mixed',
-      ' ',
-      '"',
-      'quote',
-      "'",
-      ' ',
-      'types'
-    ]);
+    expect(tokenImages('mixed "quote\' types')).toEqual(['mixed', ' ', '"', 'quote', "'", ' ', 'types']);
     expect(tokenImages('escaped \\"quote\\" in text')).toEqual([
       'escaped',
       ' ',
@@ -602,7 +574,7 @@ describe('Malformed Patterns', () => {
       ' ',
       'in',
       ' ',
-      'text'
+      'text',
     ]);
   });
 
@@ -639,7 +611,7 @@ describe('Malformed Patterns', () => {
       '>',
       '{{',
       ' ',
-      'broken'
+      'broken',
     ]);
     expect(tokenImages('<!--comment--><tag>more{{ content')).toEqual([
       '<!--comment-->',
@@ -649,7 +621,7 @@ describe('Malformed Patterns', () => {
       'more',
       '{{',
       ' ',
-      'content'
+      'content',
     ]);
     expect(tokenImages("\"quoted text<tag attr='mixed'>end")).toEqual([
       '"',
@@ -665,7 +637,7 @@ describe('Malformed Patterns', () => {
       'mixed',
       "'",
       '>',
-      'end'
+      'end',
     ]);
   });
 
@@ -693,11 +665,11 @@ describe('Position Tracking Accuracy', () => {
 final line`;
     const result = tokenize(input);
 
-    const tagOpenToken = result.tokens.find(t => t.image === '<' && t.startLine === 2);
+    const tagOpenToken = result.tokens.find((t) => t.image === '<' && t.startLine === 2);
     expect(tagOpenToken).toBeDefined();
     expect(tagOpenToken!.startColumn).toBe(1);
 
-    const variableToken = result.tokens.find(t => t.image === 'variable');
+    const variableToken = result.tokens.find((t) => t.image === 'variable');
     expect(variableToken).toBeDefined();
     expect(variableToken!.startLine).toBe(3);
   });
@@ -707,7 +679,7 @@ final line`;
     const result = tokenize(input);
 
     expect(result.tokens.length).toBeGreaterThan(0);
-    result.tokens.forEach(token => {
+    result.tokens.forEach((token) => {
       expect(token.startOffset).toBeGreaterThanOrEqual(0);
       expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset!);
       expect(token.startLine).toBeGreaterThanOrEqual(1);
@@ -720,7 +692,7 @@ final line`;
     const result = tokenize(input);
 
     // Verify all tokens have valid positions
-    result.tokens.forEach(token => {
+    result.tokens.forEach((token) => {
       expect(token.startOffset).toBeGreaterThanOrEqual(0);
       expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset!);
       expect(token.startLine).toBeGreaterThanOrEqual(1);
@@ -733,8 +705,8 @@ final line`;
     const result = tokenize(input);
 
     // Find tokens and verify their positions make sense
-    const tagOpen = result.tokens.find(t => t.image === '<' && t.startLine === 1);
-    const innerOpen = result.tokens.find(t => t.image === '<' && t.startLine === 2);
+    const tagOpen = result.tokens.find((t) => t.image === '<' && t.startLine === 1);
+    const innerOpen = result.tokens.find((t) => t.image === '<' && t.startLine === 2);
 
     expect(tagOpen).toBeDefined();
     expect(innerOpen).toBeDefined();
@@ -751,7 +723,7 @@ final line`;
 
     // Verify complete coverage
     let expectedOffset = 0;
-    sortedTokens.forEach(token => {
+    sortedTokens.forEach((token) => {
       expect(token.startOffset).toBeGreaterThanOrEqual(expectedOffset);
       expectedOffset = token.endOffset! + 1;
     });
@@ -768,7 +740,7 @@ comment -->
 more text`;
 
     const result = tokenize(input);
-    const commentToken = result.tokens.find(t => t.tokenType.name === 'Comment');
+    const commentToken = result.tokens.find((t) => t.tokenType.name === 'Comment');
 
     expect(commentToken).toBeDefined();
     expect(commentToken!.startLine).toBe(2);
@@ -780,11 +752,11 @@ more text`;
     const result = tokenize(input);
 
     // Check that line numbers increase correctly
-    const lines = new Set(result.tokens.map(t => t.startLine));
+    const lines = new Set(result.tokens.map((t) => t.startLine));
     expect(lines.size).toBeGreaterThan(1);
 
     // Verify positions are sequential
-    result.tokens.forEach(token => {
+    result.tokens.forEach((token) => {
       expect(token.startOffset).toBeGreaterThanOrEqual(0);
       expect(token.endOffset).toBeGreaterThanOrEqual(token.startOffset!);
     });
@@ -862,10 +834,10 @@ describe('Performance and Stress Tests', () => {
       '<'.repeat(1000) + '>',
       '"'.repeat(2000),
       '<!--' + 'x'.repeat(10000) + '-->',
-      Array(1000).fill('{{}}').join('')
+      Array(1000).fill('{{}}').join(''),
     ];
 
-    backtrackingTests.forEach(test => {
+    backtrackingTests.forEach((test) => {
       const start = performance.now();
       const result = tokenize(test);
       const end = performance.now();
@@ -879,7 +851,7 @@ describe('Performance and Stress Tests', () => {
     const sizes = [1000, 5000, 10000, 20000];
     const times: number[] = [];
 
-    sizes.forEach(size => {
+    sizes.forEach((size) => {
       const content = 'x'.repeat(size);
       const start = performance.now();
       tokenize(content);
@@ -914,7 +886,7 @@ describe('Error Recovery', () => {
     expect(result.errors).toHaveLength(0);
     expect(result.tokens.length).toBeGreaterThan(0);
 
-    const types = result.tokens.map(t => t.tokenType);
+    const types = result.tokens.map((t) => t.tokenType);
     expect(types).toContain(Identifier);
     expect(types).toContain(TemplateOpen);
   });
@@ -930,7 +902,7 @@ describe('Error Recovery', () => {
     expect(result.tokens.length).toBeGreaterThan(0);
 
     // Should tokenize the valid parts
-    const images = result.tokens.map(t => t.image);
+    const images = result.tokens.map((t) => t.image);
     expect(images).toContain('<');
     expect(images).toContain('valid');
     expect(images).toContain('>');
@@ -941,7 +913,7 @@ describe('Error Recovery', () => {
     const input = 'text with @#$%^&*()[]{}|;:,.<>?/~`';
     const result = tokenize(input);
     expect(result.errors).toHaveLength(0);
-    const images = result.tokens.map(t => t.image);
+    const images = result.tokens.map((t) => t.image);
     expect(images).toEqual(['text', ' ', 'with', ' ', '@#$%^&*()[]{}|;:,.', '<', '>', '?/~`']);
   });
 });

From d8b272ebef2f8eab8174e075aa12d367f7716fa9 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 26 Aug 2025 15:44:19 +0800
Subject: [PATCH 20/76] error and source

---
 packages/poml/next/context.ts |  28 ++-
 packages/poml/next/error.ts   | 422 ++++++++++++++++++++++++++++++++++
 packages/poml/next/source.ts  | 115 +++++++++
 packages/poml/next/types.ts   |  48 ++++
 4 files changed, 607 insertions(+), 6 deletions(-)
 create mode 100644 packages/poml/next/error.ts
 create mode 100644 packages/poml/next/source.ts
 create mode 100644 packages/poml/next/types.ts

diff --git a/packages/poml/next/context.ts b/packages/poml/next/context.ts
index 92939bda..bee95045 100644
--- a/packages/poml/next/context.ts
+++ b/packages/poml/next/context.ts
@@ -1,17 +1,33 @@
-export class ContextEvaluator {
-  private contextStore: { [key: string]: any } = {};
-  private stack: Array<{ [key: string]: any }> = [];
+/** One manager per POML compile (nested files do not count) */
+export type Context<T> = { [key: string]: T };
+export class ContextManager<T> {
+  private contextStore: { [key: string]: T } = {};
+  private stack: Array<{ [key: string]: T }> = [];
 
-  public setGlobalVariable(key: string, value: any) {
+  public initialize(initialContext: { [key: string]: T }) {
+    this.contextStore = { ...initialContext };
+    this.stack = [];
+  }
+
+  public setGlobalVariable(key: string, value: T) {
     this.contextStore[key] = value;
   }
 
-  public setLocalVariable(key: string, value: any) {
+  public setLocalVariable(key: string, value: T) {
     if (this.stack.length === 0) {
       throw new Error('No local stack available');
     }
     this.stack[this.stack.length - 1][key] = value;
   }
 
-  public pushStack() {}
+  public pushStack(context: Context<T>) {
+    this.stack.push({ ...context });
+  }
+
+  public popStack() {
+    if (this.stack.length === 0) {
+      throw new Error('No local stack to pop');
+    }
+    this.stack.pop();
+  }
 }
diff --git a/packages/poml/next/error.ts b/packages/poml/next/error.ts
new file mode 100644
index 00000000..83928da6
--- /dev/null
+++ b/packages/poml/next/error.ts
@@ -0,0 +1,422 @@
+import * as path from 'path';
+import chalk from 'chalk';
+import { Diagnostic, Range, Severity } from './types';
+import sourceManager from './source';
+
+/**
+ * Global Error Collector.
+ *
+ * Goals:
+ *
+ * 1. Centralized singleton that collects errors from anywhere in the codebase
+ * 2. Support for error types (error/warning), source locations (file, line, column, index ranges), and contextual data
+ * 3. Handle errors from embedded languages (JSON, JS expressions) with source mapping back to original positions
+ * 4. Track errors across multiple source files without conflicts
+ * 5. Collect multiple errors without stopping execution
+ * 6. Clear errors between compilation runs or test cases
+ * 7. Generate human-readable, formatted error messages with source context
+ */
+export class ErrorCollector {
+  private diagnostics: Diagnostic[] = [];
+  private suppressedCodes = new Set<string>();
+  private maxErrors = 100;
+
+  /**
+   * Clear all collected errors
+   */
+  public clear(): void {
+    this.diagnostics = [];
+  }
+
+  /**
+   * Post an error
+   */
+  public error(message: string, range?: Range, options: Partial<Diagnostic> = {}): void {
+    this.add({
+      ...options,
+      severity: Severity.ERROR,
+      message,
+      range,
+      sourceFile: options.sourceFile || sourceManager.getCurrentFile(),
+    });
+  }
+
+  /**
+   * Post a warning
+   */
+  public warning(message: string, range?: Range, options: Partial<Diagnostic> = {}): void {
+    this.add({
+      ...options,
+      severity: Severity.WARNING,
+      message,
+      range,
+      sourceFile: options.sourceFile || sourceManager.getCurrentFile(),
+    });
+  }
+
+  /**
+   * Post an info message
+   */
+  public info(message: string, range?: Range, options: Partial<Diagnostic> = {}): void {
+    this.add({
+      ...options,
+      severity: Severity.INFO,
+      message,
+      range,
+      sourceFile: options.sourceFile || sourceManager.getCurrentFile(),
+    });
+  }
+
+  /**
+   * Add a diagnostic
+   */
+  public add(diagnostic: Diagnostic): void {
+    // Check error limit
+    if (this.diagnostics.length >= this.maxErrors) {
+      if (this.diagnostics.length === this.maxErrors) {
+        this.diagnostics.push({
+          severity: Severity.ERROR,
+          message: `Error limit reached (${this.maxErrors}). Further errors suppressed.`,
+        });
+      }
+      return;
+    }
+
+    // Skip suppressed error codes
+    if (diagnostic.code && this.suppressedCodes.has(diagnostic.code)) {
+      return;
+    }
+
+    // Add current file if not specified
+    if (!diagnostic.sourceFile && sourceManager.getCurrentFile()) {
+      diagnostic.sourceFile = sourceManager.getCurrentFile();
+    }
+
+    this.diagnostics.push(diagnostic);
+  }
+
+  /**
+   * Post a JSON parsing error with automatic position mapping
+   */
+  public jsonError(originalError: Error, jsonRange: Range): void {
+    // Extract position from JSON parse error if available
+    const posMatch = originalError.message.match(/position (\d+)/);
+    let range = jsonRange;
+
+    if (posMatch) {
+      const errorPos = parseInt(posMatch[1]);
+      // Map the JSON error position to the original source
+      range = {
+        start: jsonRange.start + errorPos,
+        end: jsonRange.start + errorPos + 1,
+      };
+    }
+
+    this.error(`JSON parsing error: ${originalError.message}`, range, {
+      code: 'JSON_PARSE_ERROR',
+      originalError,
+      hint: 'Check for trailing commas, unquoted keys, or undefined values',
+    });
+  }
+
+  /**
+   * Post a JavaScript expression evaluation error
+   */
+  public expressionError(originalError: Error, expressionRange: Range, evalHeaderLength: number = 0): void {
+    // Adjust range if there's a header (like "return " or "const result = ")
+    const adjustedRange =
+      evalHeaderLength > 0
+        ? {
+            start: expressionRange.start + evalHeaderLength,
+            end: expressionRange.end,
+          }
+        : expressionRange;
+
+    // Try to extract line/column from error stack
+    const stackMatch = originalError.stack?.match(/<anonymous>:(\d+):(\d+)/);
+    let range = adjustedRange;
+
+    if (stackMatch) {
+      const errorLine = parseInt(stackMatch[1]);
+      const errorCol = parseInt(stackMatch[2]);
+
+      // If we have line/column info, try to be more precise
+      const currentFileContent = sourceManager.getCurrentFileContent();
+      if (currentFileContent) {
+        const exprContent = currentFileContent.substring(expressionRange.start, expressionRange.end);
+        const lines = exprContent.split('\n');
+
+        if (errorLine <= lines.length) {
+          let offset = expressionRange.start;
+          for (let i = 0; i < errorLine - 1; i++) {
+            offset += lines[i].length + 1; // +1 for newline
+          }
+          offset += Math.min(errorCol - 1, lines[errorLine - 1].length);
+
+          range = {
+            start: offset,
+            end: offset + 1,
+          };
+        }
+      }
+    }
+
+    this.error(`Expression evaluation failed: ${originalError.message}`, range, {
+      code: 'EXPRESSION_ERROR',
+      originalError,
+      hint: 'Check variable names and syntax in the expression',
+    });
+  }
+
+  /**
+   * Suppress errors with specific codes
+   */
+  public suppressCode(code: string): void {
+    this.suppressedCodes.add(code);
+  }
+
+  /**
+   * Format a single diagnostic for CLI output
+   */
+  private formatDiagnostic(diagnostic: Diagnostic): string {
+    const parts: string[] = [];
+
+    // Severity and code
+    const severityColor = {
+      [Severity.ERROR]: chalk.red,
+      [Severity.WARNING]: chalk.yellow,
+      [Severity.INFO]: chalk.blue,
+    }[diagnostic.severity];
+
+    let header = severityColor(diagnostic.severity.toUpperCase());
+
+    if (diagnostic.code) {
+      header += chalk.gray(` [${diagnostic.code}]`);
+    }
+
+    // File location
+    if (diagnostic.sourceFile) {
+      const source = sourceManager.loadSource(diagnostic.sourceFile);
+
+      if (source && diagnostic.range) {
+        const startPos = sourceManager.indexToPosition(source, diagnostic.range.start);
+        const location = `${diagnostic.sourceFile}:${startPos.line}:${startPos.column}`;
+        header += ` ${chalk.cyan(location)}`;
+      } else {
+        header += ` ${chalk.cyan(diagnostic.sourceFile)}`;
+      }
+    }
+
+    parts.push(header);
+
+    // Message
+    parts.push(`  ${diagnostic.message}`);
+
+    // Source context
+    if (diagnostic.sourceFile && diagnostic.range) {
+      const source = sourceManager.loadSource(diagnostic.sourceFile);
+
+      if (source) {
+        const startPos = sourceManager.indexToPosition(source, diagnostic.range.start);
+        const endPos = sourceManager.indexToPosition(source, diagnostic.range.end);
+
+        // Show context lines
+        const contextLines = 2;
+        const startLine = Math.max(0, startPos.line - contextLines - 1);
+        const endLine = Math.min(source.lines.length - 1, startPos.line + contextLines - 1);
+
+        parts.push('');
+
+        for (let i = startLine; i <= endLine; i++) {
+          const lineNum = String(i + 1).padStart(4, ' ');
+          const isErrorLine = i === startPos.line - 1;
+          const pipe = isErrorLine ? '>' : '|';
+          const lineColor = isErrorLine ? chalk.white : chalk.gray;
+
+          parts.push(chalk.gray(`  ${lineNum} ${pipe}`) + ' ' + lineColor(source.lines[i]));
+
+          // Add error underline
+          if (isErrorLine) {
+            const spacing = ' '.repeat(startPos.column - 1 + 7);
+            let markerLength = 1;
+
+            if (startPos.line === endPos.line) {
+              markerLength = Math.max(1, endPos.column - startPos.column);
+            } else {
+              markerLength = source.lines[i].length - startPos.column + 1;
+            }
+
+            const marker = '^'.repeat(Math.min(markerLength, 80));
+            parts.push(severityColor(spacing + marker));
+          }
+        }
+      }
+    }
+
+    // Hint
+    if (diagnostic.hint) {
+      parts.push('');
+      parts.push(chalk.green(`  💡 ${diagnostic.hint}`));
+    }
+
+    return parts.join('\n');
+  }
+
+  /**
+   * Get all errors
+   */
+  public getErrors(): Diagnostic[] {
+    return this.diagnostics.filter((d) => d.severity === Severity.ERROR);
+  }
+
+  /**
+   * Get all warnings
+   */
+  public getWarnings(): Diagnostic[] {
+    return this.diagnostics.filter((d) => d.severity === Severity.WARNING);
+  }
+
+  /**
+   * Check if there are any errors
+   */
+  public hasErrors(): boolean {
+    return this.getErrors().length > 0;
+  }
+
+  /**
+   * Get count by severity
+   */
+  public getCounts(): { errors: number; warnings: number; info: number } {
+    const counts = { errors: 0, warnings: 0, info: 0 };
+
+    for (const d of this.diagnostics) {
+      switch (d.severity) {
+        case Severity.ERROR:
+          counts.errors++;
+          break;
+        case Severity.WARNING:
+          counts.warnings++;
+          break;
+        case Severity.INFO:
+          counts.info++;
+          break;
+      }
+    }
+
+    return counts;
+  }
+
+  /**
+   * Format all diagnostics for CLI output
+   */
+  public format(
+    options: {
+      showWarnings?: boolean;
+      showInfo?: boolean;
+      groupByFile?: boolean;
+    } = {},
+  ): string {
+    const { showWarnings = true, showInfo = false, groupByFile = true } = options;
+
+    const filtered = this.diagnostics.filter((d) => {
+      if (d.severity === Severity.ERROR) return true;
+      if (d.severity === Severity.WARNING) return showWarnings;
+      if (d.severity === Severity.INFO) return showInfo;
+      return false;
+    });
+
+    if (filtered.length === 0) {
+      return chalk.green('✓ No issues found');
+    }
+
+    const output: string[] = [];
+
+    if (groupByFile) {
+      // Group by file
+      const byFile = new Map<string, Diagnostic[]>();
+      const noFile: Diagnostic[] = [];
+
+      for (const d of filtered) {
+        if (d.sourceFile) {
+          if (!byFile.has(d.sourceFile)) {
+            byFile.set(d.sourceFile, []);
+          }
+          byFile.get(d.sourceFile)!.push(d);
+        } else {
+          noFile.push(d);
+        }
+      }
+
+      // Sort files
+      const sortedFiles = Array.from(byFile.keys()).sort();
+
+      for (const file of sortedFiles) {
+        output.push(chalk.underline.bold(path.relative(process.cwd(), file)));
+        output.push('');
+
+        const diagnostics = byFile.get(file)!.sort((a, b) => {
+          if (!a.range || !b.range) return 0;
+          return a.range.start - b.range.start;
+        });
+
+        for (const d of diagnostics) {
+          output.push(this.formatDiagnostic(d));
+          output.push('');
+        }
+      }
+
+      // Add diagnostics without file
+      if (noFile.length > 0) {
+        output.push(chalk.underline.bold('General'));
+        output.push('');
+        for (const d of noFile) {
+          output.push(this.formatDiagnostic(d));
+          output.push('');
+        }
+      }
+    } else {
+      // Simple list
+      for (const d of filtered) {
+        output.push(this.formatDiagnostic(d));
+        output.push('');
+      }
+    }
+
+    // Summary
+    const counts = this.getCounts();
+    const summary: string[] = [];
+
+    if (counts.errors > 0) {
+      summary.push(chalk.red(`${counts.errors} error${counts.errors !== 1 ? 's' : ''}`));
+    }
+    if (counts.warnings > 0 && showWarnings) {
+      summary.push(chalk.yellow(`${counts.warnings} warning${counts.warnings !== 1 ? 's' : ''}`));
+    }
+    if (counts.info > 0 && showInfo) {
+      summary.push(chalk.blue(`${counts.info} info`));
+    }
+
+    output.push(chalk.bold(`Found ${summary.join(', ')}`));
+
+    return output.join('\n');
+  }
+
+  /**
+   * Print formatted errors to console
+   */
+  public print(options?: Parameters<typeof this.format>[0]): void {
+    console.log(this.format(options));
+  }
+
+  /**
+   * Get all diagnostics
+   */
+  public getDiagnostics(): ReadonlyArray<Diagnostic> {
+    return this.diagnostics;
+  }
+}
+
+// Create singleton instance
+const errorCollector = new ErrorCollector();
+
+export default errorCollector;
diff --git a/packages/poml/next/source.ts b/packages/poml/next/source.ts
new file mode 100644
index 00000000..50a09fb3
--- /dev/null
+++ b/packages/poml/next/source.ts
@@ -0,0 +1,115 @@
+import * as fs from 'fs';
+import { SourceFileCache, Position } from './types';
+
+export class SourceManager {
+  private sourceCache = new Map<string, SourceFileCache>();
+  private currentSourceFile?: string;
+  private currentSourceContent?: string;
+
+  /**
+   * Set the current source file context for subsequent errors
+   */
+  public setCurrentFile(sourceFile: string, content?: string): void {
+    this.currentSourceFile = sourceFile;
+    this.currentSourceContent = content;
+
+    if (content && sourceFile) {
+      this.cacheSource(sourceFile, content);
+    }
+  }
+
+  /**
+   * Clear current file context
+   */
+  public clearCurrentFile(): void {
+    this.currentSourceFile = undefined;
+    this.currentSourceContent = undefined;
+  }
+
+  public getCurrentFile(): string | undefined {
+    return this.currentSourceFile;
+  }
+
+  public getCurrentFileContent(): string | undefined {
+    return this.currentSourceContent;
+  }
+
+  /**
+   * Clear all
+   */
+  public clear(): void {
+    this.sourceCache.clear();
+    this.clearCurrentFile();
+  }
+
+  /**
+   * Cache source file content
+   */
+  private cacheSource(file: string, content: string): void {
+    const lines = content.split('\n');
+    const lineStarts: number[] = [0];
+
+    let pos = 0;
+    for (const line of lines) {
+      pos += line.length + 1; // +1 for newline
+      lineStarts.push(pos);
+    }
+
+    this.sourceCache.set(file, {
+      content,
+      lines,
+      lineStarts,
+    });
+  }
+
+  /**
+   * Load source file if not cached
+   */
+  public loadSource(file: string): SourceFileCache | null {
+    if (this.sourceCache.has(file)) {
+      return this.sourceCache.get(file)!;
+    }
+
+    try {
+      const content = fs.readFileSync(file, 'utf8');
+      this.cacheSource(file, content);
+      return this.sourceCache.get(file)!;
+    } catch (error) {
+      return null;
+    }
+  }
+
+  /**
+   * Convert byte position to line/column
+   */
+  public indexToPosition(source: SourceFileCache, index: number): Position {
+    const { lineStarts } = source;
+
+    // Binary search for the line
+    let line = 0;
+    let left = 0;
+    let right = lineStarts.length - 1;
+
+    while (left < right) {
+      const mid = Math.floor((left + right + 1) / 2);
+      if (lineStarts[mid] <= index) {
+        left = mid;
+      } else {
+        right = mid - 1;
+      }
+    }
+
+    line = left;
+    const column = index - lineStarts[line];
+
+    return {
+      line: line + 1, // 1-based
+      column: column + 1, // 1-based
+      index,
+    };
+  }
+}
+
+// Create singleton instance
+const sourceManager = new SourceManager();
+export default sourceManager;
diff --git a/packages/poml/next/types.ts b/packages/poml/next/types.ts
new file mode 100644
index 00000000..ce6ac3b5
--- /dev/null
+++ b/packages/poml/next/types.ts
@@ -0,0 +1,48 @@
+/**
+ * Range in source file (byte positions)
+ */
+export interface Range {
+  start: number;
+  end: number;
+}
+
+/**
+ * Error severity levels
+ */
+export enum Severity {
+  ERROR = 'error',
+  WARNING = 'warning',
+  INFO = 'info',
+}
+
+/**
+ * Diagnostic interface
+ */
+export interface Diagnostic {
+  severity: Severity;
+  message: string;
+  sourceFile?: string;
+  range?: Range;
+  code?: string;
+  hint?: string;
+  originalError?: Error;
+}
+
+/**
+ * Position with line and column
+ */
+export interface Position {
+  line: number;
+  column: number;
+  index: number;
+}
+
+/**
+ * Source file cache entry
+ */
+export interface SourceFileCache {
+  filePath?: string;
+  content: string;
+  lines: string[];
+  lineStarts: number[];
+}

From 3726698e2a41f676d442e4921a1ad8b4d4b76ef2 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 26 Aug 2025 17:46:07 +0800
Subject: [PATCH 21/76] .

---
 packages/poml/next/ast.ts | 189 ++++++++++++++++++++++++++++----------
 1 file changed, 143 insertions(+), 46 deletions(-)

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index c8db6b8a..8e54f437 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -1,61 +1,158 @@
 import { Tokenizer, Token } from './tokenizer';
 import componentDocs from '../assets/componentDocs.json';
+import { Range } from './types';
+
+export interface Node {
+  kind:
+    | 'META'
+    | 'EXPRESSION'
+    | 'VALUE'
+    | 'STRING'
+    | 'VALUE'
+    | 'FORLOOP'
+    | 'OPEN'
+    | 'CLOSE'
+    | 'SELFCLOSE'
+    | 'ELEMENT'
+    | 'TEXT'
+    | 'POML'
+    | 'ATTRIBUTE'
+    | 'TEMPLATE';
+  range: Range; // Range of the entire node in source
+}
+
+export interface ExpressionNode extends Node {
+  kind: 'EXPRESSION';
+  value: string;
+}
+
+/**
+ * A template node could be:
+ *
+ * 1. the value in an attribute like `if="i > 0"` -> `"i > 0"` with quotes
+ * 2. a standalone template variable like `{{ userName }}`
+ * 3.
+ */
+export interface TemplateNode extends Node {
+  kind: 'TEMPLATE';
+  value: ExpressionNode;
+}
+
+/**
+ * A string node represents a pure text, without any quotes or template variables.
+ *
+ * It's also sometimes reused to represent a key, an identifier, or a tag name.
+ */
+export interface StringNode extends Node {
+  kind: 'STRING';
+  value: string;
+}
+
+/**
+ * A value node could be:
+ *
+ * 1. a quoted attribute value: "some text" or 'some text'
+ * 2. text content between tags with white spaces: >  some text<nested-tag>
+ * 3. quoted or not quoted template values: {{ someVar }} or "{{ var }}"
+ * 4. mixture of text and template variables: "Hello, {{ userName }}!"
+ *
+ * The value node always include the full range, including quotes if any.
+ * But it's children only include the inner parts, excluding quotes.
+ */
+export interface ValueNode extends Node {
+  kind: 'VALUE';
+  children: (StringNode | TemplateNode)[];
+}
 
-// Source position and attribute interfaces
-export interface SourceRange {
-  start: number;
-  end: number;
+/**
+ * A for loop node could be like:
+ *
+ * ```
+ * <task for="item in items.everything">
+ * ```
+ *
+ * More advanced versions are not supported yet.
+ */
+export interface ForLoopNode extends Node {
+  kind: 'FORLOOP';
+  iterator: StringNode;
+  collection: ExpressionNode;
 }
 
-export interface AttributeInfo {
-  key: string;
-  value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[]; // Mixed content: array of text/template nodes
-  keyRange: SourceRange; // Position of attribute name
-  valueRange: SourceRange; // Position of attribute value (excluding quotes)
-  fullRange: SourceRange; // Full attribute including key="value"
+export interface AttributeNode extends Node {
+  kind: 'ATTRIBUTE';
+  key: StringNode;
+  value: ValueNode;
 }
 
-// Main AST node interface
-export interface ASTNode {
-  id: string; // Unique ID for caching and React keys
-  kind: 'META' | 'TEXT' | 'POML' | 'TEMPLATE';
-  start: number; // Source position start of entire node
-  end: number; // Source position end of entire node
-  content: string; // The raw string content
-  parent?: ASTNode; // Reference to the parent node
-  children: ASTNode[]; // Child nodes
-
-  // For POML and META nodes
-  tagName?: string; // Tag name (e.g., 'task', 'meta')
-  attributes?: AttributeInfo[]; // Detailed attribute information
-
-  // Detailed source positions
-  openingTag?: {
-    start: number; // Position of '<'
-    end: number; // Position after '>'
-    nameRange: SourceRange; // Position of tag name
-  };
-
-  closingTag?: {
-    start: number; // Position of '</'
-    end: number; // Position after '>'
-    nameRange: SourceRange; // Position of tag name in closing tag
-  };
-
-  contentRange?: SourceRange; // Position of content between tags (excluding nested tags)
-
-  // For TEXT nodes
-  textSegments?: SourceRange[]; // Multiple ranges for text content (excluding nested POML)
-
-  // For TEMPLATE nodes
-  expression?: string; // The full expression content between {{}}
+export interface ForLoopAttributeNode extends Node {
+  kind: 'ATTRIBUTE';
+  key: StringNode; // Always "for"
+  value: ForLoopNode;
+}
+
+export interface OpenTagNode extends Node {
+  kind: 'OPEN';
+  value: StringNode;
+  attributes: (AttributeNode | ForLoopAttributeNode)[];
+}
+
+export interface CloseTagNode extends Node {
+  kind: 'CLOSE';
+  value: StringNode;
+}
+
+export interface SelfCloseTagNode extends Node {
+  kind: 'SELFCLOSE';
+  value: StringNode;
+  attributes: (AttributeNode | ForLoopAttributeNode)[];
+}
+
+export interface ElementNode extends Node {
+  kind: 'ELEMENT';
+  tagName: StringNode;
+  children: (ElementNode | ValueNode)[];
+}
+
+export interface TextNode extends Node {
+  kind: 'TEXT';
+  tagName: StringNode; // Always "text"
+  // We don't allow anything here yet.
+  attributes: AttributeNode[];
+  value: StringNode;
+}
+
+export interface MetaNode extends Node {
+  kind: 'META';
+  tagName: StringNode;
+  attributes: AttributeNode[];
 }
 
-// AST Parser class
 class ASTParser {
   private tokens: Token[];
   private position: number;
   private nextId: number;
+
+  // These are the tags that are always valid in POML.
+  // You can not disable them.
+  private alwaysValidTags = new Set<string>(['text', 'meta']);
+
+  // These semantics are handled right here.
+  private nonComponentTags = new Set<string>([
+    'let',
+    'include',
+    'template',
+    'context',
+    'stylesheet',
+    'output-schema',
+    'outputschema',
+    'tool',
+    'tool-def',
+    'tool-definition',
+    'tooldef',
+    'tooldefinition',
+  ]);
+
   private validPomlTags: Set<string>;
 
   constructor(tokens: Token[]) {
@@ -66,7 +163,7 @@ class ASTParser {
   }
 
   private buildValidTagsSet(): Set<string> {
-    const validTags = new Set<string>();
+    const validTags = new Set<string>(this.alwaysValidTags);
 
     for (const doc of componentDocs) {
       if (doc.name) {

From c092ed7f5bd04b4a53d08e0ccc7319803034bb2e Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 26 Aug 2025 18:15:25 +0800
Subject: [PATCH 22/76] add ast

---
 packages/poml/next/ast.ts  | 295 ++++++++++++++++++++++++++++++++++---
 packages/poml/next/node.ts |   0
 2 files changed, 271 insertions(+), 24 deletions(-)
 create mode 100644 packages/poml/next/node.ts

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index 8e54f437..f9943dcc 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -2,6 +2,22 @@ import { Tokenizer, Token } from './tokenizer';
 import componentDocs from '../assets/componentDocs.json';
 import { Range } from './types';
 
+/**
+ * Base interface for all AST nodes in the POML syntax tree.
+ *
+ * Every node in the AST must have a kind discriminator and a range indicating
+ * its position in the source text. The kind field enables TypeScript discriminated
+ * unions for type-safe node handling.
+ *
+ * Cases that apply:
+ * - All syntactic constructs in POML markup (elements, attributes, text, templates)
+ * - Meta-level constructs (root nodes, expression nodes)
+ *
+ * Cases that do not apply:
+ * - Lexical tokens (these are handled by the tokenizer)
+ * - Semantic information (component types, validation results)
+ * - Runtime values (evaluated expressions, resolved variables)
+ */
 export interface Node {
   kind:
     | 'META'
@@ -17,21 +33,52 @@ export interface Node {
     | 'TEXT'
     | 'POML'
     | 'ATTRIBUTE'
-    | 'TEMPLATE';
-  range: Range; // Range of the entire node in source
+    | 'TEMPLATE'
+    | 'ROOT';
+  range: Range;
 }
 
+/**
+ * Represents a JavaScript expression as a string.
+ *
+ * This node stores raw expression text that will be evaluated at runtime.
+ * It serves as a wrapper for expressions used in various contexts like
+ * conditions, loops, and template interpolations.
+ *
+ * Cases that apply:
+ * - Conditional expressions: `i > 0`, `user.name === "admin"`
+ * - Collection accessors: `items.everything`, `data[0].value`
+ * - Function calls: `formatDate(now)`, `items.filter(x => x.active)`
+ * - Property paths: `user.profile.settings.theme`
+ *
+ * Cases that do not apply:
+ * - Template syntax including braces: `{{ expression }}` (use TemplateNode)
+ * - String literals with quotes: `"hello"` (use StringNode or ValueNode)
+ * - POML markup: `<tag>` (use element nodes)
+ */
 export interface ExpressionNode extends Node {
   kind: 'EXPRESSION';
   value: string;
 }
 
 /**
- * A template node could be:
+ * Represents a template interpolation with double curly braces.
+ *
+ * Template nodes handle variable interpolation in POML, containing an
+ * expression that will be evaluated and substituted at runtime. The node
+ * preserves the template syntax for proper rendering and error reporting.
  *
- * 1. the value in an attribute like `if="i > 0"` -> `"i > 0"` with quotes
- * 2. a standalone template variable like `{{ userName }}`
- * 3.
+ * Cases that apply:
+ * - Standalone template variables: `{{ userName }}`, `{{ count + 1 }}`
+ * - Template expressions in text: part of "Hello {{ name }}!"
+ * - Complex expressions: `{{ users.map(u => u.name).join(", ") }}`
+ * - Conditional rendering: `{{ isVisible ? "Show" : "Hide" }}`
+ *
+ * Cases that do not apply:
+ * - Attribute expressions without braces: `if="x > 0"` (use ExpressionNode)
+ * - Plain text: `Hello World` (use StringNode)
+ * - POML elements: `<div>` (use element nodes)
+ * - Single braces: `{ not a template }` (treated as plain text)
  */
 export interface TemplateNode extends Node {
   kind: 'TEMPLATE';
@@ -39,9 +86,25 @@ export interface TemplateNode extends Node {
 }
 
 /**
- * A string node represents a pure text, without any quotes or template variables.
+ * Represents plain text content without any special syntax.
+ *
+ * String nodes are the most basic content nodes, containing literal text
+ * that requires no processing. They are used both for content and as
+ * components of other nodes (like attribute keys and tag names).
  *
- * It's also sometimes reused to represent a key, an identifier, or a tag name.
+ * Cases that apply:
+ * - Plain text content: `Hello World`, `This is a paragraph`
+ * - Long text blocks in `<text>` elements: `some long text <ignored-tag> continued`
+ * - Attribute keys: the `class` in `class="container"`
+ * - Tag names: the `div` in `<div>`
+ * - Identifiers: variable names like `item` in for loops
+ * - Whitespace and formatting text between elements
+ *
+ * Cases that do not apply:
+ * - Text containing templates: `Hello {{ name }}` (use ValueNode with children)
+ * - Quoted strings in attributes: `"value"` (use ValueNode)
+ * - Expressions: `x > 0` (use ExpressionNode)
+ * - Template variables: `{{ var }}` (use TemplateNode)
  */
 export interface StringNode extends Node {
   kind: 'STRING';
@@ -49,15 +112,26 @@ export interface StringNode extends Node {
 }
 
 /**
- * A value node could be:
+ * Represents a composite value that may contain text and/or templates.
+ *
+ * Value nodes are containers for mixed content, handling both pure text
+ * and interpolated templates. They preserve quote information when used
+ * as attribute values and support complex content composition.
  *
- * 1. a quoted attribute value: "some text" or 'some text'
- * 2. text content between tags with white spaces: >  some text<nested-tag>
- * 3. quoted or not quoted template values: {{ someVar }} or "{{ var }}"
- * 4. mixture of text and template variables: "Hello, {{ userName }}!"
+ * Cases that apply:
+ * - Quoted attribute values: `"some text"`, `'single quoted'`
+ * - Mixed content with templates: `"Hello, {{ userName }}!"`
+ * - Text content between tags: `>  some text  <` (including whitespace)
+ * - Unquoted template values in certain contexts
+ * - Multi-part content: `"Price: ${{ amount }} USD"`
  *
- * The value node always include the full range, including quotes if any.
- * But it's children only include the inner parts, excluding quotes.
+ * Cases that do not apply:
+ * - Attribute keys: `class=...` (the `class` part uses StringNode)
+ * - Pure expressions without quotes: `if=condition` (use ExpressionNode)
+ * - Tag names: `div` (use StringNode)
+ * - Standalone template variables not in a value context
+ *
+ * Note: The range includes quotes if present, but children exclude them.
  */
 export interface ValueNode extends Node {
   kind: 'VALUE';
@@ -65,13 +139,24 @@ export interface ValueNode extends Node {
 }
 
 /**
- * A for loop node could be like:
+ * Represents a for-loop iteration construct in POML.
+ *
+ * For loops enable iterative rendering of elements, following the pattern
+ * "iterator in collection". This node captures both the loop variable
+ * and the collection expression for runtime evaluation.
  *
- * ```
- * <task for="item in items.everything">
- * ```
+ * Cases that apply:
+ * - Simple iteration: `item in items`
+ * - Property access: `user in data.users`
+ * - Array literals: `num in [1, 2, 3]`
+ * - Method calls: `result in getResults()`
+ * - Nested property iteration: `task in project.tasks.active`
  *
- * More advanced versions are not supported yet.
+ * Cases that do not apply (not yet supported):
+ * - Advanced loop syntax (not yet supported): `(item, index) in items`
+ * - Destructuring patterns (not yet supported): `{name, age} in users`
+ * - Conditional loops: `if` attributes (use separate condition handling)
+ * - Template interpolation: `{{ items }}` (use TemplateNode)
  */
 export interface ForLoopNode extends Node {
   kind: 'FORLOOP';
@@ -79,55 +164,217 @@ export interface ForLoopNode extends Node {
   collection: ExpressionNode;
 }
 
+/**
+ * Represents a standard attribute on a POML element.
+ *
+ * Attributes provide metadata and configuration for elements. They consist
+ * of a key-value pair where the key is always a simple string and the value
+ * can be a complex composition of text and templates.
+ *
+ * Cases that apply:
+ * - Simple attributes: `class="container"`, `id='main'`
+ * - Boolean/presence attributes: `disabled`, `checked`
+ * - Template values: `title="{{ pageTitle }}"` or `title={{ pageTitle }}`
+ * - Mixed values: `placeholder="Enter {{ fieldName }}..."`
+ *
+ * Cases that do not apply:
+ * - For-loop attributes: `for="item in items"` (use ForLoopAttributeNode)
+ * - Spread attributes (not yet supported): `{...props}`
+ * - Dynamic attribute names (not supported): `[attrName]="value"`
+ */
 export interface AttributeNode extends Node {
   kind: 'ATTRIBUTE';
   key: StringNode;
   value: ValueNode;
 }
 
+/**
+ * Represents a special for-loop attribute on POML elements.
+ *
+ * This specialized attribute node handles the `for` attribute specifically,
+ * which contains loop iteration syntax rather than a simple value. It enables
+ * elements to be rendered multiple times based on a collection.
+ *
+ * Cases that apply:
+ * - For attributes only: `for="item in items"`
+ * - Nested iterations: `for="subitem in item.children"`
+ * - Computed collections: `for="i in [...Array(5).keys()]"`
+ *
+ * Cases that do not apply:
+ * - Any attribute with a key other than "for"
+ * - Standard attributes: `class="..."` (use AttributeNode)
+ * - Conditional attributes: `if="..."` (use AttributeNode)
+ */
 export interface ForLoopAttributeNode extends Node {
   kind: 'ATTRIBUTE';
-  key: StringNode; // Always "for"
+  key: StringNode;
   value: ForLoopNode;
 }
 
+/**
+ * Represents an opening tag in POML markup.
+ *
+ * Open tags mark the beginning of an element that expects a corresponding
+ * closing tag. They may contain attributes that configure the element's
+ * behavior and appearance.
+ *
+ * Cases that apply:
+ * - Standard opening tags: `<document>`, `<message role="user">`
+ * - Tags with attributes: `<div class="container" id="main">`
+ * - Tags with for-loops: `<task for="item in items">`
+ * - Nested structure beginnings: `<section>` before content
+ *
+ * Cases that do not apply:
+ * - Self-closing tags: `<image src="..." />` (use SelfCloseTagNode)
+ * - Closing tags: `</document>` (use CloseTagNode)
+ * - Complete elements: opening + content + closing (use ElementNode)
+ * - Invalid or malformed tags (treated as text)
+ */
 export interface OpenTagNode extends Node {
   kind: 'OPEN';
   value: StringNode;
   attributes: (AttributeNode | ForLoopAttributeNode)[];
 }
 
+/**
+ * Represents a closing tag in POML markup.
+ *
+ * Close tags mark the end of an element, matching a previously opened tag.
+ * They contain only the tag name and no attributes.
+ *
+ * Cases that apply:
+ * - Standard closing tags: `</document>`, `</message>`
+ * - Nested structure endings: `</section>`, `</div>`
+ * - Any valid POML element closure
+ *
+ * Cases that do not apply:
+ * - Opening tags: `<document>` (use OpenTagNode)
+ * - Self-closing tags: `<br/>` (use SelfCloseTagNode)
+ * - Tags with attributes (closing tags never have attributes)
+ * - Mismatched closing tags (parser error)
+ */
 export interface CloseTagNode extends Node {
   kind: 'CLOSE';
   value: StringNode;
 }
 
+/**
+ * Represents a self-closing tag in POML markup.
+ *
+ * Self-closing tags represent complete elements that have no children or
+ * content. They combine opening and closing in a single tag and may have
+ * attributes.
+ *
+ * Cases that apply:
+ * - Image elements: `<image src="photo.jpg" />`
+ * - Meta elements: `<meta name="author" content="John" />`
+ * - Data elements without content: `<data path="file.csv" />`
+ * - Any element explicitly self-closed: `<element attr="value" />`
+ *
+ * Cases that do not apply:
+ * - Elements with content: `<div>content</div>` (use ElementNode)
+ * - Separate open/close tags: `<div></div>` (use ElementNode)
+ * - Tags without the self-closing slash: `<img>` (use OpenTagNode)
+ * - Text content elements (these require open/close pairs)
+ */
 export interface SelfCloseTagNode extends Node {
   kind: 'SELFCLOSE';
   value: StringNode;
   attributes: (AttributeNode | ForLoopAttributeNode)[];
 }
 
+/**
+ * Represents a complete POML element with its content.
+ *
+ * Element nodes are high-level constructs that represent semantic POML
+ * components. They contain a tag name, optional attributes (inherited from
+ * open tag), and may have child content including other elements, text,
+ * or values.
+ *
+ * Cases that apply:
+ * - Document structures: `<document>...content...</document>`
+ * - Messages: `<message role="user">Hello</message>`
+ * - Nested elements: `<section><paragraph>Text</paragraph></section>`
+ * - Data components: `<table>...rows...</table>`
+ *
+ * Cases that do not apply:
+ * - Self-closing elements: `<image />` (use SelfCloseTagNode)
+ * - Raw text content: plain text outside elements (use TextNode)
+ * - Template variables: `{{ var }}` (use TemplateNode)
+ * - Meta elements: `<meta>` tags (use MetaNode)
+ */
 export interface ElementNode extends Node {
   kind: 'ELEMENT';
   tagName: StringNode;
-  children: (ElementNode | ValueNode)[];
+  children: (ElementNode | TextNode | MetaNode | ValueNode)[];
 }
 
+/**
+ * Represents a text element that preserves literal content.
+ *
+ * Text nodes are special POML elements that treat their content as literal
+ * text, preventing template variable interpolation. They ensure content is
+ * preserved exactly as written, useful for code samples or pre-formatted text.
+ *
+ * Cases that apply:
+ * - Explicit text elements: `<text>Literal {{ not_interpolated }}</text>`
+ *
+ * Cases that do not apply:
+ * - Regular text content with interpolation (use ValueNode)
+ * - Plain text outside elements (use ValueNode)
+ * - Elements allowing template processing (use ElementNode)
+ * - Text with attributes enabling processing (future feature)
+ *
+ * Note: The tagName is always "text" for these nodes, and attributes must be empty.
+ */
 export interface TextNode extends Node {
   kind: 'TEXT';
-  tagName: StringNode; // Always "text"
-  // We don't allow anything here yet.
+  tagName: StringNode;
   attributes: AttributeNode[];
   value: StringNode;
 }
 
+/**
+ * Represents metadata elements in POML.
+ *
+ * Meta nodes provide document-level metadata and configuration that doesn't
+ * render as visible content. They typically appear at the document start and
+ * configure processing behavior, document properties, or provide auxiliary
+ * information.
+ *
+ * Cases that apply:
+ * - Document metadata: `<meta minVersion="1.0">`
+ * - Configuration: `<meta enableComponents="+reference">`
+ *
+ * Cases that do not apply:
+ * - Any element that is not `<meta>` (use ElementNode)
+ */
 export interface MetaNode extends Node {
   kind: 'META';
   tagName: StringNode;
   attributes: AttributeNode[];
 }
 
+/**
+ * Represents the root node of a POML document tree.
+ *
+ * Root nodes serve as the top-level container for all document content when
+ * there isn't an explicit `<poml>` wrapper. They provide a consistent entry
+ * point for document traversal and processing.
+ *
+ * Cases that apply:
+ * - Documents without `<poml>` wrapper
+ * - Documents with multiple top-level elements
+ * - Documents with `<poml>` but surrounded by white spaces or comments
+ *
+ * Cases that do not apply:
+ * - All nested elements
+ */
+export interface RootNode extends Node {
+  kind: 'ROOT';
+  children: (ElementNode | TextNode | MetaNode | ValueNode)[];
+}
+
 class ASTParser {
   private tokens: Token[];
   private position: number;
diff --git a/packages/poml/next/node.ts b/packages/poml/next/node.ts
new file mode 100644
index 00000000..e69de29b

From 82e028d72b70e29d17a084ddb4584d3f519a857a Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 26 Aug 2025 18:23:53 +0800
Subject: [PATCH 23/76] fix cst

---
 packages/poml/next/cst.ts               |  74 ++---
 packages/poml/next/node.ts              |   0
 packages/poml/next/{ast.ts => nodes.ts} | 377 ------------------------
 3 files changed, 23 insertions(+), 428 deletions(-)
 delete mode 100644 packages/poml/next/node.ts
 rename packages/poml/next/{ast.ts => nodes.ts} (55%)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 6ede8a39..ff13c4b0 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -16,62 +16,14 @@ import {
 } from './lexer';
 
 import { listComponentAliases } from '../base';
-
-// Source position interfaces
-export interface SourceRange {
-  start: number;
-  end: number;
-}
-
-export interface AttributeInfo {
-  key: string;
-  value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[];
-  keyRange: SourceRange;
-  valueRange: SourceRange;
-  fullRange: SourceRange;
-}
-
-// Core AST node interface
-export interface ASTNode {
-  id: string;
-  kind: 'META' | 'TEXT' | 'POML' | 'TEMPLATE';
-  start: number;
-  end: number;
-  content: string;
-  parent?: ASTNode;
-  children: ASTNode[];
-
-  // For POML and META nodes
-  tagName?: string;
-  attributes?: AttributeInfo[];
-
-  // Detailed source positions
-  openingTag?: {
-    start: number;
-    end: number;
-    nameRange: SourceRange;
-  };
-
-  closingTag?: {
-    start: number;
-    end: number;
-    nameRange: SourceRange;
-  };
-
-  contentRange?: SourceRange;
-
-  // For TEXT nodes
-  textSegments?: SourceRange[];
-
-  // For TEMPLATE nodes
-  expression?: string;
-}
+import * as Nodes from './nodes';
 
 // Context for parsing configuration
 export interface PomlContext {
   variables: { [key: string]: any };
   stylesheet: { [key: string]: string };
-  minimalPomlVersion?: string;
+  minPomlVersion?: string;
+  maxPomlVersion?: string;
   sourcePath: string;
   enabledComponents: Set<string>;
   unknownComponentBehavior: 'error' | 'warning' | 'ignore';
@@ -85,6 +37,26 @@ export class CSTParser {
   private context: PomlContext;
   private nodeIdCounter: number;
 
+  // These are the tags that are always valid in POML.
+  // You can not disable them.
+  private alwaysValidTags = new Set<string>(['text', 'meta']);
+
+  // These semantics are handled right here.
+  private nonComponentTags = new Set<string>([
+    'let',
+    'include',
+    'template',
+    'context',
+    'stylesheet',
+    'output-schema',
+    'outputschema',
+    'tool',
+    'tool-def',
+    'tool-definition',
+    'tooldef',
+    'tooldefinition',
+  ]);
+
   constructor(context: PomlContext) {
     this.tokens = [];
     this.position = 0;
diff --git a/packages/poml/next/node.ts b/packages/poml/next/node.ts
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages/poml/next/ast.ts b/packages/poml/next/nodes.ts
similarity index 55%
rename from packages/poml/next/ast.ts
rename to packages/poml/next/nodes.ts
index f9943dcc..d4995984 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/nodes.ts
@@ -1,5 +1,3 @@
-import { Tokenizer, Token } from './tokenizer';
-import componentDocs from '../assets/componentDocs.json';
 import { Range } from './types';
 
 /**
@@ -374,378 +372,3 @@ export interface RootNode extends Node {
   kind: 'ROOT';
   children: (ElementNode | TextNode | MetaNode | ValueNode)[];
 }
-
-class ASTParser {
-  private tokens: Token[];
-  private position: number;
-  private nextId: number;
-
-  // These are the tags that are always valid in POML.
-  // You can not disable them.
-  private alwaysValidTags = new Set<string>(['text', 'meta']);
-
-  // These semantics are handled right here.
-  private nonComponentTags = new Set<string>([
-    'let',
-    'include',
-    'template',
-    'context',
-    'stylesheet',
-    'output-schema',
-    'outputschema',
-    'tool',
-    'tool-def',
-    'tool-definition',
-    'tooldef',
-    'tooldefinition',
-  ]);
-
-  private validPomlTags: Set<string>;
-
-  constructor(tokens: Token[]) {
-    this.tokens = tokens;
-    this.position = 0;
-    this.nextId = 0;
-    this.validPomlTags = this.buildValidTagsSet();
-  }
-
-  private buildValidTagsSet(): Set<string> {
-    const validTags = new Set<string>(this.alwaysValidTags);
-
-    for (const doc of componentDocs) {
-      if (doc.name) {
-        validTags.add(doc.name.toLowerCase());
-        // Convert camelCase to kebab-case
-        validTags.add(
-          doc.name
-            .toLowerCase()
-            .replace(/([A-Z])/g, '-$1')
-            .toLowerCase(),
-        );
-      }
-    }
-
-    // Add special tags
-    validTags.add('poml');
-    validTags.add('text');
-    validTags.add('meta');
-
-    return validTags;
-  }
-
-  private generateId(): string {
-    return `ast_${this.nextId++}`;
-  }
-
-  private peek(): Token | undefined {
-    return this.tokens[this.position];
-  }
-
-  private advance(): Token | undefined {
-    return this.tokens[this.position++];
-  }
-
-  private extractTagName(tagContent: string): string {
-    // Remove < and > and any attributes
-    const content = tagContent.slice(1, -1);
-    const match = content.match(/^\/?\s*([a-zA-Z][\w-]*)/);
-    return match ? match[1] : '';
-  }
-
-  private parseAttributeValue(value: string): (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[] {
-    // Parse attribute value for mixed text and template variables
-    const result: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[] = [];
-    let currentPos = 0;
-
-    while (currentPos < value.length) {
-      const templateStart = value.indexOf('{{', currentPos);
-
-      if (templateStart === -1) {
-        // No more template variables, add remaining text
-        if (currentPos < value.length) {
-          result.push({
-            id: this.generateId(),
-            kind: 'TEXT',
-            start: currentPos,
-            end: value.length,
-            content: value.substring(currentPos),
-            children: [],
-          });
-        }
-        break;
-      }
-
-      // Add text before template variable
-      if (templateStart > currentPos) {
-        result.push({
-          id: this.generateId(),
-          kind: 'TEXT',
-          start: currentPos,
-          end: templateStart,
-          content: value.substring(currentPos, templateStart),
-          children: [],
-        });
-      }
-
-      // Find end of template variable
-      const templateEnd = value.indexOf('}}', templateStart + 2);
-      if (templateEnd === -1) {
-        // Malformed template, treat as text
-        result.push({
-          id: this.generateId(),
-          kind: 'TEXT',
-          start: templateStart,
-          end: value.length,
-          content: value.substring(templateStart),
-          children: [],
-        });
-        break;
-      }
-
-      // Add template variable
-      const templateContent = value.substring(templateStart + 2, templateEnd);
-      result.push({
-        id: this.generateId(),
-        kind: 'TEMPLATE',
-        start: templateStart,
-        end: templateEnd + 2,
-        content: value.substring(templateStart, templateEnd + 2),
-        expression: templateContent.trim(),
-        children: [],
-      });
-
-      currentPos = templateEnd + 2;
-    }
-
-    return result;
-  }
-
-  private parseAttributes(tagContent: string): AttributeInfo[] {
-    const attributes: AttributeInfo[] = [];
-
-    // Simple attribute parsing - can be enhanced later
-    const attrRegex = /(\w+)=["']([^"']*?)["']/g;
-    let match;
-
-    while ((match = attrRegex.exec(tagContent)) !== null) {
-      const key = match[1];
-      const value = match[2];
-      const fullMatch = match[0];
-      const matchStart = match.index;
-
-      attributes.push({
-        key,
-        value: this.parseAttributeValue(value),
-        keyRange: { start: matchStart, end: matchStart + key.length },
-        valueRange: { start: matchStart + key.length + 2, end: matchStart + key.length + 2 + value.length },
-        fullRange: { start: matchStart, end: matchStart + fullMatch.length },
-      });
-    }
-
-    return attributes;
-  }
-
-  parse(): ASTNode {
-    const children = this.parseNodes();
-
-    if (children.length === 1 && children[0].kind === 'POML') {
-      return children[0];
-    }
-
-    // Create root text node
-    const rootNode: ASTNode = {
-      id: this.generateId(),
-      kind: 'TEXT',
-      start: 0,
-      end: this.tokens.length > 0 ? this.tokens[this.tokens.length - 1].end : 0,
-      content: this.tokens.map((t) => t.value).join(''),
-      children,
-      textSegments: [],
-    };
-
-    // Set parent references
-    children.forEach((child) => {
-      child.parent = rootNode;
-    });
-
-    return rootNode;
-  }
-
-  private parseNodes(): ASTNode[] {
-    const nodes: ASTNode[] = [];
-
-    while (this.position < this.tokens.length) {
-      const token = this.peek();
-      if (!token) break;
-
-      if (token.type === 'TEMPLATE_VAR') {
-        nodes.push(this.parseTemplateVariable());
-      } else if (token.type === 'TAG_OPEN') {
-        const tagName = this.extractTagName(token.value);
-
-        if (this.validPomlTags.has(tagName.toLowerCase())) {
-          const node = this.parsePomlNode();
-          if (node) {
-            nodes.push(node);
-          }
-        } else {
-          // Invalid tag, treat as text
-          nodes.push(this.parseTextFromToken());
-        }
-      } else if (token.type === 'TEXT') {
-        nodes.push(this.parseTextFromToken());
-      } else {
-        // Skip other token types for now
-        this.advance();
-      }
-    }
-
-    return nodes;
-  }
-
-  private parseTemplateVariable(): ASTNode {
-    const token = this.advance()!;
-    const expression = token.value.slice(2, -2).trim(); // Remove {{ and }}
-
-    return {
-      id: this.generateId(),
-      kind: 'TEMPLATE',
-      start: token.start,
-      end: token.end,
-      content: token.value,
-      expression,
-      children: [],
-    };
-  }
-
-  private parseTextFromToken(): ASTNode {
-    const token = this.advance()!;
-
-    return {
-      id: this.generateId(),
-      kind: 'TEXT',
-      start: token.start,
-      end: token.end,
-      content: token.value,
-      children: [],
-      textSegments: [{ start: token.start, end: token.end }],
-    };
-  }
-
-  private parsePomlNode(): ASTNode | null {
-    const openToken = this.advance()!;
-    const tagName = this.extractTagName(openToken.value);
-
-    // Parse attributes
-    const attributes = this.parseAttributes(openToken.value);
-
-    // Determine node kind
-    const kind = tagName.toLowerCase() === 'meta' ? 'META' : 'POML';
-
-    const node: ASTNode = {
-      id: this.generateId(),
-      kind,
-      start: openToken.start,
-      end: openToken.end, // Will be updated when we find closing tag
-      content: openToken.value, // Will be updated
-      tagName: tagName.toLowerCase(),
-      attributes,
-      children: [],
-      openingTag: {
-        start: openToken.start,
-        end: openToken.end,
-        nameRange: {
-          start: openToken.start + 1,
-          end: openToken.start + 1 + tagName.length,
-        },
-      },
-    };
-
-    // Parse children until we find the closing tag
-    const children: ASTNode[] = [];
-    let depth = 1;
-
-    while (this.position < this.tokens.length && depth > 0) {
-      const token = this.peek();
-      if (!token) break;
-
-      if (token.type === 'TAG_OPEN') {
-        const childTagName = this.extractTagName(token.value);
-        if (childTagName.toLowerCase() === tagName.toLowerCase()) {
-          depth++;
-        }
-
-        // Special handling for text tags - don't process template variables
-        if (tagName.toLowerCase() === 'text') {
-          children.push(this.parseTextFromToken());
-        } else if (this.validPomlTags.has(childTagName.toLowerCase())) {
-          const childNode = this.parsePomlNode();
-          if (childNode) {
-            childNode.parent = node;
-            children.push(childNode);
-          }
-        } else {
-          children.push(this.parseTextFromToken());
-        }
-      } else if (token.type === 'TAG_CLOSE') {
-        const closeTagName = this.extractTagName(token.value);
-        if (closeTagName.toLowerCase() === tagName.toLowerCase()) {
-          depth--;
-          if (depth === 0) {
-            // Found our closing tag
-            const closeToken = this.advance()!;
-            node.end = closeToken.end;
-            node.closingTag = {
-              start: closeToken.start,
-              end: closeToken.end,
-              nameRange: {
-                start: closeToken.start + 2,
-                end: closeToken.start + 2 + tagName.length,
-              },
-            };
-            break;
-          }
-        }
-        this.advance();
-      } else if (token.type === 'TEMPLATE_VAR' && tagName.toLowerCase() !== 'text') {
-        // Only parse template variables outside of text tags
-        const templateNode = this.parseTemplateVariable();
-        templateNode.parent = node;
-        children.push(templateNode);
-      } else {
-        const textNode = this.parseTextFromToken();
-        textNode.parent = node;
-        children.push(textNode);
-      }
-    }
-
-    node.children = children;
-
-    // Update content to include full tag
-    if (node.closingTag) {
-      node.content = this.tokens
-        .slice(
-          this.tokens.findIndex((t) => t.start === node.start),
-          this.tokens.findIndex((t) => t.end === node.end) + 1,
-        )
-        .map((t) => t.value)
-        .join('');
-    }
-
-    return node;
-  }
-}
-
-// Main parsing function
-export function parseAST(content: string): ASTNode {
-  const tokenizer = new Tokenizer(content);
-  const tokens = tokenizer.tokenize();
-  const parser = new ASTParser(tokens);
-  return parser.parse();
-}
-
-export class PomlAstParser {
-  static parse(content: string): ASTNode {
-    return parseAST(content);
-  }
-}

From 297a50c0decb81ff13d4031b8151a0c8f6b5761c Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 29 Aug 2025 14:34:09 +0800
Subject: [PATCH 24/76] update nodes

---
 packages/poml/next/lexer.ts |  21 +++--
 packages/poml/next/nodes.ts | 169 ++++++++++++++++++++----------------
 2 files changed, 107 insertions(+), 83 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index a646defd..1074146b 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -16,15 +16,18 @@ export const SingleQuote = createToken({ name: 'SingleQuote', pattern: /'/ });
 export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
 
 /* Identifier is one of the following:
-   - XML tag names
-   - XML attribute names
-   - TextContent incorrectly parsed as identifiers
-
-   Case 3 is handled later by CST parser.
-*/
+ * - XML tag names
+ * - XML attribute names
+ * - TextContent incorrectly parsed as identifiers
+ *
+ * Notes:
+ * 1. In case 1, tags can contain : (namespaces) and . (extensions).
+ *    These are handled later by CST parser.
+ * 2. In case 3, CST parser will reclassify as TextContent if needed.
+ */
 export const Identifier = createToken({
   name: 'Identifier',
-  pattern: /[a-zA-Z_][a-zA-Z0-9_-]*/,
+  pattern: /[a-zA-Z_][a-zA-Z0-9_\-]*/,
 });
 
 export const Whitespace = createToken({
@@ -33,7 +36,7 @@ export const Whitespace = createToken({
   line_breaks: true,
 });
 
-/* eslint-disable no-irregular-whitespace */
+
 /* Catch-all for arbitrary text content
    - Match any char except:
        <          — starts a tag
@@ -46,7 +49,7 @@ export const TextContent = createToken({
   pattern: /(?:[^<"'{}]|{(?!{)|}(?!}))+/,
   line_breaks: true,
 });
-/* eslint-enable no-irregular-whitespace */
+
 
 // Define token order - more specific patterns first
 export const allTokens = [
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index d4995984..ba6cb1ae 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -1,41 +1,5 @@
 import { Range } from './types';
 
-/**
- * Base interface for all AST nodes in the POML syntax tree.
- *
- * Every node in the AST must have a kind discriminator and a range indicating
- * its position in the source text. The kind field enables TypeScript discriminated
- * unions for type-safe node handling.
- *
- * Cases that apply:
- * - All syntactic constructs in POML markup (elements, attributes, text, templates)
- * - Meta-level constructs (root nodes, expression nodes)
- *
- * Cases that do not apply:
- * - Lexical tokens (these are handled by the tokenizer)
- * - Semantic information (component types, validation results)
- * - Runtime values (evaluated expressions, resolved variables)
- */
-export interface Node {
-  kind:
-    | 'META'
-    | 'EXPRESSION'
-    | 'VALUE'
-    | 'STRING'
-    | 'VALUE'
-    | 'FORLOOP'
-    | 'OPEN'
-    | 'CLOSE'
-    | 'SELFCLOSE'
-    | 'ELEMENT'
-    | 'TEXT'
-    | 'POML'
-    | 'ATTRIBUTE'
-    | 'TEMPLATE'
-    | 'ROOT';
-  range: Range;
-}
-
 /**
  * Represents a JavaScript expression as a string.
  *
@@ -54,13 +18,15 @@ export interface Node {
  * - String literals with quotes: `"hello"` (use StringNode or ValueNode)
  * - POML markup: `<tag>` (use element nodes)
  */
-export interface ExpressionNode extends Node {
+export interface ExpressionNode {
   kind: 'EXPRESSION';
+  range: Range;
   value: string;
 }
 
 /**
- * Represents a template interpolation with double curly braces.
+ * Represents a template interpolation with double curly braces,
+ * or sometimes without braces in specific attributes.
  *
  * Template nodes handle variable interpolation in POML, containing an
  * expression that will be evaluated and substituted at runtime. The node
@@ -71,15 +37,18 @@ export interface ExpressionNode extends Node {
  * - Template expressions in text: part of "Hello {{ name }}!"
  * - Complex expressions: `{{ users.map(u => u.name).join(", ") }}`
  * - Conditional rendering: `{{ isVisible ? "Show" : "Hide" }}`
+ * - Template usage in if attributes: `condition` in `if="condition"`
  *
  * Cases that do not apply:
- * - Attribute expressions without braces: `if="x > 0"` (use ExpressionNode)
+ * - Full attribute expressions: `if="x > 0"` (use ExpressionNode)
  * - Plain text: `Hello World` (use StringNode)
- * - POML elements: `<div>` (use element nodes)
  * - Single braces: `{ not a template }` (treated as plain text)
+ * - Template elements: <template>{{ this is a jinja template }}</template> (use TextNode)
+ * - With quotes: `"{{ var }}"` (use ValueNode)
  */
-export interface TemplateNode extends Node {
+export interface TemplateNode {
   kind: 'TEMPLATE';
+  range: Range;
   value: ExpressionNode;
 }
 
@@ -104,8 +73,9 @@ export interface TemplateNode extends Node {
  * - Expressions: `x > 0` (use ExpressionNode)
  * - Template variables: `{{ var }}` (use TemplateNode)
  */
-export interface StringNode extends Node {
+export interface StringNode {
   kind: 'STRING';
+  range: Range;
   value: string;
 }
 
@@ -121,7 +91,7 @@ export interface StringNode extends Node {
  * - Mixed content with templates: `"Hello, {{ userName }}!"`
  * - Text content between tags: `>  some text  <` (including whitespace)
  * - Unquoted template values in certain contexts
- * - Multi-part content: `"Price: ${{ amount }} USD"`
+ * - Multi-part content: `"Price: ${{amount}} USD"`
  *
  * Cases that do not apply:
  * - Attribute keys: `class=...` (the `class` part uses StringNode)
@@ -131,8 +101,9 @@ export interface StringNode extends Node {
  *
  * Note: The range includes quotes if present, but children exclude them.
  */
-export interface ValueNode extends Node {
+export interface ValueNode {
   kind: 'VALUE';
+  range: Range;
   children: (StringNode | TemplateNode)[];
 }
 
@@ -156,8 +127,9 @@ export interface ValueNode extends Node {
  * - Conditional loops: `if` attributes (use separate condition handling)
  * - Template interpolation: `{{ items }}` (use TemplateNode)
  */
-export interface ForLoopNode extends Node {
-  kind: 'FORLOOP';
+export interface ForIteratorNode {
+  kind: 'FORITERATOR';
+  range: Range;
   iterator: StringNode;
   collection: ExpressionNode;
 }
@@ -171,17 +143,18 @@ export interface ForLoopNode extends Node {
  *
  * Cases that apply:
  * - Simple attributes: `class="container"`, `id='main'`
- * - Boolean/presence attributes: `disabled`, `checked`
  * - Template values: `title="{{ pageTitle }}"` or `title={{ pageTitle }}`
  * - Mixed values: `placeholder="Enter {{ fieldName }}..."`
  *
  * Cases that do not apply:
+ * - Boolean/presence attributes: `disabled`, `checked` (not yet supported)
  * - For-loop attributes: `for="item in items"` (use ForLoopAttributeNode)
  * - Spread attributes (not yet supported): `{...props}`
  * - Dynamic attribute names (not supported): `[attrName]="value"`
  */
-export interface AttributeNode extends Node {
+export interface AttributeNode {
   kind: 'ATTRIBUTE';
+  range: Range;
   key: StringNode;
   value: ValueNode;
 }
@@ -203,8 +176,9 @@ export interface AttributeNode extends Node {
  * - Standard attributes: `class="..."` (use AttributeNode)
  * - Conditional attributes: `if="..."` (use AttributeNode)
  */
-export interface ForLoopAttributeNode extends Node {
-  kind: 'ATTRIBUTE';
+export interface ForLoopAttributeNode {
+  kind: 'FORATTRIBUTE';
+  range: Range;
   key: StringNode;
   value: ForLoopNode;
 }
@@ -228,8 +202,9 @@ export interface ForLoopAttributeNode extends Node {
  * - Complete elements: opening + content + closing (use ElementNode)
  * - Invalid or malformed tags (treated as text)
  */
-export interface OpenTagNode extends Node {
+export interface OpenTagNode {
   kind: 'OPEN';
+  range: Range;
   value: StringNode;
   attributes: (AttributeNode | ForLoopAttributeNode)[];
 }
@@ -249,34 +224,34 @@ export interface OpenTagNode extends Node {
  * - Opening tags: `<document>` (use OpenTagNode)
  * - Self-closing tags: `<br/>` (use SelfCloseTagNode)
  * - Tags with attributes (closing tags never have attributes)
- * - Mismatched closing tags (parser error)
  */
-export interface CloseTagNode extends Node {
+export interface CloseTagNode {
   kind: 'CLOSE';
+  range: Range;
   value: StringNode;
 }
 
 /**
  * Represents a self-closing tag in POML markup.
  *
- * Self-closing tags represent complete elements that have no children or
+ * Self-closing elements represent complete elements that have no children or
  * content. They combine opening and closing in a single tag and may have
  * attributes.
  *
  * Cases that apply:
  * - Image elements: `<image src="photo.jpg" />`
- * - Meta elements: `<meta name="author" content="John" />`
- * - Data elements without content: `<data path="file.csv" />`
- * - Any element explicitly self-closed: `<element attr="value" />`
+ * - Runtime configurations: `<runtime model="gpt-5" temperature="0.7" />`
  *
  * Cases that do not apply:
+ * - Meta elements: `<meta name="author" content="John" />`
  * - Elements with content: `<div>content</div>` (use ElementNode)
  * - Separate open/close tags: `<div></div>` (use ElementNode)
  * - Tags without the self-closing slash: `<img>` (use OpenTagNode)
- * - Text content elements (these require open/close pairs)
+ * - Meta elements: `<meta>` tags (use MetaNode)
  */
-export interface SelfCloseTagNode extends Node {
+export interface SelfCloseElementNode {
   kind: 'SELFCLOSE';
+  range: Range;
   value: StringNode;
   attributes: (AttributeNode | ForLoopAttributeNode)[];
 }
@@ -290,25 +265,26 @@ export interface SelfCloseTagNode extends Node {
  * or values.
  *
  * Cases that apply:
- * - Document structures: `<document>...content...</document>`
- * - Messages: `<message role="user">Hello</message>`
+ * - Any elements: `<document parser="txt">...content...</document>`
+ * - Output schemas with templates: `<output-schema>{{ schemaDefinition }}</output-schema>`
  * - Nested elements: `<section><paragraph>Text</paragraph></section>`
- * - Data components: `<table>...rows...</table>`
  *
  * Cases that do not apply:
  * - Self-closing elements: `<image />` (use SelfCloseTagNode)
- * - Raw text content: plain text outside elements (use TextNode)
+ * - Literal text content: plain text (use TextNode)
  * - Template variables: `{{ var }}` (use TemplateNode)
  * - Meta elements: `<meta>` tags (use MetaNode)
  */
-export interface ElementNode extends Node {
+export interface ElementNode {
   kind: 'ELEMENT';
-  tagName: StringNode;
+  range: Range;
+  open: OpenTagNode;
+  close: CloseTagNode;
   children: (ElementNode | TextNode | MetaNode | ValueNode)[];
 }
 
 /**
- * Represents a text element that preserves literal content.
+ * Represents an element that preserves literal content.
  *
  * Text nodes are special POML elements that treat their content as literal
  * text, preventing template variable interpolation. They ensure content is
@@ -316,6 +292,7 @@ export interface ElementNode extends Node {
  *
  * Cases that apply:
  * - Explicit text elements: `<text>Literal {{ not_interpolated }}</text>`
+ * - External templates: `<template>{{ this is a jinja template }}</template>`
  *
  * Cases that do not apply:
  * - Regular text content with interpolation (use ValueNode)
@@ -323,23 +300,27 @@ export interface ElementNode extends Node {
  * - Elements allowing template processing (use ElementNode)
  * - Text with attributes enabling processing (future feature)
  *
- * Note: The tagName is always "text" for these nodes, and attributes must be empty.
+ * Note: The tagName (value) can only be "text" or "template" in this version.
  */
-export interface TextNode extends Node {
+export interface TextNode {
   kind: 'TEXT';
-  tagName: StringNode;
+  range: Range;
+  open: OpenTagNode;
+  close: CloseTagNode;
   attributes: AttributeNode[];
-  value: StringNode;
+  children: StringNode;
 }
 
 /**
- * Represents metadata elements in POML.
+ * Represents metadata elements in POML. Meta elements must be self-closed.
  *
  * Meta nodes provide document-level metadata and configuration that doesn't
  * render as visible content. They typically appear at the document start and
  * configure processing behavior, document properties, or provide auxiliary
  * information.
  *
+ * Value must be "meta" (case-insensitive).
+ *
  * Cases that apply:
  * - Document metadata: `<meta minVersion="1.0">`
  * - Configuration: `<meta enableComponents="+reference">`
@@ -347,9 +328,10 @@ export interface TextNode extends Node {
  * Cases that do not apply:
  * - Any element that is not `<meta>` (use ElementNode)
  */
-export interface MetaNode extends Node {
+export interface MetaNode {
   kind: 'META';
-  tagName: StringNode;
+  range: Range;
+  value: StringNode;
   attributes: AttributeNode[];
 }
 
@@ -368,7 +350,46 @@ export interface MetaNode extends Node {
  * Cases that do not apply:
  * - All nested elements
  */
-export interface RootNode extends Node {
+export interface RootNode {
   kind: 'ROOT';
+  range: Range;
   children: (ElementNode | TextNode | MetaNode | ValueNode)[];
 }
+
+// Keep these keys required; everything else becomes recursively optional
+type DeepPartialExcept<T, K extends keyof T> =
+  // arrays
+  T extends (infer U)[]
+    ? DeepPartialExcept<U, never>[]
+    : // functions (leave as-is)
+      T extends (...args: any) => any
+      ? T
+      : // objects
+        T extends object
+        ? { [P in keyof T as P extends K ? P : never]-?: T[P] } & {
+            [P in keyof T as P extends K ? never : P]?: DeepPartialExcept<T[P], never> | undefined;
+          }
+        : T;
+
+// Keep only "kind" required; everything else is optional, recursively.
+type Draft<T extends { kind: string }> = DeepPartialExcept<T, 'kind'>;
+
+// Union of your strict nodes
+export type StrictNode =
+  | ExpressionNode
+  | TemplateNode
+  | StringNode
+  | ValueNode
+  | ForLoopNode
+  | AttributeNode
+  | ForLoopAttributeNode
+  | OpenTagNode
+  | CloseTagNode
+  | SelfCloseElementNode
+  | ElementNode
+  | TextNode
+  | MetaNode
+  | RootNode;
+
+// The "loose" counterpart you can safely produce during parsing.
+export type DraftNode = Draft<StrictNode>;

From af9c69cfdd5afcd48f4f7b0463869ee098c2340d Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sat, 30 Aug 2025 12:16:45 +0800
Subject: [PATCH 25/76] .

---
 packages/poml/next/lexer.ts | 4 +++-
 packages/poml/next/nodes.ts | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index 1074146b..9d58174b 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -1,7 +1,9 @@
 import { createToken, Lexer } from 'chevrotain';
 
 // Define token types for extended POML
-export const Comment = createToken({ name: 'Comment', pattern: /<!--[\s\S]*?-->/ });
+export const CommentOpen = createToken({ name: 'CommentOpen', pattern: /<!--/ });
+export const CommentClose = createToken({ name: 'CommentClose', pattern: /-->/ });
+export const Pragma = createToken({ name: 'Pragma', pattern: /\b@pragma\b/i });
 export const TemplateOpen = createToken({ name: 'TemplateOpen', pattern: /{{/ });
 export const TemplateClose = createToken({ name: 'TemplateClose', pattern: /}}/ });
 export const TagClosingOpen = createToken({ name: 'TagClosingOpen', pattern: /<\// });
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index ba6cb1ae..bf734333 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -289,6 +289,8 @@ export interface ElementNode {
  * Text nodes are special POML elements that treat their content as literal
  * text, preventing template variable interpolation. They ensure content is
  * preserved exactly as written, useful for code samples or pre-formatted text.
+ * When `<text>` is used, the parser eats everything including tags and comments,
+ * including new `<text>` tags, until a matching `</text>` is found.
  *
  * Cases that apply:
  * - Explicit text elements: `<text>Literal {{ not_interpolated }}</text>`

From 97e8ba233b64541f29f5e0293cf3777c9ffbf308 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sat, 30 Aug 2025 12:52:43 +0800
Subject: [PATCH 26/76] .

---
 packages/poml/next/nodes.ts | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index bf734333..0d682204 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -43,7 +43,7 @@ export interface ExpressionNode {
  * - Full attribute expressions: `if="x > 0"` (use ExpressionNode)
  * - Plain text: `Hello World` (use StringNode)
  * - Single braces: `{ not a template }` (treated as plain text)
- * - Template elements: <template>{{ this is a jinja template }}</template> (use TextNode)
+ * - Template elements: <template>{{ this is a jinja template }}</template> (use LiteralNode)
  * - With quotes: `"{{ var }}"` (use ValueNode)
  */
 export interface TemplateNode {
@@ -271,7 +271,7 @@ export interface SelfCloseElementNode {
  *
  * Cases that do not apply:
  * - Self-closing elements: `<image />` (use SelfCloseTagNode)
- * - Literal text content: plain text (use TextNode)
+ * - Literal text content: plain text (use LiteralNode)
  * - Template variables: `{{ var }}` (use TemplateNode)
  * - Meta elements: `<meta>` tags (use MetaNode)
  */
@@ -280,13 +280,13 @@ export interface ElementNode {
   range: Range;
   open: OpenTagNode;
   close: CloseTagNode;
-  children: (ElementNode | TextNode | MetaNode | ValueNode)[];
+  children: (ElementNode | LiteralNode | CommentNode | PragmaNode | ValueNode)[];
 }
 
 /**
  * Represents an element that preserves literal content.
  *
- * Text nodes are special POML elements that treat their content as literal
+ * Literal nodes are special POML elements that treat their content as literal
  * text, preventing template variable interpolation. They ensure content is
  * preserved exactly as written, useful for code samples or pre-formatted text.
  * When `<text>` is used, the parser eats everything including tags and comments,
@@ -294,7 +294,6 @@ export interface ElementNode {
  *
  * Cases that apply:
  * - Explicit text elements: `<text>Literal {{ not_interpolated }}</text>`
- * - External templates: `<template>{{ this is a jinja template }}</template>`
  *
  * Cases that do not apply:
  * - Regular text content with interpolation (use ValueNode)
@@ -302,9 +301,11 @@ export interface ElementNode {
  * - Elements allowing template processing (use ElementNode)
  * - Text with attributes enabling processing (future feature)
  *
- * Note: The tagName (value) can only be "text" or "template" in this version.
+ * Note: The tagName (value) can only be "text" in this version.
+ * Literal node is different from elements which do not support children.
+ * Literal node is handled on the CST parsing stage.
  */
-export interface TextNode {
+export interface LiteralNode {
   kind: 'TEXT';
   range: Range;
   open: OpenTagNode;
@@ -321,14 +322,9 @@ export interface TextNode {
  * configure processing behavior, document properties, or provide auxiliary
  * information.
  *
- * Value must be "meta" (case-insensitive).
- *
  * Cases that apply:
- * - Document metadata: `<meta minVersion="1.0">`
- * - Configuration: `<meta enableComponents="+reference">`
- *
- * Cases that do not apply:
- * - Any element that is not `<meta>` (use ElementNode)
+ * - Document metadata: `<!-- @pragma minVersion 1.0 -->`
+ * - Configuration: `<!-- @pragma components +reference -table -->`
  */
 export interface MetaNode {
   kind: 'META';
@@ -355,7 +351,7 @@ export interface MetaNode {
 export interface RootNode {
   kind: 'ROOT';
   range: Range;
-  children: (ElementNode | TextNode | MetaNode | ValueNode)[];
+  children: (ElementNode | LiteralNode | MetaNode | ValueNode)[];
 }
 
 // Keep these keys required; everything else becomes recursively optional
@@ -382,14 +378,14 @@ export type StrictNode =
   | TemplateNode
   | StringNode
   | ValueNode
-  | ForLoopNode
+  | ForIteratorNode
   | AttributeNode
   | ForLoopAttributeNode
   | OpenTagNode
   | CloseTagNode
   | SelfCloseElementNode
   | ElementNode
-  | TextNode
+  | LiteralNode
   | MetaNode
   | RootNode;
 

From ddf120ca2d2080fb5508a3e77c97c174bb2b8b00 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sat, 30 Aug 2025 13:49:57 +0800
Subject: [PATCH 27/76] .

---
 packages/poml/next/cst.ts   | 978 ++++++++++--------------------------
 packages/poml/next/nodes.ts |  65 ++-
 2 files changed, 315 insertions(+), 728 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index ff13c4b0..80417f22 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -1,718 +1,286 @@
-import { IToken } from 'chevrotain';
-import {
-  extendedPomlLexer,
-  TemplateOpen,
-  TemplateClose,
-  TagClosingOpen,
-  TagSelfClose,
-  TagOpen,
-  TagClose,
-  Equals,
-  DoubleQuote,
-  SingleQuote,
-  Identifier,
-  Whitespace,
-  TextContent,
-} from './lexer';
-
-import { listComponentAliases } from '../base';
-import * as Nodes from './nodes';
-
-// Context for parsing configuration
-export interface PomlContext {
-  variables: { [key: string]: any };
-  stylesheet: { [key: string]: string };
-  minPomlVersion?: string;
-  maxPomlVersion?: string;
-  sourcePath: string;
-  enabledComponents: Set<string>;
-  unknownComponentBehavior: 'error' | 'warning' | 'ignore';
-}
-
-// CST Parser class
-export class CSTParser {
-  private tokens: IToken[];
-  private position: number;
-  private text: string;
-  private context: PomlContext;
-  private nodeIdCounter: number;
-
-  // These are the tags that are always valid in POML.
-  // You can not disable them.
-  private alwaysValidTags = new Set<string>(['text', 'meta']);
-
-  // These semantics are handled right here.
-  private nonComponentTags = new Set<string>([
-    'let',
-    'include',
-    'template',
-    'context',
-    'stylesheet',
-    'output-schema',
-    'outputschema',
-    'tool',
-    'tool-def',
-    'tool-definition',
-    'tooldef',
-    'tooldefinition',
-  ]);
-
-  constructor(context: PomlContext) {
-    this.tokens = [];
-    this.position = 0;
-    this.text = '';
-    this.context = context;
-    this.nodeIdCounter = 0;
-
-    // Initialize default enabled components (can be extended/disabled via meta tags)
-    this.context.enabledComponents = new Set(listComponentAliases());
-    this.context.unknownComponentBehavior = 'warning';
-  }
-
-  private generateId(): string {
-    return `node_${++this.nodeIdCounter}`;
-  }
-
-  private currentToken(): IToken | undefined {
-    return this.tokens[this.position];
-  }
-
-  private peekToken(offset: number = 1): IToken | undefined {
-    return this.tokens[this.position + offset];
+export class PomlCstParser extends CstParser {
+  // Define rules as public methods
+  public document!: () => DocumentCstNode;
+  public content!: () => ContentCstNode;
+  public element!: () => ElementCstNode;
+  public literalElement!: () => LiteralElementCstNode;
+  public selfCloseElement!: () => SelfCloseElementCstNode;
+  public openTag!: () => OpenTagCstNode;
+  public closeTag!: () => CloseTagCstNode;
+  public attributes!: () => AttributesCstNode;
+  public attribute!: () => AttributeCstNode;
+  public attributeValue!: () => AttributeValueCstNode;
+  public quotedValue!: () => QuotedValueCstNode;
+  public unquotedValue!: () => UnquotedValueCstNode;
+  public valueContent!: () => ValueContentCstNode;
+  public escapedChar!: () => EscapedCharCstNode;
+  public forIterator!: () => ForIteratorCstNode;
+  public template!: () => TemplateCstNode;
+  public value!: () => ValueCstNode;
+  public valueElement!: () => ValueElementCstNode;
+  public comment!: () => CommentCstNode;
+  public pragma!: () => PragmaCstNode;
+
+  constructor() {
+    super(allTokens, {
+      recoveryEnabled: true,
+      nodeLocationTracking: 'full',
+    });
+
+    this.performSelfAnalysis();
   }
 
-  private consumeToken(): IToken | undefined {
-    if (this.position < this.tokens.length) {
-      return this.tokens[this.position++];
-    }
-    return undefined;
-  }
-
-  private skipWhitespace(): void {
-    while (this.currentToken()?.tokenType === Whitespace) {
-      this.position++;
-    }
-  }
-
-  public parse(text: string): ASTNode {
-    this.text = text;
-    const lexResult = extendedPomlLexer.tokenize(text);
-    this.tokens = lexResult.tokens;
-    this.position = 0;
-
-    const rootNode: ASTNode = {
-      id: this.generateId(),
-      kind: 'TEXT',
-      start: 0,
-      end: text.length,
-      content: text,
-      children: [],
-      textSegments: [],
-    };
-
-    this.parseDocument(rootNode);
-    return rootNode;
-  }
-
-  private parseDocument(rootNode: ASTNode): void {
-    while (this.position < this.tokens.length) {
-      const token = this.currentToken();
-      if (!token) {
-        break;
-      }
-
-      if (token.tokenType === TagOpen) {
-        const nextToken = this.peekToken();
-        if (nextToken?.tokenType === Identifier) {
-          const tagName = nextToken.image;
-
-          if (tagName === 'meta') {
-            const metaNode = this.parseMetaTag();
-            if (metaNode) {
-              rootNode.children.push(metaNode);
-              metaNode.parent = rootNode;
-              this.processMeta(metaNode);
-            }
-          } else if (this.context.enabledComponents.has(tagName)) {
-            const pomlNode = this.parsePomlElement();
-            if (pomlNode) {
-              rootNode.children.push(pomlNode);
-              pomlNode.parent = rootNode;
-            }
-          } else {
-            // Unknown tag - treat as text
-            this.handleUnknownTag(tagName);
-            const textNode = this.parseTextContent();
-            if (textNode) {
-              rootNode.children.push(textNode);
-              textNode.parent = rootNode;
-            }
-          }
-        } else {
-          // Malformed tag - treat as text
-          const textNode = this.parseTextContent();
-          if (textNode) {
-            rootNode.children.push(textNode);
-            textNode.parent = rootNode;
-          }
-        }
-      } else {
-        const textNode = this.parseTextContent();
-        if (textNode) {
-          rootNode.children.push(textNode);
-          textNode.parent = rootNode;
-        }
-      }
-    }
-  }
-
-  private parseMetaTag(): ASTNode | null {
-    const startPos = this.position;
-    const openTagStart = this.currentToken()?.startOffset || 0;
-
-    this.consumeToken(); // consume '<'
-    this.skipWhitespace();
-
-    const nameToken = this.consumeToken(); // consume 'meta'
-    if (!nameToken || nameToken.image !== 'meta') {
-      return null;
-    }
-
-    const nameRange: SourceRange = {
-      start: nameToken.startOffset || 0,
-      end: (nameToken.endOffset || 0) + 1,
-    };
-
-    this.skipWhitespace();
-
-    const attributes = this.parseAttributes();
-
-    this.skipWhitespace();
-
-    // Check for self-closing or regular closing
-    const closeToken = this.currentToken();
-    let openTagEnd = 0;
-    let hasContent = false;
-
-    if (closeToken?.tokenType === TagSelfClose) {
-      this.consumeToken(); // consume '/>'
-      openTagEnd = (closeToken.endOffset || 0) + 1;
-    } else if (closeToken?.tokenType === TagClose) {
-      this.consumeToken(); // consume '>'
-      openTagEnd = (closeToken.endOffset || 0) + 1;
-      hasContent = true;
-    }
-
-    const metaNode: ASTNode = {
-      id: this.generateId(),
-      kind: 'META',
-      start: openTagStart,
-      end: openTagEnd, // Will be updated if there's content
-      content: '',
-      children: [],
-      tagName: 'meta',
-      attributes,
-      openingTag: {
-        start: openTagStart,
-        end: openTagEnd,
-        nameRange,
+  // Document is the root rule
+  private documentRule = this.RULE('document', () => {
+    this.MANY(() => {
+      this.OR([{ ALT: () => this.CONSUME(Whitespace) }, { ALT: () => this.SUBRULE(this.content) }]);
+    });
+  });
+
+  // Content can be elements, comments, pragmas, or values
+  private contentRule = this.RULE('content', () => {
+    this.OR([
+      { ALT: () => this.SUBRULE(this.pragma) },
+      { ALT: () => this.SUBRULE(this.comment) },
+      { ALT: () => this.SUBRULE(this.element) },
+      { ALT: () => this.SUBRULE(this.literalElement) },
+      { ALT: () => this.SUBRULE(this.selfCloseElement) },
+      { ALT: () => this.SUBRULE(this.value) },
+    ]);
+  });
+
+  // Regular element with open/close tags
+  private elementRule = this.RULE('element', () => {
+    const openTag = this.SUBRULE(this.openTag);
+    this.MANY(() => {
+      this.OR([{ ALT: () => this.CONSUME(Whitespace) }, { ALT: () => this.SUBRULE(this.content) }]);
+    });
+    this.SUBRULE(this.closeTag);
+  });
+
+  // Literal element (like <text>) that preserves content
+  private literalElementRule = this.RULE('literalElement', () => {
+    this.SUBRULE(this.openTag);
+    // Consume everything until matching close tag
+    this.MANY(() => {
+      this.OR([
+        // Look ahead for closing tag
+        {
+          GATE: () => !this.isClosingTag(),
+          ALT: () => this.consumeAny(),
+        },
+      ]);
+    });
+    this.SUBRULE(this.closeTag);
+  });
+
+  // Self-closing element
+  private selfCloseElementRule = this.RULE('selfCloseElement', () => {
+    this.CONSUME(TagOpen);
+    this.CONSUME(Identifier, { LABEL: 'tagName' });
+    this.OPTION(() => {
+      this.CONSUME(Whitespace);
+      this.OPTION2(() => this.SUBRULE(this.attributes));
+    });
+    this.CONSUME(TagSelfClose);
+  });
+
+  // Opening tag
+  private openTagRule = this.RULE('openTag', () => {
+    this.CONSUME(TagOpen);
+    this.CONSUME(Identifier, { LABEL: 'tagName' });
+    this.OPTION(() => {
+      this.CONSUME(Whitespace);
+      this.OPTION2(() => this.SUBRULE(this.attributes));
+    });
+    this.CONSUME(TagClose);
+  });
+
+  // Closing tag
+  private closeTagRule = this.RULE('closeTag', () => {
+    this.CONSUME(TagClosingOpen);
+    this.CONSUME(Identifier, { LABEL: 'tagName' });
+    this.OPTION(() => this.CONSUME(Whitespace));
+    this.CONSUME(TagClose);
+  });
+
+  // Attributes
+  private attributesRule = this.RULE('attributes', () => {
+    this.MANY_SEP({
+      SEP: Whitespace,
+      DEF: () => this.SUBRULE(this.attribute),
+    });
+  });
+
+  // Single attribute
+  private attributeRule = this.RULE('attribute', () => {
+    this.CONSUME(Identifier, { LABEL: 'key' });
+    this.CONSUME(Equals);
+    this.SUBRULE(this.attributeValue);
+  });
+
+  // Attribute value (quoted, unquoted, or for iterator)
+  private attributeValueRule = this.RULE('attributeValue', () => {
+    this.OR([
+      { ALT: () => this.SUBRULE(this.quotedValue) },
+      { ALT: () => this.SUBRULE(this.unquotedValue) },
+      // Special case for for="item in items"
+      {
+        GATE: () => this.isForAttribute(),
+        ALT: () => this.SUBRULE(this.forIterator),
       },
-    };
-
-    if (hasContent) {
-      // Parse content until closing tag
-      while (this.position < this.tokens.length) {
-        const token = this.currentToken();
-        if (token?.tokenType === TagClosingOpen) {
-          const nextToken = this.peekToken();
-          if (nextToken?.tokenType === Identifier && nextToken.image === 'meta') {
-            break;
-          }
-        }
-        this.position++;
-      }
-
-      // Parse closing tag
-      if (this.currentToken()?.tokenType === TagClosingOpen) {
-        const closingTagStart = this.currentToken()?.startOffset || 0;
-        this.consumeToken(); // consume '</'
-        const closingNameToken = this.consumeToken(); // consume 'meta'
-        this.skipWhitespace();
-        const finalClose = this.consumeToken(); // consume '>'
-
-        if (closingNameToken && finalClose) {
-          metaNode.closingTag = {
-            start: closingTagStart,
-            end: (finalClose.endOffset || 0) + 1,
-            nameRange: {
-              start: closingNameToken.startOffset || 0,
-              end: (closingNameToken.endOffset || 0) + 1,
-            },
-          };
-          metaNode.end = (finalClose.endOffset || 0) + 1;
-        }
-      }
-    }
-
-    metaNode.content = this.text.slice(metaNode.start, metaNode.end);
-    return metaNode;
-  }
-
-  private parsePomlElement(): ASTNode | null {
-    const openTagStart = this.currentToken()?.startOffset || 0;
-
-    this.consumeToken(); // consume '<'
-    this.skipWhitespace();
-
-    const nameToken = this.consumeToken();
-    if (!nameToken) {
-      return null;
-    }
-
-    const tagName = nameToken.image;
-    const nameRange: SourceRange = {
-      start: nameToken.startOffset || 0,
-      end: (nameToken.endOffset || 0) + 1,
-    };
-
-    this.skipWhitespace();
-
-    const attributes = this.parseAttributes();
-
-    this.skipWhitespace();
-
-    // Check for self-closing or regular closing
-    const closeToken = this.currentToken();
-    let openTagEnd = 0;
-    let hasContent = false;
-
-    if (closeToken?.tokenType === TagSelfClose) {
-      this.consumeToken(); // consume '/>'
-      openTagEnd = (closeToken.endOffset || 0) + 1;
-    } else if (closeToken?.tokenType === TagClose) {
-      this.consumeToken(); // consume '>'
-      openTagEnd = (closeToken.endOffset || 0) + 1;
-      hasContent = true;
-    }
-
-    const pomlNode: ASTNode = {
-      id: this.generateId(),
-      kind: 'POML',
-      start: openTagStart,
-      end: openTagEnd, // Will be updated if there's content
-      content: '',
-      children: [],
-      tagName,
-      attributes,
-      openingTag: {
-        start: openTagStart,
-        end: openTagEnd,
-        nameRange,
+    ]);
+  });
+
+  // Quoted value
+  private quotedValueRule = this.RULE('quotedValue', () => {
+    this.OR([
+      {
+        ALT: () => {
+          this.CONSUME(DoubleQuote, { LABEL: 'openQuote' });
+          this.MANY(() => {
+            this.SUBRULE(this.valueContent);
+          });
+          this.CONSUME2(DoubleQuote, { LABEL: 'closeQuote' });
+        },
       },
-    };
-
-    if (hasContent) {
-      if (tagName === 'text') {
-        // Special handling for <text> tags - parse content as pure text
-        this.parseTextContentForTextTag(pomlNode);
-      } else {
-        // Parse mixed content (POML and text)
-        this.parseMixedContent(pomlNode);
-      }
-
-      // Parse closing tag
-      if (this.currentToken()?.tokenType === TagClosingOpen) {
-        const closingTagStart = this.currentToken()?.startOffset || 0;
-        this.consumeToken(); // consume '</'
-        const closingNameToken = this.consumeToken();
-        this.skipWhitespace();
-        const finalClose = this.consumeToken(); // consume '>'
-
-        if (closingNameToken && finalClose) {
-          pomlNode.closingTag = {
-            start: closingTagStart,
-            end: (finalClose.endOffset || 0) + 1,
-            nameRange: {
-              start: closingNameToken.startOffset || 0,
-              end: (closingNameToken.endOffset || 0) + 1,
-            },
-          };
-          pomlNode.end = (finalClose.endOffset || 0) + 1;
-        }
-      }
-    }
-
-    pomlNode.content = this.text.slice(pomlNode.start, pomlNode.end);
-    return pomlNode;
-  }
-
-  private parseTextContentForTextTag(parentNode: ASTNode): void {
-    // In <text> tags, we parse content as pure text but still need to handle nested POML
-    while (this.position < this.tokens.length) {
-      const token = this.currentToken();
-      if (!token) {
-        break;
-      }
-
-      if (token.tokenType === TagClosingOpen) {
-        const nextToken = this.peekToken();
-        if (nextToken?.tokenType === Identifier && nextToken.image === parentNode.tagName) {
-          break; // Found closing tag
-        }
-      }
-
-      if (token.tokenType === TagOpen) {
-        const nextToken = this.peekToken();
-        if (nextToken?.tokenType === Identifier && this.context.enabledComponents.has(nextToken.image)) {
-          // Found nested POML element
-          const nestedNode = this.parsePomlElement();
-          if (nestedNode) {
-            parentNode.children.push(nestedNode);
-            nestedNode.parent = parentNode;
-          }
-        } else {
-          // Treat as text
-          const textNode = this.parseTextContent();
-          if (textNode) {
-            parentNode.children.push(textNode);
-            textNode.parent = parentNode;
-          }
-        }
-      } else {
-        const textNode = this.parseTextContent();
-        if (textNode) {
-          parentNode.children.push(textNode);
-          textNode.parent = parentNode;
-        }
-      }
-    }
-  }
-
-  private parseMixedContent(parentNode: ASTNode): void {
-    while (this.position < this.tokens.length) {
-      const token = this.currentToken();
-      if (!token) {
-        break;
-      }
-
-      if (token.tokenType === TagClosingOpen) {
-        const nextToken = this.peekToken();
-        if (nextToken?.tokenType === Identifier && nextToken.image === parentNode.tagName) {
-          break; // Found closing tag
-        }
-      }
-
-      if (token.tokenType === TagOpen) {
-        const nextToken = this.peekToken();
-        if (nextToken?.tokenType === Identifier && this.context.enabledComponents.has(nextToken.image)) {
-          // Found nested POML element
-          const nestedNode = this.parsePomlElement();
-          if (nestedNode) {
-            parentNode.children.push(nestedNode);
-            nestedNode.parent = parentNode;
-          }
-        } else {
-          // Unknown tag or malformed - treat as text
-          const textNode = this.parseTextContent();
-          if (textNode) {
-            parentNode.children.push(textNode);
-            textNode.parent = parentNode;
-          }
-        }
-      } else if (token.tokenType === TemplateOpen) {
-        // Parse template expression
-        const templateNode = this.parseTemplate();
-        if (templateNode) {
-          parentNode.children.push(templateNode);
-          templateNode.parent = parentNode;
-        }
-      } else {
-        const textNode = this.parseTextContent();
-        if (textNode) {
-          parentNode.children.push(textNode);
-          textNode.parent = parentNode;
-        }
-      }
-    }
-  }
-
-  private parseTextContent(): ASTNode | null {
-    const startOffset = this.currentToken()?.startOffset || 0;
-    let endOffset = startOffset;
-
-    // Collect consecutive text tokens
-    while (this.position < this.tokens.length) {
-      const token = this.currentToken();
-      if (!token) {
-        break;
-      }
-
-      if (token.tokenType === TextContent || token.tokenType === Whitespace) {
-        endOffset = (token.endOffset || 0) + 1;
-        this.position++;
-      } else if (
-        token.tokenType === TagOpen ||
-        token.tokenType === TemplateOpen ||
-        token.tokenType === TagClosingOpen
-      ) {
-        break;
-      } else {
-        // Other tokens treated as text in this context
-        endOffset = (token.endOffset || 0) + 1;
-        this.position++;
-      }
-    }
-
-    if (endOffset === startOffset) {
-      return null;
-    }
-
-    const textNode: ASTNode = {
-      id: this.generateId(),
-      kind: 'TEXT',
-      start: startOffset,
-      end: endOffset,
-      content: this.text.slice(startOffset, endOffset),
-      children: [],
-      textSegments: [{ start: startOffset, end: endOffset }],
-    };
-
-    return textNode;
-  }
-
-  private parseTemplate(): ASTNode | null {
-    const startToken = this.currentToken();
-    if (!startToken || startToken.tokenType !== TemplateOpen) {
-      return null;
-    }
-
-    const startOffset = startToken.startOffset || 0;
-    this.consumeToken(); // consume '{{'
-
-    let expression = '';
-    let endOffset = startOffset + 2;
-
-    // Collect content until TemplateClose
-    while (this.position < this.tokens.length) {
-      const token = this.currentToken();
-      if (!token) {
-        break;
-      }
-
-      if (token.tokenType === TemplateClose) {
-        endOffset = (token.endOffset || 0) + 1;
-        this.consumeToken();
-        break;
-      } else {
-        expression += token.image;
-        endOffset = (token.endOffset || 0) + 1;
-        this.consumeToken();
-      }
-    }
-
-    const templateNode: ASTNode = {
-      id: this.generateId(),
-      kind: 'TEMPLATE',
-      start: startOffset,
-      end: endOffset,
-      content: this.text.slice(startOffset, endOffset),
-      children: [],
-      expression: expression.trim(),
-    };
-
-    return templateNode;
-  }
-
-  private parseAttributes(): AttributeInfo[] {
-    const attributes: AttributeInfo[] = [];
-
-    while (this.position < this.tokens.length) {
-      this.skipWhitespace();
-
-      const token = this.currentToken();
-      if (!token || token.tokenType !== Identifier) {
-        break;
-      }
-
-      const keyToken = this.consumeToken()!;
-      const keyRange: SourceRange = {
-        start: keyToken.startOffset || 0,
-        end: (keyToken.endOffset || 0) + 1,
-      };
-
-      this.skipWhitespace();
-
-      if (this.currentToken()?.tokenType !== Equals) {
-        // Boolean attribute
-        attributes.push({
-          key: keyToken.image,
-          value: [
-            {
-              id: this.generateId(),
-              kind: 'TEXT',
-              start: keyRange.start,
-              end: keyRange.end,
-              content: 'true',
-              children: [],
-            },
-          ],
-          keyRange,
-          valueRange: keyRange,
-          fullRange: keyRange,
-        });
-        continue;
-      }
-
-      this.consumeToken(); // consume '='
-      this.skipWhitespace();
-
-      const quoteToken = this.currentToken();
-      if (!quoteToken || (quoteToken.tokenType !== DoubleQuote && quoteToken.tokenType !== SingleQuote)) {
-        break; // Invalid attribute
-      }
-
-      const isDoubleQuote = quoteToken.tokenType === DoubleQuote;
-      const valueStart = (quoteToken.endOffset || 0) + 1;
-      this.consumeToken(); // consume opening quote
-
-      const valueNodes: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[] = [];
-      let valueEnd = valueStart;
-
-      // Parse attribute value content
-      while (this.position < this.tokens.length) {
-        const token = this.currentToken();
-        if (!token) {
-          break;
-        }
-
-        if ((isDoubleQuote && token.tokenType === DoubleQuote) || (!isDoubleQuote && token.tokenType === SingleQuote)) {
-          valueEnd = token.startOffset || valueEnd;
-          this.consumeToken(); // consume closing quote
-          break;
-        } else if (token.tokenType === TemplateOpen) {
-          const templateNode = this.parseTemplate();
-          if (templateNode && (templateNode.kind === 'TEXT' || templateNode.kind === 'TEMPLATE')) {
-            valueNodes.push(templateNode as ASTNode & { kind: 'TEXT' | 'TEMPLATE' });
-          }
-        } else {
-          // Collect text content
-          const textStart = token.startOffset || 0;
-          let textEnd = (token.endOffset || 0) + 1;
-          let textContent = token.image;
-
-          this.consumeToken();
-
-          // Collect more text tokens
-          while (this.position < this.tokens.length) {
-            const nextToken = this.currentToken();
-            if (!nextToken) {
-              break;
-            }
-
-            if (
-              (isDoubleQuote && nextToken.tokenType === DoubleQuote) ||
-              (!isDoubleQuote && nextToken.tokenType === SingleQuote) ||
-              nextToken.tokenType === TemplateOpen
-            ) {
-              break;
-            }
-
-            textContent += nextToken.image;
-            textEnd = (nextToken.endOffset || 0) + 1;
-            this.consumeToken();
-          }
-
-          valueNodes.push({
-            id: this.generateId(),
-            kind: 'TEXT',
-            start: textStart,
-            end: textEnd,
-            content: textContent,
-            children: [],
+      {
+        ALT: () => {
+          this.CONSUME(SingleQuote, { LABEL: 'openQuote' });
+          this.MANY2(() => {
+            this.SUBRULE2(this.valueContent);
           });
-        }
-      }
-
-      const valueRange: SourceRange = { start: valueStart, end: valueEnd };
-      const fullRange: SourceRange = {
-        start: keyRange.start,
-        end: (this.tokens[this.position - 1]?.endOffset || 0) + 1,
-      };
-
-      attributes.push({
-        key: keyToken.image,
-        value: valueNodes,
-        keyRange,
-        valueRange,
-        fullRange,
-      });
-    }
-
-    return attributes;
+          this.CONSUME2(SingleQuote, { LABEL: 'closeQuote' });
+        },
+      },
+    ]);
+  });
+
+  // Unquoted value (template or expression)
+  private unquotedValueRule = this.RULE('unquotedValue', () => {
+    this.OR([
+      { ALT: () => this.SUBRULE(this.template) },
+      { ALT: () => this.CONSUME(Identifier, { LABEL: 'expression' }) },
+      { ALT: () => this.CONSUME(TextContent, { LABEL: 'expression' }) },
+    ]);
+  });
+
+  // Value content inside quotes
+  private valueContentRule = this.RULE('valueContent', () => {
+    this.OR([
+      { ALT: () => this.SUBRULE(this.template) },
+      { ALT: () => this.SUBRULE(this.escapedChar) },
+      { ALT: () => this.CONSUME(TextContent, { LABEL: 'text' }) },
+      { ALT: () => this.CONSUME(Identifier, { LABEL: 'text' }) },
+      { ALT: () => this.CONSUME(Whitespace, { LABEL: 'text' }) },
+    ]);
+  });
+
+  // Escaped character
+  private escapedCharRule = this.RULE('escapedChar', () => {
+    this.CONSUME(Backslash);
+    this.OR([
+      { ALT: () => this.CONSUME(DoubleQuote, { LABEL: 'char' }) },
+      { ALT: () => this.CONSUME(SingleQuote, { LABEL: 'char' }) },
+      { ALT: () => this.CONSUME(Backslash, { LABEL: 'char' }) },
+      { ALT: () => this.CONSUME(Identifier, { LABEL: 'char' }) },
+    ]);
+  });
+
+  // For iterator (item in items)
+  private forIteratorRule = this.RULE('forIterator', () => {
+    this.CONSUME(Identifier, { LABEL: 'iterator' });
+    this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'Whitespace1' }));
+    this.CONSUME2(Identifier, { LABEL: 'in' }); // "in" keyword
+    this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'Whitespace2' }));
+    // Collection can be complex expression
+    this.AT_LEAST_ONE(() => {
+      this.OR([
+        { ALT: () => this.CONSUME3(Identifier, { LABEL: 'collection' }) },
+        { ALT: () => this.CONSUME(TextContent, { LABEL: 'collection' }) },
+      ]);
+    });
+  });
+
+  // Template {{ expression }}
+  private templateRule = this.RULE('template', () => {
+    this.CONSUME(TemplateOpen);
+    this.MANY(() => {
+      this.OR([
+        { ALT: () => this.CONSUME(Whitespace, { LABEL: 'expression' }) },
+        { ALT: () => this.CONSUME(Identifier, { LABEL: 'expression' }) },
+        { ALT: () => this.CONSUME(TextContent, { LABEL: 'expression' }) },
+      ]);
+    });
+    this.CONSUME(TemplateClose);
+  });
+
+  // Value (text and/or templates)
+  private valueRule = this.RULE('value', () => {
+    this.AT_LEAST_ONE(() => {
+      this.SUBRULE(this.valueElement);
+    });
+  });
+
+  // Value element (text or template)
+  private valueElementRule = this.RULE('valueElement', () => {
+    this.OR([
+      { ALT: () => this.SUBRULE(this.template) },
+      { ALT: () => this.CONSUME(TextContent, { LABEL: 'text' }) },
+      { ALT: () => this.CONSUME(Identifier, { LABEL: 'text' }) },
+      { ALT: () => this.CONSUME(Whitespace, { LABEL: 'text' }) },
+    ]);
+  });
+
+  // Comment
+  private commentRule = this.RULE('comment', () => {
+    this.CONSUME(CommentOpen);
+    this.MANY(() => {
+      this.OR([
+        {
+          GATE: () => !this.isCommentClose(),
+          ALT: () => this.consumeAny({ LABEL: 'commentContent' }),
+        },
+      ]);
+    });
+    this.CONSUME(CommentClose);
+  });
+
+  // Pragma
+  private pragmaRule = this.RULE('pragma', () => {
+    this.CONSUME(CommentOpen);
+    this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'Whitespace1' }));
+    this.CONSUME(Pragma);
+    this.MANY(() => {
+      this.OR([
+        {
+          GATE: () => !this.isCommentClose(),
+          ALT: () => this.consumeAny({ LABEL: 'pragmaContent' }),
+        },
+      ]);
+    });
+    this.CONSUME(CommentClose);
+  });
+
+  // Helper methods
+  private isClosingTag(): boolean {
+    return this.LA(1).tokenType === TagClosingOpen;
   }
 
-  private processMeta(metaNode: ASTNode): void {
-    if (!metaNode.attributes) {
-      return;
-    }
-
-    for (const attr of metaNode.attributes) {
-      switch (attr.key) {
-        case 'components':
-          this.processComponentsAttribute(attr.value);
-          break;
-        case 'unknownComponents':
-          const behavior = attr.value[0]?.content; // eslint-disable-line
-          if (behavior === 'error' || behavior === 'warning' || behavior === 'ignore') {
-            this.context.unknownComponentBehavior = behavior;
-          }
-          break;
-        case 'minimalPomlVersion':
-          this.context.minimalPomlVersion = attr.value[0]?.content;
-          break;
-        // Add other meta attributes as needed
-      }
-    }
+  private isCommentClose(): boolean {
+    return this.LA(1).tokenType === CommentClose;
   }
 
-  private processComponentsAttribute(value: (ASTNode & { kind: 'TEXT' | 'TEMPLATE' })[]): void {
-    const components = value[0]?.content || '';
-    const parts = components.split(',').map((s) => s.trim());
-
-    for (const part of parts) {
-      if (part.startsWith('+')) {
-        this.context.enabledComponents.add(part.slice(1));
-      } else if (part.startsWith('-')) {
-        this.context.enabledComponents.delete(part.slice(1));
-      }
-    }
+  private isForAttribute(): boolean {
+    // Check if previous token was "for" as attribute key
+    const prevTokens = this.input.slice(Math.max(0, this.currIdx - 3), this.currIdx);
+    return prevTokens.some((t) => t.image.toLowerCase() === 'for');
   }
 
-  private handleUnknownTag(tagName: string): void {
-    switch (this.context.unknownComponentBehavior) {
-      case 'error':
-        throw new Error(`Unknown POML component: ${tagName}`);
-      case 'warning':
-        console.warn(`Unknown POML component: ${tagName}`);
-        break;
-      case 'ignore':
-        // Do nothing
-        break;
-    }
+  private consumeAny(options?: { LABEL?: string }): IToken {
+    // Consume any token
+    const token = this.LA(1);
+    this.input[this.currIdx++];
+    return token;
   }
 }
-
-// Export function to create and use the parser
-export function parseExtendedPoml(text: string, context: Partial<PomlContext> = {}): ASTNode {
-  const fullContext: PomlContext = {
-    variables: {},
-    stylesheet: {},
-    sourcePath: '',
-    enabledComponents: new Set(),
-    unknownComponentBehavior: 'warning',
-    ...context,
-  };
-
-  const parser = new CSTParser(fullContext);
-  return parser.parse(text);
-}
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 0d682204..567bdc9d 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -1,4 +1,5 @@
 import { Range } from './types';
+import { IToken } from 'chevrotain';
 
 /**
  * Represents a JavaScript expression as a string.
@@ -24,6 +25,8 @@ export interface ExpressionNode {
   value: string;
 }
 
+export interface ExpressionCstNode {}
+
 /**
  * Represents a template interpolation with double curly braces,
  * or sometimes without braces in specific attributes.
@@ -180,7 +183,7 @@ export interface ForLoopAttributeNode {
   kind: 'FORATTRIBUTE';
   range: Range;
   key: StringNode;
-  value: ForLoopNode;
+  value: ForIteratorNode;
 }
 
 /**
@@ -247,7 +250,6 @@ export interface CloseTagNode {
  * - Elements with content: `<div>content</div>` (use ElementNode)
  * - Separate open/close tags: `<div></div>` (use ElementNode)
  * - Tags without the self-closing slash: `<img>` (use OpenTagNode)
- * - Meta elements: `<meta>` tags (use MetaNode)
  */
 export interface SelfCloseElementNode {
   kind: 'SELFCLOSE';
@@ -283,6 +285,41 @@ export interface ElementNode {
   children: (ElementNode | LiteralNode | CommentNode | PragmaNode | ValueNode)[];
 }
 
+/**
+ * Represents an HTML-like line/block comment in POML.
+ *
+ * Comment nodes preserve authoring notes or disabled content that should not
+ * affect rendering. The `value` holds the comment text without the `<!--`/`-->`
+ * delimiters.
+ *
+ * Examples:
+ * - `<!-- this is a comment -->`
+ */
+export interface CommentNode {
+  kind: 'COMMENT';
+  range: Range;
+  value: StringNode;
+}
+
+/**
+ * Represents a pragma directive carried inside a comment.
+ *
+ * Pragmas are special instructions for parser/compiler. They usually appear
+ * inside comments and start with `@pragma`. For now we keep this node simple
+ * with a single `value` that contains the full directive text after
+ * `@pragma` (e.g. `components +reference -table`).
+ *
+ * Examples:
+ * - Specify version: `<!-- @pragma version >=1.0.0 <2.3.0 -->`
+ * - Turn tags on/off: `<!-- @pragma components +reference -table -->`
+ * - Turn speaker roles on/off: `<!-- @pragma speaker multi -->` or `single`
+ */
+export interface PragmaNode {
+  kind: 'PRAGMA';
+  range: Range;
+  value: StringNode;
+}
+
 /**
  * Represents an element that preserves literal content.
  *
@@ -314,25 +351,6 @@ export interface LiteralNode {
   children: StringNode;
 }
 
-/**
- * Represents metadata elements in POML. Meta elements must be self-closed.
- *
- * Meta nodes provide document-level metadata and configuration that doesn't
- * render as visible content. They typically appear at the document start and
- * configure processing behavior, document properties, or provide auxiliary
- * information.
- *
- * Cases that apply:
- * - Document metadata: `<!-- @pragma minVersion 1.0 -->`
- * - Configuration: `<!-- @pragma components +reference -table -->`
- */
-export interface MetaNode {
-  kind: 'META';
-  range: Range;
-  value: StringNode;
-  attributes: AttributeNode[];
-}
-
 /**
  * Represents the root node of a POML document tree.
  *
@@ -351,7 +369,7 @@ export interface MetaNode {
 export interface RootNode {
   kind: 'ROOT';
   range: Range;
-  children: (ElementNode | LiteralNode | MetaNode | ValueNode)[];
+  children: (ElementNode | LiteralNode | CommentNode | PragmaNode | ValueNode)[];
 }
 
 // Keep these keys required; everything else becomes recursively optional
@@ -386,7 +404,8 @@ export type StrictNode =
   | SelfCloseElementNode
   | ElementNode
   | LiteralNode
-  | MetaNode
+  | CommentNode
+  | PragmaNode
   | RootNode;
 
 // The "loose" counterpart you can safely produce during parsing.

From 7e2d3f0e9d226cb056f9c14c71cc72654a60a293 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sat, 30 Aug 2025 18:44:49 +0800
Subject: [PATCH 28/76] .

---
 packages/poml/next/lexer.ts              | 48 ++++++++++-------
 packages/poml/next/nodes.ts              | 47 +++++++++++++++--
 packages/poml/tests/reader/lexer.test.ts | 66 +++++++++++++++++-------
 3 files changed, 122 insertions(+), 39 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index 9d58174b..f712ef93 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -1,8 +1,8 @@
 import { createToken, Lexer } from 'chevrotain';
 
 // Define token types for extended POML
-export const CommentOpen = createToken({ name: 'CommentOpen', pattern: /<!--/ });
-export const CommentClose = createToken({ name: 'CommentClose', pattern: /-->/ });
+export const CommentOpen = createToken({ name: 'CommentOpen', pattern: /<!-{2,}/ });
+export const CommentClose = createToken({ name: 'CommentClose', pattern: /-{2,}>/ });
 export const Pragma = createToken({ name: 'Pragma', pattern: /\b@pragma\b/i });
 export const TemplateOpen = createToken({ name: 'TemplateOpen', pattern: /{{/ });
 export const TemplateClose = createToken({ name: 'TemplateClose', pattern: /}}/ });
@@ -20,16 +20,17 @@ export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
 /* Identifier is one of the following:
  * - XML tag names
  * - XML attribute names
- * - TextContent incorrectly parsed as identifiers
+ * - Arbitrary text content incorrectly parsed as identifiers
  *
  * Notes:
  * 1. In case 1, tags can contain : (namespaces) and . (extensions).
  *    These are handled later by CST parser.
  * 2. In case 3, CST parser will reclassify as TextContent if needed.
+ * 3. We are going to disallow "." and ":" to appear in XML tags.
  */
 export const Identifier = createToken({
   name: 'Identifier',
-  pattern: /[a-zA-Z_][a-zA-Z0-9_\-]*/,
+  pattern: /[a-zA-Z_]([a-zA-Z0-9_]|(-(?!\-+>)))*/,
 });
 
 export const Whitespace = createToken({
@@ -38,24 +39,35 @@ export const Whitespace = createToken({
   line_breaks: true,
 });
 
-
-/* Catch-all for arbitrary text content
-   - Match any char except:
-       <          — starts a tag
-       {{  or }}  — template delimiters
-       " or '     — start/end of string literals
-   - Single { or } are OK because they are not followed by another brace
-*/
-export const TextContent = createToken({
-  name: 'TextContent',
-  pattern: /(?:[^<"'{}]|{(?!{)|}(?!}))+/,
+/* Catch-all for arbitrary text content.
+ * Match any char except the patterns from other tokens:
+ * - starts or ends a tag: <, >, </, />
+ * - starts or ends a comment: <!--, -->
+ * - starts or ends a template: {{, }}
+ * - starts or ends a string literal: " or '
+ * - whitespace (handled separately)
+ * - equal sign (=)
+ * - backslash \ (allowed for escaping in strings)
+ *
+ * Allowed:
+ * - Single { or } are OK if they are not followed by another brace
+ * - Incomplete tag delimiters such as / (/< is an exception, because < is a start of tag)
+ * - Incomplete comment delimiters such as !-- or -- are OK
+ * - Incorrect @pragma directive such as @pragm or @pragmaX will be matched
+ */
+export const Arbitrary = createToken({
+  name: 'Arbitrary',
+  // Anything except: <, >, quotes, braces (double-brace protected), whitespace, =, backslash.
+  // Also allow a lone '/' but *not* when it starts '/>' (so TagSelfClose can win).
+  pattern: /(?:[^<>"'{}\s=\\\/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>))+/,
   line_breaks: true,
 });
 
-
 // Define token order - more specific patterns first
 export const allTokens = [
-  Comment,
+  CommentOpen,
+  CommentClose,
+  Pragma,
   TemplateOpen,
   TemplateClose,
   TagClosingOpen, // Must come before TagOpen
@@ -68,7 +80,7 @@ export const allTokens = [
   Backslash,
   Identifier,
   Whitespace,
-  TextContent,
+  Arbitrary,
 ];
 
 // Extended POML Lexer class
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 567bdc9d..bd9c6ecd 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -1,5 +1,5 @@
 import { Range } from './types';
-import { IToken } from 'chevrotain';
+import { CstNode, IToken } from 'chevrotain';
 
 /**
  * Represents a JavaScript expression as a string.
@@ -10,9 +10,7 @@ import { IToken } from 'chevrotain';
  *
  * Cases that apply:
  * - Conditional expressions: `i > 0`, `user.name === "admin"`
- * - Collection accessors: `items.everything`, `data[0].value`
  * - Function calls: `formatDate(now)`, `items.filter(x => x.active)`
- * - Property paths: `user.profile.settings.theme`
  *
  * Cases that do not apply:
  * - Template syntax including braces: `{{ expression }}` (use TemplateNode)
@@ -301,6 +299,8 @@ export interface CommentNode {
   value: StringNode;
 }
 
+// export interface Comment
+
 /**
  * Represents a pragma directive carried inside a comment.
  *
@@ -320,6 +320,14 @@ export interface PragmaNode {
   value: StringNode;
 }
 
+export interface PragmaCstNode extends CstNode {
+  children: {
+    CommentOpenTag?: IToken[];
+    CommentCloseTag?: IToken[];
+    PragmaSymbol?: IToken[];
+  };
+}
+
 /**
  * Represents an element that preserves literal content.
  *
@@ -351,6 +359,19 @@ export interface LiteralNode {
   children: StringNode;
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface LiteralElementCstNode extends CstNode {
+  children: {
+    OpenTag?: OpenTagCstNode[];
+    CloseTag?: CloseTagCstNode[];
+    // All content between open and close tags is treated as literal text
+    // including other tags, comments, pragmas, etc.
+    TextContent?: IToken[];
+  };
+}
+
 /**
  * Represents the root node of a POML document tree.
  *
@@ -372,6 +393,26 @@ export interface RootNode {
   children: (ElementNode | LiteralNode | CommentNode | PragmaNode | ValueNode)[];
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface RootCstNode extends CstNode {
+  children: {
+    Content?: ElementContentCstNode[];
+  };
+}
+
+export interface ElementContentCstNode extends CstNode {
+  children: {
+    Element?: ElementCstNode;
+    LiteralElement?: LiteralElementCstNode;
+    SelfCloseElement?: SelfCloseElementCstNode;
+    Comment?: CommentCstNode;
+    Pragma?: PragmaCstNode;
+    Value?: ElementValueCstNode;
+  };
+}
+
 // Keep these keys required; everything else becomes recursively optional
 type DeepPartialExcept<T, K extends keyof T> =
   // arrays
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 432c06ee..1c73bde0 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -1,7 +1,8 @@
 import { describe, expect, test } from '@jest/globals';
 import {
   extendedPomlLexer,
-  Comment,
+  CommentOpen,
+  CommentClose,
   TemplateOpen,
   TemplateClose,
   TagOpen,
@@ -14,7 +15,7 @@ import {
   Backslash,
   Identifier,
   Whitespace,
-  TextContent,
+  Arbitrary,
 } from 'poml/next/lexer';
 
 // Helper function to extract token images
@@ -262,12 +263,37 @@ describe('Token Types', () => {
   });
 
   test('should identify comments', () => {
-    expect(tokenTypes('<!-- comment -->')).toEqual([Comment]);
+    expect(tokenTypes('<!-- comment -->')).toEqual([CommentOpen, Whitespace, Identifier, Whitespace, CommentClose]);
   });
 
   test('should identify whitespace', () => {
     expect(tokenTypes('  \t\n  ')).toEqual([Whitespace]);
   });
+
+  test('should identify attributes', () => {
+    expect(tokenTypes('<markup.paragraph id="intro" data-value="123\\n"456\'>')).toEqual([
+      TagOpen,
+      Identifier,
+      Arbitrary,
+      Whitespace,
+      Identifier,
+      Equals,
+      DoubleQuote,
+      Identifier,
+      DoubleQuote,
+      Whitespace,
+      Identifier,
+      Equals,
+      DoubleQuote,
+      Arbitrary,
+      Backslash,
+      Identifier,
+      DoubleQuote,
+      Arbitrary,
+      SingleQuote,
+      TagClose,
+    ]);
+  });
 });
 
 describe('Source Position and Error Tests', () => {
@@ -436,7 +462,7 @@ describe('Boundary Conditions', () => {
     const longComment = `<!--${'x'.repeat(100000)}-->`;
     const commentResult = tokenize(longComment);
     expect(commentResult.errors).toHaveLength(0);
-    expect(commentResult.tokens).toHaveLength(1);
+    expect(commentResult.tokens).toHaveLength(3);
 
     const longIdentifier = 'a' + 'b'.repeat(10000);
     const identifierResult = tokenize(longIdentifier);
@@ -548,14 +574,14 @@ describe('Malformed Patterns', () => {
   test('should handle broken template syntax', () => {
     expect(tokenImages('}')).toEqual(['}']);
     expect(tokenImages('}}')).toEqual(['}}']);
-    expect(tokenImages('{ single brace }')).toEqual(['{ single brace }']);
-    expect(tokenImages('{not a template}')).toEqual(['{not a template}']);
+    expect(tokenImages('{ single brace }')).toEqual(['{', ' ', 'single', ' ', 'brace', ' ', '}']);
+    expect(tokenImages('{not a template}')).toEqual(['{not', ' ', 'a', ' ', 'template', '}']);
   });
 
   test('should handle nested malformed patterns', () => {
-    expect(tokenImages('<!-- <tag> -->')).toEqual(['<!-- <tag> -->']);
-    expect(tokenImages('<!-- {{template}} -->')).toEqual(['<!-- {{template}} -->']);
-    expect(tokenImages('<tag><!-- comment</tag>')).toEqual(['<', 'tag', '>', '<', '!-- comment', '</', 'tag', '>']);
+    expect(tokenImages('<!--   <tag>\n-->')).toEqual(['<!--', '   ', '<', 'tag', '>', '\n', '-->']);
+    expect(tokenImages('<!-- {{template}} -->')).toEqual(['<!--', ' ', '{{', 'template', '}}', ' ', '-->']);
+    expect(tokenImages('<tag><!-- comment</tag>')).toEqual(['<', 'tag', '>', '<!--', ' ', 'comment', '</', 'tag', '>']);
     expect(tokenImages('{{<tag>}}')).toEqual(['{{', '<', 'tag', '>', '}}']);
   });
 
@@ -588,7 +614,7 @@ describe('Malformed Patterns', () => {
   test('should handle whitespace in unexpected places', () => {
     expect(tokenImages('< tag >')).toEqual(['<', ' ', 'tag', ' ', '>']);
     expect(tokenImages('</ tag >')).toEqual(['</', ' ', 'tag', ' ', '>']);
-    expect(tokenImages('{ { template } }')).toEqual(['{ { template } }']);
+    expect(tokenImages('{ { template } }')).toEqual(['{', ' ', '{', ' ', 'template', ' ', '}', ' ', '}']);
     expect(tokenImages('attr = "value"')).toEqual(['attr', ' ', '=', ' ', '"', 'value', '"']);
   });
 
@@ -613,8 +639,12 @@ describe('Malformed Patterns', () => {
       ' ',
       'broken',
     ]);
-    expect(tokenImages('<!--comment--><tag>more{{ content')).toEqual([
-      '<!--comment-->',
+    expect(tokenImages('<!-- comment --><tag>more{{ content')).toEqual([
+      '<!--',
+      ' ',
+      'comment',
+      ' ',
+      '-->',
       '<',
       'tag',
       '>',
@@ -647,13 +677,13 @@ describe('Malformed Patterns', () => {
     expect(tokenImages('text{more}text')).toEqual(['text', '{more}text']);
     expect(tokenImages('before}after')).toEqual(['before', '}after']);
     expect(tokenImages('before{after')).toEqual(['before', '{after']);
-    expect(tokenImages('text } { more')).toEqual(['text', ' ', '} { more']);
+    expect(tokenImages('text } { more')).toEqual(['text', ' ', '}', ' ', '{', ' ', 'more']);
   });
 
   test('should handle greedy vs non-greedy matching', () => {
-    expect(tokenImages('<!--first--><!--second-->')).toEqual(['<!--first-->', '<!--second-->']);
+    expect(tokenImages('<!--first--><!--second-->')).toEqual(['<!--', 'first', '-->', '<!--', 'second', '-->']);
     expect(tokenImages('{{first}}{{second}}')).toEqual(['{{', 'first', '}}', '{{', 'second', '}}']);
-    expect(tokenImages('text<!--comment-->more')).toEqual(['text', '<!--comment-->', 'more']);
+    expect(tokenImages('text<!-----comment----->more')).toEqual(['text', '<!-----', 'comment', '----->', 'more']);
   });
 });
 
@@ -740,11 +770,11 @@ comment -->
 more text`;
 
     const result = tokenize(input);
-    const commentToken = result.tokens.find((t) => t.tokenType.name === 'Comment');
+    const commentToken = result.tokens.find((t) => t.tokenType.name === 'CommentOpen');
 
     expect(commentToken).toBeDefined();
     expect(commentToken!.startLine).toBe(2);
-    expect(commentToken!.endLine).toBe(4);
+    expect(commentToken!.endLine).toBe(2);
   });
 
   test('should handle position tracking with carriage returns', () => {
@@ -783,7 +813,7 @@ describe('Performance and Stress Tests', () => {
     const end = performance.now();
 
     expect(result.errors).toHaveLength(0);
-    expect(result.tokens).toHaveLength(1);
+    expect(result.tokens).toHaveLength(3);
     expect(end - start).toBeLessThan(500); // Should be fast
   });
 

From b070c727d4c1c5137e6d74c1ce58fd85b4ca63b4 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sat, 30 Aug 2025 18:54:19 +0800
Subject: [PATCH 29/76] .

---
 packages/poml/next/lexer.ts | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index f712ef93..4f362a6a 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -33,9 +33,10 @@ export const Identifier = createToken({
   pattern: /[a-zA-Z_]([a-zA-Z0-9_]|(-(?!\-+>)))*/,
 });
 
+// Include all Unicode whitespace characters and control characters
 export const Whitespace = createToken({
   name: 'Whitespace',
-  pattern: /[ \t\r\n]+/,
+  pattern: /[\s\u0000-\u001F\u007F-\u009F\u2000-\u200B\uFEFF]+/,
   line_breaks: true,
 });
 
@@ -45,22 +46,23 @@ export const Whitespace = createToken({
  * - starts or ends a comment: <!--, -->
  * - starts or ends a template: {{, }}
  * - starts or ends a string literal: " or '
- * - whitespace (handled separately)
+ * - whitespace (handled separately - includes Unicode whitespace and control chars)
  * - equal sign (=)
- * - backslash \ (allowed for escaping in strings)
+ * - backslash \ (handled separately for escaping)
  *
  * Allowed:
  * - Single { or } are OK if they are not followed by another brace
  * - Incomplete tag delimiters such as / (/< is an exception, because < is a start of tag)
  * - Incomplete comment delimiters such as !-- or -- are OK
  * - Incorrect @pragma directive such as @pragm or @pragmaX will be matched
+ * - All other Unicode characters including emojis, CJK, etc.
  */
 export const Arbitrary = createToken({
   name: 'Arbitrary',
-  // Anything except: <, >, quotes, braces (double-brace protected), whitespace, =, backslash.
-  // Also allow a lone '/' but *not* when it starts '/>' (so TagSelfClose can win).
-  pattern: /(?:[^<>"'{}\s=\\\/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>))+/,
-  line_breaks: true,
+  // Match anything except: <, >, quotes, =, backslash, whitespace (including Unicode), control chars
+  // Allow single braces and slashes with lookahead constraints
+  pattern: /(?:[^<>"'{}=\\\s\u0000-\u001F\u007F-\u009F\u2000-\u200B\uFEFF\/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>))+/,
+  line_breaks: false,
 });
 
 // Define token order - more specific patterns first

From f03315de8ef41005a7e90e33f02a43179e67ce2b Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sat, 30 Aug 2025 19:09:53 +0800
Subject: [PATCH 30/76] .

---
 packages/poml/next/lexer.ts              |  2 +-
 packages/poml/tests/reader/lexer.test.ts | 33 +++++++++++++++---------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index 4f362a6a..216f6e9e 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -1,7 +1,7 @@
 import { createToken, Lexer } from 'chevrotain';
 
 // Define token types for extended POML
-export const CommentOpen = createToken({ name: 'CommentOpen', pattern: /<!-{2,}/ });
+export const CommentOpen = createToken({ name: 'CommentOpen', pattern: /<!--(\-(?!\-+>))*/ });
 export const CommentClose = createToken({ name: 'CommentClose', pattern: /-{2,}>/ });
 export const Pragma = createToken({ name: 'Pragma', pattern: /\b@pragma\b/i });
 export const TemplateOpen = createToken({ name: 'TemplateOpen', pattern: /{{/ });
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 1c73bde0..ff2e940d 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -37,7 +37,7 @@ function tokenize(input: string) {
 
 describe('Basic Token Images', () => {
   test('should tokenize HTML comments', () => {
-    expect(tokenImages('<!-- comment -->')).toEqual(['<!-- comment -->']);
+    expect(tokenImages('<!-- comment -->')).toEqual(['<!--', ' ', 'comment', ' ', '-->']);
   });
 
   test('should tokenize template variables', () => {
@@ -114,15 +114,18 @@ describe('Edge Cases', () => {
 
   test('chinese characters', () => {
     expect(tokenImages('中文 {{ 文本 }}内容< 标签>')).toEqual([
-      '中文 ',
+      '中文',
+      ' ',
       '{{',
       ' ',
-      '文本 ',
+      '文本',
+      ' ',
       '}}',
       '内容',
       '<',
       ' ',
-      '标签>',
+      '标签',
+      '>',
     ]);
   });
 
@@ -376,7 +379,9 @@ Analyze data
 {{variable}}`;
 
     const images = tokenImages(input);
-    expect(images).toContain('# My Analysis\n\n');
+    expect(images).toContain('#');
+    expect(images).toContain('My');
+    expect(images).toContain('Analysis');
     expect(images).toContain('<');
     expect(images).toContain('task');
     expect(images).toContain('>');
@@ -387,7 +392,11 @@ Analyze data
 
   test('should handle comments with mixed content', () => {
     expect(tokenImages('<!-- comment --><task>content</task>')).toEqual([
-      '<!-- comment -->',
+      '<!--',
+      ' ',
+      'comment',
+      ' ',
+      '-->',
       '<',
       'task',
       '>',
@@ -446,7 +455,7 @@ describe('Boundary Conditions', () => {
   });
 
   test('should handle minimum valid patterns', () => {
-    expect(tokenImages('<!---->')).toEqual(['<!---->']);
+    expect(tokenImages('<!---->')).toEqual(['<!--', '-->']);
     expect(tokenImages('<a>')).toEqual(['<', 'a', '>']);
     expect(tokenImages('</a>')).toEqual(['</', 'a', '>']);
     expect(tokenImages('<a/>')).toEqual(['<', 'a', '/>']);
@@ -509,13 +518,13 @@ describe('Unicode and Special Characters', () => {
   });
 
   test('should handle emoji and symbols', () => {
-    expect(tokenImages('Hello 👋 World 🌍')).toEqual(['Hello', ' ', '👋 World 🌍']);
-    expect(tokenImages('Math: ∑∞π≠∅')).toEqual(['Math', ': ∑∞π≠∅']);
-    expect(tokenImages('Arrows: ←→↑↓')).toEqual(['Arrows', ': ←→↑↓']);
+    expect(tokenImages('Hello 👋 World 🌍')).toEqual(['Hello', ' ', '👋', ' ', 'World', ' ', '🌍']);
+    expect(tokenImages('Math: ∑∞π≠∅')).toEqual(['Math', ':', ' ', '∑∞π≠∅']);
+    expect(tokenImages('Arrows: ←→↑↓')).toEqual(['Arrows', ':', ' ', '←→↑↓']);
   });
 
   test('should handle unicode', () => {
-    expect(tokenImages('<こんにちは>')).toEqual(['<', 'こんにちは>']);
+    expect(tokenImages('<こんにちは>')).toEqual(['<', 'こんにちは', '>']);
     expect(tokenImages('{{你好}}')).toEqual(['{{', '你好', '}}']);
     expect(tokenImages('<tag attr="café">')).toEqual(['<', 'tag', ' ', 'attr', '=', '"', 'caf', 'é', '"', '>']);
   });
@@ -550,7 +559,7 @@ describe('Malformed Patterns', () => {
   test('should handle incomplete template variables', () => {
     expect(tokenImages('text {{')).toEqual(['text', ' ', '{{']);
     expect(tokenImages('text {{variable')).toEqual(['text', ' ', '{{', 'variable']);
-    expect(tokenImages('{{ var }{ not closed')).toEqual(['{{', ' ', 'var', ' ', '}{ not closed']);
+    expect(tokenImages('{{ var }{ not closed')).toEqual(['{{', ' ', 'var', ' ', '}{', ' ', 'not', ' ', 'closed']);
     expect(tokenImages('{{nested {{inside')).toEqual(['{{', 'nested', ' ', '{{', 'inside']);
   });
 

From 675ae8928b8a1740220caba6b5d032a2b8eceabd Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sat, 30 Aug 2025 19:18:10 +0800
Subject: [PATCH 31/76] .

---
 packages/poml/tests/reader/lexer.test.ts | 62 ++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index ff2e940d..93c60818 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -694,6 +694,68 @@ describe('Malformed Patterns', () => {
     expect(tokenImages('{{first}}{{second}}')).toEqual(['{{', 'first', '}}', '{{', 'second', '}}']);
     expect(tokenImages('text<!-----comment----->more')).toEqual(['text', '<!-----', 'comment', '----->', 'more']);
   });
+
+  test('should handle single braces correctly', () => {
+    // Single { or } are OK if not followed by another brace
+    expect(tokenImages('text { more text')).toEqual(['text', ' ', '{', ' ', 'more', ' ', 'text']);
+    expect(tokenImages('text } more text')).toEqual(['text', ' ', '}', ' ', 'more', ' ', 'text']);
+    expect(tokenImages('a{b}c')).toEqual(['a', '{b}c']);
+    expect(tokenImages('path{index}')).toEqual(['path', '{index}']);
+    expect(tokenImages('array[{key}]')).toEqual(['array', '[{key}]']);
+    expect(tokenImages('{ not {{ double')).toEqual(['{', ' ', 'not', ' ', '{{', ' ', 'double']);
+    expect(tokenImages('} not }} double')).toEqual(['}', ' ', 'not', ' ', '}}', ' ', 'double']);
+    expect(tokenImages('{}empty{}')).toEqual(['{}empty{}']);
+    expect(tokenImages('}{reversed}{')).toEqual(['}{reversed}{']);
+  });
+
+  test('should handle incomplete tag delimiters', () => {
+    // Incomplete tag delimiters such as / (except /< and />)
+    expect(tokenImages('path/to/file')).toEqual(['path', '/to/file']);
+    expect(tokenImages('a/b/c')).toEqual(['a', '/b/c']);
+    expect(tokenImages('text / more')).toEqual(['text', ' ', '/', ' ', 'more']);
+    expect(tokenImages('http://example.com')).toEqual(['http', '://example.com']);
+    expect(tokenImages('5/3=1.67')).toEqual(['5/3', '=', '1.67']);
+    // These should NOT match as incomplete delimiters
+    expect(tokenImages('/<tag>')).toEqual(['/', '<', 'tag', '>']);
+    expect(tokenImages('/>')).toEqual(['/>']);
+    expect(tokenImages('</tag>')).toEqual(['</', 'tag', '>']);
+  });
+
+  test('should handle incomplete comment delimiters', () => {
+    // Incomplete comment delimiters such as !-- or -- are OK
+    expect(tokenImages('text !-- not comment')).toEqual(['text', ' ', '!--', ' ', 'not', ' ', 'comment']);
+    expect(tokenImages('text -- also not')).toEqual(['text', ' ', '--', ' ', 'also', ' ', 'not']);
+    expect(tokenImages('a--b')).toEqual(['a--b']);
+    expect(tokenImages('!--incomplete')).toEqual(['!--incomplete']);
+    expect(tokenImages('--dashes--')).toEqual(['--dashes--']);
+    expect(tokenImages('<!-- this is comment -->')).toEqual([
+      '<!--',
+      ' ',
+      'this',
+      ' ',
+      'is',
+      ' ',
+      'comment',
+      ' ',
+      '-->',
+    ]);
+    expect(tokenImages('not<!-- comment -->')).toEqual(['not', '<!--', ' ', 'comment', ' ', '-->']);
+    expect(tokenImages('---triple-dash')).toEqual(['---triple-dash']);
+    expect(tokenImages('text --- more')).toEqual(['text', ' ', '---', ' ', 'more']);
+  });
+
+  test('should handle incorrect @pragma directives', () => {
+    // Incorrect @pragma directive such as @pragm or @pragmaX will be matched as Arbitrary
+    expect(tokenImages('@pragma')).toEqual(['@pragma']);
+    expect(tokenImages('@pragm')).toEqual(['@pragm']);
+    expect(tokenImages('@pragmaX')).toEqual(['@pragmaX']);
+    expect(tokenImages('@pragma-extended')).toEqual(['@pragma-extended']);
+    expect(tokenImages('@@pragma')).toEqual(['@@pragma']);
+    expect(tokenImages('not@pragma')).toEqual(['not', '@pragma']);
+    expect(tokenImages('@PRAGMA')).toEqual(['@PRAGMA']);
+    expect(tokenImages('@Pragma')).toEqual(['@Pragma']);
+    expect(tokenImages('@pragma key=value')).toEqual(['@pragma', ' ', 'key', '=', 'value']);
+  });
 });
 
 describe('Position Tracking Accuracy', () => {

From f02c73570f73f798135d5e2dd9ae425fc3a891f3 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sun, 31 Aug 2025 00:34:12 +0800
Subject: [PATCH 32/76] .

---
 packages/poml/next/nodes.ts              | 119 +++++++++++++++++++----
 packages/poml/tests/reader/lexer.test.ts |   7 ++
 2 files changed, 109 insertions(+), 17 deletions(-)

diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index bd9c6ecd..389bf2dd 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -184,6 +184,23 @@ export interface ForLoopAttributeNode {
   value: ForIteratorNode;
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface ForLoopAttributeCstNode extends CstNode {
+  children: {
+    AttributeKey?: IToken[];
+    WsAfterKey?: IToken[];
+    Equals?: IToken[];
+    WsAfterEquals?: IToken[];
+    OpenQuote?: IToken[];
+    WsAfterOpenQuote?: IToken[];
+    ForIterator?: ForIteratorCstNode[];
+    WsBeforeCloseQuote?: IToken[];
+    CloseQuote?: IToken[];
+  };
+}
+
 /**
  * Represents an opening tag in POML markup.
  *
@@ -206,10 +223,25 @@ export interface ForLoopAttributeNode {
 export interface OpenTagNode {
   kind: 'OPEN';
   range: Range;
-  value: StringNode;
+  value: StringNode; // tag name
   attributes: (AttributeNode | ForLoopAttributeNode)[];
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface OpenTagCstNode extends CstNode {
+  children: {
+    OpenBracket?: IToken[];
+    WsAfterBracket?: IToken[];
+    TagName?: IToken[];
+    WsAfterName?: IToken[];
+    Attribute?: AttributeCstNode[];
+    WsAfterAttribute?: IToken[];
+    CloseBracket?: IToken[];
+  };
+}
+
 /**
  * Represents a closing tag in POML markup.
  *
@@ -229,7 +261,19 @@ export interface OpenTagNode {
 export interface CloseTagNode {
   kind: 'CLOSE';
   range: Range;
-  value: StringNode;
+  value: StringNode; // tag name
+}
+
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface CloseTagCstNode extends CstNode {
+  children: {
+    ClosingOpenBracket?: IToken[];
+    WsAfterBracket?: IToken[];
+    TagName?: IToken[];
+    CloseBracket?: IToken[];
+  };
 }
 
 /**
@@ -252,10 +296,25 @@ export interface CloseTagNode {
 export interface SelfCloseElementNode {
   kind: 'SELFCLOSE';
   range: Range;
-  value: StringNode;
+  value: StringNode; // tag name
   attributes: (AttributeNode | ForLoopAttributeNode)[];
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface SelfCloseElementCstNode extends CstNode {
+  children: {
+    OpenBracket?: IToken[];
+    WsAfterBracket?: IToken[];
+    TagName?: IToken[];
+    WsAfterName?: IToken[];
+    Attribute?: AttributeCstNode[];
+    WsAfterAttribute?: IToken[];
+    SelfCloseBracket?: IToken[];
+  };
+}
+
 /**
  * Represents a complete POML element with its content.
  *
@@ -283,6 +342,28 @@ export interface ElementNode {
   children: (ElementNode | LiteralNode | CommentNode | PragmaNode | ValueNode)[];
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface ElementCstNode extends CstNode {
+  children: {
+    OpenTag?: OpenTagCstNode[];
+    CloseTag?: CloseTagCstNode[];
+    Content?: IToken[];
+  };
+}
+
+export interface ElementContentCstNode extends CstNode {
+  children: {
+    Element?: ElementCstNode;
+    LiteralElement?: LiteralElementCstNode;
+    SelfCloseElement?: SelfCloseElementCstNode;
+    Comment?: CommentCstNode;
+    Pragma?: PragmaCstNode;
+    Value?: ElementValueCstNode;
+  };
+}
+
 /**
  * Represents an HTML-like line/block comment in POML.
  *
@@ -299,7 +380,16 @@ export interface CommentNode {
   value: StringNode;
 }
 
-// export interface Comment
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface CommentCstNode extends CstNode {
+  children: {
+    CommentOpenTag?: IToken[];
+    CommentContent?: IToken[];
+    CommentCloseTag?: IToken[];
+  };
+}
 
 /**
  * Represents a pragma directive carried inside a comment.
@@ -320,11 +410,18 @@ export interface PragmaNode {
   value: StringNode;
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
 export interface PragmaCstNode extends CstNode {
   children: {
     CommentOpenTag?: IToken[];
+    WsAfterOpen?: IToken[];
+    PragmaKeyword?: IToken[];
+    WsAfterPragma?: IToken[];
+    CommentContent?: IToken[];
+    WsAfterContent?: IToken[];
     CommentCloseTag?: IToken[];
-    PragmaSymbol?: IToken[];
   };
 }
 
@@ -355,7 +452,6 @@ export interface LiteralNode {
   range: Range;
   open: OpenTagNode;
   close: CloseTagNode;
-  attributes: AttributeNode[];
   children: StringNode;
 }
 
@@ -402,17 +498,6 @@ export interface RootCstNode extends CstNode {
   };
 }
 
-export interface ElementContentCstNode extends CstNode {
-  children: {
-    Element?: ElementCstNode;
-    LiteralElement?: LiteralElementCstNode;
-    SelfCloseElement?: SelfCloseElementCstNode;
-    Comment?: CommentCstNode;
-    Pragma?: PragmaCstNode;
-    Value?: ElementValueCstNode;
-  };
-}
-
 // Keep these keys required; everything else becomes recursively optional
 type DeepPartialExcept<T, K extends keyof T> =
   // arrays
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 93c60818..11d39d78 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -756,6 +756,13 @@ describe('Malformed Patterns', () => {
     expect(tokenImages('@Pragma')).toEqual(['@Pragma']);
     expect(tokenImages('@pragma key=value')).toEqual(['@pragma', ' ', 'key', '=', 'value']);
   });
+
+  test('should handle </>', () => {
+    expect(tokenImages('</>')).toEqual(['</', '>']);
+    expect(tokenImages('< />')).toEqual(['<', ' ', '/>']);
+    expect(tokenImages('< / >')).toEqual(['<', ' ', '/', ' ', '>']);
+    expect(tokenImages('<//>')).toEqual(['</', '/>']);
+  });
 });
 
 describe('Position Tracking Accuracy', () => {

From 9ff1f7602ccf9186e1bfd2199d7b81efd32f888b Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sun, 31 Aug 2025 09:42:59 +0800
Subject: [PATCH 33/76] finish cst nodes

---
 packages/poml/next/lexer.ts |   5 +
 packages/poml/next/nodes.ts | 212 ++++++++++++++++++++++--------------
 2 files changed, 133 insertions(+), 84 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index 216f6e9e..0591d786 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -16,6 +16,11 @@ export const Equals = createToken({ name: 'Equals', pattern: /=/ });
 export const DoubleQuote = createToken({ name: 'DoubleQuote', pattern: /"/ });
 export const SingleQuote = createToken({ name: 'SingleQuote', pattern: /'/ });
 export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
+export const BackslashEscape = createToken({
+  name: 'BackslashEscape',
+  pattern: /\\(n|r|t|'|"|{{|}}|\\|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
+});
+export const CharacterEntity = createToken({ name: 'CharacterEntity', pattern: /&#[0-9]+;|&[a-zA-Z][a-zA-Z0-9]+;/ });
 
 /* Identifier is one of the following:
  * - XML tag names
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 389bf2dd..b2c586b9 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -14,7 +14,7 @@ import { CstNode, IToken } from 'chevrotain';
  *
  * Cases that do not apply:
  * - Template syntax including braces: `{{ expression }}` (use TemplateNode)
- * - String literals with quotes: `"hello"` (use StringNode or ValueNode)
+ * - String literals with quotes: `"hello"` (use LiteralNode or ValueNode)
  * - POML markup: `<tag>` (use element nodes)
  */
 export interface ExpressionNode {
@@ -23,8 +23,6 @@ export interface ExpressionNode {
   value: string;
 }
 
-export interface ExpressionCstNode {}
-
 /**
  * Represents a template interpolation with double curly braces,
  * or sometimes without braces in specific attributes.
@@ -42,9 +40,9 @@ export interface ExpressionCstNode {}
  *
  * Cases that do not apply:
  * - Full attribute expressions: `if="x > 0"` (use ExpressionNode)
- * - Plain text: `Hello World` (use StringNode)
+ * - Plain text: `Hello World` (use LiteralNode)
  * - Single braces: `{ not a template }` (treated as plain text)
- * - Template elements: <template>{{ this is a jinja template }}</template> (use LiteralNode)
+ * - Template elements: <template>{{ this is a jinja template }}</template> (use ElementNode)
  * - With quotes: `"{{ var }}"` (use ValueNode)
  */
 export interface TemplateNode {
@@ -53,10 +51,28 @@ export interface TemplateNode {
   value: ExpressionNode;
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+
+export interface CstTemplateNode extends CstNode {
+  children: {
+    OpenTemplate?: IToken[];
+    WsAfterOpen?: IToken[];
+    // Content inside {{ and }} is treated as a single expression token.
+    // Eats everything until the next }} (or the whitespace before it).
+    // Handles \{{ and \}} escapes. We won't escape other chars here.
+    Content?: IToken[];
+    // If it's close to the ending }}, try to eat whitespace before it.
+    WsAfterContent?: IToken[];
+    CloseTemplate?: IToken[];
+  };
+}
+
 /**
  * Represents plain text content without any special syntax.
  *
- * String nodes are the most basic content nodes, containing literal text
+ * Literal nodes are the most basic content nodes, containing literal text
  * that requires no processing. They are used both for content and as
  * components of other nodes (like attribute keys and tag names).
  *
@@ -74,7 +90,7 @@ export interface TemplateNode {
  * - Expressions: `x > 0` (use ExpressionNode)
  * - Template variables: `{{ var }}` (use TemplateNode)
  */
-export interface StringNode {
+export interface LiteralNode {
   kind: 'STRING';
   range: Range;
   value: string;
@@ -95,9 +111,9 @@ export interface StringNode {
  * - Multi-part content: `"Price: ${{amount}} USD"`
  *
  * Cases that do not apply:
- * - Attribute keys: `class=...` (the `class` part uses StringNode)
+ * - Attribute keys: `class=...` (the `class` part uses LiteralNode)
  * - Pure expressions without quotes: `if=condition` (use ExpressionNode)
- * - Tag names: `div` (use StringNode)
+ * - Tag names: `div` (use LiteralNode)
  * - Standalone template variables not in a value context
  *
  * Note: The range includes quotes if present, but children exclude them.
@@ -105,7 +121,29 @@ export interface StringNode {
 export interface ValueNode {
   kind: 'VALUE';
   range: Range;
-  children: (StringNode | TemplateNode)[];
+  children: (LiteralNode | TemplateNode)[];
+}
+
+/**
+ * Related CST node interfaces for parsing stage.
+ * The following two interfaces are for quoted strings and will be transformed into ValueNode.
+ */
+export interface CstQuotedNode extends CstNode {
+  children: {
+    OpenQuote?: IToken[];
+    // This is a normal quoted string without templates inside.
+    Content?: IToken[];
+    CloseQuote?: IToken[];
+  };
+}
+
+export interface CstQuotedTemplateNode extends CstNode {
+  children: {
+    OpenQuote?: IToken[];
+    // Allows "Hello {{ friend["abc"] }}!" - mix of text and templates (with quotes).
+    Content?: (IToken | CstTemplateNode)[];
+    CloseQuote?: IToken[];
+  };
 }
 
 /**
@@ -116,13 +154,14 @@ export interface ValueNode {
  * and the collection expression for runtime evaluation.
  *
  * Cases that apply:
- * - Simple iteration: `item in items`
- * - Property access: `user in data.users`
- * - Array literals: `num in [1, 2, 3]`
- * - Method calls: `result in getResults()`
- * - Nested property iteration: `task in project.tasks.active`
+ * - Simple iteration: `"item in items"`
+ * - Property access: `"user in data.users"`
+ * - Array literals: `"num in [1, 2, 3]"`
+ * - Method calls in single quotes: `'result in getResults()'`
+ * - Nested property iteration: `'task in project.tasks.active'`
  *
  * Cases that do not apply (not yet supported):
+ * - Without quotes: `item in items` (must be in quotes for now)
  * - Advanced loop syntax (not yet supported): `(item, index) in items`
  * - Destructuring patterns (not yet supported): `{name, age} in users`
  * - Conditional loops: `if` attributes (use separate condition handling)
@@ -131,10 +170,31 @@ export interface ValueNode {
 export interface ForIteratorNode {
   kind: 'FORITERATOR';
   range: Range;
-  iterator: StringNode;
+  iterator: LiteralNode;
   collection: ExpressionNode;
 }
 
+/**
+ * Related CST node interfaces for parsing stage.
+ */
+export interface CstForIteratorNode extends CstNode {
+  children: {
+    OpenQuote?: IToken[];
+    WsAfterOpen?: IToken[];
+    Iterator?: IToken[];
+    WsAfterIterator?: IToken[];
+    InKeyword?: IToken[];
+    WsAfterIn?: IToken[];
+    // Follows the same parsing rules as template expression.
+    // But as we are in a quoted string, we need to handle
+    // backslash escapes like \" and \'.
+    // Greedily match until the next unescaped quote or ws before it.
+    Collection?: IToken[];
+    WsAfterCollection?: IToken[];
+    CloseQuote?: IToken[];
+  };
+}
+
 /**
  * Represents a standard attribute on a POML element.
  *
@@ -142,62 +202,42 @@ export interface ForIteratorNode {
  * of a key-value pair where the key is always a simple string and the value
  * can be a complex composition of text and templates.
  *
+ * It also supports for-loop attributes via ForIterator, which contains
+ * loop iteration syntax rather than a simple value. It enables
+ * elements to be rendered multiple times based on a collection.
+ *
  * Cases that apply:
  * - Simple attributes: `class="container"`, `id='main'`
  * - Template values: `title="{{ pageTitle }}"` or `title={{ pageTitle }}`
  * - Mixed values: `placeholder="Enter {{ fieldName }}..."`
+ * - For attributes: `for="item in items"` (key is "for", value is ForIteratorNode)
+ * - Computed collections: `for='i in [...Array(5).keys()]'`
  *
  * Cases that do not apply:
  * - Boolean/presence attributes: `disabled`, `checked` (not yet supported)
- * - For-loop attributes: `for="item in items"` (use ForLoopAttributeNode)
  * - Spread attributes (not yet supported): `{...props}`
  * - Dynamic attribute names (not supported): `[attrName]="value"`
  */
 export interface AttributeNode {
   kind: 'ATTRIBUTE';
   range: Range;
-  key: StringNode;
-  value: ValueNode;
-}
-
-/**
- * Represents a special for-loop attribute on POML elements.
- *
- * This specialized attribute node handles the `for` attribute specifically,
- * which contains loop iteration syntax rather than a simple value. It enables
- * elements to be rendered multiple times based on a collection.
- *
- * Cases that apply:
- * - For attributes only: `for="item in items"`
- * - Nested iterations: `for="subitem in item.children"`
- * - Computed collections: `for="i in [...Array(5).keys()]"`
- *
- * Cases that do not apply:
- * - Any attribute with a key other than "for"
- * - Standard attributes: `class="..."` (use AttributeNode)
- * - Conditional attributes: `if="..."` (use AttributeNode)
- */
-export interface ForLoopAttributeNode {
-  kind: 'FORATTRIBUTE';
-  range: Range;
-  key: StringNode;
-  value: ForIteratorNode;
+  key: LiteralNode;
+  value: ValueNode | ForIteratorNode;
 }
 
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface ForLoopAttributeCstNode extends CstNode {
+export interface CstAttributeNode extends CstNode {
   children: {
     AttributeKey?: IToken[];
     WsAfterKey?: IToken[];
     Equals?: IToken[];
     WsAfterEquals?: IToken[];
-    OpenQuote?: IToken[];
-    WsAfterOpenQuote?: IToken[];
-    ForIterator?: ForIteratorCstNode[];
-    WsBeforeCloseQuote?: IToken[];
-    CloseQuote?: IToken[];
+    // Choose between one: john="doe", john='doe', john={{ template }}, for="i in items"
+    quotedValue?: CstQuotedTemplateNode[];
+    templatedValue?: CstTemplateNode[];
+    forIteratorValue?: CstForIteratorNode[];
   };
 }
 
@@ -223,8 +263,8 @@ export interface ForLoopAttributeCstNode extends CstNode {
 export interface OpenTagNode {
   kind: 'OPEN';
   range: Range;
-  value: StringNode; // tag name
-  attributes: (AttributeNode | ForLoopAttributeNode)[];
+  value: LiteralNode; // tag name
+  attributes: AttributeNode[];
 }
 
 /**
@@ -236,7 +276,7 @@ export interface OpenTagCstNode extends CstNode {
     WsAfterBracket?: IToken[];
     TagName?: IToken[];
     WsAfterName?: IToken[];
-    Attribute?: AttributeCstNode[];
+    Attribute?: CstAttributeNode[];
     WsAfterAttribute?: IToken[];
     CloseBracket?: IToken[];
   };
@@ -261,7 +301,7 @@ export interface OpenTagCstNode extends CstNode {
 export interface CloseTagNode {
   kind: 'CLOSE';
   range: Range;
-  value: StringNode; // tag name
+  value: LiteralNode; // tag name
 }
 
 /**
@@ -296,20 +336,20 @@ export interface CloseTagCstNode extends CstNode {
 export interface SelfCloseElementNode {
   kind: 'SELFCLOSE';
   range: Range;
-  value: StringNode; // tag name
-  attributes: (AttributeNode | ForLoopAttributeNode)[];
+  value: LiteralNode; // tag name
+  attributes: AttributeNode[];
 }
 
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface SelfCloseElementCstNode extends CstNode {
+export interface CstSelfCloseElementNode extends CstNode {
   children: {
     OpenBracket?: IToken[];
     WsAfterBracket?: IToken[];
     TagName?: IToken[];
     WsAfterName?: IToken[];
-    Attribute?: AttributeCstNode[];
+    Attribute?: CstAttributeNode[];
     WsAfterAttribute?: IToken[];
     SelfCloseBracket?: IToken[];
   };
@@ -339,13 +379,13 @@ export interface ElementNode {
   range: Range;
   open: OpenTagNode;
   close: CloseTagNode;
-  children: (ElementNode | LiteralNode | CommentNode | PragmaNode | ValueNode)[];
+  children: (ElementNode | LiteralElementNode | CommentNode | PragmaNode | ValueNode)[];
 }
 
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface ElementCstNode extends CstNode {
+export interface CstElementNode extends CstNode {
   children: {
     OpenTag?: OpenTagCstNode[];
     CloseTag?: CloseTagCstNode[];
@@ -353,14 +393,15 @@ export interface ElementCstNode extends CstNode {
   };
 }
 
-export interface ElementContentCstNode extends CstNode {
+export interface CstElementContentNode extends CstNode {
   children: {
-    Element?: ElementCstNode;
-    LiteralElement?: LiteralElementCstNode;
-    SelfCloseElement?: SelfCloseElementCstNode;
-    Comment?: CommentCstNode;
-    Pragma?: PragmaCstNode;
-    Value?: ElementValueCstNode;
+    Element?: CstElementNode[];
+    LiteralElement?: CstLiteralElementNode[];
+    SelfCloseElement?: CstSelfCloseElementNode[];
+    Comment?: CstCommentNode[];
+    Pragma?: CstPragmaNode[];
+    Template?: CstTemplateNode[];
+    TextContent?: IToken[];
   };
 }
 
@@ -377,13 +418,13 @@ export interface ElementContentCstNode extends CstNode {
 export interface CommentNode {
   kind: 'COMMENT';
   range: Range;
-  value: StringNode;
+  value: LiteralNode;
 }
 
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface CommentCstNode extends CstNode {
+export interface CstCommentNode extends CstNode {
   children: {
     CommentOpenTag?: IToken[];
     CommentContent?: IToken[];
@@ -407,19 +448,22 @@ export interface CommentCstNode extends CstNode {
 export interface PragmaNode {
   kind: 'PRAGMA';
   range: Range;
-  value: StringNode;
+  identifier: LiteralNode;
+  options: LiteralNode[];
 }
 
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface PragmaCstNode extends CstNode {
+export interface CstPragmaNode extends CstNode {
   children: {
     CommentOpenTag?: IToken[];
     WsAfterOpen?: IToken[];
     PragmaKeyword?: IToken[];
     WsAfterPragma?: IToken[];
-    CommentContent?: IToken[];
+    PragmaIdentifier?: IToken[];
+    WsAfterIdentifier?: IToken[];
+    PragmaOption?: (IToken | CstQuotedNode)[];
     WsAfterContent?: IToken[];
     CommentCloseTag?: IToken[];
   };
@@ -428,7 +472,7 @@ export interface PragmaCstNode extends CstNode {
 /**
  * Represents an element that preserves literal content.
  *
- * Literal nodes are special POML elements that treat their content as literal
+ * Literal element nodes are special POML elements that treat their content as literal
  * text, preventing template variable interpolation. They ensure content is
  * preserved exactly as written, useful for code samples or pre-formatted text.
  * When `<text>` is used, the parser eats everything including tags and comments,
@@ -444,27 +488,28 @@ export interface PragmaCstNode extends CstNode {
  * - Text with attributes enabling processing (future feature)
  *
  * Note: The tagName (value) can only be "text" in this version.
- * Literal node is different from elements which do not support children.
- * Literal node is handled on the CST parsing stage.
+ * Literal element node is different from elements which do not support nested tags,
+ * e.g., <let> or <template>.
+ * Literal element node is handled on the CST parsing stage.
  */
-export interface LiteralNode {
+export interface LiteralElementNode {
   kind: 'TEXT';
   range: Range;
   open: OpenTagNode;
   close: CloseTagNode;
-  children: StringNode;
+  children: LiteralNode;
 }
 
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface LiteralElementCstNode extends CstNode {
+export interface CstLiteralElementNode extends CstNode {
   children: {
     OpenTag?: OpenTagCstNode[];
-    CloseTag?: CloseTagCstNode[];
     // All content between open and close tags is treated as literal text
     // including other tags, comments, pragmas, etc.
     TextContent?: IToken[];
+    CloseTag?: CloseTagCstNode[];
   };
 }
 
@@ -486,15 +531,15 @@ export interface LiteralElementCstNode extends CstNode {
 export interface RootNode {
   kind: 'ROOT';
   range: Range;
-  children: (ElementNode | LiteralNode | CommentNode | PragmaNode | ValueNode)[];
+  children: (ElementNode | LiteralElementNode | CommentNode | PragmaNode | ValueNode)[];
 }
 
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface RootCstNode extends CstNode {
+export interface CstRootNode extends CstNode {
   children: {
-    Content?: ElementContentCstNode[];
+    Content?: CstElementContentNode[];
   };
 }
 
@@ -520,16 +565,15 @@ type Draft<T extends { kind: string }> = DeepPartialExcept<T, 'kind'>;
 export type StrictNode =
   | ExpressionNode
   | TemplateNode
-  | StringNode
+  | LiteralNode
   | ValueNode
   | ForIteratorNode
   | AttributeNode
-  | ForLoopAttributeNode
   | OpenTagNode
   | CloseTagNode
   | SelfCloseElementNode
   | ElementNode
-  | LiteralNode
+  | LiteralElementNode
   | CommentNode
   | PragmaNode
   | RootNode;

From 563715b4fc38e2f4ec7a0e3d9c531db346d85163 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sun, 31 Aug 2025 10:35:42 +0800
Subject: [PATCH 34/76] update lexer

---
 packages/poml/next/lexer.ts | 78 +++++++++++++++++++++++++++----------
 packages/poml/next/nodes.ts | 31 ++++++++-------
 2 files changed, 75 insertions(+), 34 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index 0591d786..8e430729 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -3,39 +3,39 @@ import { createToken, Lexer } from 'chevrotain';
 // Define token types for extended POML
 export const CommentOpen = createToken({ name: 'CommentOpen', pattern: /<!--(\-(?!\-+>))*/ });
 export const CommentClose = createToken({ name: 'CommentClose', pattern: /-{2,}>/ });
-export const Pragma = createToken({ name: 'Pragma', pattern: /\b@pragma\b/i });
+export const PragmaKeyword = createToken({ name: 'PragmaKeyword', pattern: /\b@pragma\b/i });
 export const TemplateOpen = createToken({ name: 'TemplateOpen', pattern: /{{/ });
 export const TemplateClose = createToken({ name: 'TemplateClose', pattern: /}}/ });
-export const TagClosingOpen = createToken({ name: 'TagClosingOpen', pattern: /<\// });
-export const TagSelfClose = createToken({ name: 'TagSelfClose', pattern: /\/>/ });
-export const TagOpen = createToken({ name: 'TagOpen', pattern: /</ });
-export const TagClose = createToken({ name: 'TagClose', pattern: />/ });
+export const ClosingOpenBracket = createToken({ name: 'ClosingOpenBracket', pattern: /<\// });
+export const SelfCloseBracket = createToken({ name: 'SelfCloseBracket', pattern: /\/>/ });
+export const OpenBracket = createToken({ name: 'OpenBracket', pattern: /</ });
+export const CloseBracket = createToken({ name: 'CloseBracket', pattern: />/ });
 export const Equals = createToken({ name: 'Equals', pattern: /=/ });
 
 // Individual character tokens for quotes and backslash - CST parser will handle semantics
 export const DoubleQuote = createToken({ name: 'DoubleQuote', pattern: /"/ });
 export const SingleQuote = createToken({ name: 'SingleQuote', pattern: /'/ });
-export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
 export const BackslashEscape = createToken({
   name: 'BackslashEscape',
   pattern: /\\(n|r|t|'|"|{{|}}|\\|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
 });
 export const CharacterEntity = createToken({ name: 'CharacterEntity', pattern: /&#[0-9]+;|&[a-zA-Z][a-zA-Z0-9]+;/ });
+// Backslash not followed by a valid escape sequence
+export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
 
-/* Identifier is one of the following:
+/* Identifier is used in one of the following:
  * - XML tag names
  * - XML attribute names
  * - Arbitrary text content incorrectly parsed as identifiers
  *
  * Notes:
- * 1. In case 1, tags can contain : (namespaces) and . (extensions).
- *    These are handled later by CST parser.
- * 2. In case 3, CST parser will reclassify as TextContent if needed.
- * 3. We are going to disallow "." and ":" to appear in XML tags.
+ * 1. In case 1, we are going to allow "." and ":" to appear in XML tags and attributes.
+ * 2. Similar for case 2.
+ * 3. In case 3, CST parser will reclassify as TextContent if needed.
  */
 export const Identifier = createToken({
   name: 'Identifier',
-  pattern: /[a-zA-Z_]([a-zA-Z0-9_]|(-(?!\-+>)))*/,
+  pattern: /[a-zA-Z_]([a-zA-Z0-9_\.:]|(-(?!\-+>)))*/,
 });
 
 // Include all Unicode whitespace characters and control characters
@@ -54,6 +54,8 @@ export const Whitespace = createToken({
  * - whitespace (handled separately - includes Unicode whitespace and control chars)
  * - equal sign (=)
  * - backslash \ (handled separately for escaping)
+ * - valid backslash escape sequences such as \n, \t, \", \', \\, \xHH, \uHHHH, \UHHHHHHHH, \{{, \}}
+ * - character entities such as &#123; or &name;
  *
  * Allowed:
  * - Single { or } are OK if they are not followed by another brace
@@ -66,36 +68,72 @@ export const Arbitrary = createToken({
   name: 'Arbitrary',
   // Match anything except: <, >, quotes, =, backslash, whitespace (including Unicode), control chars
   // Allow single braces and slashes with lookahead constraints
-  pattern: /(?:[^<>"'{}=\\\s\u0000-\u001F\u007F-\u009F\u2000-\u200B\uFEFF\/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>))+/,
+  pattern:
+    /(?:[^<>"'{}=\\&\s\u0000-\u001F\u007F-\u009F\u2000-\u200B\uFEFF\/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>)|&(?!#\d+;|[a-zA-Z][a-zA-Z0-9]+;))+/,
   line_breaks: false,
 });
 
 // Define token order - more specific patterns first
-export const allTokens = [
+export const AllTokens = [
   CommentOpen,
   CommentClose,
-  Pragma,
+  PragmaKeyword,
   TemplateOpen,
   TemplateClose,
-  TagClosingOpen, // Must come before TagOpen
-  TagSelfClose, // Must come before TagClose
-  TagOpen,
-  TagClose,
+  ClosingOpenBracket, // Must come before OpenBracket
+  SelfCloseBracket, // Must come before CloseBracket
+  OpenBracket,
+  CloseBracket,
   Equals,
   DoubleQuote,
   SingleQuote,
+  BackslashEscape,
   Backslash,
+  CharacterEntity,
   Identifier,
   Whitespace,
   Arbitrary,
 ];
 
+export const XmlBracketTokens = [
+  CommentOpen,
+  CommentClose,
+  ClosingOpenBracket,
+  SelfCloseBracket,
+  OpenBracket,
+  CloseBracket,
+];
+
+export const TextTokens = [Identifier, Whitespace, Arbitrary];
+
+// Tokens used in expressions (inside {{ and }}), excluding the closing braces.
+// Opening braces should work, but they should be also properly escaped inside to avoid confusion.
+export const TokensExpression = AllTokens.filter(
+  (tokenType) => tokenType !== TemplateOpen && tokenType !== TemplateClose,
+);
+
+// Tokens used in quotes. The quoted strings do not allow template expressions inside.
+// Quoted strings can contain backslash escapes. Character entities will be however shown as is.
+export const TokensDoubleQuoted = AllTokens.filter((tokenType) => tokenType !== DoubleQuote);
+export const TokensSingleQuoted = AllTokens.filter((tokenType) => tokenType !== SingleQuote);
+
+// Tokens used in quotes, but within quotes, it can contain other expressions ({{ and }}).
+export const TokensDoubleQuotedExpression = TokensExpression.filter((tokenType) => tokenType !== DoubleQuote);
+export const TokensSingleQuotedExpression = TokensExpression.filter((tokenType) => tokenType !== SingleQuote);
+
+// Text contents inside XML elements.
+// Like XML/HTML, the contents here can have `&` XML entities to escape special characters.
+// Escaped characters via backslash will be shown as is without escape handling.
+export const TokensTextContent = AllTokens.filter(
+  (tokenType) => !XmlBracketTokens.includes(tokenType) && tokenType !== TemplateOpen && tokenType !== TemplateClose,
+);
+
 // Extended POML Lexer class
 export class ExtendedPomlLexer {
   private lexer: Lexer;
 
   constructor() {
-    this.lexer = new Lexer(allTokens);
+    this.lexer = new Lexer(AllTokens);
   }
 
   public tokenize(text: string) {
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index b2c586b9..6d9e87ec 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -57,7 +57,7 @@ export interface TemplateNode {
 
 export interface CstTemplateNode extends CstNode {
   children: {
-    OpenTemplate?: IToken[];
+    TemplateOpen?: IToken[];
     WsAfterOpen?: IToken[];
     // Content inside {{ and }} is treated as a single expression token.
     // Eats everything until the next }} (or the whitespace before it).
@@ -65,7 +65,7 @@ export interface CstTemplateNode extends CstNode {
     Content?: IToken[];
     // If it's close to the ending }}, try to eat whitespace before it.
     WsAfterContent?: IToken[];
-    CloseTemplate?: IToken[];
+    TemplateClose?: IToken[];
   };
 }
 
@@ -389,7 +389,7 @@ export interface CstElementNode extends CstNode {
   children: {
     OpenTag?: OpenTagCstNode[];
     CloseTag?: CloseTagCstNode[];
-    Content?: IToken[];
+    Content?: CstElementContentNode[];
   };
 }
 
@@ -426,9 +426,9 @@ export interface CommentNode {
  */
 export interface CstCommentNode extends CstNode {
   children: {
-    CommentOpenTag?: IToken[];
-    CommentContent?: IToken[];
-    CommentCloseTag?: IToken[];
+    CommentOpen?: IToken[];
+    Content?: IToken[];
+    CommentClose?: IToken[];
   };
 }
 
@@ -457,7 +457,7 @@ export interface PragmaNode {
  */
 export interface CstPragmaNode extends CstNode {
   children: {
-    CommentOpenTag?: IToken[];
+    CommentOpen?: IToken[];
     WsAfterOpen?: IToken[];
     PragmaKeyword?: IToken[];
     WsAfterPragma?: IToken[];
@@ -465,7 +465,7 @@ export interface CstPragmaNode extends CstNode {
     WsAfterIdentifier?: IToken[];
     PragmaOption?: (IToken | CstQuotedNode)[];
     WsAfterContent?: IToken[];
-    CommentCloseTag?: IToken[];
+    CommentClose?: IToken[];
   };
 }
 
@@ -476,7 +476,8 @@ export interface CstPragmaNode extends CstNode {
  * text, preventing template variable interpolation. They ensure content is
  * preserved exactly as written, useful for code samples or pre-formatted text.
  * When `<text>` is used, the parser eats everything including tags and comments,
- * including new `<text>` tags, until a matching `</text>` is found.
+ * except `<text>` itself and `</text>`, treating it all as literal text,
+ * until a matching `</text>` is found.
  *
  * Cases that apply:
  * - Explicit text elements: `<text>Literal {{ not_interpolated }}</text>`
@@ -487,10 +488,12 @@ export interface CstPragmaNode extends CstNode {
  * - Elements allowing template processing (use ElementNode)
  * - Text with attributes enabling processing (future feature)
  *
- * Note: The tagName (value) can only be "text" in this version.
- * Literal element node is different from elements which do not support nested tags,
- * e.g., <let> or <template>.
- * Literal element node is handled on the CST parsing stage.
+ * Note:
+ * 1. The tagName (value) can only be "text" in this version.
+ * 2. Literal element node is different from elements which do not support nested tags,
+ *    e.g., <let> or <template>. Literal element node is handled on the CST parsing stage.
+ * 3. If you really need `<text>` in your POML. Recommended to use `&lt;text&gt;`
+ *    outside of literal element.
  */
 export interface LiteralElementNode {
   kind: 'TEXT';
@@ -507,7 +510,7 @@ export interface CstLiteralElementNode extends CstNode {
   children: {
     OpenTag?: OpenTagCstNode[];
     // All content between open and close tags is treated as literal text
-    // including other tags, comments, pragmas, etc.
+    // including other tags, comments, pragmas, etc. except for `</text>`.
     TextContent?: IToken[];
     CloseTag?: CloseTagCstNode[];
   };

From bdc91b7266ed14c612e412da7f30db71276f525e Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sun, 31 Aug 2025 11:33:48 +0800
Subject: [PATCH 35/76] .

---
 packages/poml/next/lexer.ts | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index 8e430729..4d7499d9 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -19,7 +19,10 @@ export const BackslashEscape = createToken({
   name: 'BackslashEscape',
   pattern: /\\(n|r|t|'|"|{{|}}|\\|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
 });
-export const CharacterEntity = createToken({ name: 'CharacterEntity', pattern: /&#[0-9]+;|&[a-zA-Z][a-zA-Z0-9]+;/ });
+export const CharacterEntity = createToken({
+  name: 'CharacterEntity',
+  pattern: /&#x[0-9A-Fa-f]+;|&#[0-9]+;|&[a-zA-Z][a-zA-Z0-9]+;/,
+});
 // Backslash not followed by a valid escape sequence
 export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
 
@@ -69,7 +72,7 @@ export const Arbitrary = createToken({
   // Match anything except: <, >, quotes, =, backslash, whitespace (including Unicode), control chars
   // Allow single braces and slashes with lookahead constraints
   pattern:
-    /(?:[^<>"'{}=\\&\s\u0000-\u001F\u007F-\u009F\u2000-\u200B\uFEFF\/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>)|&(?!#\d+;|[a-zA-Z][a-zA-Z0-9]+;))+/,
+    /(?:[^<>"'{}=\\&\s\u0000-\u001F\u007F-\u009F\u2000-\u200B\uFEFF\/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>)|&(?!#\d+;|x[0-9A-Fa-f]+;|[a-zA-Z][a-zA-Z0-9]+;))+/,
   line_breaks: false,
 });
 
@@ -104,7 +107,7 @@ export const XmlBracketTokens = [
   CloseBracket,
 ];
 
-export const TextTokens = [Identifier, Whitespace, Arbitrary];
+export const TokensComment = AllTokens.filter((tokenType) => tokenType !== CommentClose);
 
 // Tokens used in expressions (inside {{ and }}), excluding the closing braces.
 // Opening braces should work, but they should be also properly escaped inside to avoid confusion.
@@ -113,6 +116,7 @@ export const TokensExpression = AllTokens.filter(
 );
 
 // Tokens used in quotes. The quoted strings do not allow template expressions inside.
+// The only application currently is in @pragma directive options.
 // Quoted strings can contain backslash escapes. Character entities will be however shown as is.
 export const TokensDoubleQuoted = AllTokens.filter((tokenType) => tokenType !== DoubleQuote);
 export const TokensSingleQuoted = AllTokens.filter((tokenType) => tokenType !== SingleQuote);

From 41866ab041669987c4579a1563b47da3ee41bd10 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Sun, 31 Aug 2025 13:36:59 +0800
Subject: [PATCH 36/76] .

---
 packages/poml/next/lexer.ts | 19 +++++++++++--------
 packages/poml/next/nodes.ts | 13 +++++++------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index 4d7499d9..bcdf0333 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -65,6 +65,7 @@ export const Whitespace = createToken({
  * - Incomplete tag delimiters such as / (/< is an exception, because < is a start of tag)
  * - Incomplete comment delimiters such as !-- or -- are OK
  * - Incorrect @pragma directive such as @pragm or @pragmaX will be matched
+ * - Invalid character entities such as &abc (without semicolon) or & (by itself) or &;, &z; (invalid)
  * - All other Unicode characters including emojis, CJK, etc.
  */
 export const Arbitrary = createToken({
@@ -110,10 +111,8 @@ export const XmlBracketTokens = [
 export const TokensComment = AllTokens.filter((tokenType) => tokenType !== CommentClose);
 
 // Tokens used in expressions (inside {{ and }}), excluding the closing braces.
-// Opening braces should work, but they should be also properly escaped inside to avoid confusion.
-export const TokensExpression = AllTokens.filter(
-  (tokenType) => tokenType !== TemplateOpen && tokenType !== TemplateClose,
-);
+// Opening braces {{ should work, but they should be generally properly escaped inside to avoid confusion.
+export const TokensExpression = AllTokens.filter((tokenType) => tokenType !== TemplateClose);
 
 // Tokens used in quotes. The quoted strings do not allow template expressions inside.
 // The only application currently is in @pragma directive options.
@@ -121,15 +120,19 @@ export const TokensExpression = AllTokens.filter(
 export const TokensDoubleQuoted = AllTokens.filter((tokenType) => tokenType !== DoubleQuote);
 export const TokensSingleQuoted = AllTokens.filter((tokenType) => tokenType !== SingleQuote);
 
-// Tokens used in quotes, but within quotes, it can contain other expressions ({{ and }}).
-export const TokensDoubleQuotedExpression = TokensExpression.filter((tokenType) => tokenType !== DoubleQuote);
-export const TokensSingleQuotedExpression = TokensExpression.filter((tokenType) => tokenType !== SingleQuote);
+// Tokens used in quotes, but within quotes distinguish from expressions (surrounded by {{ and }}).
+export const TokensDoubleQuotedExpression = AllTokens.filter(
+  (tokenType) => tokenType !== DoubleQuote && tokenType !== TemplateOpen,
+);
+export const TokensSingleQuotedExpression = AllTokens.filter(
+  (tokenType) => tokenType !== SingleQuote && tokenType !== TemplateOpen,
+);
 
 // Text contents inside XML elements.
 // Like XML/HTML, the contents here can have `&` XML entities to escape special characters.
 // Escaped characters via backslash will be shown as is without escape handling.
 export const TokensTextContent = AllTokens.filter(
-  (tokenType) => !XmlBracketTokens.includes(tokenType) && tokenType !== TemplateOpen && tokenType !== TemplateClose,
+  (tokenType) => !XmlBracketTokens.includes(tokenType) && tokenType !== TemplateOpen,
 );
 
 // Extended POML Lexer class
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 6d9e87ec..0b7fbc9e 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -42,7 +42,7 @@ export interface ExpressionNode {
  * - Full attribute expressions: `if="x > 0"` (use ExpressionNode)
  * - Plain text: `Hello World` (use LiteralNode)
  * - Single braces: `{ not a template }` (treated as plain text)
- * - Template elements: <template>{{ this is a jinja template }}</template> (use ElementNode)
+ * - Template elements: <template>{{ this is a jinja template }}</template> (use LiteralNode)
  * - With quotes: `"{{ var }}"` (use ValueNode)
  */
 export interface TemplateNode {
@@ -444,6 +444,7 @@ export interface CstCommentNode extends CstNode {
  * - Specify version: `<!-- @pragma version >=1.0.0 <2.3.0 -->`
  * - Turn tags on/off: `<!-- @pragma components +reference -table -->`
  * - Turn speaker roles on/off: `<!-- @pragma speaker multi -->` or `single`
+ * - White space policy: `<!-- @pragma whitespace pre -->` or `trim` or `collapse`
  */
 export interface PragmaNode {
   kind: 'PRAGMA';
@@ -475,9 +476,8 @@ export interface CstPragmaNode extends CstNode {
  * Literal element nodes are special POML elements that treat their content as literal
  * text, preventing template variable interpolation. They ensure content is
  * preserved exactly as written, useful for code samples or pre-formatted text.
- * When `<text>` is used, the parser eats everything including tags and comments,
- * except `<text>` itself and `</text>`, treating it all as literal text,
- * until a matching `</text>` is found.
+ * For example, when `<text>` is used, the parser eats everything including tags and comments,
+ * including `<text>` itself, until a matching `</text>` is found.
  *
  * Cases that apply:
  * - Explicit text elements: `<text>Literal {{ not_interpolated }}</text>`
@@ -489,9 +489,10 @@ export interface CstPragmaNode extends CstNode {
  * - Text with attributes enabling processing (future feature)
  *
  * Note:
- * 1. The tagName (value) can only be "text" in this version.
+ * 1. The tagName (value) can only be "text" and "template" as I can think of.
+ *    There should be a dynamic list of components that should be parsed as literal elements.
  * 2. Literal element node is different from elements which do not support nested tags,
- *    e.g., <let> or <template>. Literal element node is handled on the CST parsing stage.
+ *    e.g., <let>. Literal element node is handled on the CST parsing stage.
  * 3. If you really need `<text>` in your POML. Recommended to use `&lt;text&gt;`
  *    outside of literal element.
  */

From e929b5f70af2c46f97f3b251aec8d7f4be14561c Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 1 Sep 2025 08:41:20 +0800
Subject: [PATCH 37/76] update lexer tests

---
 packages/poml/next/nodes.ts              | 13 ++++++-
 packages/poml/tests/reader/lexer.test.ts | 45 +++++++++++-------------
 2 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 0b7fbc9e..6f793fc2 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -444,7 +444,18 @@ export interface CstCommentNode extends CstNode {
  * - Specify version: `<!-- @pragma version >=1.0.0 <2.3.0 -->`
  * - Turn tags on/off: `<!-- @pragma components +reference -table -->`
  * - Turn speaker roles on/off: `<!-- @pragma speaker multi -->` or `single`
- * - White space policy: `<!-- @pragma whitespace pre -->` or `trim` or `collapse`
+ * - White space policy: `<!-- @pragma whitespace pre -->` or `trim`, `collapse` or `remove`
+ *
+ * Notes on white space policy:
+ * - `pre`: preserve all whitespace as-is
+ * - `trim`: trim leading/trailing whitespace in each element
+ * - `collapse`: trim + collapse consecutive whitespace into a single space
+ * - `remove`: collapse remove all whitespaces between two nested elements
+ *
+ * Each element type will have its own default whitespace policy.
+ * For example, `<text>` defaults to `pre`, while `<paragraph>` defaults to `collapse`.
+ * However, when a pragma is set, it overrides the default for subsequent elements.
+ * It will affect the AST constructing stages, and also affecting the props sent to components.
  */
 export interface PragmaNode {
   kind: 'PRAGMA';
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 11d39d78..231561cf 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -5,10 +5,10 @@ import {
   CommentClose,
   TemplateOpen,
   TemplateClose,
-  TagOpen,
-  TagClose,
-  TagClosingOpen,
-  TagSelfClose,
+  OpenBracket,
+  ClosingOpenBracket,
+  SelfCloseBracket,
+  CloseBracket,
   Equals,
   DoubleQuote,
   SingleQuote,
@@ -16,6 +16,7 @@ import {
   Identifier,
   Whitespace,
   Arbitrary,
+  BackslashEscape,
 } from 'poml/next/lexer';
 
 // Helper function to extract token images
@@ -158,11 +159,9 @@ describe('Edge Cases', () => {
       '"',
       'with',
       ' ',
-      '\\',
-      '"',
+      '\\"',
       'escaped',
-      '\\',
-      '"',
+      '\\"',
       ' ',
       'quotes',
       '"',
@@ -250,9 +249,9 @@ describe('Edge Cases', () => {
 
 describe('Token Types', () => {
   test('should identify correct token types for basic elements', () => {
-    expect(tokenTypes('<task>')).toEqual([TagOpen, Identifier, TagClose]);
-    expect(tokenTypes('</task>')).toEqual([TagClosingOpen, Identifier, TagClose]);
-    expect(tokenTypes('<meta />')).toEqual([TagOpen, Identifier, Whitespace, TagSelfClose]);
+    expect(tokenTypes('<task>')).toEqual([OpenBracket, Identifier, CloseBracket]);
+    expect(tokenTypes('</task>')).toEqual([ClosingOpenBracket, Identifier, CloseBracket]);
+    expect(tokenTypes('<meta />')).toEqual([OpenBracket, Identifier, Whitespace, SelfCloseBracket]);
   });
 
   test('should identify quotes and backslashes', () => {
@@ -275,9 +274,8 @@ describe('Token Types', () => {
 
   test('should identify attributes', () => {
     expect(tokenTypes('<markup.paragraph id="intro" data-value="123\\n"456\'>')).toEqual([
-      TagOpen,
+      OpenBracket,
       Identifier,
-      Arbitrary,
       Whitespace,
       Identifier,
       Equals,
@@ -289,12 +287,11 @@ describe('Token Types', () => {
       Equals,
       DoubleQuote,
       Arbitrary,
-      Backslash,
-      Identifier,
+      BackslashEscape,
       DoubleQuote,
       Arbitrary,
       SingleQuote,
-      TagClose,
+      CloseBracket,
     ]);
   });
 });
@@ -324,7 +321,7 @@ line2 <tag>
 line3`;
     const result = tokenize(input);
 
-    const tagToken = result.tokens.find((t) => t.tokenType === TagOpen);
+    const tagToken = result.tokens.find((t) => t.tokenType === OpenBracket);
     expect(tagToken).toBeDefined();
     expect(tagToken!.startLine).toBe(2);
     expect(tagToken!.startColumn).toBe(7); // After "line2 "
@@ -519,8 +516,8 @@ describe('Unicode and Special Characters', () => {
 
   test('should handle emoji and symbols', () => {
     expect(tokenImages('Hello 👋 World 🌍')).toEqual(['Hello', ' ', '👋', ' ', 'World', ' ', '🌍']);
-    expect(tokenImages('Math: ∑∞π≠∅')).toEqual(['Math', ':', ' ', '∑∞π≠∅']);
-    expect(tokenImages('Arrows: ←→↑↓')).toEqual(['Arrows', ':', ' ', '←→↑↓']);
+    expect(tokenImages('Math: ∑∞π≠∅')).toEqual(['Math:', ' ', '∑∞π≠∅']);
+    expect(tokenImages('Arrows: ←→↑↓')).toEqual(['Arrows:', ' ', '←→↑↓']);
   });
 
   test('should handle unicode', () => {
@@ -601,11 +598,9 @@ describe('Malformed Patterns', () => {
     expect(tokenImages('escaped \\"quote\\" in text')).toEqual([
       'escaped',
       ' ',
-      '\\',
-      '"',
+      '\\"',
       'quote',
-      '\\',
-      '"',
+      '\\"',
       ' ',
       'in',
       ' ',
@@ -631,7 +626,7 @@ describe('Malformed Patterns', () => {
     expect(tokenImages('<<>>')).toEqual(['<', '<', '>', '>']);
     expect(tokenImages('"""')).toEqual(['"', '"', '"']);
     expect(tokenImages("'''")).toEqual(["'", "'", "'"]);
-    expect(tokenImages('\\\\\\')).toEqual(['\\', '\\', '\\']);
+    expect(tokenImages('\\\\\\')).toEqual(['\\\\', '\\']);
     expect(tokenImages('===')).toEqual(['=', '=', '=']);
   });
 
@@ -713,7 +708,7 @@ describe('Malformed Patterns', () => {
     expect(tokenImages('path/to/file')).toEqual(['path', '/to/file']);
     expect(tokenImages('a/b/c')).toEqual(['a', '/b/c']);
     expect(tokenImages('text / more')).toEqual(['text', ' ', '/', ' ', 'more']);
-    expect(tokenImages('http://example.com')).toEqual(['http', '://example.com']);
+    expect(tokenImages('http://example.com')).toEqual(['http:', '//example.com']);
     expect(tokenImages('5/3=1.67')).toEqual(['5/3', '=', '1.67']);
     // These should NOT match as incomplete delimiters
     expect(tokenImages('/<tag>')).toEqual(['/', '<', 'tag', '>']);

From b4fde29472d96735f090837df2edec36016873c2 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 1 Sep 2025 08:49:47 +0800
Subject: [PATCH 38/76] .

---
 packages/poml/next/lexer.ts | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index bcdf0333..1757ca1f 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -41,10 +41,10 @@ export const Identifier = createToken({
   pattern: /[a-zA-Z_]([a-zA-Z0-9_\.:]|(-(?!\-+>)))*/,
 });
 
-// Include all Unicode whitespace characters and control characters
+// Include all control whitespace characters, not unicode whitespace
 export const Whitespace = createToken({
   name: 'Whitespace',
-  pattern: /[\s\u0000-\u001F\u007F-\u009F\u2000-\u200B\uFEFF]+/,
+  pattern: /[ \t\r\n\v\f]+/,
   line_breaks: true,
 });
 
@@ -54,7 +54,7 @@ export const Whitespace = createToken({
  * - starts or ends a comment: <!--, -->
  * - starts or ends a template: {{, }}
  * - starts or ends a string literal: " or '
- * - whitespace (handled separately - includes Unicode whitespace and control chars)
+ * - whitespace (handled separately - includes control chars)
  * - equal sign (=)
  * - backslash \ (handled separately for escaping)
  * - valid backslash escape sequences such as \n, \t, \", \', \\, \xHH, \uHHHH, \UHHHHHHHH, \{{, \}}
@@ -70,10 +70,10 @@ export const Whitespace = createToken({
  */
 export const Arbitrary = createToken({
   name: 'Arbitrary',
-  // Match anything except: <, >, quotes, =, backslash, whitespace (including Unicode), control chars
+  // Match anything except: <, >, quotes, =, backslash, whitespace, control chars
   // Allow single braces and slashes with lookahead constraints
   pattern:
-    /(?:[^<>"'{}=\\&\s\u0000-\u001F\u007F-\u009F\u2000-\u200B\uFEFF\/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>)|&(?!#\d+;|x[0-9A-Fa-f]+;|[a-zA-Z][a-zA-Z0-9]+;))+/,
+    /(?:[^<>"'{}=\\& \t\r\n\v\f/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>)|&(?!#\d+;|x[0-9A-Fa-f]+;|[a-zA-Z][a-zA-Z0-9]+;))+/,
   line_breaks: false,
 });
 

From 393ce43ccb9e130827df74c90d1e43d576444b7d Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 1 Sep 2025 09:02:32 +0800
Subject: [PATCH 39/76] .

---
 packages/poml/next/lexer.ts              |   6 +-
 packages/poml/tests/reader/lexer.test.ts | 146 +++++++++++++++++++++++
 2 files changed, 149 insertions(+), 3 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index 1757ca1f..e0574948 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -21,7 +21,7 @@ export const BackslashEscape = createToken({
 });
 export const CharacterEntity = createToken({
   name: 'CharacterEntity',
-  pattern: /&#x[0-9A-Fa-f]+;|&#[0-9]+;|&[a-zA-Z][a-zA-Z0-9]+;/,
+  pattern: /&#x[0-9A-Fa-f]+;|&#[0-9]+;|&[a-zA-Z][a-zA-Z0-9]*;|&;/,
 });
 // Backslash not followed by a valid escape sequence
 export const Backslash = createToken({ name: 'Backslash', pattern: /\\/ });
@@ -65,7 +65,7 @@ export const Whitespace = createToken({
  * - Incomplete tag delimiters such as / (/< is an exception, because < is a start of tag)
  * - Incomplete comment delimiters such as !-- or -- are OK
  * - Incorrect @pragma directive such as @pragm or @pragmaX will be matched
- * - Invalid character entities such as &abc (without semicolon) or & (by itself) or &;, &z; (invalid)
+ * - Invalid character entities such as &abc (without semicolon) or & (by itself)
  * - All other Unicode characters including emojis, CJK, etc.
  */
 export const Arbitrary = createToken({
@@ -73,7 +73,7 @@ export const Arbitrary = createToken({
   // Match anything except: <, >, quotes, =, backslash, whitespace, control chars
   // Allow single braces and slashes with lookahead constraints
   pattern:
-    /(?:[^<>"'{}=\\& \t\r\n\v\f/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>)|&(?!#\d+;|x[0-9A-Fa-f]+;|[a-zA-Z][a-zA-Z0-9]+;))+/,
+    /(?:[^<>"'{}=\\& \t\r\n\v\f/-]|{(?!{)|}(?!})|\/(?!>)|\-(?!\-+>)|&(?!#\d+;|x[0-9A-Fa-f]+;|[a-zA-Z][a-zA-Z0-9]*;|;))+/,
   line_breaks: false,
 });
 
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 231561cf..92d5e05f 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -17,6 +17,7 @@ import {
   Whitespace,
   Arbitrary,
   BackslashEscape,
+  CharacterEntity,
 } from 'poml/next/lexer';
 
 // Helper function to extract token images
@@ -294,6 +295,151 @@ describe('Token Types', () => {
       CloseBracket,
     ]);
   });
+
+  test('recognizes simple escapes', () => {
+    expect(tokenTypes('"a\\nb"')).toEqual([DoubleQuote, Identifier, BackslashEscape, Identifier, DoubleQuote]);
+
+    expect(tokenTypes("'a\\tb'")).toEqual([SingleQuote, Identifier, BackslashEscape, Identifier, SingleQuote]);
+
+    // Escaped quotes and backslash
+    expect(tokenTypes('"\\\" \\\\"')).toEqual([DoubleQuote, BackslashEscape, Whitespace, BackslashEscape, DoubleQuote]);
+  });
+
+  test('recognizes unicode and hex escapes', () => {
+    expect(tokenTypes('"A: \\x41"')).toEqual([
+      DoubleQuote,
+      Identifier, // A:
+      Whitespace,
+      BackslashEscape, // \x41
+      DoubleQuote,
+    ]);
+
+    expect(tokenTypes('"U: \\u0041"')).toEqual([
+      DoubleQuote,
+      Identifier, // U:
+      Whitespace,
+      BackslashEscape, // \u0041
+      DoubleQuote,
+    ]);
+
+    expect(tokenTypes('"emoji: \\U0001F600"')).toEqual([
+      DoubleQuote,
+      Identifier, // emoji:
+      Whitespace,
+      BackslashEscape, // \U0001F600
+      DoubleQuote,
+    ]);
+  });
+
+  test('recognizes escaped braces for templates', () => {
+    expect(tokenImages('pre \\{{ mid \\}} post')).toEqual(['pre', ' ', '\\{{', ' ', 'mid', ' ', '\\}}', ' ', 'post']);
+    expect(tokenTypes('pre \\{{ mid \\}} post')).toEqual([
+      Identifier,
+      Whitespace,
+      BackslashEscape,
+      Whitespace,
+      Identifier,
+      Whitespace,
+      BackslashEscape,
+      Whitespace,
+      Identifier,
+    ]);
+  });
+
+  test('invalid escapes fall back to Backslash + text', () => {
+    expect(tokenImages('"\\q"')).toEqual(['"', '\\', 'q', '"']);
+    expect(tokenTypes('"\\q"')).toEqual([DoubleQuote, Backslash, Identifier, DoubleQuote]);
+
+    // Incomplete hex/unicode
+    expect(tokenImages('"\\x4"')).toEqual(['"', '\\', 'x4', '"']);
+    expect(tokenTypes('"\\x4"')).toEqual([DoubleQuote, Backslash, Identifier, DoubleQuote]);
+
+    expect(tokenImages('"\\u123"')).toEqual(['"', '\\', 'u123', '"']);
+    expect(tokenTypes('"\\u123"')).toEqual([DoubleQuote, Backslash, Identifier, DoubleQuote]);
+  });
+
+  test('recognizes decimal, hex, and named entities', () => {
+    expect(tokenImages('Fish &amp; Chips')).toEqual(['Fish', ' ', '&amp;', ' ', 'Chips']);
+    expect(tokenTypes('Fish &amp; Chips')).toEqual([Identifier, Whitespace, CharacterEntity, Whitespace, Identifier]);
+
+    expect(tokenImages('Hex: &#x41; Dec: &#65;')).toEqual(['Hex:', ' ', '&#x41;', ' ', 'Dec:', ' ', '&#65;']);
+    const types = tokenTypes('Hex: &#x41; Dec: &#65;');
+    expect(types).toContain(CharacterEntity);
+  });
+
+  test('does not match invalid or incomplete entities', () => {
+    // Missing semicolon or bare ampersand should not be CharacterEntity
+    expect(tokenImages('A & B')).toEqual(['A', ' ', '&', ' ', 'B']);
+    const types = tokenTypes('A & B');
+    expect(types).not.toContain(CharacterEntity);
+
+    expect(tokenImages('Bad: &abc more')).toEqual(['Bad:', ' ', '&abc', ' ', 'more']);
+    expect(tokenTypes('Bad: &abc more')).not.toContain(CharacterEntity);
+  });
+
+  test('allows dot, colon, and hyphen', () => {
+    expect(tokenImages('<xml:tag.name data-value="x">')).toEqual([
+      '<',
+      'xml:tag.name',
+      ' ',
+      'data-value',
+      '=',
+      '"',
+      'x',
+      '"',
+      '>',
+    ]);
+    const types = tokenTypes('<xml:tag.name data-value="x">');
+    expect(types[1]).toBe(Identifier);
+    expect(types[3]).toBe(Identifier);
+  });
+
+  test('stops before comment close sequence', () => {
+    // Identifier should not consume the leading '-' that starts a comment close
+    expect(tokenImages('name--->')).toEqual(['name', '--->']);
+    expect(tokenTypes('name--->')).toEqual([Identifier, CommentClose]);
+  });
+
+  test('ASCII whitespace groups into Whitespace token', () => {
+    const ws = ' \t\n\r\v\f  ';
+    expect(tokenTypes(ws)).toEqual([Whitespace]);
+    expect(tokenImages(ws)).toEqual([ws]);
+  });
+
+  test('Unicode whitespace is not Whitespace', () => {
+    const nbsp = '\u00A0';
+    const emsp = '\u2003';
+    const ideographic = '\u3000';
+
+    // Single unicode spaces should be Arbitrary tokens
+    expect(tokenTypes(nbsp)).toEqual([Arbitrary]);
+    expect(tokenImages(nbsp)).toEqual(['\u00A0']);
+
+    expect(tokenTypes(emsp)).toEqual([Arbitrary]);
+    expect(tokenImages(emsp)).toEqual(['\u2003']);
+
+    expect(tokenTypes(ideographic)).toEqual([Arbitrary]);
+    expect(tokenImages(ideographic)).toEqual(['\u3000']);
+
+    // Mixed ASCII + Unicode whitespace keeps boundaries
+    expect(tokenImages('a ' + '\u2003' + ' b')).toEqual(['a', ' ', '\u2003', ' ', 'b']);
+    expect(tokenTypes('a ' + '\u2003' + ' b')).toEqual([Identifier, Whitespace, Arbitrary, Whitespace, Identifier]);
+  });
+
+  test('single braces and invalid ampersands are Arbitrary', () => {
+    expect(tokenTypes('{')).toEqual([Arbitrary]);
+    expect(tokenTypes('}')).toEqual([Arbitrary]);
+    expect(tokenTypes('&')).toEqual([Arbitrary]);
+    expect(tokenImages('&;')).toEqual(['&;']);
+    expect(tokenTypes('&;')).toEqual([CharacterEntity]);
+    expect(tokenImages('&z;')).toEqual(['&z;']); // still a CharacterEntity-like name by pattern
+    expect(tokenTypes('&z;')).toEqual([CharacterEntity]);
+  });
+
+  test('slash not followed by > stays in Arbitrary', () => {
+    expect(tokenImages('a/b')).toEqual(['a', '/b']);
+    expect(tokenTypes('a/b')).toEqual([Identifier, Arbitrary]);
+  });
 });
 
 describe('Source Position and Error Tests', () => {

From 377d0c19723f5f2d125c24ca759b9a170aa34cde Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 1 Sep 2025 09:19:26 +0800
Subject: [PATCH 40/76] minor fix

---
 packages/poml/next/lexer.ts              |   2 +-
 packages/poml/tests/reader/lexer.test.ts | 159 ++++++++++++++++++++++-
 2 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index e0574948..a48dba62 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -3,7 +3,7 @@ import { createToken, Lexer } from 'chevrotain';
 // Define token types for extended POML
 export const CommentOpen = createToken({ name: 'CommentOpen', pattern: /<!--(\-(?!\-+>))*/ });
 export const CommentClose = createToken({ name: 'CommentClose', pattern: /-{2,}>/ });
-export const PragmaKeyword = createToken({ name: 'PragmaKeyword', pattern: /\b@pragma\b/i });
+export const PragmaKeyword = createToken({ name: 'PragmaKeyword', pattern: /@pragma/i });
 export const TemplateOpen = createToken({ name: 'TemplateOpen', pattern: /{{/ });
 export const TemplateClose = createToken({ name: 'TemplateClose', pattern: /}}/ });
 export const ClosingOpenBracket = createToken({ name: 'ClosingOpenBracket', pattern: /<\// });
diff --git a/packages/poml/tests/reader/lexer.test.ts b/packages/poml/tests/reader/lexer.test.ts
index 92d5e05f..49d98d2c 100644
--- a/packages/poml/tests/reader/lexer.test.ts
+++ b/packages/poml/tests/reader/lexer.test.ts
@@ -18,6 +18,7 @@ import {
   Arbitrary,
   BackslashEscape,
   CharacterEntity,
+  PragmaKeyword,
 } from 'poml/next/lexer';
 
 // Helper function to extract token images
@@ -246,6 +247,160 @@ describe('Edge Cases', () => {
       }
     });
   });
+
+  // Added by claude
+  test('should handle comment-like sequences in different contexts', () => {
+    // The pattern <!--(-(?!-+>))* could potentially misparse these
+    expect(tokenImages('a<!--b')).toEqual(['a', '<!--', 'b']);
+    expect(tokenImages('<!--->text')).toEqual(['<!---', '>', 'text']); // Single dash before >
+    expect(tokenImages('<!---text')).toEqual(['<!---', 'text']); // Triple dash without close
+    expect(tokenImages('text<!----text')).toEqual(['text', '<!----', 'text']); // Four dashes
+    expect(tokenImages('<!--a-b-c-->')).toEqual(['<!--', 'a-b-c', '-->']); // Dashes in content
+
+    // Edge case: comment opener followed immediately by closer
+    expect(tokenImages('<!---->')).toEqual(['<!--', '-->']);
+    expect(tokenImages('<!------>')).toEqual(['<!--', '---->']); // Four dashes then close
+  });
+
+  test('should handle backslash escapes at token boundaries correctly', () => {
+    // BackslashEscape pattern could conflict with regular Backslash
+    expect(tokenImages('\\n')).toEqual(['\\n']); // Valid escape
+    expect(tokenTypes('\\n')).toEqual([BackslashEscape]);
+
+    expect(tokenImages('\\q')).toEqual(['\\', 'q']); // Invalid escape
+    expect(tokenTypes('\\q')).toEqual([Backslash, Identifier]);
+
+    // Hex escapes at boundaries
+    expect(tokenImages('\\x4')).toEqual(['\\', 'x4']); // Incomplete hex (needs 2 digits)
+    expect(tokenImages('\\x4G')).toEqual(['\\', 'x4G']); // Invalid hex char
+    expect(tokenImages('\\xGG')).toEqual(['\\', 'xGG']); // No valid hex digits
+
+    // Unicode escapes with wrong digit count
+    expect(tokenImages('\\u123')).toEqual(['\\', 'u123']); // Too few digits (needs 4)
+    expect(tokenImages('\\u12345')).toEqual(['\\u1234', '5']); // Too many for \u
+    expect(tokenImages('\\U1234567')).toEqual(['\\', 'U1234567']); // Too few for \U (needs 8)
+    expect(tokenImages('\\U123456789')).toEqual(['\\U12345678', '9']); // Too many for \U
+
+    // Template brace escapes
+    expect(tokenImages('\\{{')).toEqual(['\\{{']); // Valid escape
+    expect(tokenImages('\\}}')).toEqual(['\\}}']); // Valid escape
+    expect(tokenImages('\\{')).toEqual(['\\', '{']); // Invalid - single brace
+    expect(tokenImages('\\}')).toEqual(['\\', '}']); // Invalid - single brace
+  });
+
+  test('should handle Arbitrary token lookahead constraints correctly', () => {
+    // The Arbitrary pattern has complex lookahead constraints for braces and slashes
+
+    // Single braces should be part of Arbitrary when not followed by same brace
+    expect(tokenImages('{a')).toEqual(['{a']);
+    expect(tokenTypes('{a')).toEqual([Arbitrary]);
+
+    expect(tokenImages('}b')).toEqual(['}b']);
+    expect(tokenTypes('}b')).toEqual([Arbitrary]);
+
+    // But double braces should be template markers
+    expect(tokenImages('{{a')).toEqual(['{{', 'a']);
+    expect(tokenTypes('{{a')).toEqual([TemplateOpen, Identifier]);
+
+    // Mixed scenarios
+    expect(tokenImages('a{b}c')).toEqual(['a', '{b}c']);
+    expect(tokenTypes('a{b}c')).toEqual([Identifier, Arbitrary]);
+
+    // Slash constraints
+    expect(tokenImages('a/b')).toEqual(['a', '/b']);
+    expect(tokenTypes('a/b')).toEqual([Identifier, Arbitrary]);
+
+    expect(tokenImages('a/>b')).toEqual(['a', '/>', 'b']);
+    expect(tokenTypes('a/>b')).toEqual([Identifier, SelfCloseBracket, Identifier]);
+
+    // Dash constraints (should not consume dashes that could start comment close)
+    expect(tokenImages('text--')).toEqual(['text--']);
+    expect(tokenImages('text---')).toEqual(['text---']);
+    expect(tokenImages('text-->')).toEqual(['text', '-->']);
+    expect(tokenImages('text--->')).toEqual(['text', '--->']);
+  });
+
+  test('should handle all character entity edge cases', () => {
+    // Valid entities
+    expect(tokenImages('&amp;')).toEqual(['&amp;']);
+    expect(tokenTypes('&amp;')).toEqual([CharacterEntity]);
+
+    expect(tokenImages('&#123;')).toEqual(['&#123;']);
+    expect(tokenTypes('&#123;')).toEqual([CharacterEntity]);
+
+    expect(tokenImages('&#xABCD;')).toEqual(['&#xABCD;']);
+    expect(tokenTypes('&#xABCD;')).toEqual([CharacterEntity]);
+
+    // Edge case: empty entity &;
+    expect(tokenImages('&;')).toEqual(['&;']);
+    expect(tokenTypes('&;')).toEqual([CharacterEntity]); // Pattern includes &;
+
+    // Invalid entities should NOT match
+    expect(tokenImages('&')).toEqual(['&']);
+    expect(tokenTypes('&')).toEqual([Arbitrary]);
+
+    expect(tokenImages('&abc')).toEqual(['&abc']); // Missing semicolon
+    expect(tokenTypes('&abc')).toEqual([Arbitrary]);
+
+    expect(tokenImages('&#')).toEqual(['&#']); // Incomplete numeric
+    expect(tokenTypes('&#')).toEqual([Arbitrary]);
+
+    expect(tokenImages('&#x')).toEqual(['&#x']); // Incomplete hex
+    expect(tokenTypes('&#x')).toEqual([Arbitrary]);
+
+    // Entities in context
+    expect(tokenImages('a&amp;b')).toEqual(['a', '&amp;', 'b']);
+    expect(tokenImages('&amp;&lt;&gt;')).toEqual(['&amp;', '&lt;', '&gt;']);
+  });
+
+  // 5. Test for token precedence and order conflicts
+  test('should respect token precedence in ambiguous cases', () => {
+    // ClosingOpenBracket must come before OpenBracket
+    expect(tokenImages('</')).toEqual(['</']);
+    expect(tokenTypes('</')).toEqual([ClosingOpenBracket]);
+
+    expect(tokenImages('<')).toEqual(['<']);
+    expect(tokenTypes('<')).toEqual([OpenBracket]);
+
+    // SelfCloseBracket must come before CloseBracket
+    expect(tokenImages('/>')).toEqual(['/>']);
+    expect(tokenTypes('/>')).toEqual([SelfCloseBracket]);
+
+    expect(tokenImages('>')).toEqual(['>']);
+    expect(tokenTypes('>')).toEqual([CloseBracket]);
+
+    // BackslashEscape must come before Backslash
+    expect(tokenImages('\\n')).toEqual(['\\n']);
+    expect(tokenTypes('\\n')).toEqual([BackslashEscape]);
+
+    expect(tokenImages('\\z')).toEqual(['\\', 'z']);
+    expect(tokenTypes('\\z')).toEqual([Backslash, Identifier]);
+
+    // Identifier pattern with special chars
+    expect(tokenImages('a-b')).toEqual(['a-b']); // Dash allowed in identifier
+    expect(tokenImages('a--b')).toEqual(['a--b']); // Double dash allowed
+    expect(tokenImages('a---b')).toEqual(['a---b']); // Triple dash allowed
+    expect(tokenImages('a-->')).toEqual(['a', '-->']); // But not before >
+    expect(tokenImages('a--->')).toEqual(['a', '--->']); // Comment close takes precedence
+
+    // Identifier with dots and colons
+    expect(tokenImages('ns:tag.name')).toEqual(['ns:tag.name']);
+    expect(tokenTypes('ns:tag.name')).toEqual([Identifier]);
+
+    // PragmaKeyword tests
+    expect(tokenImages('@pragma')).toEqual(['@pragma']);
+    expect(tokenTypes('@pragma')).toEqual([PragmaKeyword]);
+    expect(tokenImages('-- @pragma')).toEqual(['--', ' ', '@pragma']);
+    expect(tokenTypes('-- @pragma')).toEqual([Arbitrary, Whitespace, PragmaKeyword]);
+    expect(tokenTypes('--@pragma')).toEqual([Arbitrary]);
+    expect(tokenImages('<!--@pragma')).toEqual(['<!--', '@pragma']);
+
+    expect(tokenImages('@PRAGMA')).toEqual(['@PRAGMA']); // Case insensitive
+    expect(tokenTypes('@PRAGMA')).toEqual([PragmaKeyword]);
+
+    expect(tokenImages('@pragmaa')).toEqual(['@pragma', 'a']); // Not a keyword
+    expect(tokenTypes('@pragmaa')).toEqual([PragmaKeyword, Identifier]);
+  });
 });
 
 describe('Token Types', () => {
@@ -889,8 +1044,8 @@ describe('Malformed Patterns', () => {
     // Incorrect @pragma directive such as @pragm or @pragmaX will be matched as Arbitrary
     expect(tokenImages('@pragma')).toEqual(['@pragma']);
     expect(tokenImages('@pragm')).toEqual(['@pragm']);
-    expect(tokenImages('@pragmaX')).toEqual(['@pragmaX']);
-    expect(tokenImages('@pragma-extended')).toEqual(['@pragma-extended']);
+    expect(tokenImages('@pragmaX')).toEqual(['@pragma', 'X']);
+    expect(tokenImages('@pragma-extended')).toEqual(['@pragma', '-extended']);
     expect(tokenImages('@@pragma')).toEqual(['@@pragma']);
     expect(tokenImages('not@pragma')).toEqual(['not', '@pragma']);
     expect(tokenImages('@PRAGMA')).toEqual(['@PRAGMA']);

From c938ed34be728e0ebeb5436b02dad03cc4b01661 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 1 Sep 2025 14:56:37 +0800
Subject: [PATCH 41/76] .

---
 packages/poml/next/cst.ts | 667 ++++++++++++++++++++++++--------------
 1 file changed, 421 insertions(+), 246 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 80417f22..0d233be7 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -1,286 +1,461 @@
-export class PomlCstParser extends CstParser {
-  // Define rules as public methods
-  public document!: () => DocumentCstNode;
-  public content!: () => ContentCstNode;
-  public element!: () => ElementCstNode;
-  public literalElement!: () => LiteralElementCstNode;
-  public selfCloseElement!: () => SelfCloseElementCstNode;
-  public openTag!: () => OpenTagCstNode;
-  public closeTag!: () => CloseTagCstNode;
-  public attributes!: () => AttributesCstNode;
-  public attribute!: () => AttributeCstNode;
-  public attributeValue!: () => AttributeValueCstNode;
-  public quotedValue!: () => QuotedValueCstNode;
-  public unquotedValue!: () => UnquotedValueCstNode;
-  public valueContent!: () => ValueContentCstNode;
-  public escapedChar!: () => EscapedCharCstNode;
-  public forIterator!: () => ForIteratorCstNode;
-  public template!: () => TemplateCstNode;
-  public value!: () => ValueCstNode;
-  public valueElement!: () => ValueElementCstNode;
-  public comment!: () => CommentCstNode;
-  public pragma!: () => PragmaCstNode;
+// cstParser.ts
+import { CstParser, IToken, TokenType, CstNode } from 'chevrotain';
 
+import {
+  // tokens & sets
+  AllTokens,
+  TokensComment,
+  TokensExpression,
+  TokensDoubleQuoted,
+  TokensSingleQuoted,
+  TokensDoubleQuotedExpression,
+  TokensSingleQuotedExpression,
+  TokensTextContent,
+  // individual tokens
+  CommentOpen,
+  CommentClose,
+  PragmaKeyword,
+  TemplateOpen,
+  TemplateClose,
+  ClosingOpenBracket,
+  SelfCloseBracket,
+  OpenBracket,
+  CloseBracket,
+  Equals,
+  DoubleQuote,
+  SingleQuote,
+  Whitespace,
+  Identifier,
+  // lexer instance
+  extendedPomlLexer,
+} from './lexer';
+
+/**
+ * Extended POML CST Parser
+ *
+ * This implements the CST shapes specified in nodes.ts for:
+ * - Root, Elements, LiteralElements, SelfCloseElements
+ * - Open/Close tags, Attributes (quoted, templated, for-iterator)
+ * - Templates ({{ ... }}), Comments, Pragmas
+ * - Text content (tokens that are not start of tags/templates)
+ *
+ * NOTE:
+ *  - Semantic checks (e.g., ensuring "in" in for-iterator, tag name match for literal elements)
+ *    are intentionally loose at CST stage. Enforce these during AST transform if needed.
+ */
+export class ExtendedPomlParser extends CstParser {
   constructor() {
-    super(allTokens, {
-      recoveryEnabled: true,
-      nodeLocationTracking: 'full',
+    super(AllTokens, {
+      recoveryEnabled: true, // be generous during CST stage
+      outputCst: true,
     });
 
-    this.performSelfAnalysis();
-  }
+    // ---------------------------
+    // Helper producers (must be used inside RULE bodies so that `this` is bound)
+    // ---------------------------
 
-  // Document is the root rule
-  private documentRule = this.RULE('document', () => {
-    this.MANY(() => {
-      this.OR([{ ALT: () => this.CONSUME(Whitespace) }, { ALT: () => this.SUBRULE(this.content) }]);
-    });
-  });
-
-  // Content can be elements, comments, pragmas, or values
-  private contentRule = this.RULE('content', () => {
-    this.OR([
-      { ALT: () => this.SUBRULE(this.pragma) },
-      { ALT: () => this.SUBRULE(this.comment) },
-      { ALT: () => this.SUBRULE(this.element) },
-      { ALT: () => this.SUBRULE(this.literalElement) },
-      { ALT: () => this.SUBRULE(this.selfCloseElement) },
-      { ALT: () => this.SUBRULE(this.value) },
-    ]);
-  });
-
-  // Regular element with open/close tags
-  private elementRule = this.RULE('element', () => {
-    const openTag = this.SUBRULE(this.openTag);
-    this.MANY(() => {
-      this.OR([{ ALT: () => this.CONSUME(Whitespace) }, { ALT: () => this.SUBRULE(this.content) }]);
+    // Produce an OR() alternatives array that consumes any one of the given tokenTypes,
+    // labeling each consumed token under `label` (so all collected under the same key).
+    const anyOf = (tokenTypes: TokenType[], label?: string) =>
+      tokenTypes.map((tt) => ({
+        ALT: () => (label ? this.CONSUME(tt, { LABEL: label }) : this.CONSUME(tt)),
+      }));
+
+    // Lookahead helpers
+    const isNextPragma = () => {
+      // Peek after <!-- and optional whitespace: expect @pragma
+      if (this.LA(1).tokenType !== CommentOpen) {
+return false;
+}
+      let k = 2;
+      while (this.LA(k).tokenType === Whitespace) {
+k++;
+}
+      return this.LA(k).tokenType === PragmaKeyword;
+    };
+
+    const isNextLiteralOpenTag = () => {
+      // Detect: < [ws]* Identifier("text" | "template")
+      if (this.LA(1).tokenType !== OpenBracket) {
+return false;
+}
+      let k = 2;
+      // optional whitespace after "<"
+      while (this.LA(k).tokenType === Whitespace) {
+k++;
+}
+      const tName = this.LA(k);
+      if (tName.tokenType !== Identifier) {
+return false;
+}
+      const name = (tName.image || '').toLowerCase();
+      return name === 'text' || name === 'template';
+    };
+
+    // ---------------------------
+    // Grammar Rules
+    // ---------------------------
+
+    this.RULE('root', () => {
+      // CstRootNode: { Content?: CstElementContentNode[] }
+      this.MANY(() => {
+        this.SUBRULE(this.elementContentNode, { LABEL: 'Content' });
+      });
     });
-    this.SUBRULE(this.closeTag);
-  });
-
-  // Literal element (like <text>) that preserves content
-  private literalElementRule = this.RULE('literalElement', () => {
-    this.SUBRULE(this.openTag);
-    // Consume everything until matching close tag
-    this.MANY(() => {
+
+    // Content inside elements/root (everything except a matching CloseTag)
+    this.RULE('elementContentNode', () => {
       this.OR([
-        // Look ahead for closing tag
+        // pragma must be before comment
+        {
+          GATE: isNextPragma,
+          ALT: () => this.SUBRULE(this.pragma, { LABEL: 'Pragma' }),
+        },
+        { ALT: () => this.SUBRULE(this.comment, { LABEL: 'Comment' }) },
+
+        // templates
+        {
+          GATE: () => this.LA(1).tokenType === TemplateOpen,
+          ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Template' }),
+        },
+
+        // self-close elements (<tag .../>)
+        {
+          // use backtracking to disambiguate quickly
+          GATE: this.BACKTRACK(this.selfCloseElement),
+          ALT: () => this.SUBRULE(this.selfCloseElement, { LABEL: 'SelfCloseElement' }),
+        },
+
+        // literal elements <text>...</text> or <template>...</template>
+        {
+          GATE: isNextLiteralOpenTag,
+          ALT: () => this.SUBRULE(this.literalElement, { LABEL: 'LiteralElement' }),
+        },
+
+        // normal <tag> ... </tag>
         {
-          GATE: () => !this.isClosingTag(),
-          ALT: () => this.consumeAny(),
+          GATE: () => this.LA(1).tokenType === OpenBracket,
+          ALT: () => this.SUBRULE(this.element, { LABEL: 'Element' }),
+        },
+
+        // fallback: raw text content
+        {
+          ALT: () => {
+            this.AT_LEAST_ONE(() => {
+              this.OR(anyOf(TokensTextContent, 'TextContent'));
+            });
+          },
         },
       ]);
     });
-    this.SUBRULE(this.closeTag);
-  });
-
-  // Self-closing element
-  private selfCloseElementRule = this.RULE('selfCloseElement', () => {
-    this.CONSUME(TagOpen);
-    this.CONSUME(Identifier, { LABEL: 'tagName' });
-    this.OPTION(() => {
-      this.CONSUME(Whitespace);
-      this.OPTION2(() => this.SUBRULE(this.attributes));
+
+    // {{ ... }}
+    this.RULE('templateNode', () => {
+      this.CONSUME(TemplateOpen, { LABEL: 'TemplateOpen' });
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
+
+      this.AT_LEAST_ONE(() => {
+        // Everything except TemplateClose (already enforced in TokensExpression)
+        this.OR(anyOf(TokensExpression, 'Content'));
+      });
+
+      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterContent' }));
+      this.CONSUME(TemplateClose, { LABEL: 'TemplateClose' });
     });
-    this.CONSUME(TagSelfClose);
-  });
-
-  // Opening tag
-  private openTagRule = this.RULE('openTag', () => {
-    this.CONSUME(TagOpen);
-    this.CONSUME(Identifier, { LABEL: 'tagName' });
-    this.OPTION(() => {
-      this.CONSUME(Whitespace);
-      this.OPTION2(() => this.SUBRULE(this.attributes));
+
+    // <!-- ... -->
+    this.RULE('comment', () => {
+      this.CONSUME(CommentOpen, { LABEL: 'CommentOpen' });
+      this.MANY(() => {
+        // Anything until CommentClose
+        this.OR(anyOf(TokensComment, 'Content'));
+      });
+      this.CONSUME(CommentClose, { LABEL: 'CommentClose' });
     });
-    this.CONSUME(TagClose);
-  });
-
-  // Closing tag
-  private closeTagRule = this.RULE('closeTag', () => {
-    this.CONSUME(TagClosingOpen);
-    this.CONSUME(Identifier, { LABEL: 'tagName' });
-    this.OPTION(() => this.CONSUME(Whitespace));
-    this.CONSUME(TagClose);
-  });
-
-  // Attributes
-  private attributesRule = this.RULE('attributes', () => {
-    this.MANY_SEP({
-      SEP: Whitespace,
-      DEF: () => this.SUBRULE(this.attribute),
+
+    // <!-- @pragma ... -->
+    this.RULE('pragma', () => {
+      this.CONSUME(CommentOpen, { LABEL: 'CommentOpen' });
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
+      this.CONSUME(PragmaKeyword, { LABEL: 'PragmaKeyword' });
+      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterPragma' }));
+
+      // identifier after @pragma
+      this.CONSUME(Identifier, { LABEL: 'PragmaIdentifier' });
+      this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIdentifier' }));
+
+      // Options: unquoted tokens or quoted strings (no templates inside these)
+      this.MANY(() => {
+        this.OR([
+          {
+            ALT: () => this.SUBRULE(this.quotedNoTemplate, { LABEL: 'PragmaOption' }),
+          },
+          {
+            ALT: () => {
+              // unquoted: anything non-whitespace & not closing
+              this.OR(
+                anyOf(
+                  AllTokens.filter(
+                    (t) => t !== CommentClose && t !== Whitespace && t !== DoubleQuote && t !== SingleQuote,
+                  ),
+                  'PragmaOption',
+                ),
+              );
+            },
+          },
+        ]);
+        this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterContent' }));
+      });
+
+      this.CONSUME(CommentClose, { LABEL: 'CommentClose' });
     });
-  });
-
-  // Single attribute
-  private attributeRule = this.RULE('attribute', () => {
-    this.CONSUME(Identifier, { LABEL: 'key' });
-    this.CONSUME(Equals);
-    this.SUBRULE(this.attributeValue);
-  });
-
-  // Attribute value (quoted, unquoted, or for iterator)
-  private attributeValueRule = this.RULE('attributeValue', () => {
-    this.OR([
-      { ALT: () => this.SUBRULE(this.quotedValue) },
-      { ALT: () => this.SUBRULE(this.unquotedValue) },
-      // Special case for for="item in items"
-      {
-        GATE: () => this.isForAttribute(),
-        ALT: () => this.SUBRULE(this.forIterator),
-      },
-    ]);
-  });
-
-  // Quoted value
-  private quotedValueRule = this.RULE('quotedValue', () => {
-    this.OR([
-      {
-        ALT: () => {
-          this.CONSUME(DoubleQuote, { LABEL: 'openQuote' });
-          this.MANY(() => {
-            this.SUBRULE(this.valueContent);
-          });
-          this.CONSUME2(DoubleQuote, { LABEL: 'closeQuote' });
+
+    // "..." or '...' — used only in pragma options (no templates allowed)
+    this.RULE('quotedNoTemplate', () => {
+      this.OR([
+        {
+          ALT: () => {
+            this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
+            this.MANY(() => {
+              this.OR(anyOf(TokensDoubleQuoted, 'Content'));
+            });
+            this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
+          },
         },
-      },
-      {
-        ALT: () => {
-          this.CONSUME(SingleQuote, { LABEL: 'openQuote' });
-          this.MANY2(() => {
-            this.SUBRULE2(this.valueContent);
-          });
-          this.CONSUME2(SingleQuote, { LABEL: 'closeQuote' });
+        {
+          ALT: () => {
+            this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
+            this.MANY(() => {
+              this.OR(anyOf(TokensSingleQuoted, 'Content'));
+            });
+            this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
+          },
         },
-      },
-    ]);
-  });
-
-  // Unquoted value (template or expression)
-  private unquotedValueRule = this.RULE('unquotedValue', () => {
-    this.OR([
-      { ALT: () => this.SUBRULE(this.template) },
-      { ALT: () => this.CONSUME(Identifier, { LABEL: 'expression' }) },
-      { ALT: () => this.CONSUME(TextContent, { LABEL: 'expression' }) },
-    ]);
-  });
-
-  // Value content inside quotes
-  private valueContentRule = this.RULE('valueContent', () => {
-    this.OR([
-      { ALT: () => this.SUBRULE(this.template) },
-      { ALT: () => this.SUBRULE(this.escapedChar) },
-      { ALT: () => this.CONSUME(TextContent, { LABEL: 'text' }) },
-      { ALT: () => this.CONSUME(Identifier, { LABEL: 'text' }) },
-      { ALT: () => this.CONSUME(Whitespace, { LABEL: 'text' }) },
-    ]);
-  });
-
-  // Escaped character
-  private escapedCharRule = this.RULE('escapedChar', () => {
-    this.CONSUME(Backslash);
-    this.OR([
-      { ALT: () => this.CONSUME(DoubleQuote, { LABEL: 'char' }) },
-      { ALT: () => this.CONSUME(SingleQuote, { LABEL: 'char' }) },
-      { ALT: () => this.CONSUME(Backslash, { LABEL: 'char' }) },
-      { ALT: () => this.CONSUME(Identifier, { LABEL: 'char' }) },
-    ]);
-  });
-
-  // For iterator (item in items)
-  private forIteratorRule = this.RULE('forIterator', () => {
-    this.CONSUME(Identifier, { LABEL: 'iterator' });
-    this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'Whitespace1' }));
-    this.CONSUME2(Identifier, { LABEL: 'in' }); // "in" keyword
-    this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'Whitespace2' }));
-    // Collection can be complex expression
-    this.AT_LEAST_ONE(() => {
-      this.OR([
-        { ALT: () => this.CONSUME3(Identifier, { LABEL: 'collection' }) },
-        { ALT: () => this.CONSUME(TextContent, { LABEL: 'collection' }) },
       ]);
     });
-  });
 
-  // Template {{ expression }}
-  private templateRule = this.RULE('template', () => {
-    this.CONSUME(TemplateOpen);
-    this.MANY(() => {
+    // Attribute value: quoted text that MAY contain templates
+    this.RULE('quotedTemplate', () => {
       this.OR([
-        { ALT: () => this.CONSUME(Whitespace, { LABEL: 'expression' }) },
-        { ALT: () => this.CONSUME(Identifier, { LABEL: 'expression' }) },
-        { ALT: () => this.CONSUME(TextContent, { LABEL: 'expression' }) },
+        {
+          ALT: () => {
+            this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
+            this.MANY(() => {
+              this.OR([
+                { ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Content' }) },
+                { ALT: () => this.OR(anyOf(TokensDoubleQuotedExpression, 'Content')) },
+              ]);
+            });
+            this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
+          },
+        },
+        {
+          ALT: () => {
+            this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
+            this.MANY(() => {
+              this.OR([
+                { ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Content' }) },
+                { ALT: () => this.OR(anyOf(TokensSingleQuotedExpression, 'Content')) },
+              ]);
+            });
+            this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
+          },
+        },
       ]);
     });
-    this.CONSUME(TemplateClose);
-  });
 
-  // Value (text and/or templates)
-  private valueRule = this.RULE('value', () => {
-    this.AT_LEAST_ONE(() => {
-      this.SUBRULE(this.valueElement);
-    });
-  });
-
-  // Value element (text or template)
-  private valueElementRule = this.RULE('valueElement', () => {
-    this.OR([
-      { ALT: () => this.SUBRULE(this.template) },
-      { ALT: () => this.CONSUME(TextContent, { LABEL: 'text' }) },
-      { ALT: () => this.CONSUME(Identifier, { LABEL: 'text' }) },
-      { ALT: () => this.CONSUME(Whitespace, { LABEL: 'text' }) },
-    ]);
-  });
-
-  // Comment
-  private commentRule = this.RULE('comment', () => {
-    this.CONSUME(CommentOpen);
-    this.MANY(() => {
+    // for="iterator in collection" (quoted; inside quotes, treat like expression until closing quote)
+    this.RULE('forIteratorValue', () => {
       this.OR([
         {
-          GATE: () => !this.isCommentClose(),
-          ALT: () => this.consumeAny({ LABEL: 'commentContent' }),
+          ALT: () => {
+            this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
+            this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
+
+            // iterator
+            this.CONSUME(Identifier, { LABEL: 'Iterator' });
+            this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' }));
+
+            // "in" keyword (lexed as Identifier). Semantic check deferred to AST.
+            this.CONSUME2(Identifier, { LABEL: 'InKeyword' });
+            this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' }));
+
+            // collection expression (like inside template), stop before optional ws + closing quote
+            this.AT_LEAST_ONE(() => {
+              this.OR(anyOf(TokensDoubleQuotedExpression, 'Collection'));
+            });
+
+            this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterCollection' }));
+            this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
+          },
+        },
+        {
+          ALT: () => {
+            this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
+            this.OPTION5(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterOpen' }));
+
+            this.CONSUME3(Identifier, { LABEL: 'Iterator' });
+            this.OPTION6(() => this.CONSUME6(Whitespace, { LABEL: 'WsAfterIterator' }));
+
+            this.CONSUME4(Identifier, { LABEL: 'InKeyword' });
+            this.OPTION7(() => this.CONSUME7(Whitespace, { LABEL: 'WsAfterIn' }));
+
+            this.AT_LEAST_ONE2(() => {
+              this.OR(anyOf(TokensSingleQuotedExpression, 'Collection'));
+            });
+
+            this.OPTION8(() => this.CONSUME8(Whitespace, { LABEL: 'WsAfterCollection' }));
+            this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
+          },
         },
       ]);
     });
-    this.CONSUME(CommentClose);
-  });
-
-  // Pragma
-  private pragmaRule = this.RULE('pragma', () => {
-    this.CONSUME(CommentOpen);
-    this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'Whitespace1' }));
-    this.CONSUME(Pragma);
-    this.MANY(() => {
+
+    // Attribute: key = (quoted value | templated value | for-iterator)
+    this.RULE('attribute', () => {
+      const keyTok = this.CONSUME(Identifier, { LABEL: 'AttributeKey' });
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterKey' }));
+      this.CONSUME(Equals, { LABEL: 'Equals' });
+      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterEquals' }));
+
       this.OR([
+        // for="..."
         {
-          GATE: () => !this.isCommentClose(),
-          ALT: () => this.consumeAny({ LABEL: 'pragmaContent' }),
+          GATE: () =>
+            keyTok.image?.toLowerCase() === 'for' &&
+            (this.LA(1).tokenType === DoubleQuote || this.LA(1).tokenType === SingleQuote),
+          ALT: () => this.SUBRULE(this.forIteratorValue, { LABEL: 'forIteratorValue' }),
         },
+
+        // value={{ ... }} (unquoted template)
+        {
+          GATE: () => this.LA(1).tokenType === TemplateOpen,
+          ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'templatedValue' }),
+        },
+
+        // "..." / '...' (may contain templates)
+        { ALT: () => this.SUBRULE(this.quotedTemplate, { LABEL: 'quotedValue' }) },
       ]);
     });
-    this.CONSUME(CommentClose);
-  });
 
-  // Helper methods
-  private isClosingTag(): boolean {
-    return this.LA(1).tokenType === TagClosingOpen;
-  }
+    // <tag ...>
+    this.RULE('openTag', () => {
+      this.CONSUME(OpenBracket, { LABEL: 'OpenBracket' });
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
 
-  private isCommentClose(): boolean {
-    return this.LA(1).tokenType === CommentClose;
-  }
+      this.CONSUME(Identifier, { LABEL: 'TagName' });
+      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterName' }));
+
+      this.MANY(() => {
+        this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
+        this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAttribute' }));
+      });
 
-  private isForAttribute(): boolean {
-    // Check if previous token was "for" as attribute key
-    const prevTokens = this.input.slice(Math.max(0, this.currIdx - 3), this.currIdx);
-    return prevTokens.some((t) => t.image.toLowerCase() === 'for');
+      this.CONSUME(CloseBracket, { LABEL: 'CloseBracket' });
+    });
+
+    // </tag>
+    this.RULE('closeTag', () => {
+      this.CONSUME(ClosingOpenBracket, { LABEL: 'ClosingOpenBracket' });
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
+      this.CONSUME(Identifier, { LABEL: 'TagName' });
+      this.CONSUME(CloseBracket, { LABEL: 'CloseBracket' });
+    });
+
+    // <tag .../> (complete element, no content)
+    this.RULE('selfCloseElement', () => {
+      this.CONSUME(OpenBracket, { LABEL: 'OpenBracket' });
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
+      this.CONSUME(Identifier, { LABEL: 'TagName' });
+      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterName' }));
+
+      this.MANY(() => {
+        this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
+        this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAttribute' }));
+      });
+
+      this.CONSUME(SelfCloseBracket, { LABEL: 'SelfCloseBracket' });
+    });
+
+    // <tag> ... </tag>
+    this.RULE('element', () => {
+      this.SUBRULE(this.openTag, { LABEL: 'OpenTag' });
+      this.MANY(() => {
+        // stop on a close tag
+        this.SUBRULE(this.elementContentNode, { LABEL: 'Content' });
+      });
+      this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
+    });
+
+    // <text> ...literal (no templates/tags parsed)... </text>
+    // or <template> ...literal... </template> (per your notes)
+    this.RULE('literalElement', () => {
+      this.SUBRULE(this.openTag, { LABEL: 'OpenTag' });
+
+      // Eat *everything* until a ClosingOpenBracket + (optional ws) + Identifier('text'|'template') + '>'
+      this.AT_LEAST_ONE(() => {
+        this.OR([
+          // Continue consuming anything that is not the start of the matching close.
+          {
+            GATE: () => {
+              if (this.LA(1).tokenType !== ClosingOpenBracket) {
+return true;
+}
+              // look ahead to see if it's </text> or </template>
+              let k = 2;
+              while (this.LA(k).tokenType === Whitespace) {
+k++;
+}
+              const t = this.LA(k);
+              if (t.tokenType !== Identifier) {
+return true;
+}
+              const name = (t.image || '').toLowerCase();
+              return !(name === 'text' || name === 'template');
+            },
+            ALT: () => {
+              // Treat all as raw text content
+              this.OR(
+                anyOf(
+                  AllTokens.filter((t) => t !== ClosingOpenBracket), // minimal guard
+                  'TextContent',
+                ),
+              );
+            },
+          },
+        ]);
+      });
+
+      this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
+    });
+
+    this.performSelfAnalysis();
   }
 
-  private consumeAny(options?: { LABEL?: string }): IToken {
-    // Consume any token
-    const token = this.LA(1);
-    this.input[this.currIdx++];
-    return token;
+  // Expose entry for external callers (TypeScript-friendly)
+  public parseRoot(): CstNode {
+    // @ts-expect-error Chevrotain types: RULE name maps to a function
+    return this.root();
   }
 }
+
+// Singleton parser instance
+export const extendedPomlParser = new ExtendedPomlParser();
+
+/**
+ * Convenience: tokenize + parse in one call.
+ */
+export function parsePomlToCst(input: string): {
+  cst: CstNode | undefined;
+  lexErrors: ReturnType<typeof extendedPomlLexer.tokenize>['errors'];
+  parseErrors: ReturnType<ExtendedPomlParser['getErrors']>;
+} {
+  const lex = extendedPomlLexer.tokenize(input);
+  extendedPomlParser.input = lex.tokens;
+  const cst = extendedPomlParser.parseRoot();
+  return {
+    cst,
+    lexErrors: lex.errors,
+    parseErrors: extendedPomlParser.errors,
+  };
+}

From 300bffa4704c3fa2fba09cc0d6b646d469ab8f6f Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 1 Sep 2025 14:56:44 +0800
Subject: [PATCH 42/76] .

---
 packages/poml/next/cst.ts | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 0d233be7..96955ae2 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -65,29 +65,29 @@ export class ExtendedPomlParser extends CstParser {
     const isNextPragma = () => {
       // Peek after <!-- and optional whitespace: expect @pragma
       if (this.LA(1).tokenType !== CommentOpen) {
-return false;
-}
+        return false;
+      }
       let k = 2;
       while (this.LA(k).tokenType === Whitespace) {
-k++;
-}
+        k++;
+      }
       return this.LA(k).tokenType === PragmaKeyword;
     };
 
     const isNextLiteralOpenTag = () => {
       // Detect: < [ws]* Identifier("text" | "template")
       if (this.LA(1).tokenType !== OpenBracket) {
-return false;
-}
+        return false;
+      }
       let k = 2;
       // optional whitespace after "<"
       while (this.LA(k).tokenType === Whitespace) {
-k++;
-}
+        k++;
+      }
       const tName = this.LA(k);
       if (tName.tokenType !== Identifier) {
-return false;
-}
+        return false;
+      }
       const name = (tName.image || '').toLowerCase();
       return name === 'text' || name === 'template';
     };
@@ -399,17 +399,17 @@ return false;
           {
             GATE: () => {
               if (this.LA(1).tokenType !== ClosingOpenBracket) {
-return true;
-}
+                return true;
+              }
               // look ahead to see if it's </text> or </template>
               let k = 2;
               while (this.LA(k).tokenType === Whitespace) {
-k++;
-}
+                k++;
+              }
               const t = this.LA(k);
               if (t.tokenType !== Identifier) {
-return true;
-}
+                return true;
+              }
               const name = (t.image || '').toLowerCase();
               return !(name === 'text' || name === 'template');
             },

From 4b54185e7ae0dda1e2d92b12503324193844da4c Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 1 Sep 2025 15:35:24 +0800
Subject: [PATCH 43/76] .

---
 packages/poml/next/cst.ts | 310 ++++++++++++++++----------------------
 1 file changed, 130 insertions(+), 180 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 96955ae2..7737ec6e 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -1,8 +1,6 @@
 // cstParser.ts
-import { CstParser, IToken, TokenType, CstNode } from 'chevrotain';
-
+import { CstParser, CstNode, IToken, TokenType } from 'chevrotain';
 import {
-  // tokens & sets
   AllTokens,
   TokensComment,
   TokensExpression,
@@ -11,7 +9,6 @@ import {
   TokensDoubleQuotedExpression,
   TokensSingleQuotedExpression,
   TokensTextContent,
-  // individual tokens
   CommentOpen,
   CommentClose,
   PragmaKeyword,
@@ -26,158 +23,171 @@ import {
   SingleQuote,
   Whitespace,
   Identifier,
-  // lexer instance
   extendedPomlLexer,
 } from './lexer';
 
 /**
  * Extended POML CST Parser
  *
- * This implements the CST shapes specified in nodes.ts for:
- * - Root, Elements, LiteralElements, SelfCloseElements
- * - Open/Close tags, Attributes (quoted, templated, for-iterator)
- * - Templates ({{ ... }}), Comments, Pragmas
- * - Text content (tokens that are not start of tags/templates)
- *
- * NOTE:
- *  - Semantic checks (e.g., ensuring "in" in for-iterator, tag name match for literal elements)
- *    are intentionally loose at CST stage. Enforce these during AST transform if needed.
+ * Matches the CST shapes declared in nodes.ts.
+ * Rules are declared as class properties so TypeScript "sees" them.
+ * Labels are used **only** where the CST interfaces require custom names
+ * different from token/rule names (e.g., TagName, WsAfter*, TextContent, etc.).
  */
 export class ExtendedPomlParser extends CstParser {
+  // ---- Rule property declarations (so TS knows they exist) ----
+  public root!: (idxInOriginalText?: number) => CstNode;
+  public elementContentNode!: (idxInOriginalText?: number) => CstNode;
+  public templateNode!: (idxInOriginalText?: number) => CstNode;
+  public comment!: (idxInOriginalText?: number) => CstNode;
+  public pragma!: (idxInOriginalText?: number) => CstNode;
+  public quotedNoTemplate!: (idxInOriginalText?: number) => CstNode;
+  public quotedTemplate!: (idxInOriginalText?: number) => CstNode;
+  public forIteratorValue!: (idxInOriginalText?: number) => CstNode;
+  public attribute!: (idxInOriginalText?: number) => CstNode;
+  public openTag!: (idxInOriginalText?: number) => CstNode;
+  public closeTag!: (idxInOriginalText?: number) => CstNode;
+  public selfCloseElement!: (idxInOriginalText?: number) => CstNode;
+  public element!: (idxInOriginalText?: number) => CstNode;
+  public literalElement!: (idxInOriginalText?: number) => CstNode;
+
+  // ---- Small helpers ----
+  private anyOf = (tokenTypes: TokenType[], label?: string) =>
+    tokenTypes.map((tt) => ({
+      ALT: () => (label ? this.CONSUME(tt, { LABEL: label }) : this.CONSUME(tt)),
+    }));
+
+  private isNextPragma = () => {
+    if (this.LA(1).tokenType !== CommentOpen) {
+return false;
+}
+    let k = 2;
+    while (this.LA(k).tokenType === Whitespace) {
+k++;
+}
+    return this.LA(k).tokenType === PragmaKeyword;
+  };
+
+  private isNextLiteralOpenTag = () => {
+    if (this.LA(1).tokenType !== OpenBracket) {
+return false;
+}
+    let k = 2;
+    while (this.LA(k).tokenType === Whitespace) {
+k++;
+}
+    const tName = this.LA(k);
+    if (tName.tokenType !== Identifier) {
+return false;
+}
+    const name = (tName.image || '').toLowerCase();
+    return name === 'text' || name === 'template';
+  };
+
+  private isAtLiteralClose = () => {
+    if (this.LA(1).tokenType !== ClosingOpenBracket) {
+return false;
+}
+    let k = 2;
+    while (this.LA(k).tokenType === Whitespace) {
+k++;
+}
+    const t = this.LA(k);
+    if (t.tokenType !== Identifier) {
+return false;
+}
+    const name = (t.image || '').toLowerCase();
+    return name === 'text' || name === 'template';
+  };
+
   constructor() {
     super(AllTokens, {
-      recoveryEnabled: true, // be generous during CST stage
       outputCst: true,
+      recoveryEnabled: true,
     });
 
     // ---------------------------
-    // Helper producers (must be used inside RULE bodies so that `this` is bound)
-    // ---------------------------
-
-    // Produce an OR() alternatives array that consumes any one of the given tokenTypes,
-    // labeling each consumed token under `label` (so all collected under the same key).
-    const anyOf = (tokenTypes: TokenType[], label?: string) =>
-      tokenTypes.map((tt) => ({
-        ALT: () => (label ? this.CONSUME(tt, { LABEL: label }) : this.CONSUME(tt)),
-      }));
-
-    // Lookahead helpers
-    const isNextPragma = () => {
-      // Peek after <!-- and optional whitespace: expect @pragma
-      if (this.LA(1).tokenType !== CommentOpen) {
-        return false;
-      }
-      let k = 2;
-      while (this.LA(k).tokenType === Whitespace) {
-        k++;
-      }
-      return this.LA(k).tokenType === PragmaKeyword;
-    };
-
-    const isNextLiteralOpenTag = () => {
-      // Detect: < [ws]* Identifier("text" | "template")
-      if (this.LA(1).tokenType !== OpenBracket) {
-        return false;
-      }
-      let k = 2;
-      // optional whitespace after "<"
-      while (this.LA(k).tokenType === Whitespace) {
-        k++;
-      }
-      const tName = this.LA(k);
-      if (tName.tokenType !== Identifier) {
-        return false;
-      }
-      const name = (tName.image || '').toLowerCase();
-      return name === 'text' || name === 'template';
-    };
-
-    // ---------------------------
-    // Grammar Rules
+    // RULE DEFINITIONS (as properties)
     // ---------------------------
 
-    this.RULE('root', () => {
+    this.root = this.RULE('root', () => {
       // CstRootNode: { Content?: CstElementContentNode[] }
       this.MANY(() => {
         this.SUBRULE(this.elementContentNode, { LABEL: 'Content' });
       });
     });
 
-    // Content inside elements/root (everything except a matching CloseTag)
-    this.RULE('elementContentNode', () => {
+    this.elementContentNode = this.RULE('elementContentNode', () => {
       this.OR([
-        // pragma must be before comment
+        // pragma (must come before raw comment)
         {
-          GATE: isNextPragma,
+          GATE: this.isNextPragma,
           ALT: () => this.SUBRULE(this.pragma, { LABEL: 'Pragma' }),
         },
+        // regular comment
         { ALT: () => this.SUBRULE(this.comment, { LABEL: 'Comment' }) },
 
-        // templates
+        // template
         {
           GATE: () => this.LA(1).tokenType === TemplateOpen,
           ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Template' }),
         },
 
-        // self-close elements (<tag .../>)
+        // self-close element
         {
-          // use backtracking to disambiguate quickly
           GATE: this.BACKTRACK(this.selfCloseElement),
           ALT: () => this.SUBRULE(this.selfCloseElement, { LABEL: 'SelfCloseElement' }),
         },
 
-        // literal elements <text>...</text> or <template>...</template>
+        // literal element: <text> or <template> acting as literal
         {
-          GATE: isNextLiteralOpenTag,
+          GATE: this.isNextLiteralOpenTag,
           ALT: () => this.SUBRULE(this.literalElement, { LABEL: 'LiteralElement' }),
         },
 
-        // normal <tag> ... </tag>
+        // normal element
         {
           GATE: () => this.LA(1).tokenType === OpenBracket,
           ALT: () => this.SUBRULE(this.element, { LABEL: 'Element' }),
         },
 
-        // fallback: raw text content
+        // raw text content
         {
           ALT: () => {
             this.AT_LEAST_ONE(() => {
-              this.OR(anyOf(TokensTextContent, 'TextContent'));
+              this.OR(this.anyOf(TokensTextContent, 'TextContent'));
             });
           },
         },
       ]);
     });
 
-    // {{ ... }}
-    this.RULE('templateNode', () => {
-      this.CONSUME(TemplateOpen, { LABEL: 'TemplateOpen' });
+    this.templateNode = this.RULE('templateNode', () => {
+      this.CONSUME(TemplateOpen);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
 
       this.AT_LEAST_ONE(() => {
-        // Everything except TemplateClose (already enforced in TokensExpression)
-        this.OR(anyOf(TokensExpression, 'Content'));
+        // anything except TemplateClose
+        this.OR(this.anyOf(TokensExpression, 'Content'));
       });
 
       this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterContent' }));
-      this.CONSUME(TemplateClose, { LABEL: 'TemplateClose' });
+      this.CONSUME(TemplateClose);
     });
 
-    // <!-- ... -->
-    this.RULE('comment', () => {
-      this.CONSUME(CommentOpen, { LABEL: 'CommentOpen' });
+    this.comment = this.RULE('comment', () => {
+      this.CONSUME(CommentOpen);
       this.MANY(() => {
-        // Anything until CommentClose
-        this.OR(anyOf(TokensComment, 'Content'));
+        // anything until -->
+        this.OR(this.anyOf(TokensComment, 'Content'));
       });
-      this.CONSUME(CommentClose, { LABEL: 'CommentClose' });
+      this.CONSUME(CommentClose);
     });
 
-    // <!-- @pragma ... -->
-    this.RULE('pragma', () => {
-      this.CONSUME(CommentOpen, { LABEL: 'CommentOpen' });
+    this.pragma = this.RULE('pragma', () => {
+      this.CONSUME(CommentOpen);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
-      this.CONSUME(PragmaKeyword, { LABEL: 'PragmaKeyword' });
+      this.CONSUME(PragmaKeyword);
       this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterPragma' }));
 
       // identifier after @pragma
@@ -187,14 +197,11 @@ export class ExtendedPomlParser extends CstParser {
       // Options: unquoted tokens or quoted strings (no templates inside these)
       this.MANY(() => {
         this.OR([
-          {
-            ALT: () => this.SUBRULE(this.quotedNoTemplate, { LABEL: 'PragmaOption' }),
-          },
+          { ALT: () => this.SUBRULE(this.quotedNoTemplate, { LABEL: 'PragmaOption' }) },
           {
             ALT: () => {
-              // unquoted: anything non-whitespace & not closing
               this.OR(
-                anyOf(
+                this.anyOf(
                   AllTokens.filter(
                     (t) => t !== CommentClose && t !== Whitespace && t !== DoubleQuote && t !== SingleQuote,
                   ),
@@ -207,17 +214,16 @@ export class ExtendedPomlParser extends CstParser {
         this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterContent' }));
       });
 
-      this.CONSUME(CommentClose, { LABEL: 'CommentClose' });
+      this.CONSUME(CommentClose);
     });
 
-    // "..." or '...' — used only in pragma options (no templates allowed)
-    this.RULE('quotedNoTemplate', () => {
+    this.quotedNoTemplate = this.RULE('quotedNoTemplate', () => {
       this.OR([
         {
           ALT: () => {
             this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
             this.MANY(() => {
-              this.OR(anyOf(TokensDoubleQuoted, 'Content'));
+              this.OR(this.anyOf(TokensDoubleQuoted, 'Content'));
             });
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
           },
@@ -226,7 +232,7 @@ export class ExtendedPomlParser extends CstParser {
           ALT: () => {
             this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
             this.MANY(() => {
-              this.OR(anyOf(TokensSingleQuoted, 'Content'));
+              this.OR(this.anyOf(TokensSingleQuoted, 'Content'));
             });
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
           },
@@ -234,8 +240,7 @@ export class ExtendedPomlParser extends CstParser {
       ]);
     });
 
-    // Attribute value: quoted text that MAY contain templates
-    this.RULE('quotedTemplate', () => {
+    this.quotedTemplate = this.RULE('quotedTemplate', () => {
       this.OR([
         {
           ALT: () => {
@@ -243,7 +248,7 @@ export class ExtendedPomlParser extends CstParser {
             this.MANY(() => {
               this.OR([
                 { ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Content' }) },
-                { ALT: () => this.OR(anyOf(TokensDoubleQuotedExpression, 'Content')) },
+                { ALT: () => this.OR(this.anyOf(TokensDoubleQuotedExpression, 'Content')) },
               ]);
             });
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
@@ -255,7 +260,7 @@ export class ExtendedPomlParser extends CstParser {
             this.MANY(() => {
               this.OR([
                 { ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Content' }) },
-                { ALT: () => this.OR(anyOf(TokensSingleQuotedExpression, 'Content')) },
+                { ALT: () => this.OR(this.anyOf(TokensSingleQuotedExpression, 'Content')) },
               ]);
             });
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
@@ -264,27 +269,19 @@ export class ExtendedPomlParser extends CstParser {
       ]);
     });
 
-    // for="iterator in collection" (quoted; inside quotes, treat like expression until closing quote)
-    this.RULE('forIteratorValue', () => {
+    this.forIteratorValue = this.RULE('forIteratorValue', () => {
       this.OR([
         {
           ALT: () => {
             this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
             this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
-
-            // iterator
             this.CONSUME(Identifier, { LABEL: 'Iterator' });
             this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' }));
-
-            // "in" keyword (lexed as Identifier). Semantic check deferred to AST.
             this.CONSUME2(Identifier, { LABEL: 'InKeyword' });
             this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' }));
-
-            // collection expression (like inside template), stop before optional ws + closing quote
             this.AT_LEAST_ONE(() => {
-              this.OR(anyOf(TokensDoubleQuotedExpression, 'Collection'));
+              this.OR(this.anyOf(TokensDoubleQuotedExpression, 'Collection'));
             });
-
             this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
           },
@@ -293,17 +290,13 @@ export class ExtendedPomlParser extends CstParser {
           ALT: () => {
             this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
             this.OPTION5(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterOpen' }));
-
             this.CONSUME3(Identifier, { LABEL: 'Iterator' });
             this.OPTION6(() => this.CONSUME6(Whitespace, { LABEL: 'WsAfterIterator' }));
-
             this.CONSUME4(Identifier, { LABEL: 'InKeyword' });
             this.OPTION7(() => this.CONSUME7(Whitespace, { LABEL: 'WsAfterIn' }));
-
             this.AT_LEAST_ONE2(() => {
-              this.OR(anyOf(TokensSingleQuotedExpression, 'Collection'));
+              this.OR(this.anyOf(TokensSingleQuotedExpression, 'Collection'));
             });
-
             this.OPTION8(() => this.CONSUME8(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
           },
@@ -311,11 +304,10 @@ export class ExtendedPomlParser extends CstParser {
       ]);
     });
 
-    // Attribute: key = (quoted value | templated value | for-iterator)
-    this.RULE('attribute', () => {
+    this.attribute = this.RULE('attribute', () => {
       const keyTok = this.CONSUME(Identifier, { LABEL: 'AttributeKey' });
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterKey' }));
-      this.CONSUME(Equals, { LABEL: 'Equals' });
+      this.CONSUME(Equals); // label not needed; token name matches
       this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterEquals' }));
 
       this.OR([
@@ -326,102 +318,64 @@ export class ExtendedPomlParser extends CstParser {
             (this.LA(1).tokenType === DoubleQuote || this.LA(1).tokenType === SingleQuote),
           ALT: () => this.SUBRULE(this.forIteratorValue, { LABEL: 'forIteratorValue' }),
         },
-
-        // value={{ ... }} (unquoted template)
+        // templatedValue: {{ ... }}
         {
           GATE: () => this.LA(1).tokenType === TemplateOpen,
           ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'templatedValue' }),
         },
-
-        // "..." / '...' (may contain templates)
+        // quotedValue: "..."/'...' (may contain templates)
         { ALT: () => this.SUBRULE(this.quotedTemplate, { LABEL: 'quotedValue' }) },
       ]);
     });
 
-    // <tag ...>
-    this.RULE('openTag', () => {
-      this.CONSUME(OpenBracket, { LABEL: 'OpenBracket' });
+    this.openTag = this.RULE('openTag', () => {
+      this.CONSUME(OpenBracket);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
-
       this.CONSUME(Identifier, { LABEL: 'TagName' });
       this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterName' }));
-
       this.MANY(() => {
         this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
         this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAttribute' }));
       });
-
-      this.CONSUME(CloseBracket, { LABEL: 'CloseBracket' });
+      this.CONSUME(CloseBracket);
     });
 
-    // </tag>
-    this.RULE('closeTag', () => {
-      this.CONSUME(ClosingOpenBracket, { LABEL: 'ClosingOpenBracket' });
+    this.closeTag = this.RULE('closeTag', () => {
+      this.CONSUME(ClosingOpenBracket);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
       this.CONSUME(Identifier, { LABEL: 'TagName' });
-      this.CONSUME(CloseBracket, { LABEL: 'CloseBracket' });
+      this.CONSUME(CloseBracket);
     });
 
-    // <tag .../> (complete element, no content)
-    this.RULE('selfCloseElement', () => {
-      this.CONSUME(OpenBracket, { LABEL: 'OpenBracket' });
+    this.selfCloseElement = this.RULE('selfCloseElement', () => {
+      this.CONSUME(OpenBracket);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
       this.CONSUME(Identifier, { LABEL: 'TagName' });
       this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterName' }));
-
       this.MANY(() => {
         this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
         this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAttribute' }));
       });
-
-      this.CONSUME(SelfCloseBracket, { LABEL: 'SelfCloseBracket' });
+      this.CONSUME(SelfCloseBracket);
     });
 
-    // <tag> ... </tag>
-    this.RULE('element', () => {
+    this.element = this.RULE('element', () => {
       this.SUBRULE(this.openTag, { LABEL: 'OpenTag' });
       this.MANY(() => {
-        // stop on a close tag
         this.SUBRULE(this.elementContentNode, { LABEL: 'Content' });
       });
       this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
     });
 
-    // <text> ...literal (no templates/tags parsed)... </text>
-    // or <template> ...literal... </template> (per your notes)
-    this.RULE('literalElement', () => {
+    this.literalElement = this.RULE('literalElement', () => {
       this.SUBRULE(this.openTag, { LABEL: 'OpenTag' });
 
-      // Eat *everything* until a ClosingOpenBracket + (optional ws) + Identifier('text'|'template') + '>'
-      this.AT_LEAST_ONE(() => {
+      // Everything until the matching </text> or </template> is treated as raw text
+      this.MANY(() => {
         this.OR([
-          // Continue consuming anything that is not the start of the matching close.
           {
-            GATE: () => {
-              if (this.LA(1).tokenType !== ClosingOpenBracket) {
-                return true;
-              }
-              // look ahead to see if it's </text> or </template>
-              let k = 2;
-              while (this.LA(k).tokenType === Whitespace) {
-                k++;
-              }
-              const t = this.LA(k);
-              if (t.tokenType !== Identifier) {
-                return true;
-              }
-              const name = (t.image || '').toLowerCase();
-              return !(name === 'text' || name === 'template');
-            },
-            ALT: () => {
-              // Treat all as raw text content
-              this.OR(
-                anyOf(
-                  AllTokens.filter((t) => t !== ClosingOpenBracket), // minimal guard
-                  'TextContent',
-                ),
-              );
-            },
+            GATE: () => !this.isAtLiteralClose(),
+            ALT: () => this.OR(this.anyOf(AllTokens, 'TextContent')),
           },
         ]);
       });
@@ -432,19 +386,15 @@ export class ExtendedPomlParser extends CstParser {
     this.performSelfAnalysis();
   }
 
-  // Expose entry for external callers (TypeScript-friendly)
   public parseRoot(): CstNode {
-    // @ts-expect-error Chevrotain types: RULE name maps to a function
+    // Invoke the entry rule (property is a function)
     return this.root();
   }
 }
 
-// Singleton parser instance
+// Singleton parser
 export const extendedPomlParser = new ExtendedPomlParser();
 
-/**
- * Convenience: tokenize + parse in one call.
- */
 export function parsePomlToCst(input: string): {
   cst: CstNode | undefined;
   lexErrors: ReturnType<typeof extendedPomlLexer.tokenize>['errors'];

From 038b6833cefb527c35fc7b0dc336ed78edc46db1 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 1 Sep 2025 19:17:47 +0800
Subject: [PATCH 44/76] .

---
 packages/poml/next/cst.ts | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 7737ec6e..5c40bedc 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -59,43 +59,43 @@ export class ExtendedPomlParser extends CstParser {
 
   private isNextPragma = () => {
     if (this.LA(1).tokenType !== CommentOpen) {
-return false;
-}
+      return false;
+    }
     let k = 2;
     while (this.LA(k).tokenType === Whitespace) {
-k++;
-}
+      k++;
+    }
     return this.LA(k).tokenType === PragmaKeyword;
   };
 
   private isNextLiteralOpenTag = () => {
     if (this.LA(1).tokenType !== OpenBracket) {
-return false;
-}
+      return false;
+    }
     let k = 2;
     while (this.LA(k).tokenType === Whitespace) {
-k++;
-}
+      k++;
+    }
     const tName = this.LA(k);
     if (tName.tokenType !== Identifier) {
-return false;
-}
+      return false;
+    }
     const name = (tName.image || '').toLowerCase();
     return name === 'text' || name === 'template';
   };
 
   private isAtLiteralClose = () => {
     if (this.LA(1).tokenType !== ClosingOpenBracket) {
-return false;
-}
+      return false;
+    }
     let k = 2;
     while (this.LA(k).tokenType === Whitespace) {
-k++;
-}
+      k++;
+    }
     const t = this.LA(k);
     if (t.tokenType !== Identifier) {
-return false;
-}
+      return false;
+    }
     const name = (t.image || '').toLowerCase();
     return name === 'text' || name === 'template';
   };

From 22827c995823218345efdc373d5d61cd472a204c Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 2 Sep 2025 00:01:08 +0800
Subject: [PATCH 45/76] .

---
 packages/poml/base.tsx      |  5 +++-
 packages/poml/next/nodes.ts | 57 +++++++++++++++++--------------------
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/packages/poml/base.tsx b/packages/poml/base.tsx
index 4e8c875c..c894c625 100644
--- a/packages/poml/base.tsx
+++ b/packages/poml/base.tsx
@@ -137,7 +137,10 @@ export interface PropsBase {
 
   // Experimental
   writerOptions?: object;
-  whiteSpace?: 'pre' | 'filter' | 'trim';
+  whiteSpace?: 'pre' | 'filter' | 'trim' | 'collapse';
+
+  // Enforce inline on every element.
+  inline?: boolean;
 
   /** Soft character limit before truncation is applied. */
   charLimit?: number;
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 6f793fc2..9884324f 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -1,6 +1,10 @@
 import { Range } from './types';
 import { CstNode, IToken } from 'chevrotain';
 
+export interface AstNode {
+  range: Range; // start and end offsets in the source text
+}
+
 /**
  * Represents a JavaScript expression as a string.
  *
@@ -17,9 +21,8 @@ import { CstNode, IToken } from 'chevrotain';
  * - String literals with quotes: `"hello"` (use LiteralNode or ValueNode)
  * - POML markup: `<tag>` (use element nodes)
  */
-export interface ExpressionNode {
+export interface ExpressionNode extends AstNode {
   kind: 'EXPRESSION';
-  range: Range;
   value: string;
 }
 
@@ -45,9 +48,8 @@ export interface ExpressionNode {
  * - Template elements: <template>{{ this is a jinja template }}</template> (use LiteralNode)
  * - With quotes: `"{{ var }}"` (use ValueNode)
  */
-export interface TemplateNode {
+export interface TemplateNode extends AstNode {
   kind: 'TEMPLATE';
-  range: Range;
   value: ExpressionNode;
 }
 
@@ -90,9 +92,8 @@ export interface CstTemplateNode extends CstNode {
  * - Expressions: `x > 0` (use ExpressionNode)
  * - Template variables: `{{ var }}` (use TemplateNode)
  */
-export interface LiteralNode {
+export interface LiteralNode extends AstNode {
   kind: 'STRING';
-  range: Range;
   value: string;
 }
 
@@ -118,12 +119,16 @@ export interface LiteralNode {
  *
  * Note: The range includes quotes if present, but children exclude them.
  */
-export interface ValueNode {
+export interface ValueNode extends AstNode {
   kind: 'VALUE';
-  range: Range;
   children: (LiteralNode | TemplateNode)[];
 }
 
+export interface TextElementNode extends AstNode {
+  kind: 'TEXT';
+  value: string;
+}
+
 /**
  * Related CST node interfaces for parsing stage.
  * The following two interfaces are for quoted strings and will be transformed into ValueNode.
@@ -167,9 +172,8 @@ export interface CstQuotedTemplateNode extends CstNode {
  * - Conditional loops: `if` attributes (use separate condition handling)
  * - Template interpolation: `{{ items }}` (use TemplateNode)
  */
-export interface ForIteratorNode {
+export interface ForIteratorNode extends AstNode {
   kind: 'FORITERATOR';
-  range: Range;
   iterator: LiteralNode;
   collection: ExpressionNode;
 }
@@ -218,9 +222,8 @@ export interface CstForIteratorNode extends CstNode {
  * - Spread attributes (not yet supported): `{...props}`
  * - Dynamic attribute names (not supported): `[attrName]="value"`
  */
-export interface AttributeNode {
+export interface AttributeNode extends AstNode {
   kind: 'ATTRIBUTE';
-  range: Range;
   key: LiteralNode;
   value: ValueNode | ForIteratorNode;
 }
@@ -260,9 +263,8 @@ export interface CstAttributeNode extends CstNode {
  * - Complete elements: opening + content + closing (use ElementNode)
  * - Invalid or malformed tags (treated as text)
  */
-export interface OpenTagNode {
+export interface OpenTagNode extends AstNode {
   kind: 'OPEN';
-  range: Range;
   value: LiteralNode; // tag name
   attributes: AttributeNode[];
 }
@@ -298,9 +300,8 @@ export interface OpenTagCstNode extends CstNode {
  * - Self-closing tags: `<br/>` (use SelfCloseTagNode)
  * - Tags with attributes (closing tags never have attributes)
  */
-export interface CloseTagNode {
+export interface CloseTagNode extends AstNode {
   kind: 'CLOSE';
-  range: Range;
   value: LiteralNode; // tag name
 }
 
@@ -333,9 +334,8 @@ export interface CloseTagCstNode extends CstNode {
  * - Separate open/close tags: `<div></div>` (use ElementNode)
  * - Tags without the self-closing slash: `<img>` (use OpenTagNode)
  */
-export interface SelfCloseElementNode {
+export interface SelfCloseElementNode extends AstNode {
   kind: 'SELFCLOSE';
-  range: Range;
   value: LiteralNode; // tag name
   attributes: AttributeNode[];
 }
@@ -374,9 +374,8 @@ export interface CstSelfCloseElementNode extends CstNode {
  * - Template variables: `{{ var }}` (use TemplateNode)
  * - Meta elements: `<meta>` tags (use MetaNode)
  */
-export interface ElementNode {
+export interface ElementNode extends AstNode {
   kind: 'ELEMENT';
-  range: Range;
   open: OpenTagNode;
   close: CloseTagNode;
   children: (ElementNode | LiteralElementNode | CommentNode | PragmaNode | ValueNode)[];
@@ -415,9 +414,8 @@ export interface CstElementContentNode extends CstNode {
  * Examples:
  * - `<!-- this is a comment -->`
  */
-export interface CommentNode {
+export interface CommentNode extends AstNode {
   kind: 'COMMENT';
-  range: Range;
   value: LiteralNode;
 }
 
@@ -444,22 +442,21 @@ export interface CstCommentNode extends CstNode {
  * - Specify version: `<!-- @pragma version >=1.0.0 <2.3.0 -->`
  * - Turn tags on/off: `<!-- @pragma components +reference -table -->`
  * - Turn speaker roles on/off: `<!-- @pragma speaker multi -->` or `single`
- * - White space policy: `<!-- @pragma whitespace pre -->` or `trim`, `collapse` or `remove`
+ * - White space policy: `<!-- @pragma whitespace pre -->` or `trim`, `collapse`
  *
  * Notes on white space policy:
  * - `pre`: preserve all whitespace as-is
  * - `trim`: trim leading/trailing whitespace in each element
  * - `collapse`: trim + collapse consecutive whitespace into a single space
- * - `remove`: collapse remove all whitespaces between two nested elements
+ *   If there are two inline="false" elements next to each other, space between them will be deleted.
  *
  * Each element type will have its own default whitespace policy.
  * For example, `<text>` defaults to `pre`, while `<paragraph>` defaults to `collapse`.
  * However, when a pragma is set, it overrides the default for subsequent elements.
  * It will affect the AST constructing stages, and also affecting the props sent to components.
  */
-export interface PragmaNode {
+export interface PragmaNode extends AstNode {
   kind: 'PRAGMA';
-  range: Range;
   identifier: LiteralNode;
   options: LiteralNode[];
 }
@@ -507,9 +504,8 @@ export interface CstPragmaNode extends CstNode {
  * 3. If you really need `<text>` in your POML. Recommended to use `&lt;text&gt;`
  *    outside of literal element.
  */
-export interface LiteralElementNode {
-  kind: 'TEXT';
-  range: Range;
+export interface LiteralElementNode extends AstNode {
+  kind: 'LITERAL';
   open: OpenTagNode;
   close: CloseTagNode;
   children: LiteralNode;
@@ -543,9 +539,8 @@ export interface CstLiteralElementNode extends CstNode {
  * Cases that do not apply:
  * - All nested elements
  */
-export interface RootNode {
+export interface RootNode extends AstNode {
   kind: 'ROOT';
-  range: Range;
   children: (ElementNode | LiteralElementNode | CommentNode | PragmaNode | ValueNode)[];
 }
 

From 0beb690124fcdf6c86dd42deac47eb3568d6b0c2 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 2 Sep 2025 08:17:40 +0800
Subject: [PATCH 46/76] cst update

---
 packages/poml/next/cst.ts   | 51 ++++++++++++++++++++++++-------------
 packages/poml/next/nodes.ts | 50 +++++++++++++++++++++---------------
 2 files changed, 63 insertions(+), 38 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 5c40bedc..dd8b990b 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -1,4 +1,3 @@
-// cstParser.ts
 import { CstParser, CstNode, IToken, TokenType } from 'chevrotain';
 import {
   AllTokens,
@@ -26,6 +25,23 @@ import {
   extendedPomlLexer,
 } from './lexer';
 
+import {
+  CstTemplateNode,
+  CstQuotedNode,
+  CstQuotedTemplateNode,
+  CstForIteratorNode,
+  CstAttributeNode,
+  CstOpenTagNode,
+  CstCloseTagNode,
+  CstSelfCloseElementNode,
+  CstElementNode,
+  CstElementContentNode,
+  CstCommentNode,
+  CstPragmaNode,
+  CstLiteralElementNode,
+  CstRootNode,
+} from './nodes';
+
 /**
  * Extended POML CST Parser
  *
@@ -36,20 +52,20 @@ import {
  */
 export class ExtendedPomlParser extends CstParser {
   // ---- Rule property declarations (so TS knows they exist) ----
-  public root!: (idxInOriginalText?: number) => CstNode;
-  public elementContentNode!: (idxInOriginalText?: number) => CstNode;
-  public templateNode!: (idxInOriginalText?: number) => CstNode;
-  public comment!: (idxInOriginalText?: number) => CstNode;
-  public pragma!: (idxInOriginalText?: number) => CstNode;
-  public quotedNoTemplate!: (idxInOriginalText?: number) => CstNode;
-  public quotedTemplate!: (idxInOriginalText?: number) => CstNode;
-  public forIteratorValue!: (idxInOriginalText?: number) => CstNode;
-  public attribute!: (idxInOriginalText?: number) => CstNode;
-  public openTag!: (idxInOriginalText?: number) => CstNode;
-  public closeTag!: (idxInOriginalText?: number) => CstNode;
-  public selfCloseElement!: (idxInOriginalText?: number) => CstNode;
-  public element!: (idxInOriginalText?: number) => CstNode;
-  public literalElement!: (idxInOriginalText?: number) => CstNode;
+  public root!: (idxInOriginalText?: number) => CstRootNode;
+  public elementContentNode!: (idxInOriginalText?: number) => CstElementContentNode;
+  public templateNode!: (idxInOriginalText?: number) => CstTemplateNode;
+  public comment!: (idxInOriginalText?: number) => CstCommentNode;
+  public pragma!: (idxInOriginalText?: number) => CstPragmaNode;
+  public quoted!: (idxInOriginalText?: number) => CstQuotedNode;
+  public quotedTemplate!: (idxInOriginalText?: number) => CstQuotedTemplateNode;
+  public forIteratorValue!: (idxInOriginalText?: number) => CstForIteratorNode;
+  public attribute!: (idxInOriginalText?: number) => CstAttributeNode;
+  public openTag!: (idxInOriginalText?: number) => CstOpenTagNode;
+  public closeTag!: (idxInOriginalText?: number) => CstCloseTagNode;
+  public selfCloseElement!: (idxInOriginalText?: number) => CstSelfCloseElementNode;
+  public element!: (idxInOriginalText?: number) => CstElementNode;
+  public literalElement!: (idxInOriginalText?: number) => CstLiteralElementNode;
 
   // ---- Small helpers ----
   private anyOf = (tokenTypes: TokenType[], label?: string) =>
@@ -197,7 +213,7 @@ export class ExtendedPomlParser extends CstParser {
       // Options: unquoted tokens or quoted strings (no templates inside these)
       this.MANY(() => {
         this.OR([
-          { ALT: () => this.SUBRULE(this.quotedNoTemplate, { LABEL: 'PragmaOption' }) },
+          { ALT: () => this.SUBRULE(this.quoted, { LABEL: 'PragmaOption' }) },
           {
             ALT: () => {
               this.OR(
@@ -217,7 +233,7 @@ export class ExtendedPomlParser extends CstParser {
       this.CONSUME(CommentClose);
     });
 
-    this.quotedNoTemplate = this.RULE('quotedNoTemplate', () => {
+    this.quoted = this.RULE('quoted', () => {
       this.OR([
         {
           ALT: () => {
@@ -370,6 +386,7 @@ export class ExtendedPomlParser extends CstParser {
     this.literalElement = this.RULE('literalElement', () => {
       this.SUBRULE(this.openTag, { LABEL: 'OpenTag' });
 
+      // TODO: the ending tag should match the starting tag name (text/template)
       // Everything until the matching </text> or </template> is treated as raw text
       this.MANY(() => {
         this.OR([
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 9884324f..7eb4a849 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -18,7 +18,7 @@ export interface AstNode {
  *
  * Cases that do not apply:
  * - Template syntax including braces: `{{ expression }}` (use TemplateNode)
- * - String literals with quotes: `"hello"` (use LiteralNode or ValueNode)
+ * - String literals with quotes: `"hello"` (use ValueNode)
  * - POML markup: `<tag>` (use element nodes)
  */
 export interface ExpressionNode extends AstNode {
@@ -98,7 +98,8 @@ export interface LiteralNode extends AstNode {
 }
 
 /**
- * Represents a composite value that may contain text and/or templates.
+ * Represents a composite value that may contain text.
+ * Used specifically for the "quotes" in attribute values.
  *
  * Value nodes are containers for mixed content, handling both pure text
  * and interpolated templates. They preserve quote information when used
@@ -107,15 +108,12 @@ export interface LiteralNode extends AstNode {
  * Cases that apply:
  * - Quoted attribute values: `"some text"`, `'single quoted'`
  * - Mixed content with templates: `"Hello, {{ userName }}!"`
- * - Text content between tags: `>  some text  <` (including whitespace)
- * - Unquoted template values in certain contexts
+ * - Unquoted template values in certain attribute contexts
  * - Multi-part content: `"Price: ${{amount}} USD"`
  *
  * Cases that do not apply:
  * - Attribute keys: `class=...` (the `class` part uses LiteralNode)
- * - Pure expressions without quotes: `if=condition` (use ExpressionNode)
- * - Tag names: `div` (use LiteralNode)
- * - Standalone template variables not in a value context
+ * - Pure expressions without quotes: `if=condition` (illegal)
  *
  * Note: The range includes quotes if present, but children exclude them.
  */
@@ -124,11 +122,6 @@ export interface ValueNode extends AstNode {
   children: (LiteralNode | TemplateNode)[];
 }
 
-export interface TextElementNode extends AstNode {
-  kind: 'TEXT';
-  value: string;
-}
-
 /**
  * Related CST node interfaces for parsing stage.
  * The following two interfaces are for quoted strings and will be transformed into ValueNode.
@@ -272,7 +265,7 @@ export interface OpenTagNode extends AstNode {
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface OpenTagCstNode extends CstNode {
+export interface CstOpenTagNode extends CstNode {
   children: {
     OpenBracket?: IToken[];
     WsAfterBracket?: IToken[];
@@ -308,7 +301,7 @@ export interface CloseTagNode extends AstNode {
 /**
  * Related CST node interfaces for parsing stage.
  */
-export interface CloseTagCstNode extends CstNode {
+export interface CstCloseTagNode extends CstNode {
   children: {
     ClosingOpenBracket?: IToken[];
     WsAfterBracket?: IToken[];
@@ -378,7 +371,21 @@ export interface ElementNode extends AstNode {
   kind: 'ELEMENT';
   open: OpenTagNode;
   close: CloseTagNode;
-  children: (ElementNode | LiteralElementNode | CommentNode | PragmaNode | ValueNode)[];
+  children: (ElementNode | LiteralElementNode | CommentNode | PragmaNode | TextElementNode)[];
+}
+
+/**
+ * Very similar to ValueNode, but specifically for text content between tags.
+ *
+ * Cases that apply:
+ * - Text content between tags: `>  some text  <` (including whitespace)
+ *
+ * Cases that do not apply:
+ * - Text inside <text> or other literal elements (use LiteralElementNode)
+ */
+export interface TextElementNode extends AstNode {
+  kind: 'TEXT';
+  value: string;
 }
 
 /**
@@ -386,8 +393,8 @@ export interface ElementNode extends AstNode {
  */
 export interface CstElementNode extends CstNode {
   children: {
-    OpenTag?: OpenTagCstNode[];
-    CloseTag?: CloseTagCstNode[];
+    OpenTag?: CstOpenTagNode[];
+    CloseTag?: CstCloseTagNode[];
     Content?: CstElementContentNode[];
   };
 }
@@ -491,8 +498,8 @@ export interface CstPragmaNode extends CstNode {
  * - Explicit text elements: `<text>Literal {{ not_interpolated }}</text>`
  *
  * Cases that do not apply:
- * - Regular text content with interpolation (use ValueNode)
- * - Plain text outside elements (use ValueNode)
+ * - Regular text content with interpolation (use TextElementNode or ValueNode)
+ * - Plain text outside elements (use TextElementNode)
  * - Elements allowing template processing (use ElementNode)
  * - Text with attributes enabling processing (future feature)
  *
@@ -516,11 +523,11 @@ export interface LiteralElementNode extends AstNode {
  */
 export interface CstLiteralElementNode extends CstNode {
   children: {
-    OpenTag?: OpenTagCstNode[];
+    OpenTag?: CstOpenTagNode[];
     // All content between open and close tags is treated as literal text
     // including other tags, comments, pragmas, etc. except for `</text>`.
     TextContent?: IToken[];
-    CloseTag?: CloseTagCstNode[];
+    CloseTag?: CstCloseTagNode[];
   };
 }
 
@@ -584,6 +591,7 @@ export type StrictNode =
   | SelfCloseElementNode
   | ElementNode
   | LiteralElementNode
+  | TextElementNode
   | CommentNode
   | PragmaNode
   | RootNode;

From bb7ca37389aaa06f3b51e67d572c311f5b7e46ed Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 2 Sep 2025 09:23:52 +0800
Subject: [PATCH 47/76] update nodes

---
 packages/poml/next/cst.ts   | 31 +++++++++++++++++++++++++++++++
 packages/poml/next/nodes.ts | 33 ++++++++++++---------------------
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index dd8b990b..e93d54fb 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -41,6 +41,7 @@ import {
   CstLiteralElementNode,
   CstRootNode,
 } from './nodes';
+import { listComponentAliases } from 'poml/base';
 
 /**
  * Extended POML CST Parser
@@ -67,6 +68,25 @@ export class ExtendedPomlParser extends CstParser {
   public element!: (idxInOriginalText?: number) => CstElementNode;
   public literalElement!: (idxInOriginalText?: number) => CstLiteralElementNode;
 
+  // ---- Tag names for rules (for CST nodes) ----
+  private validComponentNames: Set<string>;
+
+  // They are handled in file.tsx currently.
+  // I think they will be gradually moved to component registry in future.
+  private validDirectives: Set<string> = new Set([
+    'include',
+    'let',
+    'output-schema',
+    'outputschema',
+    'tool-definition',
+    'tool-def',
+    'tooldef',
+    'tool',
+    'template',
+  ]);
+  // This list affects the CST parser stage only.
+  private literalTagNames: Set<string> = new Set(['text', 'template']);
+
   // ---- Small helpers ----
   private anyOf = (tokenTypes: TokenType[], label?: string) =>
     tokenTypes.map((tt) => ({
@@ -113,14 +133,25 @@ export class ExtendedPomlParser extends CstParser {
       return false;
     }
     const name = (t.image || '').toLowerCase();
+
+    // TODO: should match the opening tag name
     return name === 'text' || name === 'template';
   };
 
+  private isValidOpenTag = (tagName: string) => {
+    // When pragma strict is enabled, only known component names are allowed as tags.
+    // Other component names will show as errors in the semantic analysis stage.
+    // When pragma strict is not enabled, tag names that are not known components
+    // will be treated as texts.
+    return this.validComponentNames.has(tagName.toLowerCase());
+  };
+
   constructor() {
     super(AllTokens, {
       outputCst: true,
       recoveryEnabled: true,
     });
+    this.validComponentNames = new Set(listComponentAliases());
 
     // ---------------------------
     // RULE DEFINITIONS (as properties)
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 7eb4a849..0ccbc3d5 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -264,8 +264,11 @@ export interface OpenTagNode extends AstNode {
 
 /**
  * Related CST node interfaces for parsing stage.
+ *
+ * Opening tag without the ending close bracket.
+ * Allow prefix sharing with SelfCloseElementNode.
  */
-export interface CstOpenTagNode extends CstNode {
+export interface CstOpenTagPartialNode extends CstNode {
   children: {
     OpenBracket?: IToken[];
     WsAfterBracket?: IToken[];
@@ -273,7 +276,6 @@ export interface CstOpenTagNode extends CstNode {
     WsAfterName?: IToken[];
     Attribute?: CstAttributeNode[];
     WsAfterAttribute?: IToken[];
-    CloseBracket?: IToken[];
   };
 }
 
@@ -333,21 +335,6 @@ export interface SelfCloseElementNode extends AstNode {
   attributes: AttributeNode[];
 }
 
-/**
- * Related CST node interfaces for parsing stage.
- */
-export interface CstSelfCloseElementNode extends CstNode {
-  children: {
-    OpenBracket?: IToken[];
-    WsAfterBracket?: IToken[];
-    TagName?: IToken[];
-    WsAfterName?: IToken[];
-    Attribute?: CstAttributeNode[];
-    WsAfterAttribute?: IToken[];
-    SelfCloseBracket?: IToken[];
-  };
-}
-
 /**
  * Represents a complete POML element with its content.
  *
@@ -393,9 +380,12 @@ export interface TextElementNode extends AstNode {
  */
 export interface CstElementNode extends CstNode {
   children: {
-    OpenTag?: CstOpenTagNode[];
-    CloseTag?: CstCloseTagNode[];
+    OpenTagPartial?: CstOpenTagPartialNode[];
+    OpenTagCloseBracket?: IToken[];
     Content?: CstElementContentNode[];
+    CloseTag?: CstCloseTagNode[];
+    // Alternative, it can also be a self-closing tag.
+    SelfCloseBracket?: IToken[];
   };
 }
 
@@ -403,7 +393,6 @@ export interface CstElementContentNode extends CstNode {
   children: {
     Element?: CstElementNode[];
     LiteralElement?: CstLiteralElementNode[];
-    SelfCloseElement?: CstSelfCloseElementNode[];
     Comment?: CstCommentNode[];
     Pragma?: CstPragmaNode[];
     Template?: CstTemplateNode[];
@@ -523,11 +512,13 @@ export interface LiteralElementNode extends AstNode {
  */
 export interface CstLiteralElementNode extends CstNode {
   children: {
-    OpenTag?: CstOpenTagNode[];
+    OpenTagPartial?: CstOpenTagPartialNode[];
+    OpenTagCloseBracket?: IToken[];
     // All content between open and close tags is treated as literal text
     // including other tags, comments, pragmas, etc. except for `</text>`.
     TextContent?: IToken[];
     CloseTag?: CstCloseTagNode[];
+    // Literal element cannot be self-closing.
   };
 }
 

From 5aa67f75b881ba2da28f277ee14bb75881ee504e Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 2 Sep 2025 10:58:39 +0800
Subject: [PATCH 48/76] .

---
 packages/poml/next/cst.ts   | 109 ++++++++++++++++++------------------
 packages/poml/next/nodes.ts |   8 +--
 2 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index e93d54fb..7eac6f9c 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -31,9 +31,8 @@ import {
   CstQuotedTemplateNode,
   CstForIteratorNode,
   CstAttributeNode,
-  CstOpenTagNode,
+  CstOpenTagPartialNode,
   CstCloseTagNode,
-  CstSelfCloseElementNode,
   CstElementNode,
   CstElementContentNode,
   CstCommentNode,
@@ -54,17 +53,16 @@ import { listComponentAliases } from 'poml/base';
 export class ExtendedPomlParser extends CstParser {
   // ---- Rule property declarations (so TS knows they exist) ----
   public root!: (idxInOriginalText?: number) => CstRootNode;
-  public elementContentNode!: (idxInOriginalText?: number) => CstElementContentNode;
-  public templateNode!: (idxInOriginalText?: number) => CstTemplateNode;
+  public elementContent!: (idxInOriginalText?: number) => CstElementContentNode;
+  public template!: (idxInOriginalText?: number) => CstTemplateNode;
   public comment!: (idxInOriginalText?: number) => CstCommentNode;
   public pragma!: (idxInOriginalText?: number) => CstPragmaNode;
   public quoted!: (idxInOriginalText?: number) => CstQuotedNode;
   public quotedTemplate!: (idxInOriginalText?: number) => CstQuotedTemplateNode;
   public forIteratorValue!: (idxInOriginalText?: number) => CstForIteratorNode;
   public attribute!: (idxInOriginalText?: number) => CstAttributeNode;
-  public openTag!: (idxInOriginalText?: number) => CstOpenTagNode;
+  public openTagPartial!: (idxInOriginalText?: number) => CstOpenTagPartialNode;
   public closeTag!: (idxInOriginalText?: number) => CstCloseTagNode;
-  public selfCloseElement!: (idxInOriginalText?: number) => CstSelfCloseElementNode;
   public element!: (idxInOriginalText?: number) => CstElementNode;
   public literalElement!: (idxInOriginalText?: number) => CstLiteralElementNode;
 
@@ -160,11 +158,11 @@ export class ExtendedPomlParser extends CstParser {
     this.root = this.RULE('root', () => {
       // CstRootNode: { Content?: CstElementContentNode[] }
       this.MANY(() => {
-        this.SUBRULE(this.elementContentNode, { LABEL: 'Content' });
+        this.SUBRULE(this.elementContent, { LABEL: 'Content' });
       });
     });
 
-    this.elementContentNode = this.RULE('elementContentNode', () => {
+    this.elementContent = this.RULE('elementContent', () => {
       this.OR([
         // pragma (must come before raw comment)
         {
@@ -172,18 +170,14 @@ export class ExtendedPomlParser extends CstParser {
           ALT: () => this.SUBRULE(this.pragma, { LABEL: 'Pragma' }),
         },
         // regular comment
-        { ALT: () => this.SUBRULE(this.comment, { LABEL: 'Comment' }) },
-
-        // template
         {
-          GATE: () => this.LA(1).tokenType === TemplateOpen,
-          ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Template' }),
+          ALT: () => this.SUBRULE(this.comment, { LABEL: 'Comment' }),
         },
 
-        // self-close element
+        // template
         {
-          GATE: this.BACKTRACK(this.selfCloseElement),
-          ALT: () => this.SUBRULE(this.selfCloseElement, { LABEL: 'SelfCloseElement' }),
+          GATE: () => this.LA(1).tokenType === TemplateOpen,
+          ALT: () => this.SUBRULE(this.template, { LABEL: 'Template' }),
         },
 
         // literal element: <text> or <template> acting as literal
@@ -194,7 +188,6 @@ export class ExtendedPomlParser extends CstParser {
 
         // normal element
         {
-          GATE: () => this.LA(1).tokenType === OpenBracket,
           ALT: () => this.SUBRULE(this.element, { LABEL: 'Element' }),
         },
 
@@ -209,13 +202,23 @@ export class ExtendedPomlParser extends CstParser {
       ]);
     });
 
-    this.templateNode = this.RULE('templateNode', () => {
+    this.template = this.RULE('template', () => {
       this.CONSUME(TemplateOpen);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
 
       this.AT_LEAST_ONE(() => {
-        // anything except TemplateClose
-        this.OR(this.anyOf(TokensExpression, 'Content'));
+        this.OR([
+          // mid-content whitespace: only if NOT followed by TemplateClose
+          {
+            GATE: () => this.LA(1).tokenType === Whitespace && this.LA(2).tokenType !== TemplateClose,
+            ALT: () => this.CONSUME1(Whitespace, { LABEL: 'Content' }),
+          },
+          // everything else in TokensExpression except Whitespace (handled above)
+          ...this.anyOf(
+            TokensExpression.filter((t) => t !== Whitespace),
+            'Content',
+          ),
+        ]);
       });
 
       this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterContent' }));
@@ -239,28 +242,22 @@ export class ExtendedPomlParser extends CstParser {
 
       // identifier after @pragma
       this.CONSUME(Identifier, { LABEL: 'PragmaIdentifier' });
-      this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIdentifier' }));
 
       // Options: unquoted tokens or quoted strings (no templates inside these)
       this.MANY(() => {
+        this.CONSUME3(Whitespace, { LABEL: 'WsBeforeEachOption' });
         this.OR([
-          { ALT: () => this.SUBRULE(this.quoted, { LABEL: 'PragmaOption' }) },
           {
-            ALT: () => {
-              this.OR(
-                this.anyOf(
-                  AllTokens.filter(
-                    (t) => t !== CommentClose && t !== Whitespace && t !== DoubleQuote && t !== SingleQuote,
-                  ),
-                  'PragmaOption',
-                ),
-              );
-            },
+            ALT: () => this.SUBRULE(this.quoted, { LABEL: 'PragmaOption' }),
+          },
+          {
+            ALT: () => this.CONSUME2(Identifier, { LABEL: 'PragmaOption' }),
           },
         ]);
-        this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterContent' }));
       });
 
+      this.OPTION3(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterAll' }));
+
       this.CONSUME(CommentClose);
     });
 
@@ -294,7 +291,7 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
             this.MANY(() => {
               this.OR([
-                { ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Content' }) },
+                { ALT: () => this.SUBRULE(this.template, { LABEL: 'Content' }) },
                 { ALT: () => this.OR(this.anyOf(TokensDoubleQuotedExpression, 'Content')) },
               ]);
             });
@@ -306,7 +303,7 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
             this.MANY(() => {
               this.OR([
-                { ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'Content' }) },
+                { ALT: () => this.SUBRULE(this.template, { LABEL: 'Content' }) },
                 { ALT: () => this.OR(this.anyOf(TokensSingleQuotedExpression, 'Content')) },
               ]);
             });
@@ -368,14 +365,14 @@ export class ExtendedPomlParser extends CstParser {
         // templatedValue: {{ ... }}
         {
           GATE: () => this.LA(1).tokenType === TemplateOpen,
-          ALT: () => this.SUBRULE(this.templateNode, { LABEL: 'templatedValue' }),
+          ALT: () => this.SUBRULE(this.template, { LABEL: 'templatedValue' }),
         },
         // quotedValue: "..."/'...' (may contain templates)
         { ALT: () => this.SUBRULE(this.quotedTemplate, { LABEL: 'quotedValue' }) },
       ]);
     });
 
-    this.openTag = this.RULE('openTag', () => {
+    this.openTagPartial = this.RULE('openTagPartial', () => {
       this.CONSUME(OpenBracket);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
       this.CONSUME(Identifier, { LABEL: 'TagName' });
@@ -384,7 +381,6 @@ export class ExtendedPomlParser extends CstParser {
         this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
         this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAttribute' }));
       });
-      this.CONSUME(CloseBracket);
     });
 
     this.closeTag = this.RULE('closeTag', () => {
@@ -394,28 +390,29 @@ export class ExtendedPomlParser extends CstParser {
       this.CONSUME(CloseBracket);
     });
 
-    this.selfCloseElement = this.RULE('selfCloseElement', () => {
-      this.CONSUME(OpenBracket);
-      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
-      this.CONSUME(Identifier, { LABEL: 'TagName' });
-      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterName' }));
-      this.MANY(() => {
-        this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
-        this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAttribute' }));
-      });
-      this.CONSUME(SelfCloseBracket);
-    });
-
     this.element = this.RULE('element', () => {
-      this.SUBRULE(this.openTag, { LABEL: 'OpenTag' });
-      this.MANY(() => {
-        this.SUBRULE(this.elementContentNode, { LABEL: 'Content' });
-      });
-      this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
+      this.SUBRULE(this.openTagPartial, { LABEL: 'OpenTagPartial' });
+      this.OR([
+        {
+          ALT: () => {
+            this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
+            this.MANY(() => {
+              this.SUBRULE(this.elementContent, { LABEL: 'Content' });
+            });
+            this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
+          },
+        },
+        {
+          ALT: () => {
+            this.CONSUME(SelfCloseBracket, { LABEL: 'SelfCloseBracket' });
+          },
+        },
+      ]);
     });
 
     this.literalElement = this.RULE('literalElement', () => {
-      this.SUBRULE(this.openTag, { LABEL: 'OpenTag' });
+      this.SUBRULE(this.openTagPartial, { LABEL: 'OpenTagPartial' });
+      this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
 
       // TODO: the ending tag should match the starting tag name (text/template)
       // Everything until the matching </text> or </template> is treated as raw text
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 0ccbc3d5..0ddfa331 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -273,9 +273,9 @@ export interface CstOpenTagPartialNode extends CstNode {
     OpenBracket?: IToken[];
     WsAfterBracket?: IToken[];
     TagName?: IToken[];
-    WsAfterName?: IToken[];
+    WsBeforeEachAttribute?: IToken[];
     Attribute?: CstAttributeNode[];
-    WsAfterAttribute?: IToken[];
+    WsAfterAll?: IToken[];
   };
 }
 
@@ -467,9 +467,9 @@ export interface CstPragmaNode extends CstNode {
     PragmaKeyword?: IToken[];
     WsAfterPragma?: IToken[];
     PragmaIdentifier?: IToken[];
-    WsAfterIdentifier?: IToken[];
+    WsBeforeEachOption?: IToken[];
     PragmaOption?: (IToken | CstQuotedNode)[];
-    WsAfterContent?: IToken[];
+    WsAfterAll?: IToken[];
     CommentClose?: IToken[];
   };
 }

From 0a7b888bf4e2e42a0f4ced96d2122be5601dc517 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Thu, 4 Sep 2025 14:05:54 +0800
Subject: [PATCH 49/76] review to element

---
 packages/poml/next/cst.ts   | 67 ++++++++++++++++++++++++++++---------
 packages/poml/next/nodes.ts |  5 +--
 2 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 7eac6f9c..4c73b7fa 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -91,6 +91,18 @@ export class ExtendedPomlParser extends CstParser {
       ALT: () => (label ? this.CONSUME(tt, { LABEL: label }) : this.CONSUME(tt)),
     }));
 
+  // Lookahead helper: Check if next is whitespace but next non-whitespace token is not of given type
+  private isSafeWhitespace = (tokenType: TokenType) => {
+    if (this.LA(1).tokenType !== Whitespace) {
+      return false;
+    }
+    let k = 2;
+    while (this.LA(k).tokenType === Whitespace) {
+      k++;
+    }
+    return this.LA(k).tokenType !== tokenType;
+  };
+
   private isNextPragma = () => {
     if (this.LA(1).tokenType !== CommentOpen) {
       return false;
@@ -208,9 +220,9 @@ export class ExtendedPomlParser extends CstParser {
 
       this.AT_LEAST_ONE(() => {
         this.OR([
-          // mid-content whitespace: only if NOT followed by TemplateClose
           {
-            GATE: () => this.LA(1).tokenType === Whitespace && this.LA(2).tokenType !== TemplateClose,
+            // mid-content whitespace: only if NOT followed by TemplateClose
+            GATE: () => this.isSafeWhitespace(TemplateClose),
             ALT: () => this.CONSUME1(Whitespace, { LABEL: 'Content' }),
           },
           // everything else in TokensExpression except Whitespace (handled above)
@@ -323,25 +335,46 @@ export class ExtendedPomlParser extends CstParser {
             this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' }));
             this.CONSUME2(Identifier, { LABEL: 'InKeyword' });
             this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' }));
-            this.AT_LEAST_ONE(() => {
-              this.OR(this.anyOf(TokensDoubleQuotedExpression, 'Collection'));
-            });
-            this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterCollection' }));
+            // It's written as a double quoted expression without {{ }} here
+            // but it will be treated as an expression in the semantic analysis stage.
+            (this.AT_LEAST_ONE(() => {
+              this.OR([
+                {
+                  GATE: () => this.isSafeWhitespace(DoubleQuote),
+                  ALT: () => this.CONSUME4(Whitespace, { LABEL: 'Collection' }),
+                },
+                ...this.anyOf(
+                  TokensDoubleQuoted.filter((t) => t !== Whitespace),
+                  'Collection',
+                ),
+              ]);
+            }),
+              this.OPTION4(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterCollection' })));
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
           },
         },
         {
           ALT: () => {
             this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
-            this.OPTION5(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterOpen' }));
+            this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
             this.CONSUME3(Identifier, { LABEL: 'Iterator' });
-            this.OPTION6(() => this.CONSUME6(Whitespace, { LABEL: 'WsAfterIterator' }));
+            this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' }));
             this.CONSUME4(Identifier, { LABEL: 'InKeyword' });
-            this.OPTION7(() => this.CONSUME7(Whitespace, { LABEL: 'WsAfterIn' }));
-            this.AT_LEAST_ONE2(() => {
-              this.OR(this.anyOf(TokensSingleQuotedExpression, 'Collection'));
-            });
-            this.OPTION8(() => this.CONSUME8(Whitespace, { LABEL: 'WsAfterCollection' }));
+            this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' }));
+            // Similar for single quoted expression
+            (this.AT_LEAST_ONE(() => {
+              this.OR([
+                {
+                  GATE: () => this.isSafeWhitespace(SingleQuote),
+                  ALT: () => this.CONSUME4(Whitespace, { LABEL: 'Collection' }),
+                },
+                ...this.anyOf(
+                  TokensSingleQuoted.filter((t) => t !== Whitespace),
+                  'Collection',
+                ),
+              ]);
+            }),
+              this.OPTION4(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterCollection' })));
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
           },
         },
@@ -374,19 +407,21 @@ export class ExtendedPomlParser extends CstParser {
 
     this.openTagPartial = this.RULE('openTagPartial', () => {
       this.CONSUME(OpenBracket);
-      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
       this.CONSUME(Identifier, { LABEL: 'TagName' });
       this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterName' }));
       this.MANY(() => {
+        this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsBeforeEachAttribute' }));
         this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
-        this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAttribute' }));
       });
+      this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterAll' }));
     });
 
     this.closeTag = this.RULE('closeTag', () => {
       this.CONSUME(ClosingOpenBracket);
-      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterBracket' }));
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
       this.CONSUME(Identifier, { LABEL: 'TagName' });
+      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsBeforeClose' }));
       this.CONSUME(CloseBracket);
     });
 
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 0ddfa331..163c622e 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -271,7 +271,7 @@ export interface OpenTagNode extends AstNode {
 export interface CstOpenTagPartialNode extends CstNode {
   children: {
     OpenBracket?: IToken[];
-    WsAfterBracket?: IToken[];
+    WsAfterOpen?: IToken[];
     TagName?: IToken[];
     WsBeforeEachAttribute?: IToken[];
     Attribute?: CstAttributeNode[];
@@ -306,8 +306,9 @@ export interface CloseTagNode extends AstNode {
 export interface CstCloseTagNode extends CstNode {
   children: {
     ClosingOpenBracket?: IToken[];
-    WsAfterBracket?: IToken[];
+    WsAfterOpen?: IToken[];
     TagName?: IToken[];
+    WsBeforeClose?: IToken[];
     CloseBracket?: IToken[];
   };
 }

From 1ae48193bada6086c2f7de82ff4c54650baa415d Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Thu, 4 Sep 2025 18:04:50 +0800
Subject: [PATCH 50/76] .

---
 packages/poml/next/cst.ts   | 153 +++++++++++++-----------------------
 packages/poml/next/nodes.ts |  73 +++++------------
 2 files changed, 75 insertions(+), 151 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 4c73b7fa..d268cc08 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -37,7 +37,6 @@ import {
   CstElementContentNode,
   CstCommentNode,
   CstPragmaNode,
-  CstLiteralElementNode,
   CstRootNode,
 } from './nodes';
 import { listComponentAliases } from 'poml/base';
@@ -64,7 +63,6 @@ export class ExtendedPomlParser extends CstParser {
   public openTagPartial!: (idxInOriginalText?: number) => CstOpenTagPartialNode;
   public closeTag!: (idxInOriginalText?: number) => CstCloseTagNode;
   public element!: (idxInOriginalText?: number) => CstElementNode;
-  public literalElement!: (idxInOriginalText?: number) => CstLiteralElementNode;
 
   // ---- Tag names for rules (for CST nodes) ----
   private validComponentNames: Set<string>;
@@ -92,15 +90,12 @@ export class ExtendedPomlParser extends CstParser {
     }));
 
   // Lookahead helper: Check if next is whitespace but next non-whitespace token is not of given type
-  private isSafeWhitespace = (tokenType: TokenType) => {
-    if (this.LA(1).tokenType !== Whitespace) {
-      return false;
-    }
-    let k = 2;
-    while (this.LA(k).tokenType === Whitespace) {
+  private isAlmostClose = (tokenType: TokenType) => {
+    let k = 1;
+    if (this.LA(k).tokenType === Whitespace) {
       k++;
     }
-    return this.LA(k).tokenType !== tokenType;
+    return this.LA(k).tokenType === tokenType;
   };
 
   private isNextPragma = () => {
@@ -114,23 +109,7 @@ export class ExtendedPomlParser extends CstParser {
     return this.LA(k).tokenType === PragmaKeyword;
   };
 
-  private isNextLiteralOpenTag = () => {
-    if (this.LA(1).tokenType !== OpenBracket) {
-      return false;
-    }
-    let k = 2;
-    while (this.LA(k).tokenType === Whitespace) {
-      k++;
-    }
-    const tName = this.LA(k);
-    if (tName.tokenType !== Identifier) {
-      return false;
-    }
-    const name = (tName.image || '').toLowerCase();
-    return name === 'text' || name === 'template';
-  };
-
-  private isAtLiteralClose = () => {
+  private isAtLiteralClose = (expectedTagName: string) => {
     if (this.LA(1).tokenType !== ClosingOpenBracket) {
       return false;
     }
@@ -144,8 +123,7 @@ export class ExtendedPomlParser extends CstParser {
     }
     const name = (t.image || '').toLowerCase();
 
-    // TODO: should match the opening tag name
-    return name === 'text' || name === 'template';
+    return name === expectedTagName.toLowerCase();
   };
 
   private isValidOpenTag = (tagName: string) => {
@@ -192,12 +170,6 @@ export class ExtendedPomlParser extends CstParser {
           ALT: () => this.SUBRULE(this.template, { LABEL: 'Template' }),
         },
 
-        // literal element: <text> or <template> acting as literal
-        {
-          GATE: this.isNextLiteralOpenTag,
-          ALT: () => this.SUBRULE(this.literalElement, { LABEL: 'LiteralElement' }),
-        },
-
         // normal element
         {
           ALT: () => this.SUBRULE(this.element, { LABEL: 'Element' }),
@@ -218,23 +190,17 @@ export class ExtendedPomlParser extends CstParser {
       this.CONSUME(TemplateOpen);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
 
-      this.AT_LEAST_ONE(() => {
-        this.OR([
-          {
-            // mid-content whitespace: only if NOT followed by TemplateClose
-            GATE: () => this.isSafeWhitespace(TemplateClose),
-            ALT: () => this.CONSUME1(Whitespace, { LABEL: 'Content' }),
-          },
-          // everything else in TokensExpression except Whitespace (handled above)
-          ...this.anyOf(
-            TokensExpression.filter((t) => t !== Whitespace),
-            'Content',
-          ),
-        ]);
+      this.AT_LEAST_ONE({
+        GATE: () => !this.isAlmostClose(TemplateClose),
+        DEF: () => {
+          this.OR(
+            this.anyOf(
+              TokensExpression.filter((t) => t !== Whitespace),
+              'Content',
+            ),
+          );
+        },
       });
-
-      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterContent' }));
-      this.CONSUME(TemplateClose);
     });
 
     this.comment = this.RULE('comment', () => {
@@ -337,19 +303,13 @@ export class ExtendedPomlParser extends CstParser {
             this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' }));
             // It's written as a double quoted expression without {{ }} here
             // but it will be treated as an expression in the semantic analysis stage.
-            (this.AT_LEAST_ONE(() => {
-              this.OR([
-                {
-                  GATE: () => this.isSafeWhitespace(DoubleQuote),
-                  ALT: () => this.CONSUME4(Whitespace, { LABEL: 'Collection' }),
-                },
-                ...this.anyOf(
-                  TokensDoubleQuoted.filter((t) => t !== Whitespace),
-                  'Collection',
-                ),
-              ]);
-            }),
-              this.OPTION4(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterCollection' })));
+            this.AT_LEAST_ONE({
+              GATE: () => !this.isAlmostClose(DoubleQuote),
+              DEF: () => {
+                this.OR(this.anyOf(TokensDoubleQuoted, 'Content'));
+              },
+            });
+            this.OPTION4(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
           },
         },
@@ -362,19 +322,13 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME4(Identifier, { LABEL: 'InKeyword' });
             this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' }));
             // Similar for single quoted expression
-            (this.AT_LEAST_ONE(() => {
-              this.OR([
-                {
-                  GATE: () => this.isSafeWhitespace(SingleQuote),
-                  ALT: () => this.CONSUME4(Whitespace, { LABEL: 'Collection' }),
-                },
-                ...this.anyOf(
-                  TokensSingleQuoted.filter((t) => t !== Whitespace),
-                  'Collection',
-                ),
-              ]);
-            }),
-              this.OPTION4(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterCollection' })));
+            this.AT_LEAST_ONE({
+              GATE: () => !this.isAlmostClose(DoubleQuote),
+              DEF: () => {
+                this.OR(this.anyOf(TokensSingleQuoted, 'Content'));
+              },
+            });
+            this.OPTION4(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
           },
         },
@@ -408,13 +362,19 @@ export class ExtendedPomlParser extends CstParser {
     this.openTagPartial = this.RULE('openTagPartial', () => {
       this.CONSUME(OpenBracket);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
-      this.CONSUME(Identifier, { LABEL: 'TagName' });
+      const tagTok = this.CONSUME(Identifier, { LABEL: 'TagName' });
       this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterName' }));
       this.MANY(() => {
         this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsBeforeEachAttribute' }));
         this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
       });
       this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterAll' }));
+
+      // Compute & return semantic info (to discriminate literal tags and text tags)
+      return this.ACTION(() => ({
+        tagName: tagTok.image,
+        isLiteral: this.literalTagNames.has(tagTok.image.toLowerCase()),
+      }));
     });
 
     this.closeTag = this.RULE('closeTag', () => {
@@ -426,43 +386,40 @@ export class ExtendedPomlParser extends CstParser {
     });
 
     this.element = this.RULE('element', () => {
-      this.SUBRULE(this.openTagPartial, { LABEL: 'OpenTagPartial' });
+      const { tagName, isLiteral } = this.SUBRULE(this.openTagPartial, { LABEL: 'OpenTagPartial' });
+
       this.OR([
         {
+          GATE: this.ACTION(() => isLiteral),
           ALT: () => {
+            // Literal element logic - must have closing tag, no self-close
             this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
+
+            // Everything until the matching close tag is treated as raw text
             this.MANY(() => {
-              this.SUBRULE(this.elementContent, { LABEL: 'Content' });
+              this.OR([
+                {
+                  GATE: this.ACTION(() => !this.isAtLiteralClose(tagName)),
+                  DEF: () => this.OR(this.anyOf(AllTokens, 'TextContent')),
+                },
+              ]);
             });
+
             this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
           },
         },
         {
           ALT: () => {
-            this.CONSUME(SelfCloseBracket, { LABEL: 'SelfCloseBracket' });
+            this.OPTION(() => this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' }));
+            this.MANY(() => {
+              this.SUBRULE(this.elementContent, { LABEL: 'Content' });
+            });
+            this.SUBRULE2(this.closeTag);
           },
         },
       ]);
     });
 
-    this.literalElement = this.RULE('literalElement', () => {
-      this.SUBRULE(this.openTagPartial, { LABEL: 'OpenTagPartial' });
-      this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
-
-      // TODO: the ending tag should match the starting tag name (text/template)
-      // Everything until the matching </text> or </template> is treated as raw text
-      this.MANY(() => {
-        this.OR([
-          {
-            GATE: () => !this.isAtLiteralClose(),
-            ALT: () => this.OR(this.anyOf(AllTokens, 'TextContent')),
-          },
-        ]);
-      });
-
-      this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
-    });
-
     this.performSelfAnalysis();
   }
 
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 163c622e..6c77e3ea 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -344,22 +344,38 @@ export interface SelfCloseElementNode extends AstNode {
  * open tag), and may have child content including other elements, text,
  * or values.
  *
+ * It should also support literal elements, which are:
+ *
+ * - Special POML elements that treat their content as literal text
+ * - Prevents template variable interpolation
+ * - Content is preserved exactly as written, useful for code samples or pre-formatted text
+ * - When `<text>` is used, the parser eats everything including tags and comments,
+ *   including nested `<text>` itself, until a matching `</text>` is found
+ * - The tagName can only be "text" and "template" for literal elements
+ * - If you need `<text>` in your POML content, use `&lt;text&gt;` outside of literal elements
+ *
  * Cases that apply:
  * - Any elements: `<document parser="txt">...content...</document>`
  * - Output schemas with templates: `<output-schema>{{ schemaDefinition }}</output-schema>`
  * - Nested elements: `<section><paragraph>Text</paragraph></section>`
+ * - Literal text elements: `<text>Literal {{ not_interpolated }}</text>` (literal elements)
  *
  * Cases that do not apply:
  * - Self-closing elements: `<image />` (use SelfCloseTagNode)
  * - Literal text content: plain text (use LiteralNode)
  * - Template variables: `{{ var }}` (use TemplateNode)
  * - Meta elements: `<meta>` tags (use MetaNode)
+ *
+ * Note:
+ * - Literal element node is different from elements which do not support nested tags
+ *   (e.g., <let>). Literal element node is handled on the CST parsing stage.
  */
 export interface ElementNode extends AstNode {
   kind: 'ELEMENT';
   open: OpenTagNode;
   close: CloseTagNode;
-  children: (ElementNode | LiteralElementNode | CommentNode | PragmaNode | TextElementNode)[];
+  children: (ElementNode | CommentNode | PragmaNode | TextElementNode)[];
+  // isLiteral?: boolean; // True for <text> and <template> tags
 }
 
 /**
@@ -369,7 +385,7 @@ export interface ElementNode extends AstNode {
  * - Text content between tags: `>  some text  <` (including whitespace)
  *
  * Cases that do not apply:
- * - Text inside <text> or other literal elements (use LiteralElementNode)
+ * - Text inside <text> or other literal elements (use ElementNode with literal)
  */
 export interface TextElementNode extends AstNode {
   kind: 'TEXT';
@@ -384,6 +400,7 @@ export interface CstElementNode extends CstNode {
     OpenTagPartial?: CstOpenTagPartialNode[];
     OpenTagCloseBracket?: IToken[];
     Content?: CstElementContentNode[];
+    TextContent?: IToken[]; // For literal elements like <text>
     CloseTag?: CstCloseTagNode[];
     // Alternative, it can also be a self-closing tag.
     SelfCloseBracket?: IToken[];
@@ -393,7 +410,6 @@ export interface CstElementNode extends CstNode {
 export interface CstElementContentNode extends CstNode {
   children: {
     Element?: CstElementNode[];
-    LiteralElement?: CstLiteralElementNode[];
     Comment?: CstCommentNode[];
     Pragma?: CstPragmaNode[];
     Template?: CstTemplateNode[];
@@ -475,54 +491,6 @@ export interface CstPragmaNode extends CstNode {
   };
 }
 
-/**
- * Represents an element that preserves literal content.
- *
- * Literal element nodes are special POML elements that treat their content as literal
- * text, preventing template variable interpolation. They ensure content is
- * preserved exactly as written, useful for code samples or pre-formatted text.
- * For example, when `<text>` is used, the parser eats everything including tags and comments,
- * including `<text>` itself, until a matching `</text>` is found.
- *
- * Cases that apply:
- * - Explicit text elements: `<text>Literal {{ not_interpolated }}</text>`
- *
- * Cases that do not apply:
- * - Regular text content with interpolation (use TextElementNode or ValueNode)
- * - Plain text outside elements (use TextElementNode)
- * - Elements allowing template processing (use ElementNode)
- * - Text with attributes enabling processing (future feature)
- *
- * Note:
- * 1. The tagName (value) can only be "text" and "template" as I can think of.
- *    There should be a dynamic list of components that should be parsed as literal elements.
- * 2. Literal element node is different from elements which do not support nested tags,
- *    e.g., <let>. Literal element node is handled on the CST parsing stage.
- * 3. If you really need `<text>` in your POML. Recommended to use `&lt;text&gt;`
- *    outside of literal element.
- */
-export interface LiteralElementNode extends AstNode {
-  kind: 'LITERAL';
-  open: OpenTagNode;
-  close: CloseTagNode;
-  children: LiteralNode;
-}
-
-/**
- * Related CST node interfaces for parsing stage.
- */
-export interface CstLiteralElementNode extends CstNode {
-  children: {
-    OpenTagPartial?: CstOpenTagPartialNode[];
-    OpenTagCloseBracket?: IToken[];
-    // All content between open and close tags is treated as literal text
-    // including other tags, comments, pragmas, etc. except for `</text>`.
-    TextContent?: IToken[];
-    CloseTag?: CstCloseTagNode[];
-    // Literal element cannot be self-closing.
-  };
-}
-
 /**
  * Represents the root node of a POML document tree.
  *
@@ -540,7 +508,7 @@ export interface CstLiteralElementNode extends CstNode {
  */
 export interface RootNode extends AstNode {
   kind: 'ROOT';
-  children: (ElementNode | LiteralElementNode | CommentNode | PragmaNode | ValueNode)[];
+  children: (ElementNode | CommentNode | PragmaNode | ValueNode)[];
 }
 
 /**
@@ -582,7 +550,6 @@ export type StrictNode =
   | CloseTagNode
   | SelfCloseElementNode
   | ElementNode
-  | LiteralElementNode
   | TextElementNode
   | CommentNode
   | PragmaNode

From 182ad141aaedc0a4e75c1bdda4f37b5c032d703d Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Thu, 4 Sep 2025 18:17:20 +0800
Subject: [PATCH 51/76] fix lint issues

---
 packages/poml/next/cst.ts   | 17 +++++++++--------
 packages/poml/next/nodes.ts |  3 +++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index d268cc08..e1c43fd5 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -109,7 +109,7 @@ export class ExtendedPomlParser extends CstParser {
     return this.LA(k).tokenType === PragmaKeyword;
   };
 
-  private isAtLiteralClose = (expectedTagName: string) => {
+  private isAtLiteralClose = (expectedTagName: string | undefined) => {
     if (this.LA(1).tokenType !== ClosingOpenBracket) {
       return false;
     }
@@ -123,7 +123,7 @@ export class ExtendedPomlParser extends CstParser {
     }
     const name = (t.image || '').toLowerCase();
 
-    return name === expectedTagName.toLowerCase();
+    return name === expectedTagName?.toLowerCase();
   };
 
   private isValidOpenTag = (tagName: string) => {
@@ -136,7 +136,6 @@ export class ExtendedPomlParser extends CstParser {
 
   constructor() {
     super(AllTokens, {
-      outputCst: true,
       recoveryEnabled: true,
     });
     this.validComponentNames = new Set(listComponentAliases());
@@ -386,11 +385,13 @@ export class ExtendedPomlParser extends CstParser {
     });
 
     this.element = this.RULE('element', () => {
-      const { tagName, isLiteral } = this.SUBRULE(this.openTagPartial, { LABEL: 'OpenTagPartial' });
+      const { tagName, isLiteral } = this.SUBRULE(this.openTagPartial, {
+        LABEL: 'OpenTagPartial',
+      }) as CstOpenTagPartialNode;
 
       this.OR([
         {
-          GATE: this.ACTION(() => isLiteral),
+          GATE: () => Boolean(isLiteral),
           ALT: () => {
             // Literal element logic - must have closing tag, no self-close
             this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
@@ -399,8 +400,8 @@ export class ExtendedPomlParser extends CstParser {
             this.MANY(() => {
               this.OR([
                 {
-                  GATE: this.ACTION(() => !this.isAtLiteralClose(tagName)),
-                  DEF: () => this.OR(this.anyOf(AllTokens, 'TextContent')),
+                  GATE: () => !this.isAtLiteralClose(tagName),
+                  ALT: () => this.OR(this.anyOf(AllTokens, 'TextContent')),
                 },
               ]);
             });
@@ -435,7 +436,7 @@ export const extendedPomlParser = new ExtendedPomlParser();
 export function parsePomlToCst(input: string): {
   cst: CstNode | undefined;
   lexErrors: ReturnType<typeof extendedPomlLexer.tokenize>['errors'];
-  parseErrors: ReturnType<ExtendedPomlParser['getErrors']>;
+  parseErrors: typeof extendedPomlParser.errors;
 } {
   const lex = extendedPomlLexer.tokenize(input);
   extendedPomlParser.input = lex.tokens;
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 6c77e3ea..f6c0559f 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -277,6 +277,9 @@ export interface CstOpenTagPartialNode extends CstNode {
     Attribute?: CstAttributeNode[];
     WsAfterAll?: IToken[];
   };
+  // Auxiliary info
+  isLiteral?: boolean;
+  tagName?: string;
 }
 
 /**

From 8013a52f5faa9269a3db2c8319cfe1c6947f55e2 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 09:54:50 +0800
Subject: [PATCH 52/76] .

---
 packages/poml/next/cst.ts | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index e1c43fd5..07a8b81e 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -200,6 +200,9 @@ export class ExtendedPomlParser extends CstParser {
           );
         },
       });
+
+      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsBeforeClose' }));
+      this.CONSUME2(TemplateClose);
     });
 
     this.comment = this.RULE('comment', () => {
@@ -297,9 +300,9 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
             this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
             this.CONSUME(Identifier, { LABEL: 'Iterator' });
-            this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' }));
+            this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' });
             this.CONSUME2(Identifier, { LABEL: 'InKeyword' });
-            this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' }));
+            this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' });
             // It's written as a double quoted expression without {{ }} here
             // but it will be treated as an expression in the semantic analysis stage.
             this.AT_LEAST_ONE({
@@ -308,7 +311,7 @@ export class ExtendedPomlParser extends CstParser {
                 this.OR(this.anyOf(TokensDoubleQuoted, 'Content'));
               },
             });
-            this.OPTION4(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterCollection' }));
+            this.OPTION2(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
           },
         },
@@ -316,10 +319,10 @@ export class ExtendedPomlParser extends CstParser {
           ALT: () => {
             this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
             this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
-            this.CONSUME3(Identifier, { LABEL: 'Iterator' });
-            this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' }));
-            this.CONSUME4(Identifier, { LABEL: 'InKeyword' });
-            this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' }));
+            this.CONSUME(Identifier, { LABEL: 'Iterator' });
+            this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' });
+            this.CONSUME2(Identifier, { LABEL: 'InKeyword' });
+            this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' });
             // Similar for single quoted expression
             this.AT_LEAST_ONE({
               GATE: () => !this.isAlmostClose(DoubleQuote),
@@ -327,7 +330,7 @@ export class ExtendedPomlParser extends CstParser {
                 this.OR(this.anyOf(TokensSingleQuoted, 'Content'));
               },
             });
-            this.OPTION4(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterCollection' }));
+            this.OPTION2(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
           },
         },
@@ -362,12 +365,11 @@ export class ExtendedPomlParser extends CstParser {
       this.CONSUME(OpenBracket);
       this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
       const tagTok = this.CONSUME(Identifier, { LABEL: 'TagName' });
-      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterName' }));
       this.MANY(() => {
-        this.OPTION3(() => this.CONSUME3(Whitespace, { LABEL: 'WsBeforeEachAttribute' }));
+        this.CONSUME2(Whitespace, { LABEL: 'WsBeforeEachAttribute' });
         this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
       });
-      this.OPTION4(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterAll' }));
+      this.OPTION2(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAll' }));
 
       // Compute & return semantic info (to discriminate literal tags and text tags)
       return this.ACTION(() => ({
@@ -411,13 +413,19 @@ export class ExtendedPomlParser extends CstParser {
         },
         {
           ALT: () => {
-            this.OPTION(() => this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' }));
+            this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
             this.MANY(() => {
               this.SUBRULE(this.elementContent, { LABEL: 'Content' });
             });
             this.SUBRULE2(this.closeTag);
           },
         },
+        {
+          ALT: () => {
+            // Self-closing tag - no content, no closing tag
+            this.CONSUME(SelfCloseBracket);
+          },
+        },
       ]);
     });
 

From e6a8427e58de01354f73899798bb589d714f308b Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 11:02:53 +0800
Subject: [PATCH 53/76] add test

---
 packages/poml/tests/reader/cst.test.ts | 271 +++++++++++++++----------
 1 file changed, 168 insertions(+), 103 deletions(-)

diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index 4406f53f..9e2da248 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -1,129 +1,194 @@
 import { describe, expect, test } from '@jest/globals';
-import { parseExtendedPoml, ASTNode } from 'poml/next/cst';
-
-describe('Extended POML CST Parser', () => {
-  test('parses pure text content', () => {
-    const input = 'This is plain text content.';
-    const result = parseExtendedPoml(input);
-
-    expect(result.kind).toBe('TEXT');
-    expect(result.content).toBe(input);
-    expect(result.children).toHaveLength(0);
+import { ExtendedPomlParser } from 'poml/next/cst';
+import { extendedPomlLexer, Whitespace, Identifier } from 'poml/next/lexer';
+import type {
+  CstRootNode,
+  CstElementContentNode,
+  CstTemplateNode,
+  CstCommentNode,
+  CstPragmaNode,
+  CstQuotedNode,
+  CstQuotedTemplateNode,
+  CstForIteratorNode,
+  CstAttributeNode,
+  CstOpenTagPartialNode,
+  CstCloseTagNode,
+  CstElementNode,
+} from 'poml/next/nodes';
+
+function withParser<T>(input: string, run: (p: ExtendedPomlParser) => T) {
+  const lex = extendedPomlLexer.tokenize(input);
+  const parser = new ExtendedPomlParser();
+  parser.input = lex.tokens;
+  const node = run(parser);
+  expect(parser.errors).toHaveLength(0);
+  return { node, parser, tokens: lex.tokens };
+}
+
+describe('CST Parser Rules', () => {
+  test('template rule produces CstTemplateNode', () => {
+    const { node } = withParser('{{ name }}', (p) => p.template()) as { node: CstTemplateNode };
+
+    expect(node.name).toBe('template');
+    expect(node.children.TemplateOpen?.[0].image).toBe('{{');
+    expect(node.children.Content).toBeDefined();
+    // Should have whitespace after open and before close when present
+    expect(node.children.WsAfterOpen?.[0].tokenType).toBe(Whitespace);
+    // nodes.ts expects WsAfterContent before close
+    expect(node.children.WsAfterContent?.[0].tokenType).toBe(Whitespace);
+    expect(node.children.TemplateClose?.[0].image).toBe('}}');
   });
 
-  test('parses simple POML element', () => {
-    const input = '<task>Analyze the data</task>';
-    const result = parseExtendedPoml(input);
-
-    expect(result.kind).toBe('TEXT');
-    expect(result.children).toHaveLength(1);
-
-    const taskNode = result.children[0];
-    expect(taskNode.kind).toBe('POML');
-    expect(taskNode.tagName).toBe('task');
-    expect(taskNode.children).toHaveLength(1);
-    expect(taskNode.children[0].content).toBe('Analyze the data');
+  test('comment rule produces CstCommentNode', () => {
+    const { node } = withParser('<!-- hello -->', (p) => p.comment()) as { node: CstCommentNode };
+    expect(node.name).toBe('comment');
+    expect(node.children.CommentOpen?.[0].image).toBe('<!--');
+    expect(node.children.Content?.map((t) => t.image).join('')).toContain('hello');
+    expect(node.children.CommentClose?.[0].image).toBe('-->');
   });
 
-  test('parses mixed content', () => {
-    const input = `# My Document
-
-This is regular text.
-
-<task>
-  Process this data
-</task>
-
-More text here.`;
-
-    const result = parseExtendedPoml(input);
-
-    expect(result.kind).toBe('TEXT');
-    expect(result.children.length).toBeGreaterThan(1);
-
-    // Should have text nodes and POML nodes
-    const pomlNodes = result.children.filter((child) => child.kind === 'POML');
-    expect(pomlNodes).toHaveLength(1);
-    expect(pomlNodes[0].tagName).toBe('task');
+  test('pragma rule produces CstPragmaNode', () => {
+    const input = '<!-- @pragma components +reference -table -->';
+    const { node } = withParser(input, (p) => p.pragma()) as { node: CstPragmaNode };
+
+    expect(node.name).toBe('pragma');
+    expect(node.children.CommentOpen?.[0].image).toBe('<!--');
+    expect(node.children.WsAfterOpen?.[0].tokenType).toBe(Whitespace);
+    expect(node.children.PragmaKeyword?.[0].image.toLowerCase()).toBe('@pragma');
+    expect(node.children.WsAfterPragma?.[0].tokenType).toBe(Whitespace);
+    expect(node.children.PragmaIdentifier?.[0].tokenType).toBe(Identifier);
+    // At least one option present, can be quoted or identifier
+    expect(node.children.PragmaOption?.length).toBeGreaterThan(0);
+    expect(node.children.CommentClose?.[0].image).toBe('-->');
   });
 
-  test('parses self-closing elements', () => {
-    const input = '<meta components="+reference,-table" />';
-    const result = parseExtendedPoml(input);
+  test('quoted rule produces CstQuotedNode (double and single)', () => {
+    const { node: node1 } = withParser('"hello"', (p) => p.quoted()) as { node: CstQuotedNode };
+    expect(node1.name).toBe('quoted');
+    expect(node1.children.OpenQuote?.[0].image).toBe('"');
+    expect(node1.children.Content?.map((t) => t.image).join('')).toBe('hello');
+    expect(node1.children.CloseQuote?.[0].image).toBe('"');
 
-    expect(result.children).toHaveLength(1);
-    const metaNode = result.children[0];
-    expect(metaNode.kind).toBe('META');
-    expect(metaNode.tagName).toBe('meta');
-    expect(metaNode.attributes).toHaveLength(1);
-    expect(metaNode.attributes![0].key).toBe('components');
+    const { node: node2 } = withParser("'world'", (p) => p.quoted()) as { node: CstQuotedNode };
+    expect(node2.children.OpenQuote?.[0].image).toBe("'");
+    expect(node2.children.CloseQuote?.[0].image).toBe("'");
   });
 
-  test('parses template expressions', () => {
-    const input = 'Hello {{name}}!';
-    const result = parseExtendedPoml(input);
-
-    expect(result.children.length).toBeGreaterThan(1);
-    const templateNode = result.children.find((child) => child.kind === 'TEMPLATE');
-    expect(templateNode).toBeDefined();
-    expect(templateNode!.expression).toBe('name');
+  test('quotedTemplate rule produces CstQuotedTemplateNode', () => {
+    const input = '"Hello {{ name }}!"';
+    const { node } = withParser(input, (p) => p.quotedTemplate()) as { node: CstQuotedTemplateNode };
+    expect(node.name).toBe('quotedTemplate');
+    expect(node.children.OpenQuote?.[0].image).toBe('"');
+    expect(node.children.CloseQuote?.[0].image).toBe('"');
+    expect(node.children.Content?.length).toBeGreaterThan(0);
+    // Should include a template embedded inside
+    const hasTemplate = (node.children.Content || []).some(
+      (c: any) => typeof c === 'object' && c && 'name' in c && (c as any).name === 'template',
+    );
+    expect(hasTemplate).toBe(true);
   });
 
-  test('parses attributes with mixed content', () => {
-    const input = '<p class="header" id="{{elementId}}">Content</p>';
-    const result = parseExtendedPoml(input);
-
-    const pNode = result.children.find((child) => child.kind === 'POML');
-    expect(pNode).toBeDefined();
-    expect(pNode!.attributes).toHaveLength(2);
-
-    const classAttr = pNode!.attributes!.find((attr) => attr.key === 'class');
-    expect(classAttr).toBeDefined();
-    expect(classAttr!.value[0].content).toBe('header');
+  test('forIteratorValue rule produces CstForIteratorNode', () => {
+    const input = '"item in items"';
+    const { node } = withParser(input, (p) => p.forIteratorValue()) as { node: CstForIteratorNode };
+    expect(node.name).toBe('forIteratorValue');
+    expect(node.children.OpenQuote?.[0].image).toBe('"');
+    expect(node.children.Iterator?.[0].image).toBe('item');
+    expect(node.children.InKeyword?.[0].tokenType).toBe(Identifier);
+    // nodes.ts expects Collection label for the expression part
+    expect(node.children.Collection?.length).toBeGreaterThan(0);
+    expect(node.children.CloseQuote?.[0].image).toBe('"');
+  });
 
-    const idAttr = pNode!.attributes!.find((attr) => attr.key === 'id');
-    expect(idAttr).toBeDefined();
-    expect(idAttr!.value[0].kind).toBe('TEMPLATE');
+  test('attribute rule produces CstAttributeNode for plain, templated, and for-iterator values', () => {
+    // quoted value
+    let result = withParser('id="value"', (p) => p.attribute()) as { node: CstAttributeNode };
+    let node = result.node;
+    expect(node.name).toBe('attribute');
+    expect(node.children.AttributeKey?.[0].image).toBe('id');
+    expect(node.children.Equals?.[0].image).toBe('=');
+    expect(node.children.quotedValue?.[0]).toBeDefined();
+
+    // templated value
+    result = withParser('title={{ name }}', (p) => p.attribute()) as { node: CstAttributeNode };
+    node = result.node;
+    expect(node.children.AttributeKey?.[0].image).toBe('title');
+    expect(node.children.templatedValue?.[0]).toBeDefined();
+
+    // for-iterator value
+    result = withParser('for="i in items"', (p) => p.attribute()) as { node: CstAttributeNode };
+    node = result.node;
+    expect(node.children.AttributeKey?.[0].image.toLowerCase()).toBe('for');
+    expect(node.children.forIteratorValue?.[0]).toBeDefined();
   });
 
-  test('handles text tag with nested POML', () => {
-    const input = `<text>
-This is **markdown** content.
-<cp caption="Nested">This is nested POML</cp>
-More markdown here.
-</text>`;
+  test('openTagPartial rule returns extra fields and children', () => {
+    const { node } = withParser('<text id="a" class="b" ', (p) => p.openTagPartial()) as {
+      node: CstOpenTagPartialNode;
+    };
+    expect(node.name).toBe('openTagPartial');
+    // Extra fields defined in nodes.ts
+    expect(typeof node.isLiteral).toBe('boolean');
+    expect(node.tagName?.toLowerCase()).toBe('text');
+    // Children
+    expect(node.children.OpenBracket?.[0].image).toBe('<');
+    expect(node.children.TagName?.[0].image.toLowerCase()).toBe('text');
+    expect(node.children.Attribute?.length).toBeGreaterThan(0);
+  });
 
-    const result = parseExtendedPoml(input);
-    const textNode = result.children.find((child) => child.kind === 'POML' && child.tagName === 'text');
+  test('closeTag rule produces CstCloseTagNode', () => {
+    const { node } = withParser('</text>', (p) => p.closeTag()) as { node: CstCloseTagNode };
+    expect(node.name).toBe('closeTag');
+    expect(node.children.ClosingOpenBracket?.[0].image).toBe('</');
+    expect(node.children.TagName?.[0].image.toLowerCase()).toBe('text');
+    expect(node.children.CloseBracket?.[0].image).toBe('>');
+  });
 
-    expect(textNode).toBeDefined();
-    expect(textNode!.children.length).toBeGreaterThan(1);
+  test('element rule: normal open/close element produces CstElementNode', () => {
+    const input = '<document>{{x}}</document>';
+    const { node } = withParser(input, (p) => p.element()) as { node: CstElementNode };
+    expect(node.name).toBe('element');
+    expect(node.children.OpenTagPartial?.[0]).toBeDefined();
+    expect(node.children.OpenTagCloseBracket?.[0].image).toBe('>');
+    expect(node.children.Content?.length).toBeGreaterThan(0);
+    expect(node.children.CloseTag?.[0]).toBeDefined();
+  });
 
-    const cpNode = textNode!.children.find((child) => child.kind === 'POML' && child.tagName === 'cp');
-    expect(cpNode).toBeDefined();
+  test('element rule: self-closing element', () => {
+    const { node } = withParser('<meta />', (p) => p.element()) as { node: CstElementNode };
+    expect(node.children.SelfCloseBracket?.[0].image).toBe('/>');
   });
 
-  test('preserves source position information', () => {
-    const input = '<task>Test</task>';
-    const result = parseExtendedPoml(input);
-
-    const taskNode = result.children[0];
-    expect(taskNode.start).toBe(0);
-    expect(taskNode.end).toBe(input.length);
-    expect(taskNode.openingTag).toBeDefined();
-    expect(taskNode.closingTag).toBeDefined();
-    expect(taskNode.openingTag!.nameRange.start).toBeGreaterThan(0);
-    expect(taskNode.openingTag!.nameRange.end).toBeGreaterThan(taskNode.openingTag!.nameRange.start);
+  test('element rule: literal element treats content as TextContent', () => {
+    const input = '<text>Hello {{ name }} </text>';
+    const { node } = withParser(input, (p) => p.element()) as { node: CstElementNode };
+    expect(node.children.OpenTagPartial?.[0]).toBeDefined();
+    expect(node.children.OpenTagCloseBracket?.[0].image).toBe('>');
+    // Literal elements should store raw tokens under TextContent (no Template child)
+    expect(node.children.TextContent?.length).toBeGreaterThan(0);
+    const images = (node.children.TextContent || []).map((t) => (t as any).image);
+    expect(images).toContain('{{');
+    expect(images).toContain('}}');
+    expect(node.children.CloseTag?.[0]).toBeDefined();
   });
 
-  test('handles unknown components gracefully', () => {
-    const input = '<unknown>This should be treated as text</unknown>';
+  test('elementContent rule produces CstElementContentNode with text', () => {
+    const { node } = withParser('hello world', (p) => p.elementContent()) as {
+      node: CstElementContentNode;
+    };
+    expect(node.name).toBe('elementContent');
+    expect(node.children.TextContent?.length).toBeGreaterThan(0);
+  });
 
-    // Should not throw by default (warning behavior)
-    const result = parseExtendedPoml(input);
-    expect(result).toBeDefined();
+  test('root rule produces CstRootNode with Content', () => {
+    const input = '<document><!-- @pragma components ref --><text>t</text>{{x}}</document>';
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+    expect(node.name).toBe('root');
+    expect(node.children.Content?.length).toBeGreaterThan(0);
 
-    // Should treat unknown tag as text content
-    expect(result.children.length).toBeGreaterThan(0);
+    // Sanity: ensure CST contains an element somewhere
+    const contentNodes = node.children.Content || [];
+    const hasElement = contentNodes.some((n) => (n as any).name === 'element');
+    expect(hasElement).toBe(true);
   });
 });

From feeb689f29d04e4b65a7e030d94dfb540c931b0b Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 12:38:24 +0800
Subject: [PATCH 54/76] token rules

---
 packages/poml/next/nodes.ts | 94 ++++++++++++++++++++++++++++++++++---
 1 file changed, 87 insertions(+), 7 deletions(-)

diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index f6c0559f..6ad685cf 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -5,6 +5,86 @@ export interface AstNode {
   range: Range; // start and end offsets in the source text
 }
 
+/**
+ * Plain token sequences helpers from the lexer.
+ */
+
+export interface CstCommentTokens extends CstNode {
+  // Can be empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstExpressionTokens extends CstNode {
+  // Always trim the ws around the expression {{ expr }}.
+  // Must be non-empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstDoubleQuotedTokens extends CstNode {
+  // The untrimmed content within "...", can be empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstDoubleQuotedTrimmedTokens extends CstNode {
+  // Trimmed content in "..." without leading/trailing whitespace
+  // Must be non-empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstSingleQuotedTokens extends CstNode {
+  // The untrimmed content in '...', can be empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstSingleQuotedTrimmedTokens extends CstNode {
+  // Trimmed content without leading/trailing whitespace
+  // Must be non-empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstDoubleQuotedExpressionTokens extends CstNode {
+  // Contents in "...{{ ... }}..." but outside the {{ }}
+  // Must be non-empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstSingleQuotedExpressionTokens extends CstNode {
+  // Contents in '...{{ ... }}...' but outside the {{ }}
+  // Must be non-empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstBetweenTagsTokens extends CstNode {
+  // Plain texts within tags but outside nested tags. Must be non-empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
+export interface CstLiteralTagTokens extends CstNode {
+  // Plain texts within literal tags like <text>...</text>.
+  // Match greedily. Can be empty.
+  children: {
+    Content: IToken[];
+  };
+}
+
 /**
  * Represents a JavaScript expression as a string.
  *
@@ -64,7 +144,7 @@ export interface CstTemplateNode extends CstNode {
     // Content inside {{ and }} is treated as a single expression token.
     // Eats everything until the next }} (or the whitespace before it).
     // Handles \{{ and \}} escapes. We won't escape other chars here.
-    Content?: IToken[];
+    Content?: CstExpressionTokens[];
     // If it's close to the ending }}, try to eat whitespace before it.
     WsAfterContent?: IToken[];
     TemplateClose?: IToken[];
@@ -130,7 +210,7 @@ export interface CstQuotedNode extends CstNode {
   children: {
     OpenQuote?: IToken[];
     // This is a normal quoted string without templates inside.
-    Content?: IToken[];
+    Content?: (CstDoubleQuotedTokens | CstSingleQuotedTokens)[];
     CloseQuote?: IToken[];
   };
 }
@@ -139,7 +219,7 @@ export interface CstQuotedTemplateNode extends CstNode {
   children: {
     OpenQuote?: IToken[];
     // Allows "Hello {{ friend["abc"] }}!" - mix of text and templates (with quotes).
-    Content?: (IToken | CstTemplateNode)[];
+    Content?: (CstDoubleQuotedExpressionTokens | CstSingleQuotedExpressionTokens | CstTemplateNode)[];
     CloseQuote?: IToken[];
   };
 }
@@ -186,7 +266,7 @@ export interface CstForIteratorNode extends CstNode {
     // But as we are in a quoted string, we need to handle
     // backslash escapes like \" and \'.
     // Greedily match until the next unescaped quote or ws before it.
-    Collection?: IToken[];
+    Collection?: (CstDoubleQuotedTrimmedTokens | CstSingleQuotedTrimmedTokens)[];
     WsAfterCollection?: IToken[];
     CloseQuote?: IToken[];
   };
@@ -403,7 +483,7 @@ export interface CstElementNode extends CstNode {
     OpenTagPartial?: CstOpenTagPartialNode[];
     OpenTagCloseBracket?: IToken[];
     Content?: CstElementContentNode[];
-    TextContent?: IToken[]; // For literal elements like <text>
+    TextContent?: CstLiteralTagTokens[]; // For literal elements like <text>
     CloseTag?: CstCloseTagNode[];
     // Alternative, it can also be a self-closing tag.
     SelfCloseBracket?: IToken[];
@@ -416,7 +496,7 @@ export interface CstElementContentNode extends CstNode {
     Comment?: CstCommentNode[];
     Pragma?: CstPragmaNode[];
     Template?: CstTemplateNode[];
-    TextContent?: IToken[];
+    TextContent?: CstBetweenTagsTokens[];
   };
 }
 
@@ -441,7 +521,7 @@ export interface CommentNode extends AstNode {
 export interface CstCommentNode extends CstNode {
   children: {
     CommentOpen?: IToken[];
-    Content?: IToken[];
+    Content?: CstCommentTokens[];
     CommentClose?: IToken[];
   };
 }

From 5fe27c00051c5f6026f699917c2cf1b6a15fba9a Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 13:02:59 +0800
Subject: [PATCH 55/76] update nodes

---
 packages/poml/next/cst.ts   | 157 ++++++++++++++++++++++++------------
 packages/poml/next/nodes.ts |  24 +++---
 2 files changed, 119 insertions(+), 62 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 07a8b81e..b0a610da 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -26,6 +26,16 @@ import {
 } from './lexer';
 
 import {
+  CstCommentTokens,
+  CstExpressionTokens,
+  CstDoubleQuotedTokens,
+  CstDoubleQuotedTrimmedTokens,
+  CstSingleQuotedTokens,
+  CstSingleQuotedTrimmedTokens,
+  CstDoubleQuotedExpressionTokens,
+  CstSingleQuotedExpressionTokens,
+  CstBetweenTagsTokens,
+  CstLiteralTagTokens,
   CstTemplateNode,
   CstQuotedNode,
   CstQuotedTemplateNode,
@@ -53,6 +63,18 @@ export class ExtendedPomlParser extends CstParser {
   // ---- Rule property declarations (so TS knows they exist) ----
   public root!: (idxInOriginalText?: number) => CstRootNode;
   public elementContent!: (idxInOriginalText?: number) => CstElementContentNode;
+  // token-sequence helper rules
+  public commentTokens!: (idxInOriginalText?: number) => CstCommentTokens;
+  public expressionTokens!: (idxInOriginalText?: number) => CstExpressionTokens;
+  public doubleQuotedTokens!: (idxInOriginalText?: number) => CstDoubleQuotedTokens;
+  public singleQuotedTokens!: (idxInOriginalText?: number) => CstSingleQuotedTokens;
+  public doubleQuotedTrimmedTokens!: (idxInOriginalText?: number) => CstDoubleQuotedTrimmedTokens;
+  public singleQuotedTrimmedTokens!: (idxInOriginalText?: number) => CstSingleQuotedTrimmedTokens;
+  public doubleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstDoubleQuotedExpressionTokens;
+  public singleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstSingleQuotedExpressionTokens;
+  public betweenTagsTokens!: (idxInOriginalText?: number) => CstBetweenTagsTokens;
+  public literalTagTokens!: (idxInOriginalText?: number, expectedTagName?: string) => CstLiteralTagTokens;
+  // regular rules
   public template!: (idxInOriginalText?: number) => CstTemplateNode;
   public comment!: (idxInOriginalText?: number) => CstCommentNode;
   public pragma!: (idxInOriginalText?: number) => CstPragmaNode;
@@ -90,7 +112,7 @@ export class ExtendedPomlParser extends CstParser {
     }));
 
   // Lookahead helper: Check if next is whitespace but next non-whitespace token is not of given type
-  private isAlmostClose = (tokenType: TokenType) => {
+  private atAlmostClose = (tokenType: TokenType) => {
     let k = 1;
     if (this.LA(k).tokenType === Whitespace) {
       k++;
@@ -177,40 +199,93 @@ export class ExtendedPomlParser extends CstParser {
         // raw text content
         {
           ALT: () => {
-            this.AT_LEAST_ONE(() => {
-              this.OR(this.anyOf(TokensTextContent, 'TextContent'));
-            });
+            // Group text between tags under CstBetweenTagsTokens
+            this.SUBRULE(this.betweenTagsTokens, { LABEL: 'TextContent' });
           },
         },
       ]);
     });
 
-    this.template = this.RULE('template', () => {
-      this.CONSUME(TemplateOpen);
-      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
+    // ----- Token sequence helper rules -----
+    this.commentTokens = this.RULE('commentTokens', () => {
+      this.MANY(() => {
+        this.OR(this.anyOf(TokensComment, 'Content'));
+      });
+    });
 
+    this.expressionTokens = this.RULE('expressionTokens', () => {
       this.AT_LEAST_ONE({
-        GATE: () => !this.isAlmostClose(TemplateClose),
+        GATE: () => !this.atAlmostClose(TemplateClose),
         DEF: () => {
-          this.OR(
-            this.anyOf(
-              TokensExpression.filter((t) => t !== Whitespace),
-              'Content',
-            ),
-          );
+          this.OR(this.anyOf(TokensExpression, 'Content'));
         },
       });
+    });
 
-      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsBeforeClose' }));
+    this.doubleQuotedTokens = this.RULE('doubleQuotedTokens', () => {
+      this.MANY(() => {
+        this.OR(this.anyOf(TokensDoubleQuoted, 'Content'));
+      });
+    });
+
+    this.singleQuotedTokens = this.RULE('singleQuotedTokens', () => {
+      this.MANY(() => {
+        this.OR(this.anyOf(TokensSingleQuoted, 'Content'));
+      });
+    });
+
+    this.doubleQuotedTrimmedTokens = this.RULE('doubleQuotedTrimmedTokens', () => {
+      // Greedily match until the next double quote (allow inner whitespace)
+      this.AT_LEAST_ONE({
+        GATE: () => !this.atAlmostClose(DoubleQuote),
+        DEF: () => {
+          this.OR(this.anyOf(TokensDoubleQuoted, 'Content'));
+        },
+      });
+    });
+
+    this.singleQuotedTrimmedTokens = this.RULE('singleQuotedTrimmedTokens', () => {
+      // Greedily match until the next single quote (allow inner whitespace)
+      this.AT_LEAST_ONE({
+        GATE: () => !this.atAlmostClose(SingleQuote),
+        DEF: () => {
+          this.OR(this.anyOf(TokensSingleQuoted, 'Content'));
+        },
+      });
+    });
+
+    this.doubleQuotedExpressionTokens = this.RULE('doubleQuotedExpressionTokens', () => {
+      this.AT_LEAST_ONE(() => {
+        this.OR(this.anyOf(TokensDoubleQuotedExpression, 'Content'));
+      });
+    });
+
+    this.singleQuotedExpressionTokens = this.RULE('singleQuotedExpressionTokens', () => {
+      this.AT_LEAST_ONE(() => {
+        this.OR(this.anyOf(TokensSingleQuotedExpression, 'Content'));
+      });
+    });
+
+    this.betweenTagsTokens = this.RULE('betweenTagsTokens', () => {
+      this.AT_LEAST_ONE(() => {
+        this.OR(this.anyOf(TokensTextContent, 'Content'));
+      });
+    });
+
+    // ----- Main rules -----
+
+    this.template = this.RULE('template', () => {
+      this.CONSUME(TemplateOpen);
+      this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
+      this.SUBRULE(this.expressionTokens, { LABEL: 'Content' });
+      this.OPTION2(() => this.CONSUME2(Whitespace, { LABEL: 'WsAfterContent' }));
       this.CONSUME2(TemplateClose);
     });
 
     this.comment = this.RULE('comment', () => {
       this.CONSUME(CommentOpen);
-      this.MANY(() => {
-        // anything until -->
-        this.OR(this.anyOf(TokensComment, 'Content'));
-      });
+      // anything until -->
+      this.SUBRULE(this.commentTokens, { LABEL: 'Content' });
       this.CONSUME(CommentClose);
     });
 
@@ -246,18 +321,14 @@ export class ExtendedPomlParser extends CstParser {
         {
           ALT: () => {
             this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
-            this.MANY(() => {
-              this.OR(this.anyOf(TokensDoubleQuoted, 'Content'));
-            });
+            this.SUBRULE(this.doubleQuotedTokens, { LABEL: 'Content' });
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
           },
         },
         {
           ALT: () => {
             this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
-            this.MANY(() => {
-              this.OR(this.anyOf(TokensSingleQuoted, 'Content'));
-            });
+            this.SUBRULE(this.singleQuotedTokens, { LABEL: 'Content' });
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
           },
         },
@@ -272,7 +343,9 @@ export class ExtendedPomlParser extends CstParser {
             this.MANY(() => {
               this.OR([
                 { ALT: () => this.SUBRULE(this.template, { LABEL: 'Content' }) },
-                { ALT: () => this.OR(this.anyOf(TokensDoubleQuotedExpression, 'Content')) },
+                {
+                  ALT: () => this.SUBRULE(this.doubleQuotedExpressionTokens, { LABEL: 'Content' }),
+                },
               ]);
             });
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
@@ -284,7 +357,9 @@ export class ExtendedPomlParser extends CstParser {
             this.MANY(() => {
               this.OR([
                 { ALT: () => this.SUBRULE(this.template, { LABEL: 'Content' }) },
-                { ALT: () => this.OR(this.anyOf(TokensSingleQuotedExpression, 'Content')) },
+                {
+                  ALT: () => this.SUBRULE(this.singleQuotedExpressionTokens, { LABEL: 'Content' }),
+                },
               ]);
             });
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
@@ -303,14 +378,8 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' });
             this.CONSUME2(Identifier, { LABEL: 'InKeyword' });
             this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' });
-            // It's written as a double quoted expression without {{ }} here
-            // but it will be treated as an expression in the semantic analysis stage.
-            this.AT_LEAST_ONE({
-              GATE: () => !this.isAlmostClose(DoubleQuote),
-              DEF: () => {
-                this.OR(this.anyOf(TokensDoubleQuoted, 'Content'));
-              },
-            });
+            // Greedily match until the next unescaped quote
+            this.SUBRULE(this.doubleQuotedTrimmedTokens, { LABEL: 'Collection' });
             this.OPTION2(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(DoubleQuote, { LABEL: 'CloseQuote' });
           },
@@ -323,13 +392,8 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' });
             this.CONSUME2(Identifier, { LABEL: 'InKeyword' });
             this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' });
-            // Similar for single quoted expression
-            this.AT_LEAST_ONE({
-              GATE: () => !this.isAlmostClose(DoubleQuote),
-              DEF: () => {
-                this.OR(this.anyOf(TokensSingleQuoted, 'Content'));
-              },
-            });
+            // Greedily match until the next unescaped quote
+            this.SUBRULE(this.singleQuotedTrimmedTokens, { LABEL: 'Collection' });
             this.OPTION2(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
           },
@@ -399,14 +463,7 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
 
             // Everything until the matching close tag is treated as raw text
-            this.MANY(() => {
-              this.OR([
-                {
-                  GATE: () => !this.isAtLiteralClose(tagName),
-                  ALT: () => this.OR(this.anyOf(AllTokens, 'TextContent')),
-                },
-              ]);
-            });
+            this.MANY(() => this.SUBRULE(this.literalTagTokens, { ARGS: [tagName], LABEL: 'TextContent' }));
 
             this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
           },
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 6ad685cf..f04b07bc 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -12,7 +12,7 @@ export interface AstNode {
 export interface CstCommentTokens extends CstNode {
   // Can be empty.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
@@ -20,14 +20,14 @@ export interface CstExpressionTokens extends CstNode {
   // Always trim the ws around the expression {{ expr }}.
   // Must be non-empty.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
 export interface CstDoubleQuotedTokens extends CstNode {
   // The untrimmed content within "...", can be empty.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
@@ -35,14 +35,14 @@ export interface CstDoubleQuotedTrimmedTokens extends CstNode {
   // Trimmed content in "..." without leading/trailing whitespace
   // Must be non-empty.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
 export interface CstSingleQuotedTokens extends CstNode {
   // The untrimmed content in '...', can be empty.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
@@ -50,30 +50,30 @@ export interface CstSingleQuotedTrimmedTokens extends CstNode {
   // Trimmed content without leading/trailing whitespace
   // Must be non-empty.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
 export interface CstDoubleQuotedExpressionTokens extends CstNode {
   // Contents in "...{{ ... }}..." but outside the {{ }}
-  // Must be non-empty.
+  // Must be non-empty. Can have leading/trailing whitespace.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
 export interface CstSingleQuotedExpressionTokens extends CstNode {
   // Contents in '...{{ ... }}...' but outside the {{ }}
-  // Must be non-empty.
+  // Must be non-empty. Can have leading/trailing whitespace.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
 export interface CstBetweenTagsTokens extends CstNode {
   // Plain texts within tags but outside nested tags. Must be non-empty.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 
@@ -81,7 +81,7 @@ export interface CstLiteralTagTokens extends CstNode {
   // Plain texts within literal tags like <text>...</text>.
   // Match greedily. Can be empty.
   children: {
-    Content: IToken[];
+    Content?: IToken[];
   };
 }
 

From 11f58fd60c3864783a2c69acd6c3e53077251bcc Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 13:19:01 +0800
Subject: [PATCH 56/76] pass self check

---
 packages/poml/next/cst.ts | 40 ++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index b0a610da..38817fd9 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -73,7 +73,8 @@ export class ExtendedPomlParser extends CstParser {
   public doubleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstDoubleQuotedExpressionTokens;
   public singleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstSingleQuotedExpressionTokens;
   public betweenTagsTokens!: (idxInOriginalText?: number) => CstBetweenTagsTokens;
-  public literalTagTokens!: (idxInOriginalText?: number, expectedTagName?: string) => CstLiteralTagTokens;
+  // Accepting expectedTagName as argument to validate matching close tag
+  public literalTagTokens!: (idxInOriginalText?: number, args?: [string]) => CstLiteralTagTokens;
   // regular rules
   public template!: (idxInOriginalText?: number) => CstTemplateNode;
   public comment!: (idxInOriginalText?: number) => CstCommentNode;
@@ -272,6 +273,15 @@ export class ExtendedPomlParser extends CstParser {
       });
     });
 
+    this.literalTagTokens = this.RULE('literalTagTokens', (expectedTagName?: string) => {
+      this.AT_LEAST_ONE({
+        GATE: () => !this.isAtLiteralClose(expectedTagName),
+        DEF: () => {
+          this.OR(this.anyOf(TokensTextContent, 'Content'));
+        },
+      });
+    });
+
     // ----- Main rules -----
 
     this.template = this.RULE('template', () => {
@@ -341,10 +351,10 @@ export class ExtendedPomlParser extends CstParser {
           ALT: () => {
             this.CONSUME(DoubleQuote, { LABEL: 'OpenQuote' });
             this.MANY(() => {
-              this.OR([
+              this.OR2([
                 { ALT: () => this.SUBRULE(this.template, { LABEL: 'Content' }) },
                 {
-                  ALT: () => this.SUBRULE(this.doubleQuotedExpressionTokens, { LABEL: 'Content' }),
+                  ALT: () => this.SUBRULE2(this.doubleQuotedExpressionTokens, { LABEL: 'Content' }),
                 },
               ]);
             });
@@ -354,11 +364,11 @@ export class ExtendedPomlParser extends CstParser {
         {
           ALT: () => {
             this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
-            this.MANY(() => {
-              this.OR([
-                { ALT: () => this.SUBRULE(this.template, { LABEL: 'Content' }) },
+            this.MANY2(() => {
+              this.OR3([
+                { ALT: () => this.SUBRULE3(this.template, { LABEL: 'Content' }) },
                 {
-                  ALT: () => this.SUBRULE(this.singleQuotedExpressionTokens, { LABEL: 'Content' }),
+                  ALT: () => this.SUBRULE4(this.singleQuotedExpressionTokens, { LABEL: 'Content' }),
                 },
               ]);
             });
@@ -387,14 +397,14 @@ export class ExtendedPomlParser extends CstParser {
         {
           ALT: () => {
             this.CONSUME(SingleQuote, { LABEL: 'OpenQuote' });
-            this.OPTION(() => this.CONSUME(Whitespace, { LABEL: 'WsAfterOpen' }));
-            this.CONSUME(Identifier, { LABEL: 'Iterator' });
-            this.CONSUME2(Whitespace, { LABEL: 'WsAfterIterator' });
-            this.CONSUME2(Identifier, { LABEL: 'InKeyword' });
-            this.CONSUME3(Whitespace, { LABEL: 'WsAfterIn' });
+            this.OPTION3(() => this.CONSUME5(Whitespace, { LABEL: 'WsAfterOpen' }));
+            this.CONSUME3(Identifier, { LABEL: 'Iterator' });
+            this.CONSUME6(Whitespace, { LABEL: 'WsAfterIterator' });
+            this.CONSUME4(Identifier, { LABEL: 'InKeyword' });
+            this.CONSUME7(Whitespace, { LABEL: 'WsAfterIn' });
             // Greedily match until the next unescaped quote
             this.SUBRULE(this.singleQuotedTrimmedTokens, { LABEL: 'Collection' });
-            this.OPTION2(() => this.CONSUME4(Whitespace, { LABEL: 'WsAfterCollection' }));
+            this.OPTION4(() => this.CONSUME8(Whitespace, { LABEL: 'WsAfterCollection' }));
             this.CONSUME2(SingleQuote, { LABEL: 'CloseQuote' });
           },
         },
@@ -463,14 +473,14 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
 
             // Everything until the matching close tag is treated as raw text
-            this.MANY(() => this.SUBRULE(this.literalTagTokens, { ARGS: [tagName], LABEL: 'TextContent' }));
+            this.SUBRULE(this.literalTagTokens, { ARGS: [tagName], LABEL: 'TextContent' });
 
             this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
           },
         },
         {
           ALT: () => {
-            this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
+            this.CONSUME2(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
             this.MANY(() => {
               this.SUBRULE(this.elementContent, { LABEL: 'Content' });
             });

From d3528861621a7b6945aee1a44481568b01b439cb Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 14:10:18 +0800
Subject: [PATCH 57/76] fix

---
 packages/poml/next/cst.ts              | 14 +++++++++++++-
 packages/poml/next/lexer.ts            |  2 ++
 packages/poml/next/nodes.ts            |  8 ++++++++
 packages/poml/tests/reader/cst.test.ts | 10 ++++++++--
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 38817fd9..983a65fb 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -5,6 +5,7 @@ import {
   TokensExpression,
   TokensDoubleQuoted,
   TokensSingleQuoted,
+  TokensCommentIdentifiers,
   TokensDoubleQuotedExpression,
   TokensSingleQuotedExpression,
   TokensTextContent,
@@ -28,6 +29,7 @@ import {
 import {
   CstCommentTokens,
   CstExpressionTokens,
+  CstCommentIdentifierTokens,
   CstDoubleQuotedTokens,
   CstDoubleQuotedTrimmedTokens,
   CstSingleQuotedTokens,
@@ -66,6 +68,7 @@ export class ExtendedPomlParser extends CstParser {
   // token-sequence helper rules
   public commentTokens!: (idxInOriginalText?: number) => CstCommentTokens;
   public expressionTokens!: (idxInOriginalText?: number) => CstExpressionTokens;
+  public commentIdentifierTokens!: (idxInOriginalText?: number) => CstCommentIdentifierTokens;
   public doubleQuotedTokens!: (idxInOriginalText?: number) => CstDoubleQuotedTokens;
   public singleQuotedTokens!: (idxInOriginalText?: number) => CstSingleQuotedTokens;
   public doubleQuotedTrimmedTokens!: (idxInOriginalText?: number) => CstDoubleQuotedTrimmedTokens;
@@ -214,6 +217,12 @@ export class ExtendedPomlParser extends CstParser {
       });
     });
 
+    this.commentIdentifierTokens = this.RULE('commentIdentifierTokens', () => {
+      this.AT_LEAST_ONE(() => {
+        this.OR(this.anyOf(TokensCommentIdentifiers, 'Content'));
+      });
+    });
+
     this.expressionTokens = this.RULE('expressionTokens', () => {
       this.AT_LEAST_ONE({
         GATE: () => !this.atAlmostClose(TemplateClose),
@@ -313,10 +322,13 @@ export class ExtendedPomlParser extends CstParser {
         this.CONSUME3(Whitespace, { LABEL: 'WsBeforeEachOption' });
         this.OR([
           {
+            // Try quoted options first
             ALT: () => this.SUBRULE(this.quoted, { LABEL: 'PragmaOption' }),
           },
           {
-            ALT: () => this.CONSUME2(Identifier, { LABEL: 'PragmaOption' }),
+            // Then try identifier tokens (can include +, -, etc.)
+            GATE: () => this.LA(1).tokenType !== SingleQuote && this.LA(1).tokenType !== DoubleQuote,
+            ALT: () => this.SUBRULE2(this.commentIdentifierTokens, { LABEL: 'PragmaOption' }),
           },
         ]);
       });
diff --git a/packages/poml/next/lexer.ts b/packages/poml/next/lexer.ts
index a48dba62..5cd22c13 100644
--- a/packages/poml/next/lexer.ts
+++ b/packages/poml/next/lexer.ts
@@ -109,6 +109,8 @@ export const XmlBracketTokens = [
 ];
 
 export const TokensComment = AllTokens.filter((tokenType) => tokenType !== CommentClose);
+// Tokens used in comment, but disallow whitespace, used in @pragma as "identifiers".
+export const TokensCommentIdentifiers = TokensComment.filter((tokenType) => tokenType !== Whitespace);
 
 // Tokens used in expressions (inside {{ and }}), excluding the closing braces.
 // Opening braces {{ should work, but they should be generally properly escaped inside to avoid confusion.
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index f04b07bc..a5f191af 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -70,6 +70,14 @@ export interface CstSingleQuotedExpressionTokens extends CstNode {
   };
 }
 
+export interface CstCommentIdentifierTokens extends CstNode {
+  // Non-whitespace tokens in comments used as identifiers in pragmas.
+  // Supports special chars like +, -, etc. Must be non-empty.
+  children: {
+    Content?: IToken[];
+  };
+}
+
 export interface CstBetweenTagsTokens extends CstNode {
   // Plain texts within tags but outside nested tags. Must be non-empty.
   children: {
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index 9e2da248..ce9dded7 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -43,7 +43,10 @@ describe('CST Parser Rules', () => {
     const { node } = withParser('<!-- hello -->', (p) => p.comment()) as { node: CstCommentNode };
     expect(node.name).toBe('comment');
     expect(node.children.CommentOpen?.[0].image).toBe('<!--');
-    expect(node.children.Content?.map((t) => t.image).join('')).toContain('hello');
+    // Content is a CstCommentTokens node, not raw tokens
+    const contentNode = node.children.Content?.[0];
+    const contentText = contentNode?.children.Content?.map((t) => t.image).join('') || '';
+    expect(contentText).toContain('hello');
     expect(node.children.CommentClose?.[0].image).toBe('-->');
   });
 
@@ -66,7 +69,10 @@ describe('CST Parser Rules', () => {
     const { node: node1 } = withParser('"hello"', (p) => p.quoted()) as { node: CstQuotedNode };
     expect(node1.name).toBe('quoted');
     expect(node1.children.OpenQuote?.[0].image).toBe('"');
-    expect(node1.children.Content?.map((t) => t.image).join('')).toBe('hello');
+    // Content is a CstDoubleQuotedTokens node
+    const contentNode = node1.children.Content?.[0];
+    const contentText = contentNode?.children.Content?.map((t) => t.image).join('') || '';
+    expect(contentText).toBe('hello');
     expect(node1.children.CloseQuote?.[0].image).toBe('"');
 
     const { node: node2 } = withParser("'world'", (p) => p.quoted()) as { node: CstQuotedNode };

From 292dfb53e915da5edb1d0f210252b2cb8a9ec0b3 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 15:27:13 +0800
Subject: [PATCH 58/76] fix peek

---
 packages/poml/next/cst.ts              | 59 ++++++++++++++++----------
 packages/poml/next/nodes.ts            |  3 --
 packages/poml/tests/reader/cst.test.ts |  3 --
 3 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 983a65fb..8731c2d5 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -152,6 +152,21 @@ export class ExtendedPomlParser extends CstParser {
     return name === expectedTagName?.toLowerCase();
   };
 
+  private peekTagName = (): string | undefined => {
+    if (this.LA(1).tokenType !== OpenBracket) {
+      return undefined;
+    }
+    let k = 2;
+    while (this.LA(k).tokenType === Whitespace) {
+      k++;
+    }
+    const token = this.LA(k);
+    if (token.tokenType !== Identifier) {
+      return undefined;
+    }
+    return token.image;
+  };
+
   private isValidOpenTag = (tagName: string) => {
     // When pragma strict is enabled, only known component names are allowed as tags.
     // Other component names will show as errors in the semantic analysis stage.
@@ -456,12 +471,6 @@ export class ExtendedPomlParser extends CstParser {
         this.SUBRULE(this.attribute, { LABEL: 'Attribute' });
       });
       this.OPTION2(() => this.CONSUME3(Whitespace, { LABEL: 'WsAfterAll' }));
-
-      // Compute & return semantic info (to discriminate literal tags and text tags)
-      return this.ACTION(() => ({
-        tagName: tagTok.image,
-        isLiteral: this.literalTagNames.has(tagTok.image.toLowerCase()),
-      }));
     });
 
     this.closeTag = this.RULE('closeTag', () => {
@@ -473,14 +482,12 @@ export class ExtendedPomlParser extends CstParser {
     });
 
     this.element = this.RULE('element', () => {
-      const { tagName, isLiteral } = this.SUBRULE(this.openTagPartial, {
-        LABEL: 'OpenTagPartial',
-      }) as CstOpenTagPartialNode;
-
+      const tagName = this.peekTagName();
       this.OR([
         {
-          GATE: () => Boolean(isLiteral),
+          GATE: () => this.literalTagNames.has(tagName?.toLowerCase() || ''),
           ALT: () => {
+            this.SUBRULE(this.openTagPartial, { LABEL: 'OpenTagPartial' });
             // Literal element logic - must have closing tag, no self-close
             this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
 
@@ -491,18 +498,26 @@ export class ExtendedPomlParser extends CstParser {
           },
         },
         {
+          GATE: () => tagName === undefined || !this.literalTagNames.has(tagName?.toLowerCase()),
           ALT: () => {
-            this.CONSUME2(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
-            this.MANY(() => {
-              this.SUBRULE(this.elementContent, { LABEL: 'Content' });
-            });
-            this.SUBRULE2(this.closeTag);
-          },
-        },
-        {
-          ALT: () => {
-            // Self-closing tag - no content, no closing tag
-            this.CONSUME(SelfCloseBracket);
+            this.SUBRULE2(this.openTagPartial, { LABEL: 'OpenTagPartial' });
+            this.OR2([
+              {
+                ALT: () => {
+                  this.CONSUME2(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
+                  this.MANY(() => {
+                    this.SUBRULE(this.elementContent, { LABEL: 'Content' });
+                  });
+                  this.SUBRULE2(this.closeTag);
+                },
+              },
+              {
+                ALT: () => {
+                  // Self-closing tag - no content, no closing tag
+                  this.CONSUME(SelfCloseBracket, { LABEL: 'SelfCloseBracket' });
+                },
+              },
+            ]);
           },
         },
       ]);
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index a5f191af..f174171d 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -365,9 +365,6 @@ export interface CstOpenTagPartialNode extends CstNode {
     Attribute?: CstAttributeNode[];
     WsAfterAll?: IToken[];
   };
-  // Auxiliary info
-  isLiteral?: boolean;
-  tagName?: string;
 }
 
 /**
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index ce9dded7..e18f8d1d 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -133,9 +133,6 @@ describe('CST Parser Rules', () => {
       node: CstOpenTagPartialNode;
     };
     expect(node.name).toBe('openTagPartial');
-    // Extra fields defined in nodes.ts
-    expect(typeof node.isLiteral).toBe('boolean');
-    expect(node.tagName?.toLowerCase()).toBe('text');
     // Children
     expect(node.children.OpenBracket?.[0].image).toBe('<');
     expect(node.children.TagName?.[0].image.toLowerCase()).toBe('text');

From 82b0441262e7f992cfa8f641a9133c003a2a2413 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 17:13:55 +0800
Subject: [PATCH 59/76] fix cst

---
 packages/poml/next/cst.ts              |   4 +-
 packages/poml/tests/reader/cst.test.ts | 285 ++++++++++++++++++++++++-
 2 files changed, 282 insertions(+), 7 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 8731c2d5..4123dd3e 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -301,7 +301,7 @@ export class ExtendedPomlParser extends CstParser {
       this.AT_LEAST_ONE({
         GATE: () => !this.isAtLiteralClose(expectedTagName),
         DEF: () => {
-          this.OR(this.anyOf(TokensTextContent, 'Content'));
+          this.OR(this.anyOf(AllTokens, 'Content'));
         },
       });
     });
@@ -508,7 +508,7 @@ export class ExtendedPomlParser extends CstParser {
                   this.MANY(() => {
                     this.SUBRULE(this.elementContent, { LABEL: 'Content' });
                   });
-                  this.SUBRULE2(this.closeTag);
+                  this.SUBRULE2(this.closeTag, { LABEL: 'CloseTag' });
                 },
               },
               {
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index e18f8d1d..103644ba 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -1,4 +1,5 @@
 import { describe, expect, test } from '@jest/globals';
+import { CstNode, IToken } from 'chevrotain';
 import { ExtendedPomlParser } from 'poml/next/cst';
 import { extendedPomlLexer, Whitespace, Identifier } from 'poml/next/lexer';
 import type {
@@ -14,6 +15,7 @@ import type {
   CstOpenTagPartialNode,
   CstCloseTagNode,
   CstElementNode,
+  CstLiteralTagTokens,
 } from 'poml/next/nodes';
 
 function withParser<T>(input: string, run: (p: ExtendedPomlParser) => T) {
@@ -153,26 +155,46 @@ describe('CST Parser Rules', () => {
     expect(node.name).toBe('element');
     expect(node.children.OpenTagPartial?.[0]).toBeDefined();
     expect(node.children.OpenTagCloseBracket?.[0].image).toBe('>');
-    expect(node.children.Content?.length).toBeGreaterThan(0);
+    expect(node.children.Content?.length).toBe(1);
+    const contentNode = node.children.Content?.[0] as CstElementContentNode;
+    expect(contentNode.name).toBe('elementContent');
+    const templateNode = contentNode.children.Template?.[0] as CstTemplateNode;
+    expect(templateNode.name).toBe('template');
+    expect(templateNode.children.TemplateOpen?.[0].image).toBe('{{');
+    expect(templateNode.children.Content?.[0].children.Content?.[0].image).toBe('x');
+    expect(templateNode.children.TemplateClose?.[0].image).toBe('}}');
     expect(node.children.CloseTag?.[0]).toBeDefined();
   });
 
   test('element rule: self-closing element', () => {
     const { node } = withParser('<meta />', (p) => p.element()) as { node: CstElementNode };
+    expect(node.children.OpenTagPartial?.[0]).toBeDefined();
+    node.recoveredNode;
+    const openTag = node.children.OpenTagPartial?.[0] as CstOpenTagPartialNode;
+    expect(openTag.children.OpenBracket?.[0].image).toBe('<');
+    expect(openTag.children.TagName?.[0].image).toBe('meta');
+    expect(openTag.children.WsAfterAll?.[0].image).toBe(' ');
     expect(node.children.SelfCloseBracket?.[0].image).toBe('/>');
   });
 
   test('element rule: literal element treats content as TextContent', () => {
-    const input = '<text>Hello {{ name }} </text>';
+    const input = '<text>Hello {{ name }} <text> </text>';
     const { node } = withParser(input, (p) => p.element()) as { node: CstElementNode };
     expect(node.children.OpenTagPartial?.[0]).toBeDefined();
     expect(node.children.OpenTagCloseBracket?.[0].image).toBe('>');
     // Literal elements should store raw tokens under TextContent (no Template child)
     expect(node.children.TextContent?.length).toBeGreaterThan(0);
-    const images = (node.children.TextContent || []).map((t) => (t as any).image);
+    const content = node.children.TextContent?.[0] as CstLiteralTagTokens;
+    const images = content.children.Content?.map((t) => t.image) || [];
     expect(images).toContain('{{');
     expect(images).toContain('}}');
+    expect(images).toContain('<');
+    expect(images).toContain('text');
+    expect(images).toContain('>');
+    expect(images[images.length - 1]).toBe(' ');
     expect(node.children.CloseTag?.[0]).toBeDefined();
+    const closeTag = node.children.CloseTag?.[0] as CstCloseTagNode;
+    expect(closeTag.children.TagName?.[0].image).toBe('text');
   });
 
   test('elementContent rule produces CstElementContentNode with text', () => {
@@ -191,7 +213,260 @@ describe('CST Parser Rules', () => {
 
     // Sanity: ensure CST contains an element somewhere
     const contentNodes = node.children.Content || [];
-    const hasElement = contentNodes.some((n) => (n as any).name === 'element');
-    expect(hasElement).toBe(true);
+    const elementNames = contentNodes.map((n) => (n as any).name);
+    expect(elementNames).toContain('elementContent');
   });
 });
+
+describe('Helper function sanity', () => {
+  test('images() on template: token lists -> string[], node lists -> nested[]', () => {
+    const { node } = withParser('{{ name }}', (p) => p.template()) as { node: CstTemplateNode };
+
+    const snap = images(node) as ImagesTree<CstTemplateNode>;
+
+    // Token-only props => string[]
+    expect(Array.isArray(snap.TemplateOpen)).toBe(true);
+    expect(typeof snap.TemplateOpen![0]).toBe('string');
+    expect(snap.TemplateOpen![0]).toBe('{{');
+
+    expect(Array.isArray(snap.TemplateClose)).toBe(true);
+    expect(typeof snap.TemplateClose![0]).toBe('string');
+    expect(snap.TemplateClose![0]).toBe('}}');
+
+    // Node-only prop => nested[]
+    expect(Array.isArray(snap.Content)).toBe(true);
+    expect(typeof snap.Content![0]).toBe('object'); // nested tree, not string
+    // Nested should mirror structure (has children keys)
+    expect(snap.Content![0]).toBeDefined();
+
+    // Present keys are never undefined
+    for (const k of Object.keys(node.children)) {
+      // @ts-expect-error runtime check
+      expect(snap[k]).toBeDefined();
+      // @ts-expect-error runtime check
+      expect(Array.isArray(snap[k])).toBe(true);
+    }
+  });
+
+  test('names() shape: has { name, children } and token items are tokenType names', () => {
+    const { node } = withParser('{{ name }}', (p) => p.template()) as { node: CstTemplateNode };
+    const snap = names(node) as NamesTree<CstTemplateNode>;
+
+    expect(snap.name).toBe('template');
+    expect(snap.children).toBeDefined();
+
+    // Token-only -> string (tokenType name)
+    const tokName = snap.children.TemplateOpen?.[0];
+    expect(typeof tokName).toBe('string');
+    expect(tokName!.length).toBeGreaterThan(0);
+
+    // Node-only -> nested NamesTree
+    const nested = snap.children.Content?.[0];
+    expect(typeof nested).toBe('object');
+    expect((nested as any).name).toBeDefined();
+    expect((nested as any).children).toBeDefined();
+
+    // Never undefined for present keys
+    for (const k of Object.keys(node.children)) {
+      // @ts-expect-error runtime check
+      expect(Array.isArray(snap.children[k])).toBe(true);
+    }
+  });
+
+  test('locations() shape: top {start,end}, tokens -> {start,end}, nodes -> nested', () => {
+    const { node } = withParser('{{ name }}', (p) => p.template()) as { node: CstTemplateNode };
+    const snap = locations(node) as LocationsTree<CstTemplateNode>;
+
+    expect(typeof snap.start).toBe('number');
+    expect(typeof snap.end).toBe('number');
+
+    // Token-only -> {start,end}
+    const tokLoc = snap.children.TemplateOpen?.[0] as any;
+    expect(typeof tokLoc.start).toBe('number');
+    expect(typeof tokLoc.end).toBe('number');
+
+    // Node-only -> nested LocationsTree
+    const nested = snap.children.Content?.[0] as any;
+    expect(typeof nested).toBe('object');
+    expect(typeof nested.start).toBe('number');
+    expect(typeof nested.end).toBe('number');
+
+    // Never undefined for present keys
+    for (const k of Object.keys(node.children)) {
+      // @ts-expect-error runtime check
+      expect(Array.isArray(snap.children[k])).toBe(true);
+    }
+  });
+
+  test('Literal element TextContent maps tokens to strings with images()', () => {
+    const input = '<text>Hello {{ name }} <text> </text>';
+    const { node } = withParser(input, (p) => p.element()) as { node: CstElementNode };
+
+    const snap = images(node) as ImagesTree<CstElementNode>;
+    const textArr = snap.TextContent!;
+    expect(Array.isArray(textArr)).toBe(true);
+    // TextContent is token-only; each item should be string[]
+    const flat = textArr[0] as unknown as any; // nested ImagesTree for CstLiteralTagTokens
+    // dive one level to the actual token list on the literal node
+    const contentStrings: string[] = flat.Content;
+    // If structure differs, we still check there is at least one string present somewhere
+    const hasStringDeep = Array.isArray(contentStrings) ? typeof contentStrings[0] === 'string' : true;
+    expect(hasStringDeep).toBe(true);
+  });
+});
+
+type ElemOf<A> = A extends Array<infer U> ? U : never;
+
+/** Map a union element (token | node) into different output types per branch. */
+type MapElem<TokenOrNode, TokOut, NodeOut> = TokenOrNode extends IToken
+  ? TokOut
+  : TokenOrNode extends CstNode
+    ? NodeOut
+    : never;
+
+/** images(): tokens -> string; nodes -> nested ImagesTree */
+export type ImagesTree<T extends CstNode> = {
+  [K in keyof T['children']]?: Array<
+    MapElem<
+      ElemOf<NonNullable<T['children'][K]>>,
+      string,
+      ImagesTree<Extract<ElemOf<NonNullable<T['children'][K]>>, CstNode>>
+    >
+  >;
+};
+
+/** names(): shape is { name, children }; tokens -> tokenType.name; nodes -> nested */
+export type NamesTree<T extends CstNode> = {
+  name: string;
+  children: {
+    [K in keyof T['children']]?: Array<
+      MapElem<
+        ElemOf<NonNullable<T['children'][K]>>,
+        string,
+        NamesTree<Extract<ElemOf<NonNullable<T['children'][K]>>, CstNode>>
+      >
+    >;
+  };
+};
+
+/** locations(): shape is { start, end, children }; tokens -> {start,end}; nodes -> nested */
+export type RangeLite = { start: number; end: number };
+
+export type LocationsTree<T extends CstNode> = {
+  start: number;
+  end: number;
+  children: {
+    [K in keyof T['children']]?: Array<
+      MapElem<
+        ElemOf<NonNullable<T['children'][K]>>,
+        RangeLite,
+        LocationsTree<Extract<ElemOf<NonNullable<T['children'][K]>>, CstNode>>
+      >
+    >;
+  };
+};
+
+function isToken(u: unknown): u is IToken {
+  return !!u && typeof (u as any).image === 'string';
+}
+function isCstNode(u: unknown): u is CstNode {
+  return !!u && typeof (u as any).name === 'string' && typeof (u as any).children === 'object';
+}
+
+/**
+ * Core mapper (bi-morphic: tokens and nodes can map to DIFFERENT output types)
+ * - Always returns arrays for any present child key (never undefined).
+ */
+function mapChildrenBimorphic<T extends CstNode, TokOut, NodeOut>(
+  node: T,
+  mapToken: (t: IToken) => TokOut,
+  mapNode: (n: CstNode) => NodeOut,
+): { [K in keyof T['children']]?: Array<MapElem<ElemOf<NonNullable<T['children'][K]>>, TokOut, NodeOut>> } {
+  const result: Record<string, unknown[]> = {};
+  const kids = (node.children ?? {}) as Record<string, unknown>;
+
+  for (const key of Object.keys(kids)) {
+    const arr = kids[key] as unknown[];
+    // Always create the array (never leave it undefined)
+    const out: unknown[] = [];
+    if (Array.isArray(arr)) {
+      for (const v of arr) {
+        if (isToken(v)) {
+out.push(mapToken(v));
+} else if (isCstNode(v)) {
+out.push(mapNode(v));
+}
+        // else ignore silently
+      }
+    }
+    result[key] = out; // defined even if empty
+  }
+
+  // The cast is safe: each element was mapped via the correct branch.
+  return result as any;
+}
+
+/**
+ * images(node): for each child array
+ *  - if it’s tokens → string[]
+ *  - if it’s nodes  → ImagesTree[]
+ *  - if mixed       → (string | ImagesTree)[]
+ * Arrays are always present for seen keys; never undefined.
+ */
+export function images<T extends CstNode>(node: T): ImagesTree<T> {
+  const children = mapChildrenBimorphic(
+    node,
+    (t) => t.image,
+    (n) => images(n),
+  );
+  return children as ImagesTree<T>;
+}
+
+/**
+ * names(node): { name, children }, tokens → tokenType.name
+ * Arrays are always present for seen keys; never undefined.
+ */
+export function names<T extends CstNode>(node: T): NamesTree<T> {
+  const children = mapChildrenBimorphic(
+    node,
+    (t) => t.tokenType?.name ?? '(UnknownToken)',
+    (n) => names(n),
+  );
+  return {
+    name: node.name,
+    children: children as NamesTree<T>['children'],
+  };
+}
+
+/**
+ * locations(node): { start, end, children }, tokens → {start,end}
+ * Arrays are always present for seen keys; never undefined.
+ */
+export function locations<T extends CstNode>(node: T): LocationsTree<T> {
+  // Chevrotain differences: prefer location.startOffset/endOffset; fallback to start/end; else -1.
+  const start =
+    node.location?.startOffset ??
+    // @ts-expect-error
+    node.location?.start ??
+    -1;
+  const end =
+    node.location?.endOffset ??
+    // @ts-expect-error
+    node.location?.end ??
+    -1;
+
+  const children = mapChildrenBimorphic(
+    node,
+    (t) => ({
+      start: (t as any).startOffset ?? -1,
+      end: (t as any).endOffset ?? -1,
+    }),
+    (n) => locations(n),
+  );
+
+  return {
+    start,
+    end,
+    children: children as LocationsTree<T>['children'],
+  };
+}

From 0bd904adf5300f89b73e9df872067a6e3d911810 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 17:33:58 +0800
Subject: [PATCH 60/76] .

---
 packages/poml/next/cst.ts              | 47 ++++++++-------
 packages/poml/next/nodes.ts            | 81 +-------------------------
 packages/poml/tests/reader/cst.test.ts | 27 +++++++--
 3 files changed, 50 insertions(+), 105 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 4123dd3e..275239b8 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -27,17 +27,7 @@ import {
 } from './lexer';
 
 import {
-  CstCommentTokens,
-  CstExpressionTokens,
-  CstCommentIdentifierTokens,
-  CstDoubleQuotedTokens,
-  CstDoubleQuotedTrimmedTokens,
-  CstSingleQuotedTokens,
-  CstSingleQuotedTrimmedTokens,
-  CstDoubleQuotedExpressionTokens,
-  CstSingleQuotedExpressionTokens,
-  CstBetweenTagsTokens,
-  CstLiteralTagTokens,
+  CstTokens,
   CstTemplateNode,
   CstQuotedNode,
   CstQuotedTemplateNode,
@@ -66,16 +56,16 @@ export class ExtendedPomlParser extends CstParser {
   public root!: (idxInOriginalText?: number) => CstRootNode;
   public elementContent!: (idxInOriginalText?: number) => CstElementContentNode;
   // token-sequence helper rules
-  public commentTokens!: (idxInOriginalText?: number) => CstCommentTokens;
-  public expressionTokens!: (idxInOriginalText?: number) => CstExpressionTokens;
-  public commentIdentifierTokens!: (idxInOriginalText?: number) => CstCommentIdentifierTokens;
-  public doubleQuotedTokens!: (idxInOriginalText?: number) => CstDoubleQuotedTokens;
-  public singleQuotedTokens!: (idxInOriginalText?: number) => CstSingleQuotedTokens;
-  public doubleQuotedTrimmedTokens!: (idxInOriginalText?: number) => CstDoubleQuotedTrimmedTokens;
-  public singleQuotedTrimmedTokens!: (idxInOriginalText?: number) => CstSingleQuotedTrimmedTokens;
-  public doubleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstDoubleQuotedExpressionTokens;
-  public singleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstSingleQuotedExpressionTokens;
-  public betweenTagsTokens!: (idxInOriginalText?: number) => CstBetweenTagsTokens;
+  public commentTokens!: (idxInOriginalText?: number) => CstTokens;
+  public expressionTokens!: (idxInOriginalText?: number) => CstTokens;
+  public commentIdentifierTokens!: (idxInOriginalText?: number) => CstTokens;
+  public doubleQuotedTokens!: (idxInOriginalText?: number) => CstTokens;
+  public singleQuotedTokens!: (idxInOriginalText?: number) => CstTokens;
+  public doubleQuotedTrimmedTokens!: (idxInOriginalText?: number) => CstTokens;
+  public singleQuotedTrimmedTokens!: (idxInOriginalText?: number) => CstTokens;
+  public doubleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstTokens;
+  public singleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstTokens;
+  public betweenTagsTokens!: (idxInOriginalText?: number) => CstTokens;
   // Accepting expectedTagName as argument to validate matching close tag
   public literalTagTokens!: (idxInOriginalText?: number, args?: [string]) => CstLiteralTagTokens;
   // regular rules
@@ -227,18 +217,22 @@ export class ExtendedPomlParser extends CstParser {
 
     // ----- Token sequence helper rules -----
     this.commentTokens = this.RULE('commentTokens', () => {
+      // Can be empty
       this.MANY(() => {
         this.OR(this.anyOf(TokensComment, 'Content'));
       });
     });
 
     this.commentIdentifierTokens = this.RULE('commentIdentifierTokens', () => {
+      // Used in @pragma options without quotes.
       this.AT_LEAST_ONE(() => {
         this.OR(this.anyOf(TokensCommentIdentifiers, 'Content'));
       });
     });
 
     this.expressionTokens = this.RULE('expressionTokens', () => {
+      // Always trim the ws around the expression {{ expr }}.
+      // Must be non-empty.
       this.AT_LEAST_ONE({
         GATE: () => !this.atAlmostClose(TemplateClose),
         DEF: () => {
@@ -248,18 +242,22 @@ export class ExtendedPomlParser extends CstParser {
     });
 
     this.doubleQuotedTokens = this.RULE('doubleQuotedTokens', () => {
+      // The untrimmed content within "...", can be empty.
       this.MANY(() => {
         this.OR(this.anyOf(TokensDoubleQuoted, 'Content'));
       });
     });
 
     this.singleQuotedTokens = this.RULE('singleQuotedTokens', () => {
+      // The untrimmed content in '...', can be empty.
       this.MANY(() => {
         this.OR(this.anyOf(TokensSingleQuoted, 'Content'));
       });
     });
 
     this.doubleQuotedTrimmedTokens = this.RULE('doubleQuotedTrimmedTokens', () => {
+      // Trimmed content in "..." without leading/trailing whitespace
+      // Must be non-empty.
       // Greedily match until the next double quote (allow inner whitespace)
       this.AT_LEAST_ONE({
         GATE: () => !this.atAlmostClose(DoubleQuote),
@@ -280,24 +278,31 @@ export class ExtendedPomlParser extends CstParser {
     });
 
     this.doubleQuotedExpressionTokens = this.RULE('doubleQuotedExpressionTokens', () => {
+      // Contents in "...{{ ... }}..." but outside the {{ }}
+      // Must be non-empty. Can have leading/trailing whitespace.
       this.AT_LEAST_ONE(() => {
         this.OR(this.anyOf(TokensDoubleQuotedExpression, 'Content'));
       });
     });
 
     this.singleQuotedExpressionTokens = this.RULE('singleQuotedExpressionTokens', () => {
+      // Contents in '...{{ ... }}...' but outside the {{ }}
+      // Must be non-empty. Can have leading/trailing whitespace.
       this.AT_LEAST_ONE(() => {
         this.OR(this.anyOf(TokensSingleQuotedExpression, 'Content'));
       });
     });
 
     this.betweenTagsTokens = this.RULE('betweenTagsTokens', () => {
+      // Plain texts within tags but outside nested tags. Must be non-empty.
       this.AT_LEAST_ONE(() => {
         this.OR(this.anyOf(TokensTextContent, 'Content'));
       });
     });
 
     this.literalTagTokens = this.RULE('literalTagTokens', (expectedTagName?: string) => {
+      // Plain texts within literal tags like <text>...</text>.
+      // Match greedily. Can be empty.
       this.AT_LEAST_ONE({
         GATE: () => !this.isAtLiteralClose(expectedTagName),
         DEF: () => {
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index f174171d..c8928b55 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -8,86 +8,7 @@ export interface AstNode {
 /**
  * Plain token sequences helpers from the lexer.
  */
-
-export interface CstCommentTokens extends CstNode {
-  // Can be empty.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstExpressionTokens extends CstNode {
-  // Always trim the ws around the expression {{ expr }}.
-  // Must be non-empty.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstDoubleQuotedTokens extends CstNode {
-  // The untrimmed content within "...", can be empty.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstDoubleQuotedTrimmedTokens extends CstNode {
-  // Trimmed content in "..." without leading/trailing whitespace
-  // Must be non-empty.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstSingleQuotedTokens extends CstNode {
-  // The untrimmed content in '...', can be empty.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstSingleQuotedTrimmedTokens extends CstNode {
-  // Trimmed content without leading/trailing whitespace
-  // Must be non-empty.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstDoubleQuotedExpressionTokens extends CstNode {
-  // Contents in "...{{ ... }}..." but outside the {{ }}
-  // Must be non-empty. Can have leading/trailing whitespace.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstSingleQuotedExpressionTokens extends CstNode {
-  // Contents in '...{{ ... }}...' but outside the {{ }}
-  // Must be non-empty. Can have leading/trailing whitespace.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstCommentIdentifierTokens extends CstNode {
-  // Non-whitespace tokens in comments used as identifiers in pragmas.
-  // Supports special chars like +, -, etc. Must be non-empty.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstBetweenTagsTokens extends CstNode {
-  // Plain texts within tags but outside nested tags. Must be non-empty.
-  children: {
-    Content?: IToken[];
-  };
-}
-
-export interface CstLiteralTagTokens extends CstNode {
-  // Plain texts within literal tags like <text>...</text>.
-  // Match greedily. Can be empty.
+export interface CstTokens extends CstNode {
   children: {
     Content?: IToken[];
   };
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index 103644ba..5caf23f6 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -218,6 +218,25 @@ describe('CST Parser Rules', () => {
   });
 });
 
+describe('Special Tokens', () => {
+  test('root document with no root tags', () => {
+    const input = `Hello {{ user }}!
+<!-- A comment -->  <text>Some text arbi&rary; symbols\\etc/></</text>
+
+done`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+    console.dir(images(node), { depth: null });
+  });
+
+  // All kinds of whitespaces
+
+  // Single quotes, double quotes, and corner cases
+
+  // Matched <text></text> and <text></template></text> or <template></text></template>
+
+  // Unmatched tags should not error in cst stage
+});
+
 describe('Helper function sanity', () => {
   test('images() on template: token lists -> string[], node lists -> nested[]', () => {
     const { node } = withParser('{{ name }}', (p) => p.template()) as { node: CstTemplateNode };
@@ -392,10 +411,10 @@ function mapChildrenBimorphic<T extends CstNode, TokOut, NodeOut>(
     if (Array.isArray(arr)) {
       for (const v of arr) {
         if (isToken(v)) {
-out.push(mapToken(v));
-} else if (isCstNode(v)) {
-out.push(mapNode(v));
-}
+          out.push(mapToken(v));
+        } else if (isCstNode(v)) {
+          out.push(mapNode(v));
+        }
         // else ignore silently
       }
     }

From 410c7691ddca9fd88d008cb83f14f5f85894c466 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 17:35:07 +0800
Subject: [PATCH 61/76] .

---
 packages/poml/next/cst.ts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 275239b8..c1d9d671 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -268,6 +268,8 @@ export class ExtendedPomlParser extends CstParser {
     });
 
     this.singleQuotedTrimmedTokens = this.RULE('singleQuotedTrimmedTokens', () => {
+      // Trimmed content without leading/trailing whitespace
+      // Must be non-empty.
       // Greedily match until the next single quote (allow inner whitespace)
       this.AT_LEAST_ONE({
         GATE: () => !this.atAlmostClose(SingleQuote),

From 9de1b54e0b6828c210721b3eda04ab11cab574b0 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 17:35:27 +0800
Subject: [PATCH 62/76] .

---
 packages/poml/tests/reader/cst.test.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index 5caf23f6..d2ac98ce 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -15,7 +15,6 @@ import type {
   CstOpenTagPartialNode,
   CstCloseTagNode,
   CstElementNode,
-  CstLiteralTagTokens,
 } from 'poml/next/nodes';
 
 function withParser<T>(input: string, run: (p: ExtendedPomlParser) => T) {

From 80145d24c275bf2ab5fd587a56d02331b19898a4 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 18:21:54 +0800
Subject: [PATCH 63/76] .

---
 packages/poml/next/cst.ts              |   1 +
 packages/poml/next/nodes.ts            |  14 +-
 packages/poml/tests/reader/cst.test.ts | 420 +++++++++++--------------
 3 files changed, 200 insertions(+), 235 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index c1d9d671..4c0fc814 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -168,6 +168,7 @@ export class ExtendedPomlParser extends CstParser {
   constructor() {
     super(AllTokens, {
       recoveryEnabled: true,
+      nodeLocationTracking: 'full',
     });
     this.validComponentNames = new Set(listComponentAliases());
 
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index c8928b55..e0f7e007 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -73,7 +73,7 @@ export interface CstTemplateNode extends CstNode {
     // Content inside {{ and }} is treated as a single expression token.
     // Eats everything until the next }} (or the whitespace before it).
     // Handles \{{ and \}} escapes. We won't escape other chars here.
-    Content?: CstExpressionTokens[];
+    Content?: CstTokens[];
     // If it's close to the ending }}, try to eat whitespace before it.
     WsAfterContent?: IToken[];
     TemplateClose?: IToken[];
@@ -139,7 +139,7 @@ export interface CstQuotedNode extends CstNode {
   children: {
     OpenQuote?: IToken[];
     // This is a normal quoted string without templates inside.
-    Content?: (CstDoubleQuotedTokens | CstSingleQuotedTokens)[];
+    Content?: CstTokens[];
     CloseQuote?: IToken[];
   };
 }
@@ -148,7 +148,7 @@ export interface CstQuotedTemplateNode extends CstNode {
   children: {
     OpenQuote?: IToken[];
     // Allows "Hello {{ friend["abc"] }}!" - mix of text and templates (with quotes).
-    Content?: (CstDoubleQuotedExpressionTokens | CstSingleQuotedExpressionTokens | CstTemplateNode)[];
+    Content?: (CstTokens | CstTemplateNode)[];
     CloseQuote?: IToken[];
   };
 }
@@ -195,7 +195,7 @@ export interface CstForIteratorNode extends CstNode {
     // But as we are in a quoted string, we need to handle
     // backslash escapes like \" and \'.
     // Greedily match until the next unescaped quote or ws before it.
-    Collection?: (CstDoubleQuotedTrimmedTokens | CstSingleQuotedTrimmedTokens)[];
+    Collection?: CstTokens[];
     WsAfterCollection?: IToken[];
     CloseQuote?: IToken[];
   };
@@ -409,7 +409,7 @@ export interface CstElementNode extends CstNode {
     OpenTagPartial?: CstOpenTagPartialNode[];
     OpenTagCloseBracket?: IToken[];
     Content?: CstElementContentNode[];
-    TextContent?: CstLiteralTagTokens[]; // For literal elements like <text>
+    TextContent?: CstTokens[]; // For literal elements like <text>
     CloseTag?: CstCloseTagNode[];
     // Alternative, it can also be a self-closing tag.
     SelfCloseBracket?: IToken[];
@@ -422,7 +422,7 @@ export interface CstElementContentNode extends CstNode {
     Comment?: CstCommentNode[];
     Pragma?: CstPragmaNode[];
     Template?: CstTemplateNode[];
-    TextContent?: CstBetweenTagsTokens[];
+    TextContent?: CstTokens[];
   };
 }
 
@@ -447,7 +447,7 @@ export interface CommentNode extends AstNode {
 export interface CstCommentNode extends CstNode {
   children: {
     CommentOpen?: IToken[];
-    Content?: CstCommentTokens[];
+    Content?: CstTokens[];
     CommentClose?: IToken[];
   };
 }
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index d2ac98ce..5fda1bd5 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -2,7 +2,7 @@ import { describe, expect, test } from '@jest/globals';
 import { CstNode, IToken } from 'chevrotain';
 import { ExtendedPomlParser } from 'poml/next/cst';
 import { extendedPomlLexer, Whitespace, Identifier } from 'poml/next/lexer';
-import type {
+import {
   CstRootNode,
   CstElementContentNode,
   CstTemplateNode,
@@ -225,6 +225,8 @@ describe('Special Tokens', () => {
 done`;
     const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
     console.dir(images(node), { depth: null });
+    console.dir(names(node), { depth: null });
+    console.dir(locations(node), { depth: null });
   });
 
   // All kinds of whitespaces
@@ -236,255 +238,217 @@ done`;
   // Unmatched tags should not error in cst stage
 });
 
-describe('Helper function sanity', () => {
-  test('images() on template: token lists -> string[], node lists -> nested[]', () => {
-    const { node } = withParser('{{ name }}', (p) => p.template()) as { node: CstTemplateNode };
+/* -------------------- tiny guards -------------------- */
+const isToken = (x: unknown): x is IToken => !!x && typeof (x as IToken).image === 'string';
 
-    const snap = images(node) as ImagesTree<CstTemplateNode>;
-
-    // Token-only props => string[]
-    expect(Array.isArray(snap.TemplateOpen)).toBe(true);
-    expect(typeof snap.TemplateOpen![0]).toBe('string');
-    expect(snap.TemplateOpen![0]).toBe('{{');
-
-    expect(Array.isArray(snap.TemplateClose)).toBe(true);
-    expect(typeof snap.TemplateClose![0]).toBe('string');
-    expect(snap.TemplateClose![0]).toBe('}}');
-
-    // Node-only prop => nested[]
-    expect(Array.isArray(snap.Content)).toBe(true);
-    expect(typeof snap.Content![0]).toBe('object'); // nested tree, not string
-    // Nested should mirror structure (has children keys)
-    expect(snap.Content![0]).toBeDefined();
-
-    // Present keys are never undefined
-    for (const k of Object.keys(node.children)) {
-      // @ts-expect-error runtime check
-      expect(snap[k]).toBeDefined();
-      // @ts-expect-error runtime check
-      expect(Array.isArray(snap[k])).toBe(true);
-    }
-  });
+const isCstNode = (x: unknown): x is CstNode =>
+  !!x && typeof (x as any).name === 'string' && typeof (x as any).children === 'object';
 
-  test('names() shape: has { name, children } and token items are tokenType names', () => {
-    const { node } = withParser('{{ name }}', (p) => p.template()) as { node: CstTemplateNode };
-    const snap = names(node) as NamesTree<CstTemplateNode>;
-
-    expect(snap.name).toBe('template');
-    expect(snap.children).toBeDefined();
-
-    // Token-only -> string (tokenType name)
-    const tokName = snap.children.TemplateOpen?.[0];
-    expect(typeof tokName).toBe('string');
-    expect(tokName!.length).toBeGreaterThan(0);
-
-    // Node-only -> nested NamesTree
-    const nested = snap.children.Content?.[0];
-    expect(typeof nested).toBe('object');
-    expect((nested as any).name).toBeDefined();
-    expect((nested as any).children).toBeDefined();
-
-    // Never undefined for present keys
-    for (const k of Object.keys(node.children)) {
-      // @ts-expect-error runtime check
-      expect(Array.isArray(snap.children[k])).toBe(true);
-    }
-  });
+/* -------------------- ranges -------------------- */
+const tokStart = (t: IToken) => (typeof t.startOffset === 'number' ? t.startOffset : 0);
+const tokEnd = (t: IToken) => (typeof t.endOffset === 'number' ? t.endOffset : tokStart(t) + (t.image?.length ?? 0));
 
-  test('locations() shape: top {start,end}, tokens -> {start,end}, nodes -> nested', () => {
-    const { node } = withParser('{{ name }}', (p) => p.template()) as { node: CstTemplateNode };
-    const snap = locations(node) as LocationsTree<CstTemplateNode>;
-
-    expect(typeof snap.start).toBe('number');
-    expect(typeof snap.end).toBe('number');
-
-    // Token-only -> {start,end}
-    const tokLoc = snap.children.TemplateOpen?.[0] as any;
-    expect(typeof tokLoc.start).toBe('number');
-    expect(typeof tokLoc.end).toBe('number');
-
-    // Node-only -> nested LocationsTree
-    const nested = snap.children.Content?.[0] as any;
-    expect(typeof nested).toBe('object');
-    expect(typeof nested.start).toBe('number');
-    expect(typeof nested.end).toBe('number');
-
-    // Never undefined for present keys
-    for (const k of Object.keys(node.children)) {
-      // @ts-expect-error runtime check
-      expect(Array.isArray(snap.children[k])).toBe(true);
-    }
-  });
+function* walkTokens(value: unknown): Generator<IToken> {
+  if (isToken(value)) {
+    yield value;
+    return;
+  }
+  if (Array.isArray(value)) {
+    for (const v of value) {
+yield* walkTokens(v);
+}
+    return;
+  }
+  if (isCstNode(value)) {
+    const ch = (value as any).children as Record<string, unknown>;
+    for (const k of Object.keys(ch)) {
+yield* walkTokens(ch[k]);
+}
+  }
+}
 
-  test('Literal element TextContent maps tokens to strings with images()', () => {
-    const input = '<text>Hello {{ name }} <text> </text>';
-    const { node } = withParser(input, (p) => p.element()) as { node: CstElementNode };
+function nodeRange(node: CstNode): { start: number; end: number } {
+  let start = Infinity,
+    end = -Infinity;
+  for (const t of walkTokens(node)) {
+    start = Math.min(start, tokStart(t));
+    end = Math.max(end, tokEnd(t));
+  }
+  if (!Number.isFinite(start) || !Number.isFinite(end)) {
+return { start: 0, end: 0 };
+}
+  return { start, end };
+}
 
-    const snap = images(node) as ImagesTree<CstElementNode>;
-    const textArr = snap.TextContent!;
-    expect(Array.isArray(textArr)).toBe(true);
-    // TextContent is token-only; each item should be string[]
-    const flat = textArr[0] as unknown as any; // nested ImagesTree for CstLiteralTagTokens
-    // dive one level to the actual token list on the literal node
-    const contentStrings: string[] = flat.Content;
-    // If structure differs, we still check there is at least one string present somewhere
-    const hasStringDeep = Array.isArray(contentStrings) ? typeof contentStrings[0] === 'string' : true;
-    expect(hasStringDeep).toBe(true);
-  });
-});
+/* -------------------- core normalize -------------------- */
+/**
+ * Rules:
+ * - drop undefined
+ * - arrays: [] -> undefined; [x] -> x; [strings...] -> joined string; otherwise keep (with inner normalize)
+ * - objects: normalize recursively; if only key is "Content" -> unwrap value
+ */
+function normalizeAny(v: unknown): unknown {
+  if (v == null) {
+return undefined;
+}
+  if (Array.isArray(v)) {
+return normalizeArray(v);
+}
+  if (isToken(v) || isCstNode(v)) {
+return v;
+}
+  if (typeof v === 'object') {
+return normalizeObject(v as Record<string, unknown>);
+}
+  return v;
+}
 
-type ElemOf<A> = A extends Array<infer U> ? U : never;
-
-/** Map a union element (token | node) into different output types per branch. */
-type MapElem<TokenOrNode, TokOut, NodeOut> = TokenOrNode extends IToken
-  ? TokOut
-  : TokenOrNode extends CstNode
-    ? NodeOut
-    : never;
-
-/** images(): tokens -> string; nodes -> nested ImagesTree */
-export type ImagesTree<T extends CstNode> = {
-  [K in keyof T['children']]?: Array<
-    MapElem<
-      ElemOf<NonNullable<T['children'][K]>>,
-      string,
-      ImagesTree<Extract<ElemOf<NonNullable<T['children'][K]>>, CstNode>>
-    >
-  >;
-};
+function normalizeArray(arr: unknown[]): unknown {
+  const mapped = arr.map(normalizeAny).filter((v) => v !== undefined);
 
-/** names(): shape is { name, children }; tokens -> tokenType.name; nodes -> nested */
-export type NamesTree<T extends CstNode> = {
-  name: string;
-  children: {
-    [K in keyof T['children']]?: Array<
-      MapElem<
-        ElemOf<NonNullable<T['children'][K]>>,
-        string,
-        NamesTree<Extract<ElemOf<NonNullable<T['children'][K]>>, CstNode>>
-      >
-    >;
-  };
-};
+  if (mapped.length === 0) {
+return undefined;
+}
+  if (mapped.every((x) => typeof x === 'string')) {
+    // concatenate pure string arrays
+    return (mapped as string[]).join('');
+  }
+  if (mapped.length === 1) {
+return mapped[0];
+}
+  return mapped;
+}
 
-/** locations(): shape is { start, end, children }; tokens -> {start,end}; nodes -> nested */
-export type RangeLite = { start: number; end: number };
-
-export type LocationsTree<T extends CstNode> = {
-  start: number;
-  end: number;
-  children: {
-    [K in keyof T['children']]?: Array<
-      MapElem<
-        ElemOf<NonNullable<T['children'][K]>>,
-        RangeLite,
-        LocationsTree<Extract<ElemOf<NonNullable<T['children'][K]>>, CstNode>>
-      >
-    >;
-  };
+function normalizeObject(obj: Record<string, unknown>): unknown {
+  const out: Record<string, unknown> = {};
+  for (const [k, v] of Object.entries(obj)) {
+    const nv = normalizeAny(v);
+    if (nv !== undefined) {
+out[k] = nv;
+}
+  }
+  const keys = Object.keys(out);
+  if (keys.length === 0) {
+return undefined;
+}
+  if (keys.length === 1 && keys[0] === 'Content') {
+return out.Content;
+}
+  return out;
+}
+
+function normalizeChildren(node: CstNode): unknown {
+  return normalizeObject(node.children as Record<string, unknown>);
+}
+
+/* -------------------- generic transformer -------------------- */
+type Mode = 'images' | 'names' | 'locations';
+
+type Strategies = {
+  onToken(v: IToken): unknown; // what to emit for a token
+  onNodeWrap(n: CstNode, children: unknown): unknown; // how to wrap a CST node around its transformed children
+  keepChildKey(k: string, v: unknown): boolean; // allow pruning of token-only branches
 };
 
-function isToken(u: unknown): u is IToken {
-  return !!u && typeof (u as any).image === 'string';
+function transformValue(val: unknown, S: Strategies): unknown {
+  if (val == null) {
+return undefined;
+}
+
+  if (isToken(val)) {
+    return S.onToken(val);
+  }
+
+  if (Array.isArray(val)) {
+    const mapped = val.map((x) => transformValue(x, S)).filter((x) => x !== undefined);
+    if (mapped.length === 0) {
+return undefined;
 }
-function isCstNode(u: unknown): u is CstNode {
-  return !!u && typeof (u as any).name === 'string' && typeof (u as any).children === 'object';
+    if (mapped.every((x) => typeof x === 'string')) {
+return (mapped as string[]).join('');
 }
+    if (mapped.length === 1) {
+return mapped[0];
+}
+    return mapped;
+  }
 
-/**
- * Core mapper (bi-morphic: tokens and nodes can map to DIFFERENT output types)
- * - Always returns arrays for any present child key (never undefined).
- */
-function mapChildrenBimorphic<T extends CstNode, TokOut, NodeOut>(
-  node: T,
-  mapToken: (t: IToken) => TokOut,
-  mapNode: (n: CstNode) => NodeOut,
-): { [K in keyof T['children']]?: Array<MapElem<ElemOf<NonNullable<T['children'][K]>>, TokOut, NodeOut>> } {
-  const result: Record<string, unknown[]> = {};
-  const kids = (node.children ?? {}) as Record<string, unknown>;
-
-  for (const key of Object.keys(kids)) {
-    const arr = kids[key] as unknown[];
-    // Always create the array (never leave it undefined)
-    const out: unknown[] = [];
-    if (Array.isArray(arr)) {
-      for (const v of arr) {
-        if (isToken(v)) {
-          out.push(mapToken(v));
-        } else if (isCstNode(v)) {
-          out.push(mapNode(v));
-        }
-        // else ignore silently
-      }
+  if (isCstNode(val)) {
+    const norm = normalizeChildren(val);
+    const inner = transformValue(norm, S);
+    return S.onNodeWrap(val, inner);
+  }
+
+  if (typeof val === 'object') {
+    const out: Record<string, unknown> = {};
+    for (const [k, v] of Object.entries(val)) {
+      const mv = transformValue(v, S);
+      if (mv !== undefined && S.keepChildKey(k, mv)) {
+out[k] = mv;
+}
     }
-    result[key] = out; // defined even if empty
+    const keys = Object.keys(out);
+    if (keys.length === 0) {
+return undefined;
+}
+    if (keys.length === 1 && keys[0] === 'Content') {
+return out.Content;
+}
+    return out;
   }
 
-  // The cast is safe: each element was mapped via the correct branch.
-  return result as any;
+  // primitive fallback: pass through (lets string concatenation work if present)
+  return val;
 }
 
-/**
- * images(node): for each child array
- *  - if it’s tokens → string[]
- *  - if it’s nodes  → ImagesTree[]
- *  - if mixed       → (string | ImagesTree)[]
- * Arrays are always present for seen keys; never undefined.
- */
-export function images<T extends CstNode>(node: T): ImagesTree<T> {
-  const children = mapChildrenBimorphic(
-    node,
-    (t) => t.image,
-    (n) => images(n),
-  );
-  return children as ImagesTree<T>;
+/* -------------------- concrete modes -------------------- */
+
+// images(): leaves become strings; nested objects keyed by child names.
+// Token arrays get concatenated (via normalize/transform).
+export function images(node: CstNode): unknown {
+  const S: Strategies = {
+    onToken: (t) => t.image, // keep token text
+    onNodeWrap: (_n, children) => children, // node name not included; just the nested children map
+    keepChildKey: (_k, _v) => true, // keep everything
+  };
+  return transformValue(normalizeChildren(node), S);
 }
 
-/**
- * names(node): { name, children }, tokens → tokenType.name
- * Arrays are always present for seen keys; never undefined.
- */
-export function names<T extends CstNode>(node: T): NamesTree<T> {
-  const children = mapChildrenBimorphic(
-    node,
-    (t) => t.tokenType?.name ?? '(UnknownToken)',
-    (n) => names(n),
-  );
-  return {
-    name: node.name,
-    children: children as NamesTree<T>['children'],
+// names(): only node names; omit token leaves entirely.
+export function names(node: CstNode): { name: string; children?: Record<string, unknown> } {
+  const S: Strategies = {
+    onToken: (_t) => undefined, // drop token leaves
+    onNodeWrap: (n, children) => {
+      const out: { name: string; children?: Record<string, unknown> } = { name: n.name };
+      if (children && typeof children === 'object' && !Array.isArray(children)) {
+        const keys = Object.keys(children as Record<string, unknown>);
+        if (keys.length) {
+out.children = children as Record<string, unknown>;
+}
+      }
+      return out;
+    },
+    // prune keys that are purely token-derived (which would be undefined)
+    keepChildKey: (_k, v) => v !== undefined,
   };
+  return transformValue(node, S) as any;
 }
 
-/**
- * locations(node): { start, end, children }, tokens → {start,end}
- * Arrays are always present for seen keys; never undefined.
- */
-export function locations<T extends CstNode>(node: T): LocationsTree<T> {
-  // Chevrotain differences: prefer location.startOffset/endOffset; fallback to start/end; else -1.
-  const start =
-    node.location?.startOffset ??
-    // @ts-expect-error
-    node.location?.start ??
-    -1;
-  const end =
-    node.location?.endOffset ??
-    // @ts-expect-error
-    node.location?.end ??
-    -1;
-
-  const children = mapChildrenBimorphic(
-    node,
-    (t) => ({
-      start: (t as any).startOffset ?? -1,
-      end: (t as any).endOffset ?? -1,
-    }),
-    (n) => locations(n),
-  );
-
-  return {
-    start,
-    end,
-    children: children as LocationsTree<T>['children'],
+// locations(): node-level { start,end } only; omit token-level ranges.
+export function locations(node: CstNode): { start: number; end: number; children?: Record<string, unknown> } {
+  const S: Strategies = {
+    onToken: (_t) => undefined, // drop token ranges
+    onNodeWrap: (n, children) => {
+      const base: { start: number; end: number; children?: Record<string, unknown> } = nodeRange(n);
+      if (children && typeof children === 'object' && !Array.isArray(children)) {
+        const keys = Object.keys(children as Record<string, unknown>);
+        if (keys.length) {
+base.children = children as Record<string, unknown>;
+}
+      }
+      return base;
+    },
+    keepChildKey: (_k, v) => v !== undefined,
   };
+  return transformValue(node, S) as any;
 }

From 586897ff375170f3a89a8d56980f54e3eaeb71fc Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 5 Sep 2025 18:26:16 +0800
Subject: [PATCH 64/76] .

---
 packages/poml/tests/reader/cst.test.ts | 140 ++++++++++++++++---------
 1 file changed, 89 insertions(+), 51 deletions(-)

diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index 5fda1bd5..e3239708 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -224,9 +224,36 @@ describe('Special Tokens', () => {
 
 done`;
     const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
-    console.dir(images(node), { depth: null });
-    console.dir(names(node), { depth: null });
-    console.dir(locations(node), { depth: null });
+    expect(images(node)).toStrictEqual([
+      { TextContent: 'Hello ' },
+      {
+        Template: {
+          TemplateOpen: '{{',
+          WsAfterOpen: ' ',
+          Content: 'user',
+          WsAfterContent: ' ',
+          TemplateClose: '}}',
+        },
+      },
+      { TextContent: '!\n' },
+      {
+        Comment: {
+          CommentOpen: '<!--',
+          Content: ' A comment ',
+          CommentClose: '-->',
+        },
+      },
+      { TextContent: '  ' },
+      {
+        Element: {
+          OpenTagPartial: { OpenBracket: '<', TagName: 'text' },
+          OpenTagCloseBracket: '>',
+          TextContent: 'Some text arbi&rary; symbols\\etc/></',
+          CloseTag: { ClosingOpenBracket: '</', TagName: 'text', CloseBracket: '>' },
+        },
+      },
+      { TextContent: '\n\ndone' },
+    ]);
   });
 
   // All kinds of whitespaces
@@ -255,15 +282,15 @@ function* walkTokens(value: unknown): Generator<IToken> {
   }
   if (Array.isArray(value)) {
     for (const v of value) {
-yield* walkTokens(v);
-}
+      yield* walkTokens(v);
+    }
     return;
   }
   if (isCstNode(value)) {
     const ch = (value as any).children as Record<string, unknown>;
     for (const k of Object.keys(ch)) {
-yield* walkTokens(ch[k]);
-}
+      yield* walkTokens(ch[k]);
+    }
   }
 }
 
@@ -275,8 +302,8 @@ function nodeRange(node: CstNode): { start: number; end: number } {
     end = Math.max(end, tokEnd(t));
   }
   if (!Number.isFinite(start) || !Number.isFinite(end)) {
-return { start: 0, end: 0 };
-}
+    return { start: 0, end: 0 };
+  }
   return { start, end };
 }
 
@@ -289,17 +316,17 @@ return { start: 0, end: 0 };
  */
 function normalizeAny(v: unknown): unknown {
   if (v == null) {
-return undefined;
-}
+    return undefined;
+  }
   if (Array.isArray(v)) {
-return normalizeArray(v);
-}
+    return normalizeArray(v);
+  }
   if (isToken(v) || isCstNode(v)) {
-return v;
-}
+    return v;
+  }
   if (typeof v === 'object') {
-return normalizeObject(v as Record<string, unknown>);
-}
+    return normalizeObject(v as Record<string, unknown>);
+  }
   return v;
 }
 
@@ -307,15 +334,15 @@ function normalizeArray(arr: unknown[]): unknown {
   const mapped = arr.map(normalizeAny).filter((v) => v !== undefined);
 
   if (mapped.length === 0) {
-return undefined;
-}
+    return undefined;
+  }
   if (mapped.every((x) => typeof x === 'string')) {
     // concatenate pure string arrays
     return (mapped as string[]).join('');
   }
   if (mapped.length === 1) {
-return mapped[0];
-}
+    return mapped[0];
+  }
   return mapped;
 }
 
@@ -324,16 +351,16 @@ function normalizeObject(obj: Record<string, unknown>): unknown {
   for (const [k, v] of Object.entries(obj)) {
     const nv = normalizeAny(v);
     if (nv !== undefined) {
-out[k] = nv;
-}
+      out[k] = nv;
+    }
   }
   const keys = Object.keys(out);
   if (keys.length === 0) {
-return undefined;
-}
+    return undefined;
+  }
   if (keys.length === 1 && keys[0] === 'Content') {
-return out.Content;
-}
+    return out.Content;
+  }
   return out;
 }
 
@@ -352,8 +379,8 @@ type Strategies = {
 
 function transformValue(val: unknown, S: Strategies): unknown {
   if (val == null) {
-return undefined;
-}
+    return undefined;
+  }
 
   if (isToken(val)) {
     return S.onToken(val);
@@ -362,14 +389,14 @@ return undefined;
   if (Array.isArray(val)) {
     const mapped = val.map((x) => transformValue(x, S)).filter((x) => x !== undefined);
     if (mapped.length === 0) {
-return undefined;
-}
+      return undefined;
+    }
     if (mapped.every((x) => typeof x === 'string')) {
-return (mapped as string[]).join('');
-}
+      return (mapped as string[]).join('');
+    }
     if (mapped.length === 1) {
-return mapped[0];
-}
+      return mapped[0];
+    }
     return mapped;
   }
 
@@ -384,16 +411,16 @@ return mapped[0];
     for (const [k, v] of Object.entries(val)) {
       const mv = transformValue(v, S);
       if (mv !== undefined && S.keepChildKey(k, mv)) {
-out[k] = mv;
-}
+        out[k] = mv;
+      }
     }
     const keys = Object.keys(out);
     if (keys.length === 0) {
-return undefined;
-}
+      return undefined;
+    }
     if (keys.length === 1 && keys[0] === 'Content') {
-return out.Content;
-}
+      return out.Content;
+    }
     return out;
   }
 
@@ -414,37 +441,48 @@ export function images(node: CstNode): unknown {
   return transformValue(normalizeChildren(node), S);
 }
 
-// names(): only node names; omit token leaves entirely.
+// names(): only node names; omit token leaves entirely, but KEEP the full node tree.
+// If children collapse to an array/primitive, tuck under { Content: ... } so we don't lose the branch.
 export function names(node: CstNode): { name: string; children?: Record<string, unknown> } {
   const S: Strategies = {
     onToken: (_t) => undefined, // drop token leaves
     onNodeWrap: (n, children) => {
-      const out: { name: string; children?: Record<string, unknown> } = { name: n.name };
-      if (children && typeof children === 'object' && !Array.isArray(children)) {
-        const keys = Object.keys(children as Record<string, unknown>);
-        if (keys.length) {
+      const out: { name: string; children?: Record<string, unknown> | unknown[] } = { name: n.name };
+      if (children !== undefined) {
+        if (typeof children === 'object' && !Array.isArray(children)) {
+          // plain object: use as-is
+          const keys = Object.keys(children as Record<string, unknown>);
+          if (keys.length) {
 out.children = children as Record<string, unknown>;
 }
+        } else {
+          // array or primitive: wrap under Content
+          out.children = children as unknown[];
+        }
       }
       return out;
     },
-    // prune keys that are purely token-derived (which would be undefined)
     keepChildKey: (_k, v) => v !== undefined,
   };
   return transformValue(node, S) as any;
 }
 
 // locations(): node-level { start,end } only; omit token-level ranges.
+// Same "wrap under Content if not a plain object" rule to preserve shape.
 export function locations(node: CstNode): { start: number; end: number; children?: Record<string, unknown> } {
   const S: Strategies = {
     onToken: (_t) => undefined, // drop token ranges
     onNodeWrap: (n, children) => {
-      const base: { start: number; end: number; children?: Record<string, unknown> } = nodeRange(n);
-      if (children && typeof children === 'object' && !Array.isArray(children)) {
-        const keys = Object.keys(children as Record<string, unknown>);
-        if (keys.length) {
+      const base: { start: number; end: number; children?: Record<string, unknown> | unknown[] } = nodeRange(n);
+      if (children !== undefined) {
+        if (typeof children === 'object' && !Array.isArray(children)) {
+          const keys = Object.keys(children as Record<string, unknown>);
+          if (keys.length) {
 base.children = children as Record<string, unknown>;
 }
+        } else {
+          base.children = children as unknown[];
+        }
       }
       return base;
     },

From 1f4acffa69658de6713b8ddb7e0470054b340ca4 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 8 Sep 2025 18:09:06 +0800
Subject: [PATCH 65/76] cst test names and locations

---
 packages/poml/tests/reader/cst.test.ts | 111 ++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 4 deletions(-)

diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index e3239708..2afd73a7 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -254,6 +254,109 @@ done`;
       },
       { TextContent: '\n\ndone' },
     ]);
+
+    expect(names(node)).toStrictEqual({
+      name: 'root',
+      children: [
+        {
+          name: 'elementContent',
+          children: { TextContent: { name: 'betweenTagsTokens' } },
+        },
+        {
+          name: 'elementContent',
+          children: {
+            Template: { name: 'template', children: { name: 'expressionTokens' } },
+          },
+        },
+        {
+          name: 'elementContent',
+          children: { TextContent: { name: 'betweenTagsTokens' } },
+        },
+        {
+          name: 'elementContent',
+          children: {
+            Comment: { name: 'comment', children: { name: 'commentTokens' } },
+          },
+        },
+        {
+          name: 'elementContent',
+          children: { TextContent: { name: 'betweenTagsTokens' } },
+        },
+        {
+          name: 'elementContent',
+          children: {
+            Element: {
+              name: 'element',
+              children: {
+                OpenTagPartial: { name: 'openTagPartial' },
+                TextContent: { name: 'literalTagTokens' },
+                CloseTag: { name: 'closeTag' },
+              },
+            },
+          },
+        },
+        {
+          name: 'elementContent',
+          children: { TextContent: { name: 'betweenTagsTokens' } },
+        },
+      ],
+    });
+
+    expect(locations(node)).toStrictEqual({
+      start: 0,
+      end: 92,
+      children: [
+        {
+          start: 0,
+          end: 5,
+          children: { TextContent: { start: 0, end: 5 } },
+        },
+        {
+          start: 6,
+          end: 15,
+          children: {
+            Template: { start: 6, end: 15, children: { start: 9, end: 12 } },
+          },
+        },
+        {
+          start: 16,
+          end: 17,
+          children: { TextContent: { start: 16, end: 17 } },
+        },
+        {
+          start: 18,
+          end: 35,
+          children: {
+            Comment: { start: 18, end: 35, children: { start: 22, end: 32 } },
+          },
+        },
+        {
+          start: 36,
+          end: 37,
+          children: { TextContent: { start: 36, end: 37 } },
+        },
+        {
+          start: 38,
+          end: 86,
+          children: {
+            Element: {
+              start: 38,
+              end: 86,
+              children: {
+                OpenTagPartial: { start: 38, end: 42 },
+                TextContent: { start: 44, end: 79 },
+                CloseTag: { start: 80, end: 86 },
+              },
+            },
+          },
+        },
+        {
+          start: 87,
+          end: 92,
+          children: { TextContent: { start: 87, end: 92 } },
+        },
+      ],
+    });
   });
 
   // All kinds of whitespaces
@@ -453,8 +556,8 @@ export function names(node: CstNode): { name: string; children?: Record<string,
           // plain object: use as-is
           const keys = Object.keys(children as Record<string, unknown>);
           if (keys.length) {
-out.children = children as Record<string, unknown>;
-}
+            out.children = children as Record<string, unknown>;
+          }
         } else {
           // array or primitive: wrap under Content
           out.children = children as unknown[];
@@ -478,8 +581,8 @@ export function locations(node: CstNode): { start: number; end: number; children
         if (typeof children === 'object' && !Array.isArray(children)) {
           const keys = Object.keys(children as Record<string, unknown>);
           if (keys.length) {
-base.children = children as Record<string, unknown>;
-}
+            base.children = children as Record<string, unknown>;
+          }
         } else {
           base.children = children as unknown[];
         }

From d31297c39ae7c99cd3a6db26ffc756884254a0cf Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 8 Sep 2025 19:23:27 +0800
Subject: [PATCH 66/76] cst more tests

---
 packages/poml/next/cst.ts              |   2 +-
 packages/poml/tests/reader/cst.test.ts | 364 ++++++++++++++++++++++++-
 2 files changed, 357 insertions(+), 9 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 4c0fc814..26889dc3 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -67,7 +67,7 @@ export class ExtendedPomlParser extends CstParser {
   public singleQuotedExpressionTokens!: (idxInOriginalText?: number) => CstTokens;
   public betweenTagsTokens!: (idxInOriginalText?: number) => CstTokens;
   // Accepting expectedTagName as argument to validate matching close tag
-  public literalTagTokens!: (idxInOriginalText?: number, args?: [string]) => CstLiteralTagTokens;
+  public literalTagTokens!: (idxInOriginalText?: number, args?: [string]) => CstTokens;
   // regular rules
   public template!: (idxInOriginalText?: number) => CstTemplateNode;
   public comment!: (idxInOriginalText?: number) => CstCommentNode;
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index 2afd73a7..48d11548 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -15,15 +15,18 @@ import {
   CstOpenTagPartialNode,
   CstCloseTagNode,
   CstElementNode,
+  CstTokens,
 } from 'poml/next/nodes';
 
-function withParser<T>(input: string, run: (p: ExtendedPomlParser) => T) {
+function withParser<T>(input: string, run: (p: ExtendedPomlParser) => T, raiseOnError?: boolean) {
   const lex = extendedPomlLexer.tokenize(input);
   const parser = new ExtendedPomlParser();
   parser.input = lex.tokens;
   const node = run(parser);
-  expect(parser.errors).toHaveLength(0);
-  return { node, parser, tokens: lex.tokens };
+  if (raiseOnError || raiseOnError === undefined) {
+    expect(parser.errors).toHaveLength(0);
+  }
+  return { node, parser, tokens: lex.tokens, errors: parser.errors };
 }
 
 describe('CST Parser Rules', () => {
@@ -183,7 +186,7 @@ describe('CST Parser Rules', () => {
     expect(node.children.OpenTagCloseBracket?.[0].image).toBe('>');
     // Literal elements should store raw tokens under TextContent (no Template child)
     expect(node.children.TextContent?.length).toBeGreaterThan(0);
-    const content = node.children.TextContent?.[0] as CstLiteralTagTokens;
+    const content = node.children.TextContent?.[0] as CstTokens;
     const images = content.children.Content?.map((t) => t.image) || [];
     expect(images).toContain('{{');
     expect(images).toContain('}}');
@@ -359,13 +362,358 @@ done`;
     });
   });
 
-  // All kinds of whitespaces
+  test('all kinds of whitespaces', () => {
+    const input = `\t\n\r <\tdocument\t  >\n\t 　 {{  　 name   }}\r\n\t</document> 　 \t\n`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+
+    expect(images(node)).toStrictEqual([
+      { TextContent: '\t\n\r ' },
+      {
+        Element: {
+          OpenTagPartial: {
+            OpenBracket: '<',
+            WsAfterOpen: '\t',
+            TagName: 'document',
+            WsAfterAll: '\t  ',
+          },
+          OpenTagCloseBracket: '>',
+          Content: [
+            { TextContent: '\n\t 　 ' },
+            {
+              Template: {
+                TemplateOpen: '{{',
+                WsAfterOpen: '  ',
+                Content: '　 name',
+                WsAfterContent: '   ',
+                TemplateClose: '}}',
+              },
+            },
+            { TextContent: '\r\n\t' },
+          ],
+          CloseTag: {
+            ClosingOpenBracket: '</',
+            TagName: 'document',
+            CloseBracket: '>',
+          },
+        },
+      },
+      { TextContent: ' 　 \t\n' },
+    ]);
+  });
+
+  test('single quotes vs double quotes edge cases', () => {
+    const input = `<  div id='single' class="double"  > {{ 'nested "quote"' }} </ div   >`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+
+    expect(images(node)).toStrictEqual({
+      Element: {
+        OpenTagPartial: {
+          OpenBracket: '<',
+          TagName: 'div',
+          Attribute: [
+            { AttributeKey: 'id', Equals: '=', quotedValue: { OpenQuote: "'", Content: 'single', CloseQuote: "'" } },
+            { AttributeKey: 'class', Equals: '=', quotedValue: { OpenQuote: '"', Content: 'double', CloseQuote: '"' } },
+          ],
+          WsBeforeEachAttribute: '  ',
+          WsAfterOpen: '  ',
+          WsAfterAll: '  ',
+        },
+        OpenTagCloseBracket: '>',
+        Content: [
+          { TextContent: ' ' },
+          {
+            Template: {
+              TemplateOpen: '{{',
+              WsAfterOpen: ' ',
+              Content: '\'nested "quote"\'',
+              WsAfterContent: ' ',
+              TemplateClose: '}}',
+            },
+          },
+          { TextContent: ' ' },
+        ],
+        CloseTag: {
+          ClosingOpenBracket: '</',
+          TagName: 'div',
+          CloseBracket: '>',
+          WsAfterOpen: ' ',
+          WsBeforeClose: '   ',
+        },
+      },
+    });
+  });
+
+  test('empty quotes edge cases', () => {
+    const input = `<tag attr1="" attr2=''></tag>`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+
+    expect(images(node)).toStrictEqual({
+      Element: {
+        OpenTagPartial: {
+          OpenBracket: '<',
+          TagName: 'tag',
+          Attribute: [
+            { AttributeKey: 'attr1', Equals: '=', quotedValue: { OpenQuote: '"', CloseQuote: '"' } },
+            { AttributeKey: 'attr2', Equals: '=', quotedValue: { OpenQuote: "'", CloseQuote: "'" } },
+          ],
+          WsBeforeEachAttribute: '  ',
+        },
+        OpenTagCloseBracket: '>',
+        CloseTag: { ClosingOpenBracket: '</', TagName: 'tag', CloseBracket: '>' },
+      },
+    });
+  });
+
+  test('matched text element with literal content', () => {
+    const input = `<text>Hello {{ world }} and <other>nested</other></text>`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+
+    expect(images(node)).toStrictEqual({
+      Element: {
+        OpenTagPartial: { OpenBracket: '<', TagName: 'text' },
+        OpenTagCloseBracket: '>',
+        TextContent: 'Hello {{ world }} and <other>nested</other>',
+        CloseTag: { ClosingOpenBracket: '</', TagName: 'text', CloseBracket: '>' },
+      },
+    });
+  });
+
+  test('mismatched tags - text opening with template closing', () => {
+    const input = `<text>Content here</template></text>`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+
+    expect(images(node)).toStrictEqual({
+      Element: {
+        CloseTag: { CloseBracket: '>', ClosingOpenBracket: '</', TagName: 'text' },
+        OpenTagCloseBracket: '>',
+        OpenTagPartial: { OpenBracket: '<', TagName: 'text' },
+        TextContent: 'Content here</template>',
+      },
+    });
+  });
+
+  test('completely unmatched tags should not error', () => {
+    const input = `<document>content</div><span>more</p>`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+
+    expect(images(node)).toStrictEqual([
+      {
+        Element: {
+          OpenTagPartial: { OpenBracket: '<', TagName: 'document' },
+          OpenTagCloseBracket: '>',
+          Content: { TextContent: 'content' },
+          CloseTag: { ClosingOpenBracket: '</', TagName: 'div', CloseBracket: '>' },
+        },
+      },
+      {
+        Element: {
+          OpenTagPartial: { OpenBracket: '<', TagName: 'span' },
+          OpenTagCloseBracket: '>',
+          Content: { TextContent: 'more' },
+          CloseTag: { ClosingOpenBracket: '</', TagName: 'p', CloseBracket: '>' },
+        },
+      },
+    ]);
+  });
 
-  // Single quotes, double quotes, and corner cases
+  test('nested quoted templates with mixed quotes', () => {
+    const input = `<div title="Hello {{ 'user' }}"  meta  = '{if{{nothing''  }}123'>'World'</div>`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
 
-  // Matched <text></text> and <text></template></text> or <template></text></template>
+    expect(images(node)).toStrictEqual({
+      Element: {
+        OpenTagPartial: {
+          OpenBracket: '<',
+          TagName: 'div',
+          WsBeforeEachAttribute: '   ',
+          Attribute: [
+            {
+              AttributeKey: 'title',
+              Equals: '=',
+              quotedValue: {
+                OpenQuote: '"',
+                Content: [
+                  'Hello ',
+                  {
+                    TemplateOpen: '{{',
+                    WsAfterOpen: ' ',
+                    Content: "'user'",
+                    WsAfterContent: ' ',
+                    TemplateClose: '}}',
+                  },
+                ],
+                CloseQuote: '"',
+              },
+            },
+            {
+              AttributeKey: 'meta',
+              Equals: '=',
+              WsAfterEquals: ' ',
+              WsAfterKey: '  ',
+              quotedValue: {
+                CloseQuote: "'",
+                Content: [
+                  '{if',
+                  {
+                    Content: "nothing''",
+                    TemplateClose: '}}',
+                    TemplateOpen: '{{',
+                    WsAfterContent: '  ',
+                  },
+                  '123',
+                ],
+                OpenQuote: "'",
+              },
+            },
+          ],
+        },
+        OpenTagCloseBracket: '>',
+        Content: {
+          TextContent: "'World'",
+        },
+        CloseTag: { ClosingOpenBracket: '</', TagName: 'div', CloseBracket: '>' },
+      },
+    });
+  });
 
-  // Unmatched tags should not error in cst stage
+  test('special characters and symbols in content', () => {
+    const input = `<text>@#$%^&*(){}[]|\\:";'<>?/.,~\`</text>`;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+
+    expect(images(node)).toStrictEqual({
+      Element: {
+        OpenTagPartial: { OpenBracket: '<', TagName: 'text' },
+        OpenTagCloseBracket: '>',
+        TextContent: '@#$%^&*(){}[]|\\:";\'<>?/.,~`',
+        CloseTag: { ClosingOpenBracket: '</', TagName: 'text', CloseBracket: '>' },
+      },
+    });
+  });
+
+  test('multiple templates and elements mixed with whitespace', () => {
+    const input = `  {{ a }}  <div>{{ b }}</div>  {{ c }}  `;
+    const { node } = withParser(input, (p) => p.root()) as { node: CstRootNode };
+
+    expect(images(node)).toStrictEqual([
+      { TextContent: '  ' },
+      {
+        Template: {
+          TemplateOpen: '{{',
+          WsAfterOpen: ' ',
+          Content: 'a',
+          WsAfterContent: ' ',
+          TemplateClose: '}}',
+        },
+      },
+      { TextContent: '  ' },
+      {
+        Element: {
+          OpenTagPartial: { OpenBracket: '<', TagName: 'div' },
+          OpenTagCloseBracket: '>',
+          Content: {
+            Template: {
+              TemplateOpen: '{{',
+              WsAfterOpen: ' ',
+              Content: 'b',
+              WsAfterContent: ' ',
+              TemplateClose: '}}',
+            },
+          },
+          CloseTag: { ClosingOpenBracket: '</', TagName: 'div', CloseBracket: '>' },
+        },
+      },
+      { TextContent: '  ' },
+      {
+        Template: {
+          TemplateOpen: '{{',
+          WsAfterOpen: ' ',
+          Content: 'c',
+          WsAfterContent: ' ',
+          TemplateClose: '}}',
+        },
+      },
+      { TextContent: '  ' },
+    ]);
+  });
+});
+
+describe('Error', () => {
+  test('orphan closing tags should error', () => {
+    const input = `Some text</orphan>{{ template }}</unknown>`;
+    const { node, errors } = withParser(input, (p) => p.root(), false) as { node: CstRootNode; errors: any[] };
+    expect(errors.length).toBe(4);
+
+    expect(images(node)).toStrictEqual([
+      { TextContent: 'Some text' },
+      { TextContent: 'orphan' },
+      {
+        Template: {
+          TemplateOpen: '{{',
+          WsAfterOpen: ' ',
+          Content: 'template',
+          WsAfterContent: ' ',
+          TemplateClose: '}}',
+        },
+      },
+      { TextContent: 'unknown' },
+    ]);
+  });
+
+  test('mismatched tags - template opening with text closing', () => {
+    const input = `<template>Some content</text>`;
+    const { node, errors } = withParser(input, (p) => p.root(), false) as { node: CstRootNode; errors: any[] };
+    expect(errors.length).toBe(1);
+
+    expect(images(node)).toStrictEqual({
+      Element: {
+        OpenTagPartial: { OpenBracket: '<', TagName: 'template' },
+        OpenTagCloseBracket: '>',
+        TextContent: 'Some content</text>',
+      },
+    });
+
+    expect(names(node)).toStrictEqual({
+      name: 'root',
+      children: {
+        name: 'elementContent',
+        children: {
+          Element: {
+            name: 'element',
+            children: {
+              OpenTagPartial: { name: 'openTagPartial' },
+              TextContent: { name: 'literalTagTokens' },
+              CloseTag: { name: 'closeTag' },
+            },
+          },
+        },
+      },
+    });
+  });
+
+  test('empty template', () => {
+    const input = `<any foo={{}}>{{ }}</any>`;
+    const { node, errors } = withParser(input, (p) => p.root(), false) as { node: CstRootNode; errors: any[] };
+    expect(errors.length).toBe(2);
+    expect(images(node)).toStrictEqual({
+      Element: {
+        OpenTagPartial: {
+          OpenBracket: '<',
+          TagName: 'any',
+          WsBeforeEachAttribute: ' ',
+          Attribute: {
+            AttributeKey: 'foo',
+            Equals: '=',
+            templatedValue: { TemplateOpen: '{{', TemplateClose: '}}' },
+          },
+        },
+        OpenTagCloseBracket: '>',
+        Content: {
+          Template: { TemplateOpen: '{{', WsAfterOpen: ' ', TemplateClose: '}}' },
+        },
+        CloseTag: { ClosingOpenBracket: '</', TagName: 'any', CloseBracket: '>' },
+      },
+    });
+  });
 });
 
 /* -------------------- tiny guards -------------------- */

From 0b8c3afd8d3475e838602b63d50ed6abcbae5d51 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 11:19:23 +0800
Subject: [PATCH 67/76] lookahead

---
 packages/poml/next/cst.ts | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index 26889dc3..b6508e56 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -107,33 +107,27 @@ export class ExtendedPomlParser extends CstParser {
 
   // Lookahead helper: Check if next is whitespace but next non-whitespace token is not of given type
   private atAlmostClose = (tokenType: TokenType) => {
-    let k = 1;
-    if (this.LA(k).tokenType === Whitespace) {
-      k++;
+    if (this.LA(1).tokenType === Whitespace) {
+      return this.LA(2).tokenType === tokenType;
     }
-    return this.LA(k).tokenType === tokenType;
+    return this.LA(1).tokenType === tokenType;
   };
 
   private isNextPragma = () => {
     if (this.LA(1).tokenType !== CommentOpen) {
       return false;
     }
-    let k = 2;
-    while (this.LA(k).tokenType === Whitespace) {
-      k++;
+    if (this.LA(2).tokenType === Whitespace) {
+      return this.LA(3).tokenType === PragmaKeyword;
     }
-    return this.LA(k).tokenType === PragmaKeyword;
+    return this.LA(2).tokenType === PragmaKeyword;
   };
 
   private isAtLiteralClose = (expectedTagName: string | undefined) => {
     if (this.LA(1).tokenType !== ClosingOpenBracket) {
       return false;
     }
-    let k = 2;
-    while (this.LA(k).tokenType === Whitespace) {
-      k++;
-    }
-    const t = this.LA(k);
+    const t = this.LA(2).tokenType === Whitespace ? this.LA(3) : this.LA(2);
     if (t.tokenType !== Identifier) {
       return false;
     }
@@ -146,11 +140,7 @@ export class ExtendedPomlParser extends CstParser {
     if (this.LA(1).tokenType !== OpenBracket) {
       return undefined;
     }
-    let k = 2;
-    while (this.LA(k).tokenType === Whitespace) {
-      k++;
-    }
-    const token = this.LA(k);
+    const token = this.LA(2).tokenType === Whitespace ? this.LA(3) : this.LA(2);
     if (token.tokenType !== Identifier) {
       return undefined;
     }
@@ -169,6 +159,7 @@ export class ExtendedPomlParser extends CstParser {
     super(AllTokens, {
       recoveryEnabled: true,
       nodeLocationTracking: 'full',
+      maxLookahead: 3,
     });
     this.validComponentNames = new Set(listComponentAliases());
 

From b675acad60ba7d7ed842b21ffc2a71331db1326d Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 12:01:16 +0800
Subject: [PATCH 68/76] ast instruction

---
 packages/poml/next/ast.ts       |  16 +++
 packages/poml/next/cst.ts       |   2 +
 packages/poml/next/index.tsx    |   3 -
 packages/poml/next/meta.ts      |   3 -
 packages/poml/next/nodes.ts     |  45 +++----
 packages/poml/next/poml.tsx     |   3 -
 packages/poml/next/segment.ts   | 231 --------------------------------
 packages/poml/next/text.tsx     |   3 -
 packages/poml/next/tokenizer.ts | 142 --------------------
 9 files changed, 34 insertions(+), 414 deletions(-)
 create mode 100644 packages/poml/next/ast.ts
 delete mode 100644 packages/poml/next/index.tsx
 delete mode 100644 packages/poml/next/meta.ts
 delete mode 100644 packages/poml/next/poml.tsx
 delete mode 100644 packages/poml/next/segment.ts
 delete mode 100644 packages/poml/next/text.tsx
 delete mode 100644 packages/poml/next/tokenizer.ts

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
new file mode 100644
index 00000000..e88a1592
--- /dev/null
+++ b/packages/poml/next/ast.ts
@@ -0,0 +1,16 @@
+/**
+ * Converting CST nodes to AST nodes.
+ *
+ * It's time for:
+ *
+ * - Check open/close tag matching
+ * - Deal with HTML entities escape and backslash escape
+ * - Concatenate wrongly split text into LiteralNode
+ * - Unify the types (e.g., AttributeNode must have ValueNode children)
+ *
+ * It's not time yet for:
+ *
+ * - Evaluating expressions in templates
+ * - Resolving includes
+ * - Validating semantics (e.g., whether an attribute is allowed on a certain element)
+ */
diff --git a/packages/poml/next/cst.ts b/packages/poml/next/cst.ts
index b6508e56..c025b032 100644
--- a/packages/poml/next/cst.ts
+++ b/packages/poml/next/cst.ts
@@ -491,6 +491,8 @@ export class ExtendedPomlParser extends CstParser {
             this.CONSUME(CloseBracket, { LABEL: 'OpenTagCloseBracket' });
 
             // Everything until the matching close tag is treated as raw text
+            // We impose a stricter check when enclosing a literal tag, and avoid false negative matches
+            // This will make some auto completion scenarios fail, but it will enhance the inclusiveness of literal elements
             this.SUBRULE(this.literalTagTokens, { ARGS: [tagName], LABEL: 'TextContent' });
 
             this.SUBRULE(this.closeTag, { LABEL: 'CloseTag' });
diff --git a/packages/poml/next/index.tsx b/packages/poml/next/index.tsx
deleted file mode 100644
index 627c20c8..00000000
--- a/packages/poml/next/index.tsx
+++ /dev/null
@@ -1,3 +0,0 @@
-import { Reader } from './base';
-
-class DispatchReader extends Reader {}
diff --git a/packages/poml/next/meta.ts b/packages/poml/next/meta.ts
deleted file mode 100644
index 2164c6d3..00000000
--- a/packages/poml/next/meta.ts
+++ /dev/null
@@ -1,3 +0,0 @@
-import { Reader } from './base';
-
-class MetaReader extends Reader {}
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index e0f7e007..d75c8a58 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -107,7 +107,7 @@ export interface LiteralNode extends AstNode {
 }
 
 /**
- * Represents a composite value that may contain text.
+ * The value of an attribute, which may contain text and/or templates.
  * Used specifically for the "quotes" in attribute values.
  *
  * Value nodes are containers for mixed content, handling both pure text
@@ -117,12 +117,13 @@ export interface LiteralNode extends AstNode {
  * Cases that apply:
  * - Quoted attribute values: `"some text"`, `'single quoted'`
  * - Mixed content with templates: `"Hello, {{ userName }}!"`
- * - Unquoted template values in certain attribute contexts
+ * - Unquoted template values in certain attribute contexts (e.g., if="condition_expr")
  * - Multi-part content: `"Price: ${{amount}} USD"`
  *
  * Cases that do not apply:
  * - Attribute keys: `class=...` (the `class` part uses LiteralNode)
  * - Pure expressions without quotes: `if=condition` (illegal)
+ * - Mixture of template and non-templates in element contents (use LiteralNode and TemplateNode directly)
  *
  * Note: The range includes quotes if present, but children exclude them.
  */
@@ -349,19 +350,13 @@ export interface SelfCloseElementNode extends AstNode {
  * Represents a complete POML element with its content.
  *
  * Element nodes are high-level constructs that represent semantic POML
- * components. They contain a tag name, optional attributes (inherited from
- * open tag), and may have child content including other elements, text,
- * or values.
+ * components. They contain a tag name, which contains optional attributes,
+ * and may have child contents including other elements, text, or values.
  *
- * It should also support literal elements, which are:
+ * It should also support literal elements, which are special POML elements
+ * that treat their content as literal text without any template variable interpolation.
+ * Content is preserved exactly as written, useful for code samples or pre-formatted text.
  *
- * - Special POML elements that treat their content as literal text
- * - Prevents template variable interpolation
- * - Content is preserved exactly as written, useful for code samples or pre-formatted text
- * - When `<text>` is used, the parser eats everything including tags and comments,
- *   including nested `<text>` itself, until a matching `</text>` is found
- * - The tagName can only be "text" and "template" for literal elements
- * - If you need `<text>` in your POML content, use `&lt;text&gt;` outside of literal elements
  *
  * Cases that apply:
  * - Any elements: `<document parser="txt">...content...</document>`
@@ -383,23 +378,11 @@ export interface ElementNode extends AstNode {
   kind: 'ELEMENT';
   open: OpenTagNode;
   close: CloseTagNode;
-  children: (ElementNode | CommentNode | PragmaNode | TextElementNode)[];
+  children: ElementContentNode[];
   // isLiteral?: boolean; // True for <text> and <template> tags
 }
 
-/**
- * Very similar to ValueNode, but specifically for text content between tags.
- *
- * Cases that apply:
- * - Text content between tags: `>  some text  <` (including whitespace)
- *
- * Cases that do not apply:
- * - Text inside <text> or other literal elements (use ElementNode with literal)
- */
-export interface TextElementNode extends AstNode {
-  kind: 'TEXT';
-  value: string;
-}
+export type ElementContentNode = ElementNode | CommentNode | PragmaNode | LiteralNode | TemplateNode;
 
 /**
  * Related CST node interfaces for parsing stage.
@@ -409,6 +392,11 @@ export interface CstElementNode extends CstNode {
     OpenTagPartial?: CstOpenTagPartialNode[];
     OpenTagCloseBracket?: IToken[];
     Content?: CstElementContentNode[];
+    // For literal elements like <text>
+    // When `<text>` is used, the parser eats everything including tags and comments,
+    // including nested `<text>` itself, until a matching `</text>` is found
+    // The tagName can only be "text" and "template" for literal elements
+    // If you need `<text>` in your POML content, use `&lt;text&gt;` outside of literal elements
     TextContent?: CstTokens[]; // For literal elements like <text>
     CloseTag?: CstCloseTagNode[];
     // Alternative, it can also be a self-closing tag.
@@ -517,7 +505,7 @@ export interface CstPragmaNode extends CstNode {
  */
 export interface RootNode extends AstNode {
   kind: 'ROOT';
-  children: (ElementNode | CommentNode | PragmaNode | ValueNode)[];
+  children: ElementContentNode[];
 }
 
 /**
@@ -559,7 +547,6 @@ export type StrictNode =
   | CloseTagNode
   | SelfCloseElementNode
   | ElementNode
-  | TextElementNode
   | CommentNode
   | PragmaNode
   | RootNode;
diff --git a/packages/poml/next/poml.tsx b/packages/poml/next/poml.tsx
deleted file mode 100644
index 5a771265..00000000
--- a/packages/poml/next/poml.tsx
+++ /dev/null
@@ -1,3 +0,0 @@
-import { Reader } from './base';
-
-export class PomlReader extends Reader {}
diff --git a/packages/poml/next/segment.ts b/packages/poml/next/segment.ts
deleted file mode 100644
index 440ea74a..00000000
--- a/packages/poml/next/segment.ts
+++ /dev/null
@@ -1,231 +0,0 @@
-import componentDocs from '../assets/componentDocs.json';
-
-export interface Segment {
-  // Unique ID for caching and React keys
-  id: string;
-  kind: 'META' | 'TEXT' | 'POML';
-  start: number;
-  end: number;
-  // The raw string content of the segment
-  content: string;
-  // The path to the file or resource this segment belongs to
-  path?: string;
-  // Reference to the parent segment
-  parent?: Segment;
-  // Nested segments (e.g., a POML block within text)
-  children: Segment[];
-  // For POML segments, the name of the root tag (e.g., 'task')
-  tagName?: string;
-}
-
-class Segmenter {
-  private nextId: number;
-  private sourcePath: string | undefined;
-
-  constructor(sourcePath: string | undefined) {
-    this.nextId = 0;
-    this.sourcePath = sourcePath;
-  }
-
-  private generateId(): string {
-    return `segment_${this.nextId++}`;
-  }
-
-  private isValidPomlTag(tagName: string): boolean {
-    const validTags = new Set<string>();
-
-    for (const doc of componentDocs) {
-      if (doc.name) {
-        validTags.add(doc.name.toLowerCase());
-        validTags.add(
-          doc.name
-            .toLowerCase()
-            .replace(/([A-Z])/g, '-$1')
-            .toLowerCase(),
-        );
-      }
-    }
-
-    validTags.add('poml');
-    validTags.add('text');
-    validTags.add('meta');
-
-    return validTags.has(tagName.toLowerCase());
-  }
-
-  private parseSegments(text: string, start: number = 0, parent?: Segment): Segment[] {
-    const segments: Segment[] = [];
-    let currentPos = start;
-
-    while (currentPos < text.length) {
-      const nextOpenTag = text.indexOf('<', currentPos);
-
-      if (nextOpenTag === -1) {
-        if (currentPos < text.length) {
-          const textContent = text.substring(currentPos);
-          if (textContent.trim()) {
-            segments.push({
-              id: this.generateId(),
-              kind: 'TEXT',
-              start: currentPos,
-              end: text.length,
-              content: textContent,
-              path: this.sourcePath,
-              parent,
-              children: [],
-            });
-          }
-        }
-        break;
-      }
-
-      if (nextOpenTag > currentPos) {
-        const textContent = text.substring(currentPos, nextOpenTag);
-        if (textContent.trim()) {
-          segments.push({
-            id: this.generateId(),
-            kind: 'TEXT',
-            start: currentPos,
-            end: nextOpenTag,
-            content: textContent,
-            path: this.sourcePath,
-            parent,
-            children: [],
-          });
-        }
-      }
-
-      const tagEndPos = text.indexOf('>', nextOpenTag);
-      if (tagEndPos === -1) {
-        currentPos = nextOpenTag + 1;
-        continue;
-      }
-
-      const tagContent = text.substring(nextOpenTag + 1, tagEndPos);
-      const tagName = tagContent.trim().split(/\s+/)[0];
-
-      if (tagName.startsWith('/')) {
-        currentPos = tagEndPos + 1;
-        continue;
-      }
-
-      if (tagContent.endsWith('/')) {
-        currentPos = tagEndPos + 1;
-        continue;
-      }
-
-      if (!this.isValidPomlTag(tagName)) {
-        currentPos = tagEndPos + 1;
-        continue;
-      }
-
-      const closingTag = `</${tagName}>`;
-      const closingTagPos = this.findClosingTag(text, tagName, tagEndPos + 1);
-
-      if (closingTagPos === -1) {
-        currentPos = tagEndPos + 1;
-        continue;
-      }
-
-      const segmentContent = text.substring(nextOpenTag, closingTagPos + closingTag.length);
-      const innerContent = text.substring(tagEndPos + 1, closingTagPos);
-
-      const segment: Segment = {
-        id: this.generateId(),
-        kind: tagName.toLowerCase() === 'meta' ? 'META' : 'POML',
-        start: nextOpenTag,
-        end: closingTagPos + closingTag.length,
-        content: segmentContent,
-        path: this.sourcePath,
-        parent,
-        children: [],
-        tagName: tagName.toLowerCase(),
-      };
-
-      if (tagName.toLowerCase() === 'text') {
-        segment.children = this.parseSegments(innerContent, tagEndPos + 1, segment);
-      } else if (tagName.toLowerCase() !== 'meta') {
-        const childSegments = this.parseSegments(innerContent, tagEndPos + 1, segment);
-        segment.children = childSegments;
-      }
-
-      segments.push(segment);
-      currentPos = closingTagPos + closingTag.length;
-    }
-
-    return segments;
-  }
-
-  private findClosingTag(text: string, tagName: string, startPos: number): number {
-    let depth = 1;
-    let pos = startPos;
-
-    while (pos < text.length && depth > 0) {
-      const nextTag = text.indexOf('<', pos);
-      if (nextTag === -1) {
-        break;
-      }
-
-      const tagEndPos = text.indexOf('>', nextTag);
-      if (tagEndPos === -1) {
-        break;
-      }
-
-      const tagContent = text.substring(nextTag + 1, tagEndPos);
-      const currentTagName = tagContent.trim().split(/\s+/)[0];
-
-      if (currentTagName === tagName) {
-        depth++;
-      } else if (currentTagName === `/${tagName}`) {
-        depth--;
-      }
-
-      pos = tagEndPos + 1;
-    }
-
-    return depth === 0 ? pos - `</${tagName}>`.length : -1;
-  }
-
-  public createSegments(content: string): Segment {
-    const rootSegments = this.parseSegments(content);
-
-    if (rootSegments.length === 1 && rootSegments[0].kind === 'POML') {
-      return rootSegments[0];
-    }
-
-    if (rootSegments.length === 0) {
-      return {
-        id: this.generateId(),
-        kind: 'TEXT',
-        start: 0,
-        end: content.length,
-        content: content,
-        path: this.sourcePath,
-        children: [],
-        parent: undefined,
-      };
-    }
-
-    const rootSegment: Segment = {
-      id: this.generateId(),
-      kind: 'TEXT',
-      start: 0,
-      end: content.length,
-      content: content,
-      path: this.sourcePath,
-      children: rootSegments,
-      parent: undefined,
-    };
-
-    rootSegments.forEach((segment) => {
-      segment.parent = rootSegment;
-    });
-
-    return rootSegment;
-  }
-}
-
-export function createSegments(content: string, sourcePath?: string): Segment {
-  const segmenter = new Segmenter(sourcePath);
-  return segmenter.createSegments(content);
-}
diff --git a/packages/poml/next/text.tsx b/packages/poml/next/text.tsx
deleted file mode 100644
index 2b2e25ca..00000000
--- a/packages/poml/next/text.tsx
+++ /dev/null
@@ -1,3 +0,0 @@
-import { Reader } from './base';
-
-export class PureTextReader extends Reader {}
diff --git a/packages/poml/next/tokenizer.ts b/packages/poml/next/tokenizer.ts
deleted file mode 100644
index ce1930b8..00000000
--- a/packages/poml/next/tokenizer.ts
+++ /dev/null
@@ -1,142 +0,0 @@
-export interface Token {
-  type: 'TEXT' | 'TAG_OPEN' | 'TAG_CLOSE' | 'TAG_SELF_CLOSE' | 'TEMPLATE_VAR' | 'ATTRIBUTE';
-  value: string;
-  start: number;
-  end: number;
-}
-
-export class Tokenizer {
-  private input: string;
-  private position: number;
-
-  constructor(input: string) {
-    this.input = input;
-    this.position = 0;
-  }
-
-  tokenize(): Token[] {
-    const tokens: Token[] = [];
-
-    while (this.position < this.input.length) {
-      // Check for template variables first
-      if (this.peek() === '{' && this.peek(1) === '{') {
-        tokens.push(this.readTemplateVariable());
-        continue;
-      }
-
-      // Check for XML tags
-      if (this.peek() === '<') {
-        const tagToken = this.readTag();
-        if (tagToken) {
-          tokens.push(tagToken);
-          continue;
-        }
-      }
-
-      // Read text content
-      const textToken = this.readText();
-      if (textToken.value.length > 0) {
-        tokens.push(textToken);
-      }
-    }
-
-    return tokens;
-  }
-
-  private peek(offset: number = 0): string {
-    return this.input[this.position + offset] || '';
-  }
-
-  private advance(): string {
-    return this.input[this.position++] || '';
-  }
-
-  private readTemplateVariable(): Token {
-    const start = this.position;
-    this.advance(); // {
-    this.advance(); // {
-
-    while (this.position < this.input.length && !(this.peek() === '}' && this.peek(1) === '}')) {
-      this.advance();
-    }
-
-    if (this.peek() === '}' && this.peek(1) === '}') {
-      this.advance(); // }
-      this.advance(); // }
-    }
-
-    return {
-      type: 'TEMPLATE_VAR',
-      value: this.input.substring(start, this.position),
-      start,
-      end: this.position,
-    };
-  }
-
-  private readTag(): Token | null {
-    const start = this.position;
-    this.advance(); // <
-
-    // Skip whitespace
-    while (this.peek() === ' ' || this.peek() === '\t' || this.peek() === '\n') {
-      this.advance();
-    }
-
-    // Check for closing tag
-    const isClosing = this.peek() === '/';
-    if (isClosing) {
-      this.advance();
-    }
-
-    // Read tag name
-    let tagName = '';
-    while (
-      this.position < this.input.length &&
-      this.peek() !== '>' &&
-      this.peek() !== ' ' &&
-      this.peek() !== '\t' &&
-      this.peek() !== '\n'
-    ) {
-      tagName += this.advance();
-    }
-
-    // Skip attributes for now (will be parsed separately)
-    while (this.position < this.input.length && this.peek() !== '>') {
-      this.advance();
-    }
-
-    if (this.peek() === '>') {
-      this.advance(); // >
-
-      // Check if self-closing
-      const content = this.input.substring(start, this.position);
-      const isSelfClosing = content.endsWith('/>');
-
-      return {
-        type: isSelfClosing ? 'TAG_SELF_CLOSE' : isClosing ? 'TAG_CLOSE' : 'TAG_OPEN',
-        value: content,
-        start,
-        end: this.position,
-      };
-    }
-
-    // Invalid tag, backtrack
-    this.position = start + 1;
-    return null;
-  }
-
-  private readText(): Token {
-    const start = this.position;
-
-    while (this.position < this.input.length && this.peek() !== '<' && !(this.peek() === '{' && this.peek(1) === '{')) {
-      this.advance();
-    }
-
-    return {
-      type: 'TEXT',
-      value: this.input.substring(start, this.position),
-      start,
-      end: this.position,
-    };
-  }
-}

From ae6c12a8166bc3cc1dfd2c04518e03977df24df2 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 12:36:31 +0800
Subject: [PATCH 69/76] ast init

---
 package-lock.json           |  10 +-
 package.json                |   2 +
 packages/poml/next/ast.ts   | 520 ++++++++++++++++++++++++++++++++++++
 packages/poml/next/nodes.ts |  29 +-
 4 files changed, 535 insertions(+), 26 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 81ad5efb..9dd58aca 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -23,6 +23,7 @@
         "cheerio": "^1.0.0",
         "closest-match": "^1.3.3",
         "d3-dsv": "~2.0.0",
+        "he": "^1.2.0",
         "jquery": "^3.7.1",
         "js-tiktoken": "^1.0.20",
         "js-yaml": "^4.1.0",
@@ -50,6 +51,7 @@
         "@rollup/plugin-json": "^6.1.0",
         "@stylistic/eslint-plugin": "^5.2.3",
         "@types/d3-dsv": "~2.0.0",
+        "@types/he": "^1.2.3",
         "@types/jquery": "^3.5.32",
         "@types/js-yaml": "^4.0.9",
         "@types/lodash.throttle": "^4.1.9",
@@ -3840,6 +3842,13 @@
         "@types/unist": "*"
       }
     },
+    "node_modules/@types/he": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@types/he/-/he-1.2.3.tgz",
+      "integrity": "sha512-q67/qwlxblDzEDvzHhVkwc1gzVWxaNxeyHUBF4xElrvjL11O+Ytze+1fGpBHlr/H9myiBUaUXNnNPmBHxxfAcA==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@types/istanbul-lib-coverage": {
       "version": "2.0.6",
       "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz",
@@ -8114,7 +8123,6 @@
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
       "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
-      "dev": true,
       "license": "MIT",
       "bin": {
         "he": "bin/he"
diff --git a/package.json b/package.json
index c86ddc13..0a7a0142 100644
--- a/package.json
+++ b/package.json
@@ -408,6 +408,7 @@
     "@rollup/plugin-json": "^6.1.0",
     "@stylistic/eslint-plugin": "^5.2.3",
     "@types/d3-dsv": "~2.0.0",
+    "@types/he": "^1.2.3",
     "@types/jquery": "^3.5.32",
     "@types/js-yaml": "^4.0.9",
     "@types/lodash.throttle": "^4.1.9",
@@ -460,6 +461,7 @@
     "cheerio": "^1.0.0",
     "closest-match": "^1.3.3",
     "d3-dsv": "~2.0.0",
+    "he": "^1.2.0",
     "jquery": "^3.7.1",
     "js-tiktoken": "^1.0.20",
     "js-yaml": "^4.1.0",
diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index e88a1592..bd09c73a 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -14,3 +14,523 @@
  * - Resolving includes
  * - Validating semantics (e.g., whether an attribute is allowed on a certain element)
  */
+
+import { CstNode, IToken } from 'chevrotain';
+import * as he from 'he';
+import {
+  CstRootNode,
+  CstElementContentNode,
+  CstElementNode,
+  CstOpenTagPartialNode,
+  CstCloseTagNode,
+  CstTemplateNode,
+  CstQuotedNode,
+  CstQuotedTemplateNode,
+  CstForIteratorNode,
+  CstAttributeNode,
+  CstCommentNode,
+  CstPragmaNode,
+  CstTokens,
+} from './nodes';
+import {
+  ElementNode,
+  ElementContentNode,
+  OpenTagNode,
+  CloseTagNode,
+  SelfCloseElementNode,
+  ValueNode,
+  TemplateNode,
+  LiteralNode,
+  AttributeNode,
+  ForIteratorNode,
+  CommentNode,
+  PragmaNode,
+  RootNode,
+} from './nodes';
+import { Range } from './types';
+import { extendedPomlParser } from './cst';
+import { BackslashEscape, CharacterEntity } from './lexer';
+
+/** Error produced while building the AST (beyond lex/parse errors). */
+export interface AstBuildError {
+  message: string;
+  range?: Range;
+}
+
+/** Utility: build a range from two offsets (inclusive start, exclusive end). */
+function rangeFrom(start: number, end: number): Range {
+  return { start, end };
+}
+
+/** Utility: range that spans a list of tokens (or is empty if none). */
+function rangeFromTokens(tokens: IToken[]): Range {
+  if (!tokens.length) {
+return { start: 0, end: 0 };
+}
+  const first = tokens[0];
+  const last = tokens[tokens.length - 1];
+  return rangeFrom(first.startOffset ?? 0, (last.endOffset ?? first.startOffset ?? 0) + 1);
+}
+
+/** Utility: create a LiteralNode from raw text and token range. */
+function literal(value: string, start: number, end: number): LiteralNode {
+  return { kind: 'STRING', value, range: rangeFrom(start, end) };
+}
+
+/** Decode a single backslash escape sequence (for quoted strings). */
+function decodeEscape(seq: string): string {
+  // seq includes the leading backslash (e.g. " , \n)
+  const body = seq.slice(1);
+  switch (body) {
+    case 'n':
+      return '\n';
+    case 'r':
+      return '\r';
+    case 't':
+      return '\t';
+    case "'":
+      return "'";
+    case '"':
+      return '"';
+    case '{{': // \{{
+      return '{{';
+    case '}}': // \}}
+      return '}}';
+    case 'x':
+    case 'u':
+    case 'U': {
+      const hex = body.slice(1);
+      const n = parseInt(hex, 16);
+      return String.fromCharCode(n);
+    }
+    case '\\':
+      return '\\';
+    default:
+      // Unknown escape, return the sequence as-is minus the leading backslash (best effort)
+      return body;
+  }
+}
+
+/** Gather raw text from a list of tokens without any decoding. */
+function textFromRaw(tokens: IToken[]): string {
+  return tokens.map((t) => t.image ?? '').join('');
+}
+
+/**
+ * Gather text from tokens INSIDE QUOTED STRINGS (attribute values & pragma quoted options).
+ * Rules:
+ * - Backslash escapes ARE decoded
+ * - Character entities are shown as-is (not decoded)
+ */
+function textFromQuoted(tokens: IToken[]): string {
+  return tokens
+    .map((t) => {
+      if (t.tokenType === BackslashEscape) {
+        return decodeEscape(t.image ?? '');
+      } else {
+        return t.image;
+      }
+    })
+    .join('');
+}
+
+/**
+ * Gather text from tokens INSIDE TEMPLATE EXPRESSION ({{ ... }}).
+ * We simply join raw images because evaluation is a later phase.
+ */
+function textFromExpressionTokens(groups: CstTokens[]): string {
+  const pieces: string[] = [];
+  for (const g of groups) {
+    const toks = g.children.Content ?? [];
+    pieces.push(textFromRaw(toks));
+  }
+  return pieces.join('');
+}
+
+/** Build a range from a CST token group sequence. */
+function rangeFromTokenGroups(groups: CstTokens[], fallback: Range): Range {
+  const firstTok = groups[0]?.children.Content?.[0];
+  const lastGroup = groups[groups.length - 1];
+  const lastTok = lastGroup?.children.Content?.[lastGroup.children.Content.length - 1];
+  if (firstTok && lastTok) {
+    return rangeFrom(firstTok.startOffset ?? 0, (lastTok.endOffset ?? 0) + 1);
+  }
+  return fallback;
+}
+
+const BaseVisitor = extendedPomlParser.getBaseCstVisitorConstructorWithDefaults();
+
+/**
+ * Extended POML CST -> AST builder.
+ *
+ * This visitor performs a shape-preserving transformation from the concrete
+ * syntax tree (CST) to the semantic abstract syntax tree (AST). It also
+ * normalizes textual content according to the lexer/parser contracts:
+ *  - between-tags text decodes character entities (&amp; -> &)
+ *  - quoted strings decode backslash escapes (\n, \xHH, \uHHHH, ...)
+ *  - template expressions are preserved as raw text; evaluation is later
+ *
+ * It additionally checks that open/close tag names match and records errors
+ * instead of throwing where possible so downstream phases can proceed.
+ */
+export class ExtendedPomlAstVisitor extends BaseVisitor {
+  private errors: AstBuildError[] = [];
+
+  constructor() {
+    super();
+    this.validateVisitor();
+  }
+
+  /** Entry point: visit a CstRootNode and return an AST RootNode & errors. */
+  build(cst: CstNode): { root: RootNode; errors: AstBuildError[] } {
+    const root = this.visit(cst) as RootNode;
+    return { root, errors: this.errors };
+  }
+
+  // ---- Private helper methods ----
+
+  /**
+   * Gather text from tokens for TEXT CONTENT (between tags).
+   * Rules:
+   * - Character entities are decoded
+   * - Backslash escapes are NOT interpreted (shown as-is)
+   */
+  //   private textFromBetweenTags(tokens: IToken[]): string {
+  //     return tokens
+  //       .map((t) => {
+  //         if (t.tokenType === CharacterEntity) {
+  //           try {
+  //             return he.decode(t.image ?? '', { strict: true });
+  //           } catch (e) {
+  //             this.errors.push({
+  //               message: `Failed to decode HTML entity: ${t.image}`,
+  //               range: rangeFromTokens([t])
+  //             })
+  //           }
+  //         }
+  //       }
+  //         if (name === 'CharacterEntity') return decodeEntity(t.image ?? '')
+  //     return t.image ?? ''
+  //   })
+  //       .join('')
+  // }
+
+  // ---- Rule implementations ----
+
+  root(ctx: CstRootNode): RootNode {
+    const children: ElementContentNode[] = [];
+    for (const ec of ctx.children.Content ?? []) {
+      const node = this.visit(ec) as ElementContentNode;
+      if (node) {
+children.push(node);
+}
+    }
+
+    const start = children[0]?.range.start ?? 0;
+    const end = children.length ? children[children.length - 1].range.end : 0;
+    return { kind: 'ROOT', children, range: rangeFrom(start, end) };
+  }
+
+  elementContent(ctx: CstElementContentNode): ElementContentNode {
+    if (ctx.Pragma?.length) {
+return this.visit(ctx.Pragma[0]) as PragmaNode;
+}
+    if (ctx.Comment?.length) {
+return this.visit(ctx.Comment[0]) as CommentNode;
+}
+    if (ctx.Template?.length) {
+return this.visit(ctx.Template[0]) as TemplateNode;
+}
+    if (ctx.Element?.length) {
+return this.visit(ctx.Element[0]) as ElementNode;
+}
+
+    // Text content between tags → LiteralNode
+    const toks = ctx.TextContent?.[0]?.children.Content ?? [];
+    const text = textFromBetweenTags(toks);
+    const r = rangeFromTokens(toks);
+    return literal(text, r.start, r.end);
+  }
+
+  template(ctx: CstTemplateNode): TemplateNode {
+    const open = ctx.children.TemplateOpen?.[0];
+    const close = ctx.children.TemplateClose?.[0];
+
+    const exprText = textFromExpressionTokens(ctx.children.Content ?? []);
+
+    // Expression node range: inner content without braces/outer ws if present
+    const innerStart =
+      ctx.children.WsAfterOpen?.[0]?.endOffset != null
+        ? ctx.children.WsAfterOpen[0].endOffset + 1
+        : (open?.endOffset ?? 0) + 1;
+    const innerEnd =
+      ctx.children.WsAfterContent?.[0]?.startOffset != null
+        ? ctx.children.WsAfterContent[0].startOffset
+        : (close?.startOffset ?? innerStart);
+
+    const exprNode: ExpressionNode = {
+      kind: 'EXPRESSION',
+      value: exprText,
+      range: rangeFrom(innerStart, innerEnd),
+    };
+
+    const outerStart = open?.startOffset ?? innerStart;
+    const outerEnd = (close?.endOffset ?? outerStart - 1) + 1;
+
+    return { kind: 'TEMPLATE', value: exprNode, range: rangeFrom(outerStart, outerEnd) };
+  }
+
+  comment(ctx: CstCommentNode): CommentNode {
+    const open = ctx.children.CommentOpen?.[0];
+    const close = ctx.children.CommentClose?.[0];
+    const toks = ctx.children.Content?.[0]?.children.Content ?? [];
+    const text = rawFrom(toks);
+    const innerStart = (open?.endOffset ?? -1) + 1;
+    const innerEnd = close?.startOffset ?? innerStart;
+    return {
+      kind: 'COMMENT',
+      value: literal(text, innerStart, innerEnd),
+      range: rangeFrom(open?.startOffset ?? innerStart, (close?.endOffset ?? innerEnd - 1) + 1),
+    };
+  }
+
+  pragma(ctx: CstPragmaNode): PragmaNode {
+    const open = ctx.children.CommentOpen?.[0];
+    const close = ctx.children.CommentClose?.[0];
+
+    const idTok = ctx.children.PragmaIdentifier?.[0];
+    const identifier: LiteralNode = literal(idTok?.image ?? '', idTok?.startOffset ?? 0, (idTok?.endOffset ?? -1) + 1);
+
+    const options: LiteralNode[] = [];
+    for (const opt of ctx.children.PragmaOption ?? []) {
+      if ((opt as CstQuotedNode).children) {
+        // Quoted option
+        const q = opt as CstQuotedNode;
+        const bodyTokens = q.children.Content?.[0]?.children.Content ?? [];
+        const value = textFromQuoted(bodyTokens);
+        const start = q.children.OpenQuote?.[0]?.startOffset ?? 0;
+        const end = (q.children.CloseQuote?.[0]?.endOffset ?? start) + 1;
+        options.push(literal(value, start, end));
+      } else {
+        // Unquoted identifier-ish tokens captured by commentIdentifierTokens
+        const toks = (opt as any).children?.Content ?? [];
+        const value = rawFrom(toks);
+        const r = rangeFromTokens(toks);
+        options.push(literal(value, r.start, r.end));
+      }
+    }
+
+    const start = open?.startOffset ?? identifier.range.start;
+    const end = (close?.endOffset ?? identifier.range.end - 1) + 1;
+
+    return {
+      kind: 'PRAGMA',
+      identifier,
+      options,
+      range: rangeFrom(start, end),
+    };
+  }
+
+  quoted(ctx: CstQuotedNode): ValueNode {
+    const open = ctx.children.OpenQuote?.[0];
+    const close = ctx.children.CloseQuote?.[0];
+    const toks = ctx.children.Content?.[0]?.children.Content ?? [];
+    const text = textFromQuoted(toks);
+
+    const innerStart = (open?.endOffset ?? -1) + 1;
+    const innerEnd = close?.startOffset ?? innerStart;
+
+    const lit = literal(text, innerStart, innerEnd);
+    return {
+      kind: 'VALUE',
+      children: [lit],
+      range: rangeFrom(open?.startOffset ?? innerStart, (close?.endOffset ?? innerEnd - 1) + 1),
+    };
+  }
+
+  quotedTemplate(ctx: CstQuotedTemplateNode): ValueNode {
+    const open = ctx.children.OpenQuote?.[0];
+    const close = ctx.children.CloseQuote?.[0];
+
+    const children: (LiteralNode | TemplateNode)[] = [];
+
+    // Build mixed children maintaining order
+    for (const part of ctx.children.Content ?? []) {
+      const asTpl = part as unknown as CstTemplateNode;
+      if (asTpl.children && (asTpl.children.TemplateOpen || asTpl.children.TemplateClose)) {
+        children.push(this.visit(asTpl) as TemplateNode);
+      } else {
+        // token run outside {{ }} inside quotes
+        const toks = (part as CstTokens).children.Content ?? [];
+        const text = textFromQuoted(toks);
+        const r = rangeFromTokens(toks);
+        if (text.length > 0) {
+children.push(literal(text, r.start, r.end));
+}
+      }
+    }
+
+    const start = open?.startOffset ?? children[0]?.range.start ?? 0;
+    const end = (close?.endOffset ?? (children[children.length - 1]?.range.end ?? start) - 1) + 1;
+
+    return { kind: 'VALUE', children, range: rangeFrom(start, end) };
+  }
+
+  forIteratorValue(ctx: CstForIteratorNode): ForIteratorNode {
+    const open = ctx.children.OpenQuote?.[0];
+    const close = ctx.children.CloseQuote?.[0];
+
+    const itTok = ctx.children.Iterator?.[0];
+    const iterator = literal(itTok?.image ?? '', itTok?.startOffset ?? 0, (itTok?.endOffset ?? -1) + 1);
+
+    const collText = textFromExpressionTokens(ctx.children.Collection ?? []);
+    const collStart = ctx.children.Collection?.[0]?.children.Content?.[0]?.startOffset;
+    const collEnd = ctx.children.Collection?.[0]?.children.Content?.slice(-1)[0]?.endOffset;
+    const collection: ExpressionNode = {
+      kind: 'EXPRESSION',
+      value: collText,
+      range: rangeFrom(collStart ?? iterator.range.end, (collEnd ?? iterator.range.end - 1) + 1),
+    };
+
+    const start = open?.startOffset ?? iterator.range.start;
+    const end = (close?.endOffset ?? collection.range.end - 1) + 1;
+
+    return { kind: 'FORITERATOR', iterator, collection, range: rangeFrom(start, end) };
+  }
+
+  attribute(ctx: CstAttributeNode): AttributeNode {
+    const keyTok = ctx.children.AttributeKey?.[0];
+    const key: LiteralNode = literal(keyTok?.image ?? '', keyTok?.startOffset ?? 0, (keyTok?.endOffset ?? -1) + 1);
+
+    let value: ValueNode | ForIteratorNode;
+
+    if (ctx.children.forIteratorValue?.length) {
+      value = this.visit(ctx.children.forIteratorValue[0]) as ForIteratorNode;
+    } else if (ctx.children.quotedValue?.length) {
+      value = this.visit(ctx.children.quotedValue[0]) as ValueNode;
+    } else if (ctx.children.templatedValue?.length) {
+      // Unquoted: key={{ expr }} → wrap as ValueNode with a TemplateNode child
+      const tpl = this.visit(ctx.children.templatedValue[0]) as TemplateNode;
+      value = { kind: 'VALUE', children: [tpl], range: tpl.range };
+    } else {
+      // Fallback empty value
+      value = { kind: 'VALUE', children: [], range: key.range };
+    }
+
+    const start = key.range.start;
+    const end = value.range.end;
+
+    return { kind: 'ATTRIBUTE', key, value, range: rangeFrom(start, end) };
+  }
+
+  openTagPartial(ctx: CstOpenTagPartialNode): OpenTagNode | { partialEnd: number } {
+    const open = ctx.children.OpenBracket?.[0];
+    const nameTok = ctx.children.TagName?.[0];
+
+    const tagName = nameTok?.image ?? '';
+    const tagStart = open?.startOffset ?? nameTok?.startOffset ?? 0;
+    let lastEnd = (nameTok?.endOffset ?? tagStart) + 1;
+
+    const attributes: AttributeNode[] = [];
+    for (const a of ctx.children.Attribute ?? []) {
+      const attr = this.visit(a) as AttributeNode;
+      attributes.push(attr);
+      lastEnd = Math.max(lastEnd, attr.range.end);
+    }
+
+    const node: OpenTagNode = {
+      kind: 'OPEN',
+      value: literal(tagName, nameTok?.startOffset ?? tagStart, (nameTok?.endOffset ?? tagStart - 1) + 1),
+      attributes,
+      range: rangeFrom(tagStart, lastEnd),
+    };
+
+    return node as any;
+  }
+
+  closeTag(ctx: CstCloseTagNode): CloseTagNode {
+    const open = ctx.children.ClosingOpenBracket?.[0];
+    const nameTok = ctx.children.TagName?.[0];
+    const close = ctx.children.CloseBracket?.[0];
+
+    const start = open?.startOffset ?? nameTok?.startOffset ?? 0;
+    const end = (close?.endOffset ?? (nameTok?.endOffset ?? start) - 1) + 1;
+
+    return {
+      kind: 'CLOSE',
+      value: literal(nameTok?.image ?? '', nameTok?.startOffset ?? start, (nameTok?.endOffset ?? start - 1) + 1),
+      range: rangeFrom(start, end),
+    };
+  }
+
+  element(ctx: CstElementNode): ElementNode | SelfCloseElementNode {
+    const partial = this.visit(ctx.children.OpenTagPartial?.[0]!) as OpenTagNode;
+
+    if (ctx.children.SelfCloseBracket?.length) {
+      const selfTok = ctx.children.SelfCloseBracket[0];
+      const end = (selfTok.endOffset ?? partial.range.end - 1) + 1;
+      return {
+        kind: 'SELFCLOSE',
+        value: partial.value,
+        attributes: partial.attributes,
+        range: rangeFrom(partial.range.start, end),
+      };
+    }
+
+    // Normal or literal element with explicit CloseTag
+    const openCloseTok = ctx.children.OpenTagCloseBracket?.[0];
+    let children: ElementContentNode[] = [];
+    let close: CloseTagNode;
+
+    if (ctx.children.TextContent?.length) {
+      // Literal element: everything inside is plain text (no template interpolation)
+      const toks = ctx.children.TextContent[0].children.Content ?? [];
+      const text = rawFrom(toks);
+      const r = rangeFromTokens(toks);
+      children = [literal(text, r.start, r.end)];
+    } else {
+      // Normal element: nested content parsed as usual
+      for (const ec of ctx.children.Content ?? []) {
+        children.push(this.visit(ec) as ElementContentNode);
+      }
+    }
+
+    close = this.visit(ctx.children.CloseTag?.[0]!) as CloseTagNode;
+
+    // Tag name matching check
+    const openName = partial.value.value.toLowerCase();
+    const closeName = close.value.value.toLowerCase();
+    if (openName !== closeName) {
+      this.errors.push({
+        message: `Mismatched closing tag: expected </${openName}> but found </${closeName}>`,
+        range: close.range,
+      });
+    }
+
+    const start = partial.range.start;
+    const end = close.range.end;
+
+    return { kind: 'ELEMENT', open: partial, close, children, range: rangeFrom(start, end) };
+  }
+}
+
+// ---------------------------
+// Public helpers
+// ---------------------------
+
+/** Build an AST RootNode (and errors) from a CST produced by the parser. */
+export function cstToAst(cst: CstNode): { root: RootNode; errors: AstBuildError[] } {
+  const visitor = new ExtendedPomlAstVisitor();
+  return visitor.build(cst);
+}
+
+/** Convenience: from input string → { root, errors } using the full pipeline. */
+export function parsePomlToAst(input: string): { root: RootNode | undefined; errors: AstBuildError[] } {
+  const { cst } = (extendedPomlParser as any).constructor.parse
+    ? ((): any => {
+        throw new Error('Use parsePomlToCst from cst.ts to obtain a CST first.');
+      })()
+    : { cst: undefined };
+  // The parser wrapper already exists: users should call parsePomlToCst then cstToAst.
+  return { root: undefined, errors: [{ message: 'Call parsePomlToCst(input) then cstToAst(cst).' }] };
+}
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index d75c8a58..56ea95d6 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -14,27 +14,6 @@ export interface CstTokens extends CstNode {
   };
 }
 
-/**
- * Represents a JavaScript expression as a string.
- *
- * This node stores raw expression text that will be evaluated at runtime.
- * It serves as a wrapper for expressions used in various contexts like
- * conditions, loops, and template interpolations.
- *
- * Cases that apply:
- * - Conditional expressions: `i > 0`, `user.name === "admin"`
- * - Function calls: `formatDate(now)`, `items.filter(x => x.active)`
- *
- * Cases that do not apply:
- * - Template syntax including braces: `{{ expression }}` (use TemplateNode)
- * - String literals with quotes: `"hello"` (use ValueNode)
- * - POML markup: `<tag>` (use element nodes)
- */
-export interface ExpressionNode extends AstNode {
-  kind: 'EXPRESSION';
-  value: string;
-}
-
 /**
  * Represents a template interpolation with double curly braces,
  * or sometimes without braces in specific attributes.
@@ -51,7 +30,7 @@ export interface ExpressionNode extends AstNode {
  * - Template usage in if attributes: `condition` in `if="condition"`
  *
  * Cases that do not apply:
- * - Full attribute expressions: `if="x > 0"` (use ExpressionNode)
+ * - Full attribute expressions: `if="x > 0"` (use AttributeNode)
  * - Plain text: `Hello World` (use LiteralNode)
  * - Single braces: `{ not a template }` (treated as plain text)
  * - Template elements: <template>{{ this is a jinja template }}</template> (use LiteralNode)
@@ -59,7 +38,7 @@ export interface ExpressionNode extends AstNode {
  */
 export interface TemplateNode extends AstNode {
   kind: 'TEMPLATE';
-  value: ExpressionNode;
+  value: LiteralNode;
 }
 
 /**
@@ -94,11 +73,11 @@ export interface CstTemplateNode extends CstNode {
  * - Tag names: the `div` in `<div>`
  * - Identifiers: variable names like `item` in for loops
  * - Whitespace and formatting text between elements
+ * - Expressions: `x > 0` (use ExpressionNode)
  *
  * Cases that do not apply:
  * - Text containing templates: `Hello {{ name }}` (use ValueNode with children)
  * - Quoted strings in attributes: `"value"` (use ValueNode)
- * - Expressions: `x > 0` (use ExpressionNode)
  * - Template variables: `{{ var }}` (use TemplateNode)
  */
 export interface LiteralNode extends AstNode {
@@ -178,7 +157,7 @@ export interface CstQuotedTemplateNode extends CstNode {
 export interface ForIteratorNode extends AstNode {
   kind: 'FORITERATOR';
   iterator: LiteralNode;
-  collection: ExpressionNode;
+  collection: LiteralNode;
 }
 
 /**

From eca09b408d7ebac57566d4d9d52f8369ddf7909b Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 14:08:24 +0800
Subject: [PATCH 70/76] .

---
 packages/poml/next/ast.ts              | 150 +++++++++++--------------
 packages/poml/next/error.ts            |  65 ++++++++---
 packages/poml/next/nodes.ts            |  85 +++-----------
 packages/poml/tests/reader/cst.test.ts |  51 +++------
 4 files changed, 145 insertions(+), 206 deletions(-)

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index bd09c73a..a650e827 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -35,9 +35,6 @@ import {
 import {
   ElementNode,
   ElementContentNode,
-  OpenTagNode,
-  CloseTagNode,
-  SelfCloseElementNode,
   ValueNode,
   TemplateNode,
   LiteralNode,
@@ -50,6 +47,7 @@ import {
 import { Range } from './types';
 import { extendedPomlParser } from './cst';
 import { BackslashEscape, CharacterEntity } from './lexer';
+import * as error from './error';
 
 /** Error produced while building the AST (beyond lex/parse errors). */
 export interface AstBuildError {
@@ -57,21 +55,6 @@ export interface AstBuildError {
   range?: Range;
 }
 
-/** Utility: build a range from two offsets (inclusive start, exclusive end). */
-function rangeFrom(start: number, end: number): Range {
-  return { start, end };
-}
-
-/** Utility: range that spans a list of tokens (or is empty if none). */
-function rangeFromTokens(tokens: IToken[]): Range {
-  if (!tokens.length) {
-return { start: 0, end: 0 };
-}
-  const first = tokens[0];
-  const last = tokens[tokens.length - 1];
-  return rangeFrom(first.startOffset ?? 0, (last.endOffset ?? first.startOffset ?? 0) + 1);
-}
-
 /** Utility: create a LiteralNode from raw text and token range. */
 function literal(value: string, start: number, end: number): LiteralNode {
   return { kind: 'STRING', value, range: rangeFrom(start, end) };
@@ -111,6 +94,34 @@ function decodeEscape(seq: string): string {
   }
 }
 
+/**
+ * Range utilities.
+ * Build a range from two offsets (inclusive start, inclusive end).
+ */
+function rangeFrom(start: number, end: number): Range {
+  return { start, end };
+}
+
+/**
+ * Range that spans a list of tokens (or is [0, 0] if none).
+ */
+function rangeFromTokens(tokens: IToken[]): Range {
+  if (!tokens.length) {
+    return { start: 0, end: 0 };
+  }
+  const first = tokens[0];
+  const last = tokens[tokens.length - 1];
+  return rangeFrom(first.startOffset ?? 0, (last.endOffset ?? first.startOffset ?? 0) + 1);
+}
+
+/**
+ * Range that spans a list of CstTokens (or is [0, 0] if none).
+ */
+function rangeFromCstTokens(groups: CstTokens[]): Range {
+  const allTokens = groups.flatMap((g) => g.children.Content ?? []);
+  return rangeFromTokens(allTokens);
+}
+
 /** Gather raw text from a list of tokens without any decoding. */
 function textFromRaw(tokens: IToken[]): string {
   return tokens.map((t) => t.image ?? '').join('');
@@ -135,27 +146,11 @@ function textFromQuoted(tokens: IToken[]): string {
 }
 
 /**
- * Gather text from tokens INSIDE TEMPLATE EXPRESSION ({{ ... }}).
- * We simply join raw images because evaluation is a later phase.
+ * Gather text from CstTokens groups.
+ * Each group is expected to be a list of ITokens.
  */
-function textFromExpressionTokens(groups: CstTokens[]): string {
-  const pieces: string[] = [];
-  for (const g of groups) {
-    const toks = g.children.Content ?? [];
-    pieces.push(textFromRaw(toks));
-  }
-  return pieces.join('');
-}
-
-/** Build a range from a CST token group sequence. */
-function rangeFromTokenGroups(groups: CstTokens[], fallback: Range): Range {
-  const firstTok = groups[0]?.children.Content?.[0];
-  const lastGroup = groups[groups.length - 1];
-  const lastTok = lastGroup?.children.Content?.[lastGroup.children.Content.length - 1];
-  if (firstTok && lastTok) {
-    return rangeFrom(firstTok.startOffset ?? 0, (lastTok.endOffset ?? 0) + 1);
-  }
-  return fallback;
+function textFromCstTokens(groups: CstTokens[], fromIToken: (tokens: IToken[]) => string): string {
+  return groups.map((g) => fromIToken(g.children.Content ?? [])).join('');
 }
 
 const BaseVisitor = extendedPomlParser.getBaseCstVisitorConstructorWithDefaults();
@@ -195,25 +190,22 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
    * - Character entities are decoded
    * - Backslash escapes are NOT interpreted (shown as-is)
    */
-  //   private textFromBetweenTags(tokens: IToken[]): string {
-  //     return tokens
-  //       .map((t) => {
-  //         if (t.tokenType === CharacterEntity) {
-  //           try {
-  //             return he.decode(t.image ?? '', { strict: true });
-  //           } catch (e) {
-  //             this.errors.push({
-  //               message: `Failed to decode HTML entity: ${t.image}`,
-  //               range: rangeFromTokens([t])
-  //             })
-  //           }
-  //         }
-  //       }
-  //         if (name === 'CharacterEntity') return decodeEntity(t.image ?? '')
-  //     return t.image ?? ''
-  //   })
-  //       .join('')
-  // }
+  private textFromBetweenTags(tokens: IToken[]): string {
+    return tokens
+      .map((t) => {
+        if (t.tokenType === CharacterEntity) {
+          try {
+            return he.decode(t.image ?? '', { strict: true });
+          } catch (e) {
+            this.errors.push({
+              message: `Failed to decode HTML entity: ${t.image}`,
+              range: rangeFromTokens([t]),
+            });
+          }
+        }
+      })
+      .join('');
+  }
 
   // ---- Rule implementations ----
 
@@ -222,8 +214,8 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
     for (const ec of ctx.children.Content ?? []) {
       const node = this.visit(ec) as ElementContentNode;
       if (node) {
-children.push(node);
-}
+        children.push(node);
+      }
     }
 
     const start = children[0]?.range.start ?? 0;
@@ -233,17 +225,17 @@ children.push(node);
 
   elementContent(ctx: CstElementContentNode): ElementContentNode {
     if (ctx.Pragma?.length) {
-return this.visit(ctx.Pragma[0]) as PragmaNode;
-}
+      return this.visit(ctx.Pragma[0]) as PragmaNode;
+    }
     if (ctx.Comment?.length) {
-return this.visit(ctx.Comment[0]) as CommentNode;
-}
+      return this.visit(ctx.Comment[0]) as CommentNode;
+    }
     if (ctx.Template?.length) {
-return this.visit(ctx.Template[0]) as TemplateNode;
-}
+      return this.visit(ctx.Template[0]) as TemplateNode;
+    }
     if (ctx.Element?.length) {
-return this.visit(ctx.Element[0]) as ElementNode;
-}
+      return this.visit(ctx.Element[0]) as ElementNode;
+    }
 
     // Text content between tags → LiteralNode
     const toks = ctx.TextContent?.[0]?.children.Content ?? [];
@@ -256,23 +248,15 @@ return this.visit(ctx.Element[0]) as ElementNode;
     const open = ctx.children.TemplateOpen?.[0];
     const close = ctx.children.TemplateClose?.[0];
 
-    const exprText = textFromExpressionTokens(ctx.children.Content ?? []);
+    const exprText = textFromCstTokens(ctx.children.Content ?? [], textFromRaw);
+    const exprRange = rangeFromCstTokens(ctx.children.Content ?? []);
 
-    // Expression node range: inner content without braces/outer ws if present
-    const innerStart =
-      ctx.children.WsAfterOpen?.[0]?.endOffset != null
-        ? ctx.children.WsAfterOpen[0].endOffset + 1
-        : (open?.endOffset ?? 0) + 1;
-    const innerEnd =
-      ctx.children.WsAfterContent?.[0]?.startOffset != null
-        ? ctx.children.WsAfterContent[0].startOffset
-        : (close?.startOffset ?? innerStart);
-
-    const exprNode: ExpressionNode = {
-      kind: 'EXPRESSION',
+    const exprNode: LiteralNode = {
+      kind: 'STRING',
       value: exprText,
-      range: rangeFrom(innerStart, innerEnd),
+      range: exprRange,
     };
+    ctx.startOffset = open?.startOffset ?? exprRange.start;
 
     const outerStart = open?.startOffset ?? innerStart;
     const outerEnd = (close?.endOffset ?? outerStart - 1) + 1;
@@ -365,8 +349,8 @@ return this.visit(ctx.Element[0]) as ElementNode;
         const text = textFromQuoted(toks);
         const r = rangeFromTokens(toks);
         if (text.length > 0) {
-children.push(literal(text, r.start, r.end));
-}
+          children.push(literal(text, r.start, r.end));
+        }
       }
     }
 
diff --git a/packages/poml/next/error.ts b/packages/poml/next/error.ts
index 83928da6..1bc66ab2 100644
--- a/packages/poml/next/error.ts
+++ b/packages/poml/next/error.ts
@@ -3,6 +3,12 @@ import chalk from 'chalk';
 import { Diagnostic, Range, Severity } from './types';
 import sourceManager from './source';
 
+interface FormatOptions {
+  showWarnings?: boolean;
+  showInfo?: boolean;
+  groupByFile?: boolean;
+}
+
 /**
  * Global Error Collector.
  *
@@ -309,19 +315,19 @@ export class ErrorCollector {
   /**
    * Format all diagnostics for CLI output
    */
-  public format(
-    options: {
-      showWarnings?: boolean;
-      showInfo?: boolean;
-      groupByFile?: boolean;
-    } = {},
-  ): string {
-    const { showWarnings = true, showInfo = false, groupByFile = true } = options;
+  public format(options?: FormatOptions): string {
+    const { showWarnings = true, showInfo = false, groupByFile = true } = options ?? {};
 
     const filtered = this.diagnostics.filter((d) => {
-      if (d.severity === Severity.ERROR) return true;
-      if (d.severity === Severity.WARNING) return showWarnings;
-      if (d.severity === Severity.INFO) return showInfo;
+      if (d.severity === Severity.ERROR) {
+return true;
+}
+      if (d.severity === Severity.WARNING) {
+return showWarnings;
+}
+      if (d.severity === Severity.INFO) {
+return showInfo;
+}
       return false;
     });
 
@@ -355,7 +361,9 @@ export class ErrorCollector {
         output.push('');
 
         const diagnostics = byFile.get(file)!.sort((a, b) => {
-          if (!a.range || !b.range) return 0;
+          if (!a.range || !b.range) {
+return 0;
+}
           return a.range.start - b.range.start;
         });
 
@@ -404,7 +412,7 @@ export class ErrorCollector {
   /**
    * Print formatted errors to console
    */
-  public print(options?: Parameters<typeof this.format>[0]): void {
+  public print(options?: FormatOptions): void {
     console.log(this.format(options));
   }
 
@@ -417,6 +425,33 @@ export class ErrorCollector {
 }
 
 // Create singleton instance
-const errorCollector = new ErrorCollector();
+let errorCollector: ErrorCollector | undefined = undefined;
+
+export function getErrorCollector(): ErrorCollector {
+  if (!errorCollector) {
+    errorCollector = new ErrorCollector();
+  }
+  return errorCollector;
+}
 
-export default errorCollector;
+// Convenience export
+
+export const clear = () => getErrorCollector().clear();
+export const error = (message: string, range?: Range, options: Partial<Diagnostic> = {}) =>
+  getErrorCollector().error(message, range, options);
+export const warning = (message: string, range?: Range, options: Partial<Diagnostic> = {}) =>
+  getErrorCollector().warning(message, range, options);
+export const info = (message: string, range?: Range, options: Partial<Diagnostic> = {}) =>
+  getErrorCollector().info(message, range, options);
+export const jsonError = (originalError: Error, jsonRange: Range) =>
+  getErrorCollector().jsonError(originalError, jsonRange);
+export const expressionError = (originalError: Error, expressionRange: Range, evalHeaderLength: number = 0) =>
+  getErrorCollector().expressionError(originalError, expressionRange, evalHeaderLength);
+export const suppressCode = (code: string) => getErrorCollector().suppressCode(code);
+export const hasErrors = () => getErrorCollector().hasErrors();
+export const getErrors = () => getErrorCollector().getErrors();
+export const getWarnings = () => getErrorCollector().getWarnings();
+export const getCounts = () => getErrorCollector().getCounts();
+export const format = (options?: FormatOptions) => getErrorCollector().format(options);
+export const print = (options?: FormatOptions) => getErrorCollector().print(options);
+export const getDiagnostics = () => getErrorCollector().getDiagnostics();
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 56ea95d6..3577822e 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -227,35 +227,18 @@ export interface CstAttributeNode extends CstNode {
 }
 
 /**
- * Represents an opening tag in POML markup.
+ * Represents a prefix partial of opening tag in POML markup.
  *
  * Open tags mark the beginning of an element that expects a corresponding
  * closing tag. They may contain attributes that configure the element's
  * behavior and appearance.
  *
- * Cases that apply:
- * - Standard opening tags: `<document>`, `<message role="user">`
- * - Tags with attributes: `<div class="container" id="main">`
- * - Tags with for-loops: `<task for="item in items">`
- * - Nested structure beginnings: `<section>` before content
- *
- * Cases that do not apply:
- * - Self-closing tags: `<image src="..." />` (use SelfCloseTagNode)
- * - Closing tags: `</document>` (use CloseTagNode)
- * - Complete elements: opening + content + closing (use ElementNode)
- * - Invalid or malformed tags (treated as text)
- */
-export interface OpenTagNode extends AstNode {
-  kind: 'OPEN';
-  value: LiteralNode; // tag name
-  attributes: AttributeNode[];
-}
-
-/**
- * Related CST node interfaces for parsing stage.
- *
- * Opening tag without the ending close bracket.
+ * This is an opening tag without the ending close bracket.
  * Allow prefix sharing with SelfCloseElementNode.
+ *
+ * Examples:
+ * - `<document`
+ * - `<message role="user"`
  */
 export interface CstOpenTagPartialNode extends CstNode {
   children: {
@@ -274,23 +257,10 @@ export interface CstOpenTagPartialNode extends CstNode {
  * Close tags mark the end of an element, matching a previously opened tag.
  * They contain only the tag name and no attributes.
  *
- * Cases that apply:
+ * Examples:
  * - Standard closing tags: `</document>`, `</message>`
  * - Nested structure endings: `</section>`, `</div>`
  * - Any valid POML element closure
- *
- * Cases that do not apply:
- * - Opening tags: `<document>` (use OpenTagNode)
- * - Self-closing tags: `<br/>` (use SelfCloseTagNode)
- * - Tags with attributes (closing tags never have attributes)
- */
-export interface CloseTagNode extends AstNode {
-  kind: 'CLOSE';
-  value: LiteralNode; // tag name
-}
-
-/**
- * Related CST node interfaces for parsing stage.
  */
 export interface CstCloseTagNode extends CstNode {
   children: {
@@ -302,29 +272,6 @@ export interface CstCloseTagNode extends CstNode {
   };
 }
 
-/**
- * Represents a self-closing tag in POML markup.
- *
- * Self-closing elements represent complete elements that have no children or
- * content. They combine opening and closing in a single tag and may have
- * attributes.
- *
- * Cases that apply:
- * - Image elements: `<image src="photo.jpg" />`
- * - Runtime configurations: `<runtime model="gpt-5" temperature="0.7" />`
- *
- * Cases that do not apply:
- * - Meta elements: `<meta name="author" content="John" />`
- * - Elements with content: `<div>content</div>` (use ElementNode)
- * - Separate open/close tags: `<div></div>` (use ElementNode)
- * - Tags without the self-closing slash: `<img>` (use OpenTagNode)
- */
-export interface SelfCloseElementNode extends AstNode {
-  kind: 'SELFCLOSE';
-  value: LiteralNode; // tag name
-  attributes: AttributeNode[];
-}
-
 /**
  * Represents a complete POML element with its content.
  *
@@ -336,15 +283,16 @@ export interface SelfCloseElementNode extends AstNode {
  * that treat their content as literal text without any template variable interpolation.
  * Content is preserved exactly as written, useful for code samples or pre-formatted text.
  *
+ * Alternatively, it also supports self-closing elements.
  *
  * Cases that apply:
  * - Any elements: `<document parser="txt">...content...</document>`
  * - Output schemas with templates: `<output-schema>{{ schemaDefinition }}</output-schema>`
- * - Nested elements: `<section><paragraph>Text</paragraph></section>`
  * - Literal text elements: `<text>Literal {{ not_interpolated }}</text>` (literal elements)
+ * - Self-closing elements: `<image src="photo.jpg" />`
+ * - Runtime configurations: `<runtime model="gpt-5" temperature="0.7" />`
  *
  * Cases that do not apply:
- * - Self-closing elements: `<image />` (use SelfCloseTagNode)
  * - Literal text content: plain text (use LiteralNode)
  * - Template variables: `{{ var }}` (use TemplateNode)
  * - Meta elements: `<meta>` tags (use MetaNode)
@@ -355,10 +303,11 @@ export interface SelfCloseElementNode extends AstNode {
  */
 export interface ElementNode extends AstNode {
   kind: 'ELEMENT';
-  open: OpenTagNode;
-  close: CloseTagNode;
-  children: ElementContentNode[];
-  // isLiteral?: boolean; // True for <text> and <template> tags
+  tagName: string;
+  attributes: AttributeNode[];
+  // Children is undefined for self-closing tags.
+  // If it's not self-closing, children is at least an empty array.
+  children?: ElementContentNode[];
 }
 
 export type ElementContentNode = ElementNode | CommentNode | PragmaNode | LiteralNode | TemplateNode;
@@ -516,15 +465,11 @@ type Draft<T extends { kind: string }> = DeepPartialExcept<T, 'kind'>;
 
 // Union of your strict nodes
 export type StrictNode =
-  | ExpressionNode
   | TemplateNode
   | LiteralNode
   | ValueNode
   | ForIteratorNode
   | AttributeNode
-  | OpenTagNode
-  | CloseTagNode
-  | SelfCloseElementNode
   | ElementNode
   | CommentNode
   | PragmaNode
diff --git a/packages/poml/tests/reader/cst.test.ts b/packages/poml/tests/reader/cst.test.ts
index 48d11548..e10377ef 100644
--- a/packages/poml/tests/reader/cst.test.ts
+++ b/packages/poml/tests/reader/cst.test.ts
@@ -171,7 +171,6 @@ describe('CST Parser Rules', () => {
   test('element rule: self-closing element', () => {
     const { node } = withParser('<meta />', (p) => p.element()) as { node: CstElementNode };
     expect(node.children.OpenTagPartial?.[0]).toBeDefined();
-    node.recoveredNode;
     const openTag = node.children.OpenTagPartial?.[0] as CstOpenTagPartialNode;
     expect(openTag.children.OpenBracket?.[0].image).toBe('<');
     expect(openTag.children.TagName?.[0].image).toBe('meta');
@@ -217,6 +216,15 @@ describe('CST Parser Rules', () => {
     const contentNodes = node.children.Content || [];
     const elementNames = contentNodes.map((n) => (n as any).name);
     expect(elementNames).toContain('elementContent');
+
+    expect(node.location).toEqual({
+      startOffset: 0,
+      startLine: 1,
+      startColumn: 1,
+      endOffset: 70,
+      endLine: 1,
+      endColumn: 71,
+    });
   });
 });
 
@@ -722,42 +730,6 @@ const isToken = (x: unknown): x is IToken => !!x && typeof (x as IToken).image =
 const isCstNode = (x: unknown): x is CstNode =>
   !!x && typeof (x as any).name === 'string' && typeof (x as any).children === 'object';
 
-/* -------------------- ranges -------------------- */
-const tokStart = (t: IToken) => (typeof t.startOffset === 'number' ? t.startOffset : 0);
-const tokEnd = (t: IToken) => (typeof t.endOffset === 'number' ? t.endOffset : tokStart(t) + (t.image?.length ?? 0));
-
-function* walkTokens(value: unknown): Generator<IToken> {
-  if (isToken(value)) {
-    yield value;
-    return;
-  }
-  if (Array.isArray(value)) {
-    for (const v of value) {
-      yield* walkTokens(v);
-    }
-    return;
-  }
-  if (isCstNode(value)) {
-    const ch = (value as any).children as Record<string, unknown>;
-    for (const k of Object.keys(ch)) {
-      yield* walkTokens(ch[k]);
-    }
-  }
-}
-
-function nodeRange(node: CstNode): { start: number; end: number } {
-  let start = Infinity,
-    end = -Infinity;
-  for (const t of walkTokens(node)) {
-    start = Math.min(start, tokStart(t));
-    end = Math.max(end, tokEnd(t));
-  }
-  if (!Number.isFinite(start) || !Number.isFinite(end)) {
-    return { start: 0, end: 0 };
-  }
-  return { start, end };
-}
-
 /* -------------------- core normalize -------------------- */
 /**
  * Rules:
@@ -924,7 +896,10 @@ export function locations(node: CstNode): { start: number; end: number; children
   const S: Strategies = {
     onToken: (_t) => undefined, // drop token ranges
     onNodeWrap: (n, children) => {
-      const base: { start: number; end: number; children?: Record<string, unknown> | unknown[] } = nodeRange(n);
+      const base: { start: number; end: number; children?: Record<string, unknown> | unknown[] } = {
+        start: n.location?.startOffset ?? 0,
+        end: n.location?.endOffset ?? 0,
+      };
       if (children !== undefined) {
         if (typeof children === 'object' && !Array.isArray(children)) {
           const keys = Object.keys(children as Record<string, unknown>);

From c5ae41f4b1aff64ba0447c0dfe7858b773c85173 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 14:47:32 +0800
Subject: [PATCH 71/76] .

---
 packages/poml/next/ast.ts   | 269 +++++++++++++-----------------------
 packages/poml/next/nodes.ts |   2 +-
 2 files changed, 99 insertions(+), 172 deletions(-)

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index a650e827..14e0b167 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -55,11 +55,6 @@ export interface AstBuildError {
   range?: Range;
 }
 
-/** Utility: create a LiteralNode from raw text and token range. */
-function literal(value: string, start: number, end: number): LiteralNode {
-  return { kind: 'STRING', value, range: rangeFrom(start, end) };
-}
-
 /** Decode a single backslash escape sequence (for quoted strings). */
 function decodeEscape(seq: string): string {
   // seq includes the leading backslash (e.g. " , \n)
@@ -94,6 +89,29 @@ function decodeEscape(seq: string): string {
   }
 }
 
+// ---- Range and text utilities ----
+
+/** Utility: create a LiteralNode from raw text and token range. */
+function literal(value: string, range: Range): LiteralNode {
+  return { kind: 'STRING', value, range };
+}
+
+/**
+ * Create a LiteralNode from IToken list.
+ */
+function literalFromTokens(tokens: IToken[]): LiteralNode {
+  return literal(textFromRaw(tokens), rangeFromTokens(tokens));
+}
+
+/**
+ * Convert CST token groups to a literal string.
+ * String contents are kept as is, no escape decoding.
+ */
+function literalFromCstTokens(groups: CstTokens[]): LiteralNode {
+  const text = textFromCstTokens(groups, textFromRaw);
+  return literal(text, rangeFromCstTokens(groups));
+}
+
 /**
  * Range utilities.
  * Build a range from two offsets (inclusive start, inclusive end).
@@ -111,7 +129,14 @@ function rangeFromTokens(tokens: IToken[]): Range {
   }
   const first = tokens[0];
   const last = tokens[tokens.length - 1];
-  return rangeFrom(first.startOffset ?? 0, (last.endOffset ?? first.startOffset ?? 0) + 1);
+  return rangeFrom(first.startOffset ?? 0, last.endOffset ?? first.startOffset ?? 0);
+}
+
+/**
+ * Range from Any CstNode (or is [0, 0] if none).
+ */
+function rangeFromCstNode(node: CstNode): Range {
+  return rangeFrom(node.location?.startOffset ?? 0, node.location?.endOffset ?? node.location?.startOffset ?? 0);
 }
 
 /**
@@ -146,13 +171,15 @@ function textFromQuoted(tokens: IToken[]): string {
 }
 
 /**
- * Gather text from CstTokens groups.
+ * Gather text from CstToken groups.
  * Each group is expected to be a list of ITokens.
  */
 function textFromCstTokens(groups: CstTokens[], fromIToken: (tokens: IToken[]) => string): string {
   return groups.map((g) => fromIToken(g.children.Content ?? [])).join('');
 }
 
+// ---- AST Visitor ----
+
 const BaseVisitor = extendedPomlParser.getBaseCstVisitorConstructorWithDefaults();
 
 /**
@@ -182,31 +209,6 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
     return { root, errors: this.errors };
   }
 
-  // ---- Private helper methods ----
-
-  /**
-   * Gather text from tokens for TEXT CONTENT (between tags).
-   * Rules:
-   * - Character entities are decoded
-   * - Backslash escapes are NOT interpreted (shown as-is)
-   */
-  private textFromBetweenTags(tokens: IToken[]): string {
-    return tokens
-      .map((t) => {
-        if (t.tokenType === CharacterEntity) {
-          try {
-            return he.decode(t.image ?? '', { strict: true });
-          } catch (e) {
-            this.errors.push({
-              message: `Failed to decode HTML entity: ${t.image}`,
-              range: rangeFromTokens([t]),
-            });
-          }
-        }
-      })
-      .join('');
-  }
-
   // ---- Rule implementations ----
 
   root(ctx: CstRootNode): RootNode {
@@ -218,63 +220,38 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
       }
     }
 
-    const start = children[0]?.range.start ?? 0;
-    const end = children.length ? children[children.length - 1].range.end : 0;
-    return { kind: 'ROOT', children, range: rangeFrom(start, end) };
+    return { kind: 'ROOT', children, range: rangeFromCstNode(ctx) };
   }
 
   elementContent(ctx: CstElementContentNode): ElementContentNode {
-    if (ctx.Pragma?.length) {
-      return this.visit(ctx.Pragma[0]) as PragmaNode;
-    }
-    if (ctx.Comment?.length) {
-      return this.visit(ctx.Comment[0]) as CommentNode;
+    if (ctx.children.Pragma?.length) {
+      return this.visit(ctx.children.Pragma[0]) as PragmaNode;
+    } else if (ctx.children.Comment?.length) {
+      return this.visit(ctx.children.Comment[0]) as CommentNode;
+    } else if (ctx.children.Template?.length) {
+      return this.visit(ctx.children.Template[0]) as TemplateNode;
+    } else if (ctx.children.Element?.length) {
+      return this.visit(ctx.children.Element[0]) as ElementNode;
+    } else if (ctx.children.TextContent?.length) {
+      // Text contents between tags
+      return this.visit(ctx.children.TextContent[0]) as LiteralNode;
     }
-    if (ctx.Template?.length) {
-      return this.visit(ctx.Template[0]) as TemplateNode;
-    }
-    if (ctx.Element?.length) {
-      return this.visit(ctx.Element[0]) as ElementNode;
-    }
-
-    // Text content between tags → LiteralNode
-    const toks = ctx.TextContent?.[0]?.children.Content ?? [];
-    const text = textFromBetweenTags(toks);
-    const r = rangeFromTokens(toks);
-    return literal(text, r.start, r.end);
+    // This should not happen
+    this.errors.push({ message: 'Unknown element content', range: rangeFromCstNode(ctx) });
+    return literal('', rangeFromCstNode(ctx));
   }
 
   template(ctx: CstTemplateNode): TemplateNode {
-    const open = ctx.children.TemplateOpen?.[0];
-    const close = ctx.children.TemplateClose?.[0];
-
-    const exprText = textFromCstTokens(ctx.children.Content ?? [], textFromRaw);
-    const exprRange = rangeFromCstTokens(ctx.children.Content ?? []);
-
-    const exprNode: LiteralNode = {
-      kind: 'STRING',
-      value: exprText,
-      range: exprRange,
-    };
-    ctx.startOffset = open?.startOffset ?? exprRange.start;
-
-    const outerStart = open?.startOffset ?? innerStart;
-    const outerEnd = (close?.endOffset ?? outerStart - 1) + 1;
-
-    return { kind: 'TEMPLATE', value: exprNode, range: rangeFrom(outerStart, outerEnd) };
+    const exprNode = literalFromCstTokens(ctx.children.Content ?? []);
+    return { kind: 'TEMPLATE', value: exprNode, range: rangeFromCstNode(ctx) };
   }
 
   comment(ctx: CstCommentNode): CommentNode {
-    const open = ctx.children.CommentOpen?.[0];
-    const close = ctx.children.CommentClose?.[0];
-    const toks = ctx.children.Content?.[0]?.children.Content ?? [];
-    const text = rawFrom(toks);
-    const innerStart = (open?.endOffset ?? -1) + 1;
-    const innerEnd = close?.startOffset ?? innerStart;
+    const text = textFromCstTokens(ctx.children.Content ?? [], textFromRaw);
     return {
       kind: 'COMMENT',
-      value: literal(text, innerStart, innerEnd),
-      range: rangeFrom(open?.startOffset ?? innerStart, (close?.endOffset ?? innerEnd - 1) + 1),
+      value: literalFromCstTokens(ctx.children.Content ?? []),
+      range: rangeFromCstNode(ctx),
     };
   }
 
@@ -383,8 +360,8 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
   }
 
   attribute(ctx: CstAttributeNode): AttributeNode {
-    const keyTok = ctx.children.AttributeKey?.[0];
-    const key: LiteralNode = literal(keyTok?.image ?? '', keyTok?.startOffset ?? 0, (keyTok?.endOffset ?? -1) + 1);
+    const key: LiteralNode = literalFromTokens(ctx.children.AttributeKey ?? []);
+    const range = rangeFromCstNode(ctx);
 
     let value: ValueNode | ForIteratorNode;
 
@@ -393,128 +370,78 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
     } else if (ctx.children.quotedValue?.length) {
       value = this.visit(ctx.children.quotedValue[0]) as ValueNode;
     } else if (ctx.children.templatedValue?.length) {
-      // Unquoted: key={{ expr }} → wrap as ValueNode with a TemplateNode child
+      // Unquoted: key={{ expr }} -> wrap as ValueNode with a TemplateNode child
       const tpl = this.visit(ctx.children.templatedValue[0]) as TemplateNode;
       value = { kind: 'VALUE', children: [tpl], range: tpl.range };
     } else {
       // Fallback empty value
+      this.errors.push({
+        message: `Attribute "${key.value}" is missing a value`,
+        range,
+      });
       value = { kind: 'VALUE', children: [], range: key.range };
     }
 
-    const start = key.range.start;
-    const end = value.range.end;
-
-    return { kind: 'ATTRIBUTE', key, value, range: rangeFrom(start, end) };
+    return { kind: 'ATTRIBUTE', key, value, range };
   }
 
-  openTagPartial(ctx: CstOpenTagPartialNode): OpenTagNode | { partialEnd: number } {
-    const open = ctx.children.OpenBracket?.[0];
-    const nameTok = ctx.children.TagName?.[0];
-
-    const tagName = nameTok?.image ?? '';
-    const tagStart = open?.startOffset ?? nameTok?.startOffset ?? 0;
-    let lastEnd = (nameTok?.endOffset ?? tagStart) + 1;
-
-    const attributes: AttributeNode[] = [];
-    for (const a of ctx.children.Attribute ?? []) {
-      const attr = this.visit(a) as AttributeNode;
-      attributes.push(attr);
-      lastEnd = Math.max(lastEnd, attr.range.end);
-    }
-
-    const node: OpenTagNode = {
-      kind: 'OPEN',
-      value: literal(tagName, nameTok?.startOffset ?? tagStart, (nameTok?.endOffset ?? tagStart - 1) + 1),
-      attributes,
-      range: rangeFrom(tagStart, lastEnd),
-    };
-
-    return node as any;
+  /**
+   * Gather text from tokens for TEXT CONTENT (between tags).
+   * Rules:
+   * - Character entities are decoded
+   * - Backslash escapes are NOT interpreted (shown as-is)
+   */
+  betweenTagsTokens(ctx: CstTokens): LiteralNode {
+    const tokens = ctx.children.Content ?? [];
+    const text = tokens
+      .map((t) => {
+        if (t.tokenType === CharacterEntity) {
+          try {
+            return he.decode(t.image ?? '', { strict: true });
+          } catch (e) {
+            this.errors.push({
+              message: `Failed to decode HTML entity: ${t.image}`,
+              range: rangeFromTokens([t]),
+            });
+          }
+        }
+      })
+      .join('');
+    return literal(text, rangeFromTokens(tokens));
   }
 
-  closeTag(ctx: CstCloseTagNode): CloseTagNode {
-    const open = ctx.children.ClosingOpenBracket?.[0];
-    const nameTok = ctx.children.TagName?.[0];
-    const close = ctx.children.CloseBracket?.[0];
-
-    const start = open?.startOffset ?? nameTok?.startOffset ?? 0;
-    const end = (close?.endOffset ?? (nameTok?.endOffset ?? start) - 1) + 1;
+  element(ctx: CstElementNode): ElementNode {
+    const openTagPartial = ctx.children.OpenTagPartial?.[0];
+    const name = textFromRaw(openTagPartial?.children.TagName ?? []);
 
-    return {
-      kind: 'CLOSE',
-      value: literal(nameTok?.image ?? '', nameTok?.startOffset ?? start, (nameTok?.endOffset ?? start - 1) + 1),
-      range: rangeFrom(start, end),
-    };
-  }
-
-  element(ctx: CstElementNode): ElementNode | SelfCloseElementNode {
-    const partial = this.visit(ctx.children.OpenTagPartial?.[0]!) as OpenTagNode;
-
-    if (ctx.children.SelfCloseBracket?.length) {
-      const selfTok = ctx.children.SelfCloseBracket[0];
-      const end = (selfTok.endOffset ?? partial.range.end - 1) + 1;
-      return {
-        kind: 'SELFCLOSE',
-        value: partial.value,
-        attributes: partial.attributes,
-        range: rangeFrom(partial.range.start, end),
-      };
-    }
+    const attributes = openTagPartial?.children.Attribute?.map((a) => this.visit(a) as AttributeNode) ?? [];
 
-    // Normal or literal element with explicit CloseTag
-    const openCloseTok = ctx.children.OpenTagCloseBracket?.[0];
-    let children: ElementContentNode[] = [];
-    let close: CloseTagNode;
+    let children: ElementContentNode[];
 
     if (ctx.children.TextContent?.length) {
       // Literal element: everything inside is plain text (no template interpolation)
-      const toks = ctx.children.TextContent[0].children.Content ?? [];
-      const text = rawFrom(toks);
-      const r = rangeFromTokens(toks);
-      children = [literal(text, r.start, r.end)];
+      children = [literalFromCstTokens(ctx.children.TextContent ?? [])];
     } else {
       // Normal element: nested content parsed as usual
-      for (const ec of ctx.children.Content ?? []) {
-        children.push(this.visit(ec) as ElementContentNode);
-      }
+      children = ctx.children.Content?.map((ec) => this.visit(ec) as ElementContentNode) ?? [];
     }
 
-    close = this.visit(ctx.children.CloseTag?.[0]!) as CloseTagNode;
-
     // Tag name matching check
-    const openName = partial.value.value.toLowerCase();
-    const closeName = close.value.value.toLowerCase();
-    if (openName !== closeName) {
+    const closeTag = ctx.children.CloseTag?.[0];
+    const closeTagName = textFromRaw(closeTag?.children.TagName ?? []);
+    if (closeTag && name.toLowerCase() !== closeTagName.toLowerCase()) {
       this.errors.push({
-        message: `Mismatched closing tag: expected </${openName}> but found </${closeName}>`,
-        range: close.range,
+        message: `Mismatched closing tag: expected </${name}> but found </${closeTagName}>`,
+        range: rangeFromCstNode(closeTag),
       });
     }
 
-    const start = partial.range.start;
-    const end = close.range.end;
-
-    return { kind: 'ELEMENT', open: partial, close, children, range: rangeFrom(start, end) };
+    return { kind: 'ELEMENT', name, attributes, children, range: rangeFromCstNode(ctx) };
   }
 }
 
-// ---------------------------
-// Public helpers
-// ---------------------------
-
 /** Build an AST RootNode (and errors) from a CST produced by the parser. */
 export function cstToAst(cst: CstNode): { root: RootNode; errors: AstBuildError[] } {
   const visitor = new ExtendedPomlAstVisitor();
   return visitor.build(cst);
 }
-
-/** Convenience: from input string → { root, errors } using the full pipeline. */
-export function parsePomlToAst(input: string): { root: RootNode | undefined; errors: AstBuildError[] } {
-  const { cst } = (extendedPomlParser as any).constructor.parse
-    ? ((): any => {
-        throw new Error('Use parsePomlToCst from cst.ts to obtain a CST first.');
-      })()
-    : { cst: undefined };
-  // The parser wrapper already exists: users should call parsePomlToCst then cstToAst.
-  return { root: undefined, errors: [{ message: 'Call parsePomlToCst(input) then cstToAst(cst).' }] };
-}
diff --git a/packages/poml/next/nodes.ts b/packages/poml/next/nodes.ts
index 3577822e..4673ae43 100644
--- a/packages/poml/next/nodes.ts
+++ b/packages/poml/next/nodes.ts
@@ -303,7 +303,7 @@ export interface CstCloseTagNode extends CstNode {
  */
 export interface ElementNode extends AstNode {
   kind: 'ELEMENT';
-  tagName: string;
+  name: string;
   attributes: AttributeNode[];
   // Children is undefined for self-closing tags.
   // If it's not self-closing, children is at least an empty array.

From 03cca4a838787b8d0e5a553358c9798a3a2091f9 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 15:16:37 +0800
Subject: [PATCH 72/76] ast review

---
 packages/poml/next/ast.ts                     | 158 ++++++------------
 .../poml/next/{error.ts => diagnostics.ts}    |  16 +-
 2 files changed, 56 insertions(+), 118 deletions(-)
 rename packages/poml/next/{error.ts => diagnostics.ts} (99%)

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index 14e0b167..bb1b84fe 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -21,8 +21,6 @@ import {
   CstRootNode,
   CstElementContentNode,
   CstElementNode,
-  CstOpenTagPartialNode,
-  CstCloseTagNode,
   CstTemplateNode,
   CstQuotedNode,
   CstQuotedTemplateNode,
@@ -47,13 +45,7 @@ import {
 import { Range } from './types';
 import { extendedPomlParser } from './cst';
 import { BackslashEscape, CharacterEntity } from './lexer';
-import * as error from './error';
-
-/** Error produced while building the AST (beyond lex/parse errors). */
-export interface AstBuildError {
-  message: string;
-  range?: Range;
-}
+import * as diagnostics from './diagnostics';
 
 /** Decode a single backslash escape sequence (for quoted strings). */
 function decodeEscape(seq: string): string {
@@ -99,16 +91,17 @@ function literal(value: string, range: Range): LiteralNode {
 /**
  * Create a LiteralNode from IToken list.
  */
-function literalFromTokens(tokens: IToken[]): LiteralNode {
-  return literal(textFromRaw(tokens), rangeFromTokens(tokens));
+function literalFromTokens(tokens: IToken[], fromIToken?: (tokens: IToken[]) => string): LiteralNode {
+  const text = fromIToken ? fromIToken(tokens) : textFromRaw(tokens);
+  return literal(text, rangeFromTokens(tokens));
 }
 
 /**
  * Convert CST token groups to a literal string.
  * String contents are kept as is, no escape decoding.
  */
-function literalFromCstTokens(groups: CstTokens[]): LiteralNode {
-  const text = textFromCstTokens(groups, textFromRaw);
+function literalFromCstTokens(groups: CstTokens[], fromIToken?: (tokens: IToken[]) => string): LiteralNode {
+  const text = textFromCstTokens(groups, fromIToken ?? textFromRaw);
   return literal(text, rangeFromCstTokens(groups));
 }
 
@@ -196,19 +189,11 @@ const BaseVisitor = extendedPomlParser.getBaseCstVisitorConstructorWithDefaults(
  * instead of throwing where possible so downstream phases can proceed.
  */
 export class ExtendedPomlAstVisitor extends BaseVisitor {
-  private errors: AstBuildError[] = [];
-
   constructor() {
     super();
     this.validateVisitor();
   }
 
-  /** Entry point: visit a CstRootNode and return an AST RootNode & errors. */
-  build(cst: CstNode): { root: RootNode; errors: AstBuildError[] } {
-    const root = this.visit(cst) as RootNode;
-    return { root, errors: this.errors };
-  }
-
   // ---- Rule implementations ----
 
   root(ctx: CstRootNode): RootNode {
@@ -237,7 +222,7 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
       return this.visit(ctx.children.TextContent[0]) as LiteralNode;
     }
     // This should not happen
-    this.errors.push({ message: 'Unknown element content', range: rangeFromCstNode(ctx) });
+    diagnostics.error('Unknown element content', rangeFromCstNode(ctx));
     return literal('', rangeFromCstNode(ctx));
   }
 
@@ -256,107 +241,64 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
   }
 
   pragma(ctx: CstPragmaNode): PragmaNode {
-    const open = ctx.children.CommentOpen?.[0];
-    const close = ctx.children.CommentClose?.[0];
-
-    const idTok = ctx.children.PragmaIdentifier?.[0];
-    const identifier: LiteralNode = literal(idTok?.image ?? '', idTok?.startOffset ?? 0, (idTok?.endOffset ?? -1) + 1);
-
+    const identifier = literalFromTokens(ctx.children.PragmaIdentifier ?? []);
     const options: LiteralNode[] = [];
-    for (const opt of ctx.children.PragmaOption ?? []) {
-      if ((opt as CstQuotedNode).children) {
-        // Quoted option
-        const q = opt as CstQuotedNode;
-        const bodyTokens = q.children.Content?.[0]?.children.Content ?? [];
-        const value = textFromQuoted(bodyTokens);
-        const start = q.children.OpenQuote?.[0]?.startOffset ?? 0;
-        const end = (q.children.CloseQuote?.[0]?.endOffset ?? start) + 1;
-        options.push(literal(value, start, end));
+
+    for (const option of ctx.children.PragmaOption ?? []) {
+      if ('tokenType' in option) {
+        // IToken
+        options.push(literal(option.image ?? '', rangeFromTokens([option])));
       } else {
-        // Unquoted identifier-ish tokens captured by commentIdentifierTokens
-        const toks = (opt as any).children?.Content ?? [];
-        const value = rawFrom(toks);
-        const r = rangeFromTokens(toks);
-        options.push(literal(value, r.start, r.end));
+        // CstQuotedNode
+        options.push(this.visit(option) as LiteralNode);
       }
     }
 
-    const start = open?.startOffset ?? identifier.range.start;
-    const end = (close?.endOffset ?? identifier.range.end - 1) + 1;
-
     return {
       kind: 'PRAGMA',
       identifier,
       options,
-      range: rangeFrom(start, end),
+      range: rangeFromCstNode(ctx),
     };
   }
 
-  quoted(ctx: CstQuotedNode): ValueNode {
-    const open = ctx.children.OpenQuote?.[0];
-    const close = ctx.children.CloseQuote?.[0];
-    const toks = ctx.children.Content?.[0]?.children.Content ?? [];
-    const text = textFromQuoted(toks);
-
-    const innerStart = (open?.endOffset ?? -1) + 1;
-    const innerEnd = close?.startOffset ?? innerStart;
-
-    const lit = literal(text, innerStart, innerEnd);
-    return {
-      kind: 'VALUE',
-      children: [lit],
-      range: rangeFrom(open?.startOffset ?? innerStart, (close?.endOffset ?? innerEnd - 1) + 1),
-    };
+  quoted(ctx: CstQuotedNode): LiteralNode {
+    // Ignore the special strings like templates, entities, ...
+    return literalFromCstTokens(ctx.children.Content ?? [], textFromQuoted);
   }
 
   quotedTemplate(ctx: CstQuotedTemplateNode): ValueNode {
-    const open = ctx.children.OpenQuote?.[0];
-    const close = ctx.children.CloseQuote?.[0];
-
     const children: (LiteralNode | TemplateNode)[] = [];
 
-    // Build mixed children maintaining order
-    for (const part of ctx.children.Content ?? []) {
-      const asTpl = part as unknown as CstTemplateNode;
-      if (asTpl.children && (asTpl.children.TemplateOpen || asTpl.children.TemplateClose)) {
-        children.push(this.visit(asTpl) as TemplateNode);
+    for (const content of ctx.children.Content ?? []) {
+      if (content.name === 'template') {
+        // CstTemplateNode
+        const templateNode = this.visit(content) as TemplateNode;
+        children.push(templateNode);
       } else {
-        // token run outside {{ }} inside quotes
-        const toks = (part as CstTokens).children.Content ?? [];
-        const text = textFromQuoted(toks);
-        const r = rangeFromTokens(toks);
-        if (text.length > 0) {
-          children.push(literal(text, r.start, r.end));
-        }
+        // CstTokens - regular text content
+        const lit = literalFromCstTokens([content as CstTokens], textFromQuoted);
+        children.push(lit);
       }
     }
 
-    const start = open?.startOffset ?? children[0]?.range.start ?? 0;
-    const end = (close?.endOffset ?? (children[children.length - 1]?.range.end ?? start) - 1) + 1;
-
-    return { kind: 'VALUE', children, range: rangeFrom(start, end) };
+    return {
+      kind: 'VALUE',
+      children,
+      range: rangeFromCstNode(ctx),
+    };
   }
 
   forIteratorValue(ctx: CstForIteratorNode): ForIteratorNode {
-    const open = ctx.children.OpenQuote?.[0];
-    const close = ctx.children.CloseQuote?.[0];
-
-    const itTok = ctx.children.Iterator?.[0];
-    const iterator = literal(itTok?.image ?? '', itTok?.startOffset ?? 0, (itTok?.endOffset ?? -1) + 1);
-
-    const collText = textFromExpressionTokens(ctx.children.Collection ?? []);
-    const collStart = ctx.children.Collection?.[0]?.children.Content?.[0]?.startOffset;
-    const collEnd = ctx.children.Collection?.[0]?.children.Content?.slice(-1)[0]?.endOffset;
-    const collection: ExpressionNode = {
-      kind: 'EXPRESSION',
-      value: collText,
-      range: rangeFrom(collStart ?? iterator.range.end, (collEnd ?? iterator.range.end - 1) + 1),
-    };
+    const iterator = literalFromTokens(ctx.children.Iterator ?? [], textFromQuoted);
+    const collection = literalFromCstTokens(ctx.children.Collection ?? [], textFromQuoted);
 
-    const start = open?.startOffset ?? iterator.range.start;
-    const end = (close?.endOffset ?? collection.range.end - 1) + 1;
-
-    return { kind: 'FORITERATOR', iterator, collection, range: rangeFrom(start, end) };
+    return {
+      kind: 'FORITERATOR',
+      iterator,
+      collection,
+      range: rangeFromCstNode(ctx),
+    };
   }
 
   attribute(ctx: CstAttributeNode): AttributeNode {
@@ -375,10 +317,7 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
       value = { kind: 'VALUE', children: [tpl], range: tpl.range };
     } else {
       // Fallback empty value
-      this.errors.push({
-        message: `Attribute "${key.value}" is missing a value`,
-        range,
-      });
+      diagnostics.error(`Attribute "${key.value}" is missing a value`, range);
       value = { kind: 'VALUE', children: [], range: key.range };
     }
 
@@ -399,10 +338,7 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
           try {
             return he.decode(t.image ?? '', { strict: true });
           } catch (e) {
-            this.errors.push({
-              message: `Failed to decode HTML entity: ${t.image}`,
-              range: rangeFromTokens([t]),
-            });
+            diagnostics.error(`Failed to decode HTML entity: ${t.image}`, rangeFromTokens([t]));
           }
         }
       })
@@ -410,6 +346,8 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
     return literal(text, rangeFromTokens(tokens));
   }
 
+  // openTagPartial and closeTag is skipped. They are handled implicitly in element()
+
   element(ctx: CstElementNode): ElementNode {
     const openTagPartial = ctx.children.OpenTagPartial?.[0];
     const name = textFromRaw(openTagPartial?.children.TagName ?? []);
@@ -430,10 +368,10 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
     const closeTag = ctx.children.CloseTag?.[0];
     const closeTagName = textFromRaw(closeTag?.children.TagName ?? []);
     if (closeTag && name.toLowerCase() !== closeTagName.toLowerCase()) {
-      this.errors.push({
-        message: `Mismatched closing tag: expected </${name}> but found </${closeTagName}>`,
-        range: rangeFromCstNode(closeTag),
-      });
+      diagnostics.error(
+        `Mismatched closing tag: expected </${name}> but found </${closeTagName}>`,
+        rangeFromCstNode(closeTag),
+      );
     }
 
     return { kind: 'ELEMENT', name, attributes, children, range: rangeFromCstNode(ctx) };
diff --git a/packages/poml/next/error.ts b/packages/poml/next/diagnostics.ts
similarity index 99%
rename from packages/poml/next/error.ts
rename to packages/poml/next/diagnostics.ts
index 1bc66ab2..75090e53 100644
--- a/packages/poml/next/error.ts
+++ b/packages/poml/next/diagnostics.ts
@@ -320,14 +320,14 @@ export class ErrorCollector {
 
     const filtered = this.diagnostics.filter((d) => {
       if (d.severity === Severity.ERROR) {
-return true;
-}
+        return true;
+      }
       if (d.severity === Severity.WARNING) {
-return showWarnings;
-}
+        return showWarnings;
+      }
       if (d.severity === Severity.INFO) {
-return showInfo;
-}
+        return showInfo;
+      }
       return false;
     });
 
@@ -362,8 +362,8 @@ return showInfo;
 
         const diagnostics = byFile.get(file)!.sort((a, b) => {
           if (!a.range || !b.range) {
-return 0;
-}
+            return 0;
+          }
           return a.range.start - b.range.start;
         });
 

From 49d354218417b34161f93d224e0bb484242af7ba Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 17:08:01 +0800
Subject: [PATCH 73/76] ast tests

---
 packages/poml/next/ast.ts              |  131 +--
 packages/poml/tests/reader/ast.test.ts | 1056 +++++++++++++++---------
 2 files changed, 755 insertions(+), 432 deletions(-)

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index bb1b84fe..ca85c568 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -29,6 +29,7 @@ import {
   CstCommentNode,
   CstPragmaNode,
   CstTokens,
+  AstNode,
 } from './nodes';
 import {
   ElementNode,
@@ -194,57 +195,66 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
     this.validateVisitor();
   }
 
+  /**
+   * A hack to let rule methods get a handle of the CstNode they are visiting.
+   */
+  visit(cstNode: CstNode | CstNode[], param?: any): AstNode {
+    return super.visit(cstNode, { ...param, node: cstNode });
+  }
+
   // ---- Rule implementations ----
 
-  root(ctx: CstRootNode): RootNode {
+  root(ctx: CstRootNode['children'], { node }: { node: CstRootNode }): RootNode {
     const children: ElementContentNode[] = [];
-    for (const ec of ctx.children.Content ?? []) {
+    for (const ec of ctx.Content ?? []) {
       const node = this.visit(ec) as ElementContentNode;
       if (node) {
         children.push(node);
       }
     }
 
-    return { kind: 'ROOT', children, range: rangeFromCstNode(ctx) };
+    return { kind: 'ROOT', children, range: rangeFromCstNode(node) };
   }
 
-  elementContent(ctx: CstElementContentNode): ElementContentNode {
-    if (ctx.children.Pragma?.length) {
-      return this.visit(ctx.children.Pragma[0]) as PragmaNode;
-    } else if (ctx.children.Comment?.length) {
-      return this.visit(ctx.children.Comment[0]) as CommentNode;
-    } else if (ctx.children.Template?.length) {
-      return this.visit(ctx.children.Template[0]) as TemplateNode;
-    } else if (ctx.children.Element?.length) {
-      return this.visit(ctx.children.Element[0]) as ElementNode;
-    } else if (ctx.children.TextContent?.length) {
+  elementContent(
+    ctx: CstElementContentNode['children'],
+    { node }: { node: CstElementContentNode },
+  ): ElementContentNode {
+    if (ctx.Pragma?.length) {
+      return this.visit(ctx.Pragma[0]) as PragmaNode;
+    } else if (ctx.Comment?.length) {
+      return this.visit(ctx.Comment[0]) as CommentNode;
+    } else if (ctx.Template?.length) {
+      return this.visit(ctx.Template[0]) as TemplateNode;
+    } else if (ctx.Element?.length) {
+      return this.visit(ctx.Element[0]) as ElementNode;
+    } else if (ctx.TextContent?.length) {
       // Text contents between tags
-      return this.visit(ctx.children.TextContent[0]) as LiteralNode;
+      return this.visit(ctx.TextContent[0]) as LiteralNode;
     }
     // This should not happen
-    diagnostics.error('Unknown element content', rangeFromCstNode(ctx));
-    return literal('', rangeFromCstNode(ctx));
+    diagnostics.error('Unknown element content', rangeFromCstNode(node));
+    return literal('', rangeFromCstNode(node));
   }
 
-  template(ctx: CstTemplateNode): TemplateNode {
-    const exprNode = literalFromCstTokens(ctx.children.Content ?? []);
-    return { kind: 'TEMPLATE', value: exprNode, range: rangeFromCstNode(ctx) };
+  template(ctx: CstTemplateNode['children'], { node }: { node: CstTemplateNode }): TemplateNode {
+    const exprNode = literalFromCstTokens(ctx.Content ?? []);
+    return { kind: 'TEMPLATE', value: exprNode, range: rangeFromCstNode(node) };
   }
 
-  comment(ctx: CstCommentNode): CommentNode {
-    const text = textFromCstTokens(ctx.children.Content ?? [], textFromRaw);
+  comment(ctx: CstCommentNode['children'], { node }: { node: CstCommentNode }): CommentNode {
     return {
       kind: 'COMMENT',
-      value: literalFromCstTokens(ctx.children.Content ?? []),
-      range: rangeFromCstNode(ctx),
+      value: literalFromCstTokens(ctx.Content ?? []),
+      range: rangeFromCstNode(node),
     };
   }
 
-  pragma(ctx: CstPragmaNode): PragmaNode {
-    const identifier = literalFromTokens(ctx.children.PragmaIdentifier ?? []);
+  pragma(ctx: CstPragmaNode['children'], { node }: { node: CstPragmaNode }): PragmaNode {
+    const identifier = literalFromTokens(ctx.PragmaIdentifier ?? []);
     const options: LiteralNode[] = [];
 
-    for (const option of ctx.children.PragmaOption ?? []) {
+    for (const option of ctx.PragmaOption ?? []) {
       if ('tokenType' in option) {
         // IToken
         options.push(literal(option.image ?? '', rangeFromTokens([option])));
@@ -258,19 +268,19 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
       kind: 'PRAGMA',
       identifier,
       options,
-      range: rangeFromCstNode(ctx),
+      range: rangeFromCstNode(node),
     };
   }
 
-  quoted(ctx: CstQuotedNode): LiteralNode {
+  quoted(ctx: CstQuotedNode['children'], { node }: { node: CstQuotedNode }): LiteralNode {
     // Ignore the special strings like templates, entities, ...
-    return literalFromCstTokens(ctx.children.Content ?? [], textFromQuoted);
+    return literalFromCstTokens(ctx.Content ?? [], textFromQuoted);
   }
 
-  quotedTemplate(ctx: CstQuotedTemplateNode): ValueNode {
+  quotedTemplate(ctx: CstQuotedTemplateNode['children'], { node }: { node: CstQuotedTemplateNode }): ValueNode {
     const children: (LiteralNode | TemplateNode)[] = [];
 
-    for (const content of ctx.children.Content ?? []) {
+    for (const content of ctx.Content ?? []) {
       if (content.name === 'template') {
         // CstTemplateNode
         const templateNode = this.visit(content) as TemplateNode;
@@ -285,35 +295,35 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
     return {
       kind: 'VALUE',
       children,
-      range: rangeFromCstNode(ctx),
+      range: rangeFromCstNode(node),
     };
   }
 
-  forIteratorValue(ctx: CstForIteratorNode): ForIteratorNode {
-    const iterator = literalFromTokens(ctx.children.Iterator ?? [], textFromQuoted);
-    const collection = literalFromCstTokens(ctx.children.Collection ?? [], textFromQuoted);
+  forIteratorValue(ctx: CstForIteratorNode['children'], { node }: { node: CstForIteratorNode }): ForIteratorNode {
+    const iterator = literalFromTokens(ctx.Iterator ?? [], textFromQuoted);
+    const collection = literalFromCstTokens(ctx.Collection ?? [], textFromQuoted);
 
     return {
       kind: 'FORITERATOR',
       iterator,
       collection,
-      range: rangeFromCstNode(ctx),
+      range: rangeFromCstNode(node),
     };
   }
 
-  attribute(ctx: CstAttributeNode): AttributeNode {
-    const key: LiteralNode = literalFromTokens(ctx.children.AttributeKey ?? []);
-    const range = rangeFromCstNode(ctx);
+  attribute(ctx: CstAttributeNode['children'], { node }: { node: CstAttributeNode }): AttributeNode {
+    const key: LiteralNode = literalFromTokens(ctx.AttributeKey ?? []);
+    const range = rangeFromCstNode(node);
 
     let value: ValueNode | ForIteratorNode;
 
-    if (ctx.children.forIteratorValue?.length) {
-      value = this.visit(ctx.children.forIteratorValue[0]) as ForIteratorNode;
-    } else if (ctx.children.quotedValue?.length) {
-      value = this.visit(ctx.children.quotedValue[0]) as ValueNode;
-    } else if (ctx.children.templatedValue?.length) {
+    if (ctx.forIteratorValue?.length) {
+      value = this.visit(ctx.forIteratorValue[0]) as ForIteratorNode;
+    } else if (ctx.quotedValue?.length) {
+      value = this.visit(ctx.quotedValue[0]) as ValueNode;
+    } else if (ctx.templatedValue?.length) {
       // Unquoted: key={{ expr }} -> wrap as ValueNode with a TemplateNode child
-      const tpl = this.visit(ctx.children.templatedValue[0]) as TemplateNode;
+      const tpl = this.visit(ctx.templatedValue[0]) as TemplateNode;
       value = { kind: 'VALUE', children: [tpl], range: tpl.range };
     } else {
       // Fallback empty value
@@ -330,8 +340,8 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
    * - Character entities are decoded
    * - Backslash escapes are NOT interpreted (shown as-is)
    */
-  betweenTagsTokens(ctx: CstTokens): LiteralNode {
-    const tokens = ctx.children.Content ?? [];
+  betweenTagsTokens(ctx: CstTokens['children'], { node }: { node: CstTokens }): LiteralNode {
+    const tokens = ctx.Content ?? [];
     const text = tokens
       .map((t) => {
         if (t.tokenType === CharacterEntity) {
@@ -341,6 +351,7 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
             diagnostics.error(`Failed to decode HTML entity: ${t.image}`, rangeFromTokens([t]));
           }
         }
+        return t.image ?? '';
       })
       .join('');
     return literal(text, rangeFromTokens(tokens));
@@ -348,25 +359,25 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
 
   // openTagPartial and closeTag is skipped. They are handled implicitly in element()
 
-  element(ctx: CstElementNode): ElementNode {
-    const openTagPartial = ctx.children.OpenTagPartial?.[0];
-    const name = textFromRaw(openTagPartial?.children.TagName ?? []);
+  element(ctx: CstElementNode['children'], { node }: { node: CstElementNode }): ElementNode {
+    const openTagPartial = ctx.OpenTagPartial?.[0];
+    const name = textFromRaw(openTagPartial?.children?.TagName ?? []);
 
-    const attributes = openTagPartial?.children.Attribute?.map((a) => this.visit(a) as AttributeNode) ?? [];
+    const attributes = openTagPartial?.children?.Attribute?.map((a) => this.visit(a) as AttributeNode) ?? [];
 
     let children: ElementContentNode[];
 
-    if (ctx.children.TextContent?.length) {
+    if (ctx.TextContent?.length) {
       // Literal element: everything inside is plain text (no template interpolation)
-      children = [literalFromCstTokens(ctx.children.TextContent ?? [])];
+      children = [literalFromCstTokens(ctx.TextContent)];
     } else {
       // Normal element: nested content parsed as usual
-      children = ctx.children.Content?.map((ec) => this.visit(ec) as ElementContentNode) ?? [];
+      children = ctx.Content?.map((ec) => this.visit(ec) as ElementContentNode) ?? [];
     }
 
     // Tag name matching check
-    const closeTag = ctx.children.CloseTag?.[0];
-    const closeTagName = textFromRaw(closeTag?.children.TagName ?? []);
+    const closeTag = ctx.CloseTag?.[0];
+    const closeTagName = textFromRaw(closeTag?.children?.TagName ?? []);
     if (closeTag && name.toLowerCase() !== closeTagName.toLowerCase()) {
       diagnostics.error(
         `Mismatched closing tag: expected </${name}> but found </${closeTagName}>`,
@@ -374,12 +385,12 @@ export class ExtendedPomlAstVisitor extends BaseVisitor {
       );
     }
 
-    return { kind: 'ELEMENT', name, attributes, children, range: rangeFromCstNode(ctx) };
+    return { kind: 'ELEMENT', name, attributes, children, range: rangeFromCstNode(node) };
   }
 }
 
-/** Build an AST RootNode (and errors) from a CST produced by the parser. */
-export function cstToAst(cst: CstNode): { root: RootNode; errors: AstBuildError[] } {
+/** Build an AST RootNode from a CST produced by the parser. */
+export function cstToAst(cst: CstNode): RootNode {
   const visitor = new ExtendedPomlAstVisitor();
-  return visitor.build(cst);
+  return visitor.visit(cst) as RootNode;
 }
diff --git a/packages/poml/tests/reader/ast.test.ts b/packages/poml/tests/reader/ast.test.ts
index 4b5819db..32cd48a0 100644
--- a/packages/poml/tests/reader/ast.test.ts
+++ b/packages/poml/tests/reader/ast.test.ts
@@ -1,423 +1,735 @@
-import { describe, expect, test } from '@jest/globals';
-import { parseAST, ASTNode } from 'poml/next/ast';
-
-describe('parseAST', () => {
-  test('pure text content', () => {
-    const content = 'This is pure text content with no POML tags.';
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.content).toBe(content);
-    expect(ast.start).toBe(0);
-    expect(ast.end).toBe(content.length);
-    expect(ast.children).toHaveLength(0);
+import { describe, expect, test, beforeEach } from '@jest/globals';
+import { extendedPomlLexer } from 'poml/next/lexer';
+import { ExtendedPomlParser } from 'poml/next/cst';
+import { cstToAst, ExtendedPomlAstVisitor } from 'poml/next/ast';
+import * as diagnostics from 'poml/next/diagnostics';
+import {
+  RootNode,
+  ElementNode,
+  LiteralNode,
+  TemplateNode,
+  ValueNode,
+  PragmaNode,
+  CommentNode,
+  ForIteratorNode,
+  AttributeNode,
+} from 'poml/next/nodes';
+import { CstNode } from 'chevrotain';
+
+// Helper function to lex, parse and build AST from raw input
+function parseToAst(input: string): RootNode {
+  // Clear diagnostics before each test
+  diagnostics.clear();
+
+  // Tokenize
+  const lexResult = extendedPomlLexer.tokenize(input);
+  expect(lexResult.errors).toHaveLength(0);
+
+  // Parse to CST
+  const parser = new ExtendedPomlParser();
+  parser.input = lexResult.tokens;
+  const cst = parser.root();
+  expect(parser.errors).toHaveLength(0);
+
+  // Convert to AST
+  return cstToAst(cst);
+}
+
+// Helper to parse specific rule and convert to AST
+function parseRule<T>(input: string, rule: (parser: ExtendedPomlParser) => CstNode): T {
+  diagnostics.clear();
+
+  const lexResult = extendedPomlLexer.tokenize(input);
+  expect(lexResult.errors).toHaveLength(0);
+
+  const parser = new ExtendedPomlParser();
+  parser.input = lexResult.tokens;
+  const cst = rule(parser);
+  expect(parser.errors).toHaveLength(0);
+
+  const visitor = new ExtendedPomlAstVisitor();
+  return visitor.visit(cst) as T;
+}
+
+describe('AST Visitor - Individual Rules', () => {
+  beforeEach(() => {
+    diagnostics.clear();
   });
 
-  test('single POML tag', () => {
-    const content = '<task>Analyze the data</task>';
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('POML');
-    expect(ast.tagName).toBe('task');
-    expect(ast.content).toBe(content);
-    expect(ast.start).toBe(0);
-    expect(ast.end).toBe(content.length);
-  });
-
-  test('mixed content with text and POML', () => {
-    const content = `# My Analysis Document
-
-This is a regular markdown document that explains the task.
-
-<task>
-  Analyze the following data and provide insights.
-</task>
-
-Here are some key points to consider:
-
-- Data quality
-- Statistical significance  
-- Business impact`;
-
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(4);
+  describe('root rule', () => {
+    test('empty root', () => {
+      const result = parseToAst('');
+      expect(result).toStrictEqual({
+        kind: 'ROOT',
+        children: [],
+        range: { start: 0, end: 0 },
+      });
+    });
+
+    test('text only root', () => {
+      const result = parseToAst('Hello World');
+      expect(result).toStrictEqual({
+        kind: 'ROOT',
+        children: [
+          {
+            kind: 'STRING',
+            value: 'Hello World',
+            range: { start: 0, end: 10 },
+          },
+        ],
+        range: { start: 0, end: 10 },
+      });
+    });
 
-    const children = ast.children;
-    expect(children[0].kind).toBe('TEXT');
-    expect(children[0].content).toContain('# My Analysis Document');
+    test('mixed content root', () => {
+      const result = parseToAst('Hello {{ name }}!');
+      expect(result.kind).toBe('ROOT');
+      expect(result.children).toHaveLength(3);
 
-    expect(children[1].kind).toBe('POML');
-    expect(children[1].tagName).toBe('task');
-    expect(children[1].content).toBe(`<task>
-  Analyze the following data and provide insights.
-</task>`);
+      expect(result.children[0]).toMatchObject({
+        kind: 'STRING',
+        value: 'Hello ',
+      });
 
-    expect(children[2].kind).toBe('TEXT');
-    expect(children[2].content).toContain('Here are some key points');
+      expect(result.children[1]).toMatchObject({
+        kind: 'TEMPLATE',
+        value: { kind: 'STRING', value: ' name ' },
+      });
 
-    expect(children[3].kind).toBe('TEXT');
-    expect(children[3].content).toContain('- Data quality');
+      expect(result.children[2]).toMatchObject({
+        kind: 'STRING',
+        value: '!',
+      });
+    });
   });
 
-  test('nested POML segments', () => {
-    const content = `<examples syntax="json">
-  <example>
-    <input>Sample data point 1</input>
-    <output>Analysis result 1</output>
-  </example>
-</examples>`;
-
-    const ast = parseAST(content);
+  describe('template rule', () => {
+    test('simple template', () => {
+      const result = parseRule<TemplateNode>('{{ var }}', (p) => p.template());
+      expect(result).toStrictEqual({
+        kind: 'TEMPLATE',
+        value: {
+          kind: 'STRING',
+          value: ' var ',
+          range: expect.any(Object),
+        },
+        range: expect.any(Object),
+      });
+    });
 
-    expect(ast.kind).toBe('POML');
-    expect(ast.tagName).toBe('examples');
-    expect(ast.children).toHaveLength(0);
-    expect(ast.content).toBe(content);
-  });
+    test('complex expression template', () => {
+      const result = parseRule<TemplateNode>('{{ user.name.toUpperCase() }}', (p) => p.template());
+      expect(result.value.value).toBe(' user.name.toUpperCase() ');
+    });
 
-  test('text in text', () => {
-    const content = `<text>This is a text<text> with nested text content.</text></text>`;
-    const ast = parseAST(content);
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.content).toBe(content);
-    expect(ast.children).toHaveLength(0);
+    test('template without spaces', () => {
+      const result = parseRule<TemplateNode>('{{count}}', (p) => p.template());
+      expect(result.value.value).toBe('count');
+    });
   });
 
-  test('text in text in POML', () => {
-    const content = `<poml><text>This is a text<text> with nested text content.</text></text></poml>`;
-    const ast = parseAST(content);
-    expect(ast.kind).toBe('POML');
-    expect(ast.tagName).toBe('poml');
-    expect(ast.children).toHaveLength(1);
-    const textNode = ast.children[0];
-    expect(textNode.kind).toBe('TEXT');
-    expect(textNode.content).toBe('This is a text<text> with nested text content.</text>');
-  });
+  describe('comment rule', () => {
+    test('simple comment', () => {
+      const result = parseRule<CommentNode>('<!-- hello -->', (p) => p.comment());
+      expect(result).toStrictEqual({
+        kind: 'COMMENT',
+        value: {
+          kind: 'STRING',
+          value: ' hello ',
+          range: expect.any(Object),
+        },
+        range: expect.any(Object),
+      });
+    });
 
-  test('nested same tag in POML', () => {
-    const content = `<task>Process data<task> with nested task content.</task></task>`;
-    const ast = parseAST(content);
-    expect(ast.kind).toBe('POML');
-    expect(ast.tagName).toBe('task');
-    expect(ast.children).toHaveLength(0);
-    expect(ast.content).toBe('<task>Process data<task> with nested task content.</task></task>');
+    test('multiline comment', () => {
+      const result = parseRule<CommentNode>('<!-- line 1\\nline 2 -->', (p) => p.comment());
+      expect(result.value.value).toContain('line 1');
+      expect(result.value.value).toContain('line 2');
+    });
   });
 
-  test('text tag with nested content', () => {
-    const content = `<poml>
-  <task>Process the following data</task>
-  <text>
-    This is **markdown** content that will be processed as pure text.
-    
-    - Item 1
-    - Item 2
-
-    <cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>
-
-    No POML processing happens here.
-  </text>
-  <hint>Remember to check the format</hint>
-</poml>`;
-
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('POML');
-    expect(ast.tagName).toBe('poml');
-    expect(ast.children).toHaveLength(4);
-
-    const textNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'text');
-    expect(textNode).toBeDefined();
-    expect(textNode!.children).toHaveLength(3);
-
-    const nestedCpNode = textNode!.children.find((c) => c.kind === 'POML' && c.tagName === 'cp');
-    expect(nestedCpNode).toBeDefined();
-    expect(nestedCpNode!.content).toBe(
-      '<cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>',
-    );
+  describe('pragma rule', () => {
+    test('pragma with identifier only', () => {
+      const result = parseRule<PragmaNode>('<!-- @pragma version -->', (p) => p.pragma());
+      expect(result).toMatchObject({
+        kind: 'PRAGMA',
+        identifier: {
+          kind: 'STRING',
+          value: 'version',
+        },
+        options: [],
+      });
+    });
+
+    test('pragma with unquoted options', () => {
+      const result = parseRule<PragmaNode>('<!-- @pragma components +reference -table -->', (p) => p.pragma());
+      expect(result).toMatchObject({
+        kind: 'PRAGMA',
+        identifier: {
+          kind: 'STRING',
+          value: 'components',
+        },
+        options: [
+          { kind: 'STRING', value: '+reference' },
+          { kind: 'STRING', value: '-table' },
+        ],
+      });
+    });
+
+    test('pragma with quoted options', () => {
+      const result = parseRule<PragmaNode>('<!-- @pragma whitespace "pre formatted" -->', (p) => p.pragma());
+      expect(result).toMatchObject({
+        kind: 'PRAGMA',
+        identifier: {
+          kind: 'STRING',
+          value: 'whitespace',
+        },
+        options: [{ kind: 'STRING', value: 'pre formatted' }],
+      });
+    });
   });
 
-  test('meta tags', () => {
-    const content = `<meta name="author">John Doe</variable>
-  <stylesheet>
-    { "task": { "captionStyle": "bold" } }
-  </stylesheet>
-</meta>
-
-<task>Complete the analysis</task>`;
-
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(3);
-
-    const metaNode = ast.children.find((c) => c.kind === 'META');
-    expect(metaNode).toBeDefined();
-    expect(metaNode!.tagName).toBe('meta');
-    expect(metaNode!.children).toHaveLength(0);
-
-    const taskNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'task');
-    expect(taskNode).toBeDefined();
+  describe('quoted rule', () => {
+    test('simple quoted string', () => {
+      const result = parseRule<LiteralNode>('"hello world"', (p) => p.quoted());
+      expect(result).toStrictEqual({
+        kind: 'STRING',
+        value: 'hello world',
+        range: expect.any(Object),
+      });
+    });
+
+    test('quoted string with single quotes', () => {
+      const result = parseRule<LiteralNode>("'hello world'", (p) => p.quoted());
+      expect(result).toStrictEqual({
+        kind: 'STRING',
+        value: 'hello world',
+        range: expect.any(Object),
+      });
+    });
   });
 
-  test('invalid tags are ignored', () => {
-    const content = `<invalid-tag>This should be ignored</invalid-tag>
-<task>This should be processed</task>
-<random>This should also be ignored</random>`;
-
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(3);
-
-    const taskNode = ast.children.find((c) => c.kind === 'POML');
-    expect(taskNode).toBeDefined();
-    expect(taskNode!.tagName).toBe('task');
-
-    const textNodes = ast.children.filter((c) => c.kind === 'TEXT');
-    expect(textNodes).toHaveLength(2);
-    expect(textNodes[0].content).toContain('<invalid-tag>This should be ignored</invalid-tag>');
-    expect(textNodes[1].content).toContain('<random>This should also be ignored</random>');
+  describe('quotedTemplate rule', () => {
+    test('quoted string with template', () => {
+      const result = parseRule<ValueNode>('"Hello {{ name }}!"', (p) => p.quotedTemplate());
+      expect(result).toMatchObject({
+        kind: 'VALUE',
+        children: [
+          { kind: 'STRING', value: 'Hello ' },
+          {
+            kind: 'TEMPLATE',
+            value: { kind: 'STRING', value: ' name ' },
+          },
+          { kind: 'STRING', value: '!' },
+        ],
+      });
+    });
+
+    test('quoted template with only template', () => {
+      const result = parseRule<ValueNode>('"{{ expression }}"', (p) => p.quotedTemplate());
+      expect(result).toMatchObject({
+        kind: 'VALUE',
+        children: [
+          {
+            kind: 'TEMPLATE',
+            value: { kind: 'STRING', value: ' expression ' },
+          },
+        ],
+      });
+    });
+
+    test('multiple templates in quoted string', () => {
+      const result = parseRule<ValueNode>('"{{ first }} and {{ second }}"', (p) => p.quotedTemplate());
+      expect(result.children).toHaveLength(4);
+      expect(result.children[0]).toMatchObject({ kind: 'TEMPLATE' });
+      expect(result.children[1]).toMatchObject({ kind: 'STRING', value: ' and ' });
+      expect(result.children[2]).toMatchObject({ kind: 'TEMPLATE' });
+      expect(result.children[3]).toMatchObject({ kind: 'STRING', value: '' });
+    });
   });
 
-  test('self-closing tags are ignored', () => {
-    const content = `<task>Valid task</task>
-<br />
-<img src="test.jpg" />
-<hint>Valid hint</hint>`;
-
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(4);
-
-    const pomlNodes = ast.children.filter((c) => c.kind === 'POML');
-    expect(pomlNodes).toHaveLength(3);
-    expect(pomlNodes[0].tagName).toBe('task');
-    expect(pomlNodes[2].tagName).toBe('hint');
+  describe('forIteratorValue rule', () => {
+    test('simple for iterator', () => {
+      const result = parseRule<ForIteratorNode>('"item in items"', (p) => p.forIteratorValue());
+      expect(result).toStrictEqual({
+        kind: 'FORITERATOR',
+        iterator: {
+          kind: 'STRING',
+          value: 'item',
+          range: expect.any(Object),
+        },
+        collection: {
+          kind: 'STRING',
+          value: 'items',
+          range: expect.any(Object),
+        },
+        range: expect.any(Object),
+      });
+    });
+
+    test('for iterator with property access', () => {
+      const result = parseRule<ForIteratorNode>('"user in data.users"', (p) => p.forIteratorValue());
+      expect(result).toMatchObject({
+        kind: 'FORITERATOR',
+        iterator: { kind: 'STRING', value: 'user' },
+        collection: { kind: 'STRING', value: 'data.users' },
+      });
+    });
+
+    test('for iterator with complex expression', () => {
+      const result = parseRule<ForIteratorNode>('"item in getItems().filter(x => x.active)"', (p) =>
+        p.forIteratorValue(),
+      );
+      expect(result.collection.value).toBe('getItems().filter(x => x.active)');
+    });
   });
 
-  test('malformed tags are handled gracefully', () => {
-    const content = `<task>Incomplete tag
-<hint>Complete hint</hint>
-<unclosed>This has no closing tag`;
-
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(3);
-
-    const hintNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'hint');
-    expect(hintNode).toBeDefined();
-    expect(hintNode!.content).toBe('<hint>Complete hint</hint>');
-
-    const textNodes = ast.children.filter((c) => c.kind === 'TEXT');
-    expect(textNodes).toHaveLength(2);
-    expect(textNodes[0].content).toBe('<task>Incomplete tag\n');
-    expect(textNodes[1].content).toBe('\n<unclosed>This has no closing tag');
+  describe('attribute rule', () => {
+    test('attribute with quoted value', () => {
+      const result = parseRule<AttributeNode>('class="container"', (p) => p.attribute());
+      expect(result).toMatchObject({
+        kind: 'ATTRIBUTE',
+        key: { kind: 'STRING', value: 'class' },
+        value: { kind: 'STRING', value: 'container' },
+      });
+    });
+
+    test('attribute with template value', () => {
+      const result = parseRule<AttributeNode>('title={{ pageTitle }}', (p) => p.attribute());
+      expect(result).toMatchObject({
+        kind: 'ATTRIBUTE',
+        key: { kind: 'STRING', value: 'title' },
+        value: {
+          kind: 'VALUE',
+          children: [{ kind: 'TEMPLATE' }],
+        },
+      });
+    });
+
+    test('attribute with for iterator', () => {
+      const result = parseRule<AttributeNode>('for="item in items"', (p) => p.attribute());
+      expect(result).toMatchObject({
+        kind: 'ATTRIBUTE',
+        key: { kind: 'STRING', value: 'for' },
+        value: {
+          kind: 'FORITERATOR',
+          iterator: { kind: 'STRING', value: 'item' },
+          collection: { kind: 'STRING', value: 'items' },
+        },
+      });
+    });
+
+    test('attribute with quoted template value', () => {
+      const result = parseRule<AttributeNode>('message="Hello {{ name }}!"', (p) => p.attribute());
+      expect(result).toMatchObject({
+        kind: 'ATTRIBUTE',
+        key: { kind: 'STRING', value: 'message' },
+        value: {
+          kind: 'VALUE',
+          children: [{ kind: 'STRING', value: 'Hello ' }, { kind: 'TEMPLATE' }, { kind: 'STRING', value: '!' }],
+        },
+      });
+    });
   });
 
-  test('malformed POML tags are ignored', () => {
-    const content = `<task>Valid task`;
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(0);
+  describe('element rule', () => {
+    test('simple element', () => {
+      const result = parseRule<ElementNode>('<div>content</div>', (p) => p.element());
+      expect(result).toMatchObject({
+        kind: 'ELEMENT',
+        name: 'div',
+        attributes: [],
+        children: [{ kind: 'STRING', value: 'content' }],
+      });
+    });
+
+    test('element with attributes', () => {
+      const result = parseRule<ElementNode>('<div class="container" id="main">text</div>', (p) => p.element());
+      expect(result).toMatchObject({
+        kind: 'ELEMENT',
+        name: 'div',
+        attributes: [
+          {
+            kind: 'ATTRIBUTE',
+            key: { value: 'class' },
+            value: { kind: 'STRING', value: 'container' },
+          },
+          {
+            kind: 'ATTRIBUTE',
+            key: { value: 'id' },
+            value: { kind: 'STRING', value: 'main' },
+          },
+        ],
+        children: [{ kind: 'STRING', value: 'text' }],
+      });
+    });
+
+    test('self-closing element', () => {
+      const result = parseRule<ElementNode>('<img src="photo.jpg" />', (p) => p.element());
+      expect(result).toMatchObject({
+        kind: 'ELEMENT',
+        name: 'img',
+        attributes: [
+          {
+            kind: 'ATTRIBUTE',
+            key: { value: 'src' },
+            value: { kind: 'STRING', value: 'photo.jpg' },
+          },
+        ],
+        children: [],
+      });
+    });
+
+    test('element with nested content', () => {
+      const result = parseRule<ElementNode>('<task>Process {{ data }} carefully</task>', (p) => p.element());
+      expect(result.children).toHaveLength(3);
+      expect(result.children[0]).toMatchObject({ kind: 'STRING', value: 'Process ' });
+      expect(result.children[1]).toMatchObject({ kind: 'TEMPLATE' });
+      expect(result.children[2]).toMatchObject({ kind: 'STRING', value: ' carefully' });
+    });
+
+    test('nested elements', () => {
+      const result = parseRule<ElementNode>('<div><span>nested</span></div>', (p) => p.element());
+      expect(result.children).toHaveLength(1);
+      expect(result.children[0]).toMatchObject({
+        kind: 'ELEMENT',
+        name: 'span',
+        children: [{ kind: 'STRING', value: 'nested' }],
+      });
+    });
   });
+});
 
-  test('empty content', () => {
-    const content = '';
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.content).toBe('');
-    expect(ast.children).toHaveLength(0);
+describe('AST Visitor - Error Handling', () => {
+  beforeEach(() => {
+    diagnostics.clear();
   });
 
-  test('whitespace-only content', () => {
-    const content = '   \n\n\t  \n  ';
-    const ast = parseAST(content);
+  test('mismatched closing tag reports error', () => {
+    const input = '<div>content</span>';
+    parseRule<ElementNode>(input, (p) => p.element());
 
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.content).toBe(content);
-    expect(ast.children).toHaveLength(0);
+    const errors = diagnostics.getErrors();
+    expect(errors).toHaveLength(1);
+    expect(errors[0].message).toContain('Mismatched closing tag');
+    expect(errors[0].message).toContain('expected </div>');
+    expect(errors[0].message).toContain('found </span>');
   });
 
-  test('hyphenated tag names', () => {
-    const content = `<output-format>JSON format</output-format>
-<system-msg>System message</system-msg>
-<user-msg>User message</user-msg>`;
-
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(4);
+  test('invalid HTML entity reports error', () => {
+    const input = '&invalidEntity;';
+    parseToAst(input);
 
-    const pomlNodes = ast.children.filter((c) => c.kind === 'POML');
-    expect(pomlNodes).toHaveLength(3);
-    expect(pomlNodes[0].tagName).toBe('output-format');
-    expect(pomlNodes[1].tagName).toBe('system-msg');
-    expect(pomlNodes[2].tagName).toBe('user-msg');
+    const errors = diagnostics.getErrors();
+    expect(errors).toHaveLength(1);
+    expect(errors[0].message).toContain('Failed to decode HTML entity');
   });
 
-  test('parent-child relationships', () => {
-    const content = `<task>
-  <hint>This is a hint</hint>
-  Some text
-  <examples>
-    <example>Example 1</example>
-  </examples>
-</task>`;
-
-    const ast = parseAST(content);
-
-    const taskNode = ast;
-    expect(taskNode.kind).toBe('POML');
-    expect(taskNode.tagName).toBe('task');
-    expect(taskNode.parent).toBeUndefined();
-
-    const hintNode = taskNode.children.find((c) => c.kind === 'POML' && c.tagName === 'hint');
-    expect(hintNode).toBeDefined();
-    expect(hintNode!.parent).toBe(taskNode);
-
-    const examplesNode = taskNode.children.find((c) => c.kind === 'POML' && c.tagName === 'examples');
-    expect(examplesNode).toBeDefined();
-    expect(examplesNode!.parent).toBe(taskNode);
-
-    const exampleNode = examplesNode!.children.find((c) => c.kind === 'POML' && c.tagName === 'example');
-    expect(exampleNode).toBeDefined();
-    expect(exampleNode!.parent).toBe(examplesNode);
-  });
-
-  test('node IDs are unique', () => {
-    const content = `<task>First task</task>
-<task>Second task</task>
-<hint>A hint</hint>`;
-
-    const ast = parseAST(content);
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(5);
-
-    function collectAllNodes(node: ASTNode): ASTNode[] {
-      const all = [node];
-      node.children.forEach((child) => {
-        all.push(...collectAllNodes(child));
-      });
-      return all;
+  test('attribute without value reports error', () => {
+    // This would be caught during parsing, but if we had a malformed CST:
+    const input = '<div class>content</div>';
+    // Note: This test might need adjustment based on actual parser behavior
+    try {
+      parseRule<ElementNode>(input, (p) => p.element());
+      const errors = diagnostics.getErrors();
+      // Check if any errors were reported for missing attribute value
+    } catch (e) {
+      // Parser error expected for malformed syntax
+      expect(true).toBe(true);
     }
-
-    const allNodes = collectAllNodes(ast);
-    const ids = allNodes.map((s) => s.id);
-    const uniqueIds = new Set(ids);
-
-    expect(uniqueIds.size).toBe(ids.length);
   });
 
-  test('complex example from specification', () => {
-    const content = `<poml>
-  <task>Process the following data</task>
-  <text>
-    This is **markdown** content that will be processed as pure text.
-    
-    - Item 1
-    - Item 2
-
-    {{ VARIABLES_WILL_ALSO_SHOWN_AS_IS }}
-    <cp caption="Nested POML">This is a nested POML component that will be processed as POML.</cp>
-
-    No POML processing happens here.
-  </text>
-  <hint>Remember to check the format</hint>
-</poml>
-
-There can be some intervening text here as well.
-
-<poml>
-  <p>You can add another POML segment here: {{variable_will_be_substituted}}</p>
-</poml>
-
-<p>POML elements do not necessarily reside in a poml element.</p>`;
-
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.children).toHaveLength(5);
-
-    const firstPomlNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'poml');
-    expect(firstPomlNode).toBeDefined();
-    expect(firstPomlNode!.children).toHaveLength(4);
-
-    const textNode = firstPomlNode!.children.find((c) => c.kind === 'POML' && c.tagName === 'text');
-    expect(textNode).toBeDefined();
-    expect(textNode!.children).toHaveLength(3);
-
-    const cpNode = textNode!.children.find((c) => c.kind === 'POML' && c.tagName === 'cp');
-    expect(cpNode).toBeDefined();
-
-    const secondPomlNode = ast.children.filter((c) => c.kind === 'POML' && c.tagName === 'poml')[1];
-    expect(secondPomlNode).toBeDefined();
-
-    const lineBreakNode = ast.children[3];
-    expect(lineBreakNode.kind).toBe('TEXT');
-    expect(lineBreakNode.content).toBe('\n\n');
-
-    const pNode = ast.children.find((c) => c.kind === 'POML' && c.tagName === 'p');
-    expect(pNode).toBeDefined();
+  test('unknown element content reports error', () => {
+    // This tests the fallback case in elementContent
+    diagnostics.clear();
+    parseToAst('normal text'); // Should not cause errors
+    expect(diagnostics.getErrors()).toHaveLength(0);
   });
+});
 
-  test('template variables in content', () => {
-    const content = `<task>Process {{variable}} with {{another_variable}}</task>`;
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('POML');
-    expect(ast.tagName).toBe('task');
-    expect(ast.children).toHaveLength(4); // text, template, text, template
+describe('AST Visitor - Special Tokens and Escapes', () => {
+  beforeEach(() => {
+    diagnostics.clear();
+  });
 
-    const templateNodes = ast.children.filter((c) => c.kind === 'TEMPLATE');
-    expect(templateNodes).toHaveLength(2);
-    expect(templateNodes[0].expression).toBe('variable');
-    expect(templateNodes[1].expression).toBe('another_variable');
+  describe('backslash escapes in quoted strings', () => {
+    test('basic escape sequences', () => {
+      const result = parseRule<LiteralNode>('"line1\\nline2"', (p) => p.quoted());
+      expect(result.value).toBe('line1\nline2');
+    });
+
+    test('unicode escape sequences', () => {
+      const result = parseRule<LiteralNode>('"\\u0048\\u0065\\u006C\\u006C\\u006F"', (p) => p.quoted()); // "Hello"
+      expect(result.value).toBe('Hello');
+    });
+
+    test('hex escape sequences', () => {
+      const result = parseRule<LiteralNode>('"\\x48\\x65\\x6C\\x6C\\x6F"', (p) => p.quoted()); // "Hello"
+      expect(result.value).toBe('Hello');
+    });
+
+    test('quote escapes', () => {
+      const result = parseRule<LiteralNode>('"\\"escaped quotes\\""', (p) => p.quoted());
+      expect(result.value).toBe('"escaped quotes"');
+    });
+
+    test('template brace escapes', () => {
+      const result = parseRule<LiteralNode>('"\\{{not a template\\}}"', (p) => p.quoted());
+      expect(result.value).toBe('{{not a template}}');
+    });
+
+    test('backslash escape', () => {
+      const result = parseRule<LiteralNode>('"path\\\\to\\\\file"', (p) => p.quoted());
+      expect(result.value).toBe('path\\to\\file');
+    });
+
+    test('unknown escape sequence', () => {
+      const result = parseRule<LiteralNode>('"\\q unknown"', (p) => p.quoted());
+      expect(result.value).toBe('q unknown'); // Unknown escape returns body without backslash
+    });
   });
 
-  test('template variables in text nodes are treated as literal', () => {
-    const content = `<text>Variables like {{this}} are shown as-is</text>`;
-    const ast = parseAST(content);
+  describe('character entities in text content', () => {
+    test('common HTML entities', () => {
+      const result = parseToAst('&amp; &lt; &gt; &quot; &apos;');
+      expect(result.children[0]).toMatchObject({
+        kind: 'STRING',
+        value: '& < > " \'',
+      });
+    });
 
-    expect(ast.kind).toBe('TEXT');
-    expect(ast.content).toBe(content);
-    expect(ast.children).toHaveLength(0);
-  });
+    test('numeric character references', () => {
+      const result = parseToAst('&#65; &#x41;'); // Both represent 'A'
+      expect(result.children[0]).toMatchObject({
+        kind: 'STRING',
+        value: 'A A',
+      });
+    });
 
-  test('template variables in attribute values', () => {
-    const content = `<task caption="Process {{variable}}">Content</task>`;
-    const ast = parseAST(content);
-
-    expect(ast.kind).toBe('POML');
-    expect(ast.tagName).toBe('task');
-    expect(ast.attributes).toHaveLength(1);
-
-    const attr = ast.attributes![0];
-    expect(attr.key).toBe('caption');
-    expect(attr.value).toHaveLength(2); // text + template
-    expect(attr.value[0].kind).toBe('TEXT');
-    expect(attr.value[0].content).toBe('Process ');
-    expect(attr.value[1].kind).toBe('TEMPLATE');
-    expect(attr.value[1].expression).toBe('variable');
+    test('mixed entities and regular text', () => {
+      const result = parseToAst('Hello &amp; welcome &lt;user&gt;');
+      expect(result.children[0]).toMatchObject({
+        kind: 'STRING',
+        value: 'Hello & welcome <user>',
+      });
+    });
   });
 
-  test('mixed template variables and text in attributes', () => {
-    const content = `<task title="Hello {{name}}, process {{data}} please">Content</task>`;
-    const ast = parseAST(content);
+  describe('escapes in different contexts', () => {
+    test('backslash escapes not processed in text content', () => {
+      const result = parseToAst('This \\n should stay as literal');
+      expect(result.children[0]).toMatchObject({
+        kind: 'STRING',
+        value: 'This \\n should stay as literal',
+      });
+    });
 
-    expect(ast.kind).toBe('POML');
-    expect(ast.attributes).toHaveLength(1);
+    test('entities not processed in quoted strings', () => {
+      const result = parseRule<LiteralNode>('"&amp; stays literal"', (p) => p.quoted());
+      expect(result.value).toBe('&amp; stays literal');
+    });
 
-    const attr = ast.attributes![0];
-    expect(attr.value).toHaveLength(4); // text, template, text, template
-    expect(attr.value[0].content).toBe('Hello ');
-    expect(attr.value[1].expression).toBe('name');
-    expect(attr.value[2].content).toBe(', process ');
-    expect(attr.value[3].expression).toBe('data');
+    test('template expressions preserve content', () => {
+      const result = parseRule<TemplateNode>('{{ "string with \\n escape" }}', (p) => p.template());
+      expect(result.value.value).toBe(' "string with \\n escape" ');
+    });
   });
 });
+
+// describe('AST Visitor - Complex Integration Tests', () => {
+//   beforeEach(() => {
+//     diagnostics.clear();
+//   });
+
+//   test('complex document with multiple element types', () => {
+//     const input = `<!-- @pragma whitespace collapse -->
+// <document>
+//   <meta author="test">
+//   <!-- This is a comment -->
+//   <section title="Introduction">
+//     Welcome to {{ appName }}!
+
+//     <list>
+//       <item for="task in tasks">
+//         Task: {{ task.name }} - Status: {{ task.status }}
+//       </item>
+//     </list>
+//   </section>
+
+//   <footer>&copy; 2024 Company</footer>
+// </document>`;
+
+//     const result = parseToAst(input);
+
+//     // Root should contain pragma, whitespace, and element
+//     expect(result.kind).toBe('ROOT');
+//     expect(result.children.length).toBeGreaterThan(0);
+
+//     // Find the pragma
+//     const pragma = result.children.find(child => child.kind === 'PRAGMA') as PragmaNode;
+//     expect(pragma).toBeDefined();
+//     expect(pragma.identifier.value).toBe('whitespace');
+//     expect(pragma.options[0].value).toBe('collapse');
+
+//     // Find the document element
+//     const document = result.children.find(child =>
+//       child.kind === 'ELEMENT' && (child as ElementNode).name === 'document'
+//     ) as ElementNode;
+//     expect(document).toBeDefined();
+
+//     // Document should have nested content
+//     expect(document.children?.length).toBeGreaterThan(0);
+
+//     // Find section with template
+//     const section = document.children?.find(child =>
+//       child.kind === 'ELEMENT' && (child as ElementNode).name === 'section'
+//     ) as ElementNode;
+//     expect(section).toBeDefined();
+//     expect(section.attributes).toHaveLength(1);
+//     expect(section.attributes[0].key.value).toBe('title');
+
+//     // Check for template in section content
+//     const templateInSection = section.children?.find(child => child.kind === 'TEMPLATE');
+//     expect(templateInSection).toBeDefined();
+
+//     // Find list with for attribute
+//     const findElementByName = (children: any[], name: string): ElementNode | undefined =>
+//       children.find(child => child.kind === 'ELEMENT' && child.name === name);
+
+//     const list = findElementByName(section.children!, 'list');
+//     expect(list).toBeDefined();
+
+//     const item = findElementByName(list!.children!, 'item');
+//     expect(item).toBeDefined();
+
+//     // Check for attribute with for iterator
+//     const forAttr = item!.attributes.find(attr => attr.key.value === 'for');
+//     expect(forAttr).toBeDefined();
+//     expect(forAttr!.value.kind).toBe('FORITERATOR');
+
+//     const forIterator = forAttr!.value as ForIteratorNode;
+//     expect(forIterator.iterator.value).toBe('task');
+//     expect(forIterator.collection.value).toBe('tasks');
+
+//     // Check footer with entity
+//     const footer = findElementByName(document.children, 'footer');
+//     expect(footer).toBeDefined();
+//     expect(footer!.children[0]).toMatchObject({
+//       kind: 'STRING',
+//       value: '© 2024 Company', // Entity should be decoded
+//     });
+//   });
+
+//   test('mixed content with templates, comments, and elements', () => {
+//     const input = `
+// Processing data for {{ userName }}...
+// <!-- Status: {{ status }} -->
+// <progress value="{{ progress }}" max="100">{{ progress }}%</progress>
+// Task completed!
+// `;
+
+//     const result = parseToAst(input);
+
+//     expect(result.children).toHaveLength(6); // whitespace, text, template, text, comment, text, element, text
+
+//     // Check template rendering
+//     const firstTemplate = result.children.find(child => child.kind === 'TEMPLATE') as TemplateNode;
+//     expect(firstTemplate).toBeDefined();
+//     expect(firstTemplate.value.value).toContain('userName');
+
+//     // Check comment
+//     const comment = result.children.find(child => child.kind === 'COMMENT') as CommentNode;
+//     expect(comment).toBeDefined();
+//     expect(comment.value.value).toContain('Status:');
+
+//     // Check progress element with template attributes
+//     const progress = result.children.find(child =>
+//       child.kind === 'ELEMENT' && (child as ElementNode).name === 'progress'
+//     ) as ElementNode;
+//     expect(progress).toBeDefined();
+//     expect(progress.attributes).toHaveLength(2);
+
+//     // Check template in attribute value
+//     const valueAttr = progress.attributes.find(attr => attr.key.value === 'value');
+//     expect(valueAttr!.value.kind).toBe('VALUE');
+//     const valueNode = valueAttr!.value as ValueNode;
+//     expect(valueNode.children[0].kind).toBe('TEMPLATE');
+
+//     // Check template in element content
+//     const progressTemplate = progress.children?.find(child => child.kind === 'TEMPLATE');
+//     expect(progressTemplate).toBeDefined();
+//   });
+
+//   test('deeply nested structure with various features', () => {
+//     const input = `<poml>
+//   <config>
+//     <model name="gpt-4" temperature="0.7" />
+//     <output format="json" pretty="true" />
+//   </config>
+
+//   <task title="Data Analysis for {{ clientName }}">
+//     <description>
+//       Analyze the provided dataset &amp; generate insights.
+
+//       <requirements>
+//         <item>Statistical analysis</item>
+//         <item>Data visualization</item>
+//         <item for="metric in requiredMetrics">{{ metric }} calculation</item>
+//       </requirements>
+//     </description>
+
+//     <examples>
+//       <example input="{{ sampleData }}" output="{{ expectedOutput }}" />
+//     </examples>
+//   </task>
+// </poml>`;
+
+//     const result = parseToAst(input);
+
+//     expect(result.children).toHaveLength(1);
+//     const poml = result.children[0] as ElementNode;
+//     expect(poml.name).toBe('poml');
+//     expect(poml.children.length).toBeGreaterThan(0);
+
+//     // Verify deep nesting is preserved
+//     const task = poml.children.find(child =>
+//       child.kind === 'ELEMENT' && (child as ElementNode).name === 'task'
+//     ) as ElementNode;
+//     expect(task).toBeDefined();
+
+//     const description = task.children.find(child =>
+//       child.kind === 'ELEMENT' && (child as ElementNode).name === 'description'
+//     ) as ElementNode;
+//     expect(description).toBeDefined();
+
+//     const requirements = description.children.find(child =>
+//       child.kind === 'ELEMENT' && (child as ElementNode).name === 'requirements'
+//     ) as ElementNode;
+//     expect(requirements).toBeDefined();
+
+//     // Check for iterator in nested structure
+//     const forItem = requirements.children.find(child => {
+//       if (child.kind !== 'ELEMENT') return false;
+//       const elem = child as ElementNode;
+//       return elem.name === 'item' && elem.attributes.some(attr => attr.key.value === 'for');
+//     }) as ElementNode;
+
+//     expect(forItem).toBeDefined();
+//     const forAttr = forItem.attributes.find(attr => attr.key.value === 'for')!;
+//     expect(forAttr.value.kind).toBe('FORITERATOR');
+
+//     // Verify self-closing elements work
+//     const config = poml.children.find(child =>
+//       child.kind === 'ELEMENT' && (child as ElementNode).name === 'config'
+//     ) as ElementNode;
+//     expect(config).toBeDefined();
+
+//     const model = config.children.find(child =>
+//       child.kind === 'ELEMENT' && (child as ElementNode).name === 'model'
+//     ) as ElementNode;
+//     expect(model).toBeDefined();
+//     expect(model.children).toHaveLength(0); // Self-closing
+//     expect(model.attributes.length).toBeGreaterThan(0);
+//   });
+// });

From 3412504b5a6a836241180b92a3b00967b0620ee1 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 17:19:42 +0800
Subject: [PATCH 74/76] bug fixes

---
 packages/poml/next/ast.ts              | 70 +++++++++++++++++---------
 packages/poml/tests/reader/ast.test.ts | 10 ++--
 2 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index ca85c568..249f3dc6 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -52,33 +52,51 @@ import * as diagnostics from './diagnostics';
 function decodeEscape(seq: string): string {
   // seq includes the leading backslash (e.g. " , \n)
   const body = seq.slice(1);
-  switch (body) {
-    case 'n':
-      return '\n';
-    case 'r':
-      return '\r';
-    case 't':
-      return '\t';
-    case "'":
-      return "'";
-    case '"':
-      return '"';
-    case '{{': // \{{
-      return '{{';
-    case '}}': // \}}
-      return '}}';
-    case 'x':
-    case 'u':
-    case 'U': {
-      const hex = body.slice(1);
+  if (body === 'n') {
+    return '\n';
+  } else if (body === 'r') {
+    return '\r';
+  } else if (body === 't') {
+    return '\t';
+  } else if (body === "'") {
+    return "'";
+  } else if (body === '"') {
+    return '"';
+  } else if (body === '{{') {
+    // \{{
+    return '{{';
+  } else if (body === '}}') {
+    // \}}
+    return '}}';
+  } else if (body.startsWith('x')) {
+    // \xHH (2 hex digits)
+    const hex = body.slice(1);
+    if (hex.length === 2 && /^[0-9a-fA-F]{2}$/.test(hex)) {
       const n = parseInt(hex, 16);
       return String.fromCharCode(n);
     }
-    case '\\':
-      return '\\';
-    default:
-      // Unknown escape, return the sequence as-is minus the leading backslash (best effort)
-      return body;
+    return body; // Invalid hex escape
+  } else if (body.startsWith('u')) {
+    // \uHHHH (4 hex digits)
+    const hex = body.slice(1);
+    if (hex.length === 4 && /^[0-9a-fA-F]{4}$/.test(hex)) {
+      const n = parseInt(hex, 16);
+      return String.fromCharCode(n);
+    }
+    return body; // Invalid unicode escape
+  } else if (body.startsWith('U')) {
+    // \UHHHHHHHH (8 hex digits)
+    const hex = body.slice(1);
+    if (hex.length === 8 && /^[0-9a-fA-F]{8}$/.test(hex)) {
+      const n = parseInt(hex, 16);
+      return String.fromCodePoint(n);
+    }
+    return body; // Invalid unicode escape
+  } else if (body === '\\') {
+    return '\\';
+  } else {
+    // Unknown escape, return the sequence as-is minus the leading backslash (best effort)
+    return body;
   }
 }
 
@@ -130,7 +148,9 @@ function rangeFromTokens(tokens: IToken[]): Range {
  * Range from Any CstNode (or is [0, 0] if none).
  */
 function rangeFromCstNode(node: CstNode): Range {
-  return rangeFrom(node.location?.startOffset ?? 0, node.location?.endOffset ?? node.location?.startOffset ?? 0);
+  const start = node.location?.startOffset ?? 0;
+  const end = node.location?.endOffset ?? start;
+  return rangeFrom(start, end);
 }
 
 /**
diff --git a/packages/poml/tests/reader/ast.test.ts b/packages/poml/tests/reader/ast.test.ts
index 32cd48a0..2e55144e 100644
--- a/packages/poml/tests/reader/ast.test.ts
+++ b/packages/poml/tests/reader/ast.test.ts
@@ -233,7 +233,7 @@ describe('AST Visitor - Individual Rules', () => {
         children: [
           {
             kind: 'TEMPLATE',
-            value: { kind: 'STRING', value: ' expression ' },
+            value: { kind: 'STRING', value: 'expression' },
           },
         ],
       });
@@ -374,7 +374,7 @@ describe('AST Visitor - Individual Rules', () => {
           {
             kind: 'ATTRIBUTE',
             key: { value: 'src' },
-            value: { kind: 'STRING', value: 'photo.jpg' },
+            value: { kind: 'VALUE', value: 'photo.jpg' },
           },
         ],
         children: [],
@@ -486,7 +486,7 @@ describe('AST Visitor - Special Tokens and Escapes', () => {
 
     test('unknown escape sequence', () => {
       const result = parseRule<LiteralNode>('"\\q unknown"', (p) => p.quoted());
-      expect(result.value).toBe('q unknown'); // Unknown escape returns body without backslash
+      expect(result.value).toBe('\\q unknown'); // Unknown escape returns body with backslash
     });
   });
 
@@ -531,8 +531,8 @@ describe('AST Visitor - Special Tokens and Escapes', () => {
     });
 
     test('template expressions preserve content', () => {
-      const result = parseRule<TemplateNode>('{{ "string with \\n escape" }}', (p) => p.template());
-      expect(result.value.value).toBe(' "string with \\n escape" ');
+      const result = parseRule<TemplateNode>('{{ "string with { } \\n \n escape" }}', (p) => p.template());
+      expect(result.value.value).toBe('"string with { } \\n \n escape"');
     });
   });
 });

From c500b0d4e7825f6205762d9f30f9dccf3bd5667a Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 18:22:31 +0800
Subject: [PATCH 75/76] bug fixes

---
 packages/poml/next/ast.ts              |   2 +-
 packages/poml/tests/reader/ast.test.ts | 191 +++++++++++++++++--------
 2 files changed, 134 insertions(+), 59 deletions(-)

diff --git a/packages/poml/next/ast.ts b/packages/poml/next/ast.ts
index 249f3dc6..367972f7 100644
--- a/packages/poml/next/ast.ts
+++ b/packages/poml/next/ast.ts
@@ -149,7 +149,7 @@ function rangeFromTokens(tokens: IToken[]): Range {
  */
 function rangeFromCstNode(node: CstNode): Range {
   const start = node.location?.startOffset ?? 0;
-  const end = node.location?.endOffset ?? start;
+  const end = node.location?.endOffset ?? node.location?.startOffset ?? start;
   return rangeFrom(start, end);
 }
 
diff --git a/packages/poml/tests/reader/ast.test.ts b/packages/poml/tests/reader/ast.test.ts
index 2e55144e..3393f388 100644
--- a/packages/poml/tests/reader/ast.test.ts
+++ b/packages/poml/tests/reader/ast.test.ts
@@ -86,19 +86,22 @@ describe('AST Visitor - Individual Rules', () => {
       expect(result.kind).toBe('ROOT');
       expect(result.children).toHaveLength(3);
 
-      expect(result.children[0]).toMatchObject({
+      expect(result.children[0]).toStrictEqual({
         kind: 'STRING',
         value: 'Hello ',
+        range: { start: 0, end: 5 },
       });
 
-      expect(result.children[1]).toMatchObject({
+      expect(result.children[1]).toStrictEqual({
         kind: 'TEMPLATE',
-        value: { kind: 'STRING', value: ' name ' },
+        value: { kind: 'STRING', value: 'name', range: expect.any(Object) },
+        range: expect.any(Object),
       });
 
-      expect(result.children[2]).toMatchObject({
+      expect(result.children[2]).toStrictEqual({
         kind: 'STRING',
         value: '!',
+        range: { start: 16, end: 16 },
       });
     });
   });
@@ -110,7 +113,7 @@ describe('AST Visitor - Individual Rules', () => {
         kind: 'TEMPLATE',
         value: {
           kind: 'STRING',
-          value: ' var ',
+          value: 'var',
           range: expect.any(Object),
         },
         range: expect.any(Object),
@@ -119,7 +122,7 @@ describe('AST Visitor - Individual Rules', () => {
 
     test('complex expression template', () => {
       const result = parseRule<TemplateNode>('{{ user.name.toUpperCase() }}', (p) => p.template());
-      expect(result.value.value).toBe(' user.name.toUpperCase() ');
+      expect(result.value.value).toBe('user.name.toUpperCase()');
     });
 
     test('template without spaces', () => {
@@ -152,40 +155,46 @@ describe('AST Visitor - Individual Rules', () => {
   describe('pragma rule', () => {
     test('pragma with identifier only', () => {
       const result = parseRule<PragmaNode>('<!-- @pragma version -->', (p) => p.pragma());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'PRAGMA',
         identifier: {
           kind: 'STRING',
           value: 'version',
+          range: { start: 13, end: 19 },
         },
         options: [],
+        range: { start: 0, end: 24 },
       });
     });
 
     test('pragma with unquoted options', () => {
       const result = parseRule<PragmaNode>('<!-- @pragma components +reference -table -->', (p) => p.pragma());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'PRAGMA',
         identifier: {
           kind: 'STRING',
           value: 'components',
+          range: { start: 13, end: 22 },
         },
         options: [
-          { kind: 'STRING', value: '+reference' },
-          { kind: 'STRING', value: '-table' },
+          { kind: 'STRING', value: '+reference', range: { start: 24, end: 33 } },
+          { kind: 'STRING', value: '-table', range: { start: 35, end: 40 } },
         ],
+        range: { start: 0, end: 44 },
       });
     });
 
     test('pragma with quoted options', () => {
       const result = parseRule<PragmaNode>('<!-- @pragma whitespace "pre formatted" -->', (p) => p.pragma());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'PRAGMA',
         identifier: {
           kind: 'STRING',
           value: 'whitespace',
+          range: { start: 13, end: 22 },
         },
-        options: [{ kind: 'STRING', value: 'pre formatted' }],
+        options: [{ kind: 'STRING', value: 'pre formatted', range: { start: 24, end: 38 } }],
+        range: { start: 0, end: 42 },
       });
     });
   });
@@ -213,39 +222,55 @@ describe('AST Visitor - Individual Rules', () => {
   describe('quotedTemplate rule', () => {
     test('quoted string with template', () => {
       const result = parseRule<ValueNode>('"Hello {{ name }}!"', (p) => p.quotedTemplate());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'VALUE',
         children: [
-          { kind: 'STRING', value: 'Hello ' },
+          { kind: 'STRING', value: 'Hello ', range: { start: 1, end: 6 } },
           {
             kind: 'TEMPLATE',
-            value: { kind: 'STRING', value: ' name ' },
+            value: { kind: 'STRING', value: 'name', range: { start: 10, end: 13 } },
+            range: { start: 8, end: 15 },
           },
-          { kind: 'STRING', value: '!' },
+          { kind: 'STRING', value: '!', range: { start: 16, end: 16 } },
         ],
+        range: { start: 0, end: 18 },
       });
     });
 
     test('quoted template with only template', () => {
       const result = parseRule<ValueNode>('"{{ expression }}"', (p) => p.quotedTemplate());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'VALUE',
         children: [
           {
             kind: 'TEMPLATE',
-            value: { kind: 'STRING', value: 'expression' },
+            value: { kind: 'STRING', value: 'expression', range: { start: 3, end: 12 } },
+            range: { start: 1, end: 16 },
           },
         ],
+        range: { start: 0, end: 17 },
       });
     });
 
     test('multiple templates in quoted string', () => {
       const result = parseRule<ValueNode>('"{{ first }} and {{ second }}"', (p) => p.quotedTemplate());
-      expect(result.children).toHaveLength(4);
-      expect(result.children[0]).toMatchObject({ kind: 'TEMPLATE' });
-      expect(result.children[1]).toMatchObject({ kind: 'STRING', value: ' and ' });
-      expect(result.children[2]).toMatchObject({ kind: 'TEMPLATE' });
-      expect(result.children[3]).toMatchObject({ kind: 'STRING', value: '' });
+      expect(result).toStrictEqual({
+        kind: 'VALUE',
+        children: [
+          {
+            kind: 'TEMPLATE',
+            value: { kind: 'STRING', value: 'first', range: { start: 3, end: 7 } },
+            range: { start: 1, end: 11 },
+          },
+          { kind: 'STRING', value: ' and ', range: { start: 12, end: 16 } },
+          {
+            kind: 'TEMPLATE',
+            value: { kind: 'STRING', value: 'second', range: { start: 20, end: 25 } },
+            range: { start: 17, end: 28 },
+          },
+        ],
+        range: { start: 0, end: 30 },
+      });
     });
   });
 
@@ -257,23 +282,24 @@ describe('AST Visitor - Individual Rules', () => {
         iterator: {
           kind: 'STRING',
           value: 'item',
-          range: expect.any(Object),
+          range: { start: 1, end: 4 },
         },
         collection: {
           kind: 'STRING',
           value: 'items',
-          range: expect.any(Object),
+          range: { start: 9, end: 13 },
         },
-        range: expect.any(Object),
+        range: { start: 0, end: 14 },
       });
     });
 
     test('for iterator with property access', () => {
       const result = parseRule<ForIteratorNode>('"user in data.users"', (p) => p.forIteratorValue());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'FORITERATOR',
-        iterator: { kind: 'STRING', value: 'user' },
-        collection: { kind: 'STRING', value: 'data.users' },
+        iterator: { kind: 'STRING', value: 'user', range: { start: 1, end: 4 } },
+        collection: { kind: 'STRING', value: 'data.users', range: { start: 9, end: 18 } },
+        range: { start: 0, end: 19 },
       });
     });
 
@@ -288,47 +314,72 @@ describe('AST Visitor - Individual Rules', () => {
   describe('attribute rule', () => {
     test('attribute with quoted value', () => {
       const result = parseRule<AttributeNode>('class="container"', (p) => p.attribute());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'ATTRIBUTE',
-        key: { kind: 'STRING', value: 'class' },
-        value: { kind: 'STRING', value: 'container' },
+        key: { kind: 'STRING', value: 'class', range: { start: 0, end: 4 } },
+        value: {
+          kind: 'VALUE',
+          children: [{ kind: 'STRING', value: 'container', range: { start: 6, end: 16 } }],
+          range: { start: 6, end: 16 },
+        },
+        range: { start: 0, end: 17 },
       });
     });
 
     test('attribute with template value', () => {
       const result = parseRule<AttributeNode>('title={{ pageTitle }}', (p) => p.attribute());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'ATTRIBUTE',
-        key: { kind: 'STRING', value: 'title' },
+        key: { kind: 'STRING', value: 'title', range: { start: 0, end: 4 } },
         value: {
           kind: 'VALUE',
-          children: [{ kind: 'TEMPLATE' }],
+          children: [
+            {
+              kind: 'TEMPLATE',
+              value: { kind: 'STRING', value: 'pageTitle', range: { start: 9, end: 17 } },
+              range: { start: 6, end: 20 },
+            },
+          ],
+          range: { start: 6, end: 20 },
         },
+        range: { start: 0, end: 20 },
       });
     });
 
     test('attribute with for iterator', () => {
       const result = parseRule<AttributeNode>('for="item in items"', (p) => p.attribute());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'ATTRIBUTE',
-        key: { kind: 'STRING', value: 'for' },
+        key: { kind: 'STRING', value: 'for', range: { start: 0, end: 2 } },
         value: {
           kind: 'FORITERATOR',
-          iterator: { kind: 'STRING', value: 'item' },
-          collection: { kind: 'STRING', value: 'items' },
+          iterator: { kind: 'STRING', value: 'item', range: { start: 5, end: 8 } },
+          collection: { kind: 'STRING', value: 'items', range: { start: 13, end: 17 } },
+          range: { start: 4, end: 18 },
         },
+        range: { start: 0, end: 18 },
       });
     });
 
     test('attribute with quoted template value', () => {
       const result = parseRule<AttributeNode>('message="Hello {{ name }}!"', (p) => p.attribute());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'ATTRIBUTE',
-        key: { kind: 'STRING', value: 'message' },
+        key: { kind: 'STRING', value: 'message', range: { start: 0, end: 6 } },
         value: {
           kind: 'VALUE',
-          children: [{ kind: 'STRING', value: 'Hello ' }, { kind: 'TEMPLATE' }, { kind: 'STRING', value: '!' }],
+          children: [
+            { kind: 'STRING', value: 'Hello ', range: { start: 9, end: 14 } },
+            {
+              kind: 'TEMPLATE',
+              value: { kind: 'STRING', value: 'name', range: { start: 18, end: 21 } },
+              range: { start: 15, end: 24 },
+            },
+            { kind: 'STRING', value: '!', range: { start: 25, end: 25 } },
+          ],
+          range: { start: 8, end: 26 },
         },
+        range: { start: 0, end: 26 },
       });
     });
   });
@@ -336,66 +387,90 @@ describe('AST Visitor - Individual Rules', () => {
   describe('element rule', () => {
     test('simple element', () => {
       const result = parseRule<ElementNode>('<div>content</div>', (p) => p.element());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'ELEMENT',
         name: 'div',
         attributes: [],
-        children: [{ kind: 'STRING', value: 'content' }],
+        children: [{ kind: 'STRING', value: 'content', range: { start: 5, end: 11 } }],
+        range: { start: 0, end: 17 },
       });
     });
 
     test('element with attributes', () => {
       const result = parseRule<ElementNode>('<div class="container" id="main">text</div>', (p) => p.element());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'ELEMENT',
         name: 'div',
         attributes: [
           {
             kind: 'ATTRIBUTE',
-            key: { value: 'class' },
-            value: { kind: 'STRING', value: 'container' },
+            key: { kind: 'STRING', value: 'class', range: { start: 5, end: 9 } },
+            value: {
+              kind: 'VALUE',
+              children: [{ kind: 'STRING', value: 'container', range: { start: 12, end: 20 } }],
+              range: { start: 11, end: 21 },
+            },
+            range: { start: 5, end: 21 },
           },
           {
             kind: 'ATTRIBUTE',
-            key: { value: 'id' },
-            value: { kind: 'STRING', value: 'main' },
+            key: { kind: 'STRING', value: 'id', range: { start: 23, end: 24 } },
+            value: {
+              kind: 'VALUE',
+              children: [{ kind: 'STRING', value: 'main', range: { start: 27, end: 30 } }],
+              range: { start: 26, end: 31 },
+            },
+            range: { start: 23, end: 31 },
           },
         ],
-        children: [{ kind: 'STRING', value: 'text' }],
+        children: [{ kind: 'STRING', value: 'text', range: { start: 33, end: 36 } }],
+        range: { start: 0, end: 42 },
       });
     });
 
     test('self-closing element', () => {
       const result = parseRule<ElementNode>('<img src="photo.jpg" />', (p) => p.element());
-      expect(result).toMatchObject({
+      expect(result).toStrictEqual({
         kind: 'ELEMENT',
         name: 'img',
         attributes: [
           {
             kind: 'ATTRIBUTE',
-            key: { value: 'src' },
-            value: { kind: 'VALUE', value: 'photo.jpg' },
+            key: { kind: 'STRING', value: 'src', range: { start: 5, end: 7 } },
+            value: {
+              kind: 'VALUE',
+              children: [{ kind: 'STRING', value: 'photo.jpg', range: { start: 10, end: 18 } }],
+              range: { start: 9, end: 19 },
+            },
+            range: { start: 5, end: 19 },
           },
         ],
         children: [],
+        range: { start: 0, end: 22 },
       });
     });
 
     test('element with nested content', () => {
       const result = parseRule<ElementNode>('<task>Process {{ data }} carefully</task>', (p) => p.element());
       expect(result.children).toHaveLength(3);
-      expect(result.children[0]).toMatchObject({ kind: 'STRING', value: 'Process ' });
-      expect(result.children[1]).toMatchObject({ kind: 'TEMPLATE' });
-      expect(result.children[2]).toMatchObject({ kind: 'STRING', value: ' carefully' });
+      expect(result.children[0]).toStrictEqual({ kind: 'STRING', value: 'Process ', range: { start: 6, end: 13 } });
+      expect(result.children[1]).toStrictEqual({
+        kind: 'TEMPLATE',
+        value: { kind: 'STRING', value: 'data', range: { start: 17, end: 20 } },
+        range: { start: 14, end: 23 },
+      });
+      expect(result.children[2]).toStrictEqual({ kind: 'STRING', value: ' carefully', range: { start: 24, end: 33 } });
     });
 
     test('nested elements', () => {
       const result = parseRule<ElementNode>('<div><span>nested</span></div>', (p) => p.element());
       expect(result.children).toHaveLength(1);
-      expect(result.children[0]).toMatchObject({
+      expect(result.children[0]).toStrictEqual({
         kind: 'ELEMENT',
         name: 'span',
-        children: [{ kind: 'STRING', value: 'nested' }],
+        attributes: [],
+        children: [{ kind: 'STRING', value: 'nested', range: { start: 11, end: 16 } }],
+        range: { start: 5, end: 23 },
       });
     });
   });

From 6d8c8b2ec558233e06343a65c4bc1ffef1d90f24 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 9 Sep 2025 18:45:55 +0800
Subject: [PATCH 76/76] .

---
 packages/poml/tests/reader/ast.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/poml/tests/reader/ast.test.ts b/packages/poml/tests/reader/ast.test.ts
index 3393f388..6d1c1d54 100644
--- a/packages/poml/tests/reader/ast.test.ts
+++ b/packages/poml/tests/reader/ast.test.ts
@@ -259,7 +259,7 @@ describe('AST Visitor - Individual Rules', () => {
         children: [
           {
             kind: 'TEMPLATE',
-            value: { kind: 'STRING', value: 'first', range: { start: 3, end: 7 } },
+            value: { kind: 'STRING', value: 'first', range: { start: 4, end: 8 } },
             range: { start: 1, end: 11 },
           },
           { kind: 'STRING', value: ' and ', range: { start: 12, end: 16 } },
@@ -269,7 +269,7 @@ describe('AST Visitor - Individual Rules', () => {
             range: { start: 17, end: 28 },
           },
         ],
-        range: { start: 0, end: 30 },
+        range: { start: 0, end: 29 },
       });
     });
   });