From ffec829c2c6ae42aca5922f034f415b9b6f10c73 Mon Sep 17 00:00:00 2001
From: Vinayak Mishra <viks@vnykmshr.com>
Date: Tue, 17 Feb 2026 11:49:26 +0545
Subject: [PATCH] fix: replace invalid codepoints with U+FFFD in entity parser

entity() emits a literal null byte for &#0; instead of the Unicode
replacement character. The CommonMark spec (section 6.2) requires
U+FFFD for codepoint 0, surrogates (0xD800-0xDFFF), and values
above 0x10FFFF.

Check for all three invalid codepoint categories before converting
to a rune.
---
 inline_test.go   | 8 ++++++++
 parser/inline.go | 7 ++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/inline_test.go b/inline_test.go
index f045201c..b5e20b69 100644
--- a/inline_test.go
+++ b/inline_test.go
@@ -1325,3 +1325,11 @@ func BenchmarkSmartDoubleQuotes(b *testing.B) {
 		runMarkdown("this should be normal \"quoted\" text.\n", params)
 	}
 }
+
+func TestEntityNullByte(t *testing.T) {
+	// &#0; should produce U+FFFD per CommonMark spec section 6.2
+	doTestsInlineParam(t, []string{
+		"&#0;",
+		"<p>\uFFFD</p>\n",
+	}, TestParams{})
+}
diff --git a/parser/inline.go b/parser/inline.go
index d526ce22..39c1d5a1 100644
--- a/parser/inline.go
+++ b/parser/inline.go
@@ -817,7 +817,12 @@ func entity(p *Parser, data []byte, offset int) (int, ast.Node) {
 		codepoint, err = strconv.ParseUint(string(ent[2:len(ent)-1]), 10, 64)
 	}
 	if err == nil { // only if conversion was valid return here.
-		return end, newTextNode([]byte(string(rune(codepoint))))
+		r := rune(codepoint)
+		// Replace invalid codepoints with U+FFFD per CommonMark spec section 6.2
+		if r == 0 || (r >= 0xD800 && r <= 0xDFFF) || r > 0x10FFFF {
+			r = '\uFFFD'
+		}
+		return end, newTextNode([]byte(string(r)))
 	}
 
 	return end, newTextNode(ent)