diff --git a/inline_test.go b/inline_test.go index f045201c..b5e20b69 100644 --- a/inline_test.go +++ b/inline_test.go @@ -1325,3 +1325,11 @@ func BenchmarkSmartDoubleQuotes(b *testing.B) { runMarkdown("this should be normal \"quoted\" text.\n", params) } } + +func TestEntityNullByte(t *testing.T) { + // � should produce U+FFFD per CommonMark spec section 6.2 + doTestsInlineParam(t, []string{ + "�", + "

\uFFFD

\n", + }, TestParams{}) +} diff --git a/parser/inline.go b/parser/inline.go index d526ce22..39c1d5a1 100644 --- a/parser/inline.go +++ b/parser/inline.go @@ -817,7 +817,12 @@ func entity(p *Parser, data []byte, offset int) (int, ast.Node) { codepoint, err = strconv.ParseUint(string(ent[2:len(ent)-1]), 10, 64) } if err == nil { // only if conversion was valid return here. - return end, newTextNode([]byte(string(rune(codepoint)))) + r := rune(codepoint) + // Replace invalid codepoints with U+FFFD per CommonMark spec section 6.2 + if r == 0 || (r >= 0xD800 && r <= 0xDFFF) || r > 0x10FFFF { + r = '\uFFFD' + } + return end, newTextNode([]byte(string(r))) } return end, newTextNode(ent)