From cba1c31c21cf3145e101b7154fd01b86144f38ea Mon Sep 17 00:00:00 2001 From: Stephen Pollei Date: Mon, 7 Sep 2015 18:00:28 -0700 Subject: [PATCH 1/3] added as an optional ommited end tag as per http://www.w3.org/TR/html5/syntax.html#optional-tags modified: src/HTMLParser.js modified: test/test-slowparse.js --- src/HTMLParser.js | 14 ++++++++++++-- test/test-slowparse.js | 6 ++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/HTMLParser.js b/src/HTMLParser.js index a54d3dc..6aa380a 100644 --- a/src/HTMLParser.js +++ b/src/HTMLParser.js @@ -116,7 +116,7 @@ module.exports = (function(){ // http://www.w3.org/TR/html5/syntax.html#optional-tags // HTML elements that with omittable close tag - omittableCloseTagHtmlElements: ["p", "li", "td", "th"], + omittableCloseTagHtmlElements: ["p", "li", "td", "th", "option"], // HTML elements that paired with omittable close tag list omittableCloseTags: { @@ -126,7 +126,17 @@ module.exports = (function(){ "section", "table", "ul"], "th": ["th", "td"], "td": ["th", "td"], - "li": ["li"] + "tr": ["tr"], + "li": ["li"], + "rb": ["rb","rt","rtc","rp"], + "rt": ["rb","rt","rtc","rp"], + "rtc": ["rb","rtc","rp"], + "rp": ["rb","rt","rtc","rp"], + "optgroup": ["optgroup"], + "option": ["option"], + "thead": ["tbody", "tfoot"], + "tbody": ["tbody", "tfoot"], + "tfoot": ["tbody"] }, // We keep a list of all valid HTML5 elements. diff --git a/test/test-slowparse.js b/test/test-slowparse.js index b3b4df2..47a029e 100644 --- a/test/test-slowparse.js +++ b/test/test-slowparse.js @@ -597,6 +597,12 @@ module.exports = function(Slowparse, window, document, validators) { ok(!result.error, "no error on omitted

"); }); + test("parsing elements with optional close tags: ", function() { + var html = ''; + var result = parse(html); + ok(!result.error, "no error on omitted "); + }); + test("intentional fail for optional close tag (incorrect use). pass = not accepted", function() { var html = '

text\nmore text

'; var result = parse(html); From db80ca9853f1ee504855fa8495aef415e47aa7aa Mon Sep 17 00:00:00 2001 From: Stephen Pollei Date: Sun, 20 Sep 2015 16:36:10 -0700 Subject: [PATCH 2/3] Added more to omittableCloseTagHtmlElements fixed dd and dt as per https://bugzilla.mozilla.org/show_bug.cgi?id=946393 got some ruby working added more tests of omitable tags; 6 of which currently fail they fail because omitable tags may nest so more care is needed * modified: src/HTMLParser.js * modified: test/test-slowparse.js --- src/HTMLParser.js | 7 ++++-- test/test-slowparse.js | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/HTMLParser.js b/src/HTMLParser.js index 6aa380a..a323533 100644 --- a/src/HTMLParser.js +++ b/src/HTMLParser.js @@ -116,7 +116,8 @@ module.exports = (function(){ // http://www.w3.org/TR/html5/syntax.html#optional-tags // HTML elements that with omittable close tag - omittableCloseTagHtmlElements: ["p", "li", "td", "th", "option"], + omittableCloseTagHtmlElements: ["p", "li", "thead", "tbody", "tfoot", "tr", "td", "th", + "dt", "dd", "rb", "rt", "rtc", "rp", "option", "optgroup" ], // HTML elements that paired with omittable close tag list omittableCloseTags: { @@ -128,12 +129,14 @@ module.exports = (function(){ "td": ["th", "td"], "tr": ["tr"], "li": ["li"], + "dt": ["dt", "dd"], + "dd": ["dt", "dd"], "rb": ["rb","rt","rtc","rp"], "rt": ["rb","rt","rtc","rp"], "rtc": ["rb","rtc","rp"], "rp": ["rb","rt","rtc","rp"], "optgroup": ["optgroup"], - "option": ["option"], + "option": ["option", "optgroup"], "thead": ["tbody", "tfoot"], "tbody": ["tbody", "tfoot"], "tfoot": ["tbody"] diff --git a/test/test-slowparse.js b/test/test-slowparse.js index 47a029e..3b93ec6 100644 --- a/test/test-slowparse.js +++ b/test/test-slowparse.js @@ -597,12 +597,60 @@ module.exports = function(Slowparse, window, document, validators) { ok(!result.error, "no error on omitted

"); }); + test("parsing elements with optional close tags: ", function() { + var html = ''; + var result = parse(html); + ok(!result.error, "no error on omitted and "); + }); + test("parsing elements with optional close tags: ", function() { var html = ''; var result = parse(html); ok(!result.error, "no error on omitted "); }); + test("parsing elements with optional close tags:
", function() { + var html = '
'; + var result = parse(html); + ok(!result.error, "no error on omitted before "); + }); + + test("parsing elements with optional close tags:
", function() { + var html = '
'; + var result = parse(html); + ok(!result.error, "no error on omitted "); + }); + + test("parsing elements with optional close tags:
", function() { + var html = '
'; + var result = parse(html); + ok(!result.error, "no error on omitted and "); + }); + + test("parsing elements with optional close tags:
", function() { + var html = '
'; + var result = parse(html); + ok(!result.error, "no error on omitted and "); + }); + + test("parsing elements with optional close tags:
Coffee
Black hot drink
Milk
White cold drink
", function() { + var html = '
Coffee
Black hot drink
Milk
White cold drink
'; + var result = parse(html); + ok(!result.error, "no error on omitted and "); + }); + + test("parsing elements with optional close tags: (Kan)(ji)", function() { + var html = '(Kan)(ji)'; + var result = parse(html); + ok(!result.error, "no error on omitted and "); + }); + + test("parsing elements with optional close tags: 10312002MonthDayYearExpiration Date", function() { + var html = '10312002MonthDayYearExpiration Date'; + var result = parse(html); + ok(!result.error, "no error on omitted , ,and "); + }); + test("intentional fail for optional close tag (incorrect use). pass = not accepted", function() { var html = '

text\nmore text

'; var result = parse(html); From 7320f4a3a2d20bf81c3facf65a4e07e2b9d3f6f3 Mon Sep 17 00:00:00 2001 From: Stephen Pollei Date: Sun, 4 Oct 2015 20:56:01 -0700 Subject: [PATCH 3/3] Fixed a few problems with nested omitable tags; still has serious problems down to 2 failures instead of 6 modified: src/DOMBuilder.js modified: src/HTMLParser.js --- src/DOMBuilder.js | 6 ++++++ src/HTMLParser.js | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/DOMBuilder.js b/src/DOMBuilder.js index c3fae32..12046dc 100644 --- a/src/DOMBuilder.js +++ b/src/DOMBuilder.js @@ -35,6 +35,12 @@ module.exports = (function(){ popElement: function() { this.currentNode = this.currentNode.parentNode; }, + popElements: function(n) { + while (n>0) { + this.currentNode = this.currentNode.parentNode; + n--; + } + }, // record the cursor position for a context change (text/html/css/script) pushContext: function(context, position) { this.contexts.push({ diff --git a/src/HTMLParser.js b/src/HTMLParser.js index a323533..3aa2324 100644 --- a/src/HTMLParser.js +++ b/src/HTMLParser.js @@ -237,6 +237,38 @@ module.exports = (function(){ return this.attributeNamespaces.indexOf(ns) !== -1; }, + _isNextTagEnder: function (stream) { + var closeTag=isNextCloseTag(stream); + var currNode = this.domBuilder.currentNode ; + var n=1; + if (!closeTag) {return false;} + closeTag=closeTag.toLowerCase(); + while (currNode && currNode.nodeName) { + var tagName = currNode.nodeName.toLowerCase(); + if (closeTag == tagName) { return {'n': +n}; } + if (!this._knownOmittableCloseTagHtmlElement(tagName)) { return false;} + currNode=currNode.parentNode; + n++; + } + return false; + }, + + _popOmited: function(tag) { + var currNode = this.domBuilder.currentNode ; + var n=1; + tag=tag.toLowerCase(); + while (currNode && currNode.nodeName) { + var tagName = currNode.nodeName.toLowerCase(); + if (!this._knownOmittableCloseTagHtmlElement(tagName)) { return false;} + n++; + if (this._knownOmittableCloseTags(tagName,tag)) break; + //if (tag == tagName) { return {'n': +n}; } + currNode=currNode.parentNode; + } + this.domBuilder.popElements(n-1); + + }, + // #### The HTML Master Parse Function // // The HTML master parse function works the same as the CSS @@ -350,12 +382,16 @@ module.exports = (function(){ // If the preceding tag and the active tag is omittableCloseTag pairs, // we tell our DOM builder that we're done. + // FIXME TODO get rid of activeTagNode; add loop that can pop nested omitable FIXME TODO + this._popOmited(tagName); + /* if (activeTagNode && parentTagNode != this.domBuilder.fragment.node){ var activeTagName = activeTagNode.nodeName.toLowerCase(); if(this._knownOmittableCloseTags(activeTagName, tagName)) { this.domBuilder.popElement(); } } + */ // Store currentNode as the parentTagNode parentTagNode = this.domBuilder.currentNode; this.domBuilder.pushElement(tagName, parseInfo, nameSpace); @@ -469,6 +505,7 @@ module.exports = (function(){ // If the open tag represents a optional-omit-close-tag element, there may be // an optional closing element, so we save the currentNode into activeTag for next step check. + // FIXME TODO kill activeTagNode activeTagNode = false; if (tagName && this._knownOmittableCloseTagHtmlElement(tagName)){ activeTagNode = this.domBuilder.currentNode; @@ -499,8 +536,15 @@ module.exports = (function(){ this.domBuilder.pushContext("html", this.stream.pos); } + // FIXME TODO isNextParent is replaced by _isNextTagEnder(stream) + // also it can pop more than one element FIXME TODO // if there is no more content in the parent element, we tell DOM builder that we're done. - if(parentTagNode && parentTagNode != this.domBuilder.fragment.node) { + // this. _isNextTagEnder(this.stream); + var tagEnds=this. _isNextTagEnder(this.stream); + if (tagEnds && tagEnds.n > 1) { + this.domBuilder.popElements(tagEnds.n-1); + } + /* if(parentTagNode && parentTagNode != this.domBuilder.fragment.node) { var parentTagName = parentTagNode.nodeName.toLowerCase(), nextIsParent = isNextTagParent(this.stream, parentTagName), needsEndTag = !allowsOmmitedEndTag(parentTagName, tagName), @@ -512,6 +556,7 @@ module.exports = (function(){ } } } + */ return; }