diff --git a/extensions/example/example.c b/extensions/example/example.c index df02cb6c..99c03b80 100644 --- a/extensions/example/example.c +++ b/extensions/example/example.c @@ -6,7 +6,7 @@ * Beginning with 8.4, Tcl API is CONST'ified */ #if (TCL_MAJOR_VERSION == 8) && (TCL_MINOR_VERSION <= 3) -# define CONST84 +# define const #endif extern char *Tdom_InitStubs (Tcl_Interp *interp, char *version, int exact); @@ -139,7 +139,7 @@ TclExampleObjCmd(dummy, interp, objc, objv) simpleCounter *counter; - static CONST84 char *exampleMethods[] = { + static const char *exampleMethods[] = { "enable", "getresult", "remove", NULL }; diff --git a/extensions/tnc/tnc.c b/extensions/tnc/tnc.c index 7049080b..eae060fe 100644 --- a/extensions/tnc/tnc.c +++ b/extensions/tnc/tnc.c @@ -13,7 +13,7 @@ * Beginning with 8.4, Tcl API is CONST'ified */ #if (TCL_MAJOR_VERSION == 8) && (TCL_MINOR_VERSION <= 3) -# define CONST84 +# define const #endif #ifndef TCL_THREADS @@ -2363,7 +2363,7 @@ tnc_ValidateObjCmd ( Tcl_HashEntry *entryPtr; TNC_Content *model; - static CONST84 char *validateMethods[] = { + static const char *validateMethods[] = { "validateTree", "validateDocument", "validateAttributes", "delete", NULL @@ -2824,7 +2824,7 @@ TclTncObjCmd(dummy, interp, objc, objv) int methodIndex, result; TNC_Data *tncdata; - static CONST84 char *tncMethods[] = { + static const char *tncMethods[] = { "enable", "remove", "getValidateCmd", NULL }; diff --git a/generic/dom.c b/generic/dom.c index b1377f52..bf1e3417 100644 --- a/generic/dom.c +++ b/generic/dom.c @@ -1783,7 +1783,7 @@ externalEntityRefHandler ( Tcl_Channel chan = (Tcl_Channel) NULL; enum XML_Status status; XML_Index storedNextFeedbackPosition; - CONST84 char *interpResult; + const char *interpResult; if (info->document->extResolver == NULL) { Tcl_AppendResult (info->interp, "Can't read external entity \"", @@ -5298,7 +5298,7 @@ TclTdomObjCmd (dummy, interp, objc, objv) Tcl_Obj *newObjName = NULL; TEncoding *encoding; - static CONST84 char *tdomMethods[] = { + static const char *tdomMethods[] = { "enable", "getdoc", "setResultEncoding", "setStoreLineColumn", "setExternalEntityResolver", "keepEmpties", diff --git a/generic/dom.h b/generic/dom.h index da8f3e92..7b1a5b12 100644 --- a/generic/dom.h +++ b/generic/dom.h @@ -90,7 +90,6 @@ * Beginning with 8.4, Tcl API is CONST'ified */ #if (TCL_MAJOR_VERSION == 8) && (TCL_MINOR_VERSION <= 3) -# define CONST84 #endif /* diff --git a/generic/domhtml.c b/generic/domhtml.c index d544835c..6c090925 100644 --- a/generic/domhtml.c +++ b/generic/domhtml.c @@ -598,6 +598,7 @@ static void TranslateEntityRefs ( value += c-'a' + 10; } else { /* error */ + break; } i++; } @@ -608,28 +609,36 @@ static void TranslateEntityRefs ( value += c-'0'; } else { /* error */ + break; } i++; } } - if (z[i]!=';') { - /* error */ - } - from = i+1; + if (z[i] == ';') { + from = i+1; #if TclOnly8Bits - z[to++] = value; -#else - if (value < 0x80) { z[to++] = value; - } else if (value <= 0x7FF) { - z[to++] = (char) ((value >> 6) | 0xC0); - z[to++] = (char) ((value | 0x80) & 0xBF); - } else if (value <= 0xFFFF) { - z[to++] = (char) ((value >> 12) | 0xE0); - z[to++] = (char) (((value >> 6) | 0x80) & 0xBF); - z[to++] = (char) ((value | 0x80) & 0xBF); +#else + if (value < 0x80) { + z[to++] = value; + } else if (value <= 0x7FF) { + z[to++] = (char) ((value >> 6) | 0xC0); + z[to++] = (char) ((value | 0x80) & 0xBF); + } else if (value <= 0xFFFF) { + z[to++] = (char) ((value >> 12) | 0xE0); + z[to++] = (char) (((value >> 6) | 0x80) & 0xBF); + z[to++] = (char) ((value | 0x80) & 0xBF); + } else { + /* error */ + while (from < i-1) { + z[to++] = z[from++]; + } + } } else { /* error */ + while (from < i-1) { + z[to++] = z[from++]; + } } #endif } else { @@ -870,7 +879,8 @@ HTML_SimpleParse ( case 'i': if (!strcmp(pn,"i")) autoclose = 1; break; case 'l': if (!strcmp(pn,"li")) autoclose = 1; break; case 'n': if (!strcmp(pn,"noscript")) autoclose = 1; break; - case 'o': if (!strcmp(pn,"option")) autoclose = 1; break; + case 'o': if (!strcmp(pn,"option") || + !strcmp(pn,"ol")) autoclose = 1; break; case 'p': if (!strcmp(pn,"p")) autoclose = 1; break; case 's': if (!strcmp(pn,"span")) autoclose = 1; break; case 't': if (!strcmp(pn,"tbody") || @@ -1436,7 +1446,8 @@ HTML_SimpleParse ( case 'h': if (!strcmp(pn,"head") || !strcmp(pn,"html")) autoclose = 1; break; case 'l': if (!strcmp(pn,"li")) autoclose = 1; break; - case 'o': if (!strcmp(pn,"option")) autoclose = 1; break; + case 'o': if (!strcmp(pn,"option") || + !strcmp(pn,"ol")) autoclose = 1; break; case 'p': if (!strcmp(pn,"p")) autoclose = 1; break; case 't': if (!strcmp(pn,"tbody") || !strcmp(pn,"td") || diff --git a/generic/domxslt.c b/generic/domxslt.c index 27eadbf0..fdb744bd 100644 --- a/generic/domxslt.c +++ b/generic/domxslt.c @@ -5876,7 +5876,7 @@ getExternalDocument ( int resultcode = 0; char *resultType, *extbase, *xmlstring, *channelId, s[20]; Tcl_Obj *extResolver = NULL; - CONST84 char *str; + const char *str; domDocument *doc; xsltSubDoc *sdoc; XML_Parser parser; diff --git a/generic/nodecmd.c b/generic/nodecmd.c index f988f787..6137d58b 100644 --- a/generic/nodecmd.c +++ b/generic/nodecmd.c @@ -295,7 +295,8 @@ NodeObjCmd (arg, interp, objc, objv) parent = (domNode *)StackTop(); if (parent == NULL) { Tcl_AppendResult(interp, "called outside domNode context", NULL); - return TCL_ERROR; + ret = TCL_ERROR; + goto end; } doc = parent->ownerDocument; @@ -323,28 +324,39 @@ NodeObjCmd (arg, interp, objc, objv) Tcl_GetStringFromObj (objv[1], &len))!=0) { Tcl_WrongNumArgs(interp, 1, objv, "?-disableOutputEscaping? text"); - return TCL_ERROR; + ret = TCL_ERROR; + goto end; } else { disableOutputEscaping = 1; index = 2; } } else { Tcl_WrongNumArgs(interp, 1, objv, "text"); - return TCL_ERROR; + ret = TCL_ERROR; + goto end; } } tval = Tcl_GetStringFromObj(objv[index], &len); switch (abs(type)) { case TEXT_NODE_CHK: - if (!tcldom_textCheck (interp, tval, "text")) return TCL_ERROR; + if (!tcldom_textCheck (interp, tval, "text")) { + ret = TCL_ERROR; + goto end; + } createType = TEXT_NODE; break; case COMMENT_NODE_CHK: - if (!tcldom_commentCheck (interp, tval)) return TCL_ERROR; + if (!tcldom_commentCheck (interp, tval)) { + ret = TCL_ERROR; + goto end; + } createType = COMMENT_NODE; break; case CDATA_SECTION_NODE_CHK: - if (!tcldom_CDATACheck (interp, tval)) return TCL_ERROR; + if (!tcldom_CDATACheck (interp, tval)) { + ret = TCL_ERROR; + goto end; + } createType = CDATA_SECTION_NODE; break; default: @@ -364,17 +376,24 @@ NodeObjCmd (arg, interp, objc, objv) case PROCESSING_INSTRUCTION_NODE: if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "target data"); - return TCL_ERROR; + ret = TCL_ERROR; + goto end; } tval = Tcl_GetStringFromObj(objv[1], &len); if (abs(type) == PROCESSING_INSTRUCTION_NODE_NAME_CHK || abs(type) == PROCESSING_INSTRUCTION_NODE_CHK) { - if (!tcldom_PINameCheck (interp, tval)) return TCL_ERROR; + if (!tcldom_PINameCheck (interp, tval)) { + ret = TCL_ERROR; + goto end; + } } aval = Tcl_GetStringFromObj(objv[2], &dlen); if (abs(type) == PROCESSING_INSTRUCTION_NODE_VALUE_CHK || abs(type) == PROCESSING_INSTRUCTION_NODE_CHK) { - if (!tcldom_PIValueCheck (interp, aval)) return TCL_ERROR; + if (!tcldom_PIValueCheck (interp, aval)) { + ret = TCL_ERROR; + goto end; + } } newNode = (domNode *) domNewProcessingInstructionNode(doc, tval, len, aval, dlen); @@ -384,7 +403,8 @@ NodeObjCmd (arg, interp, objc, objv) case PARSER_NODE: /* non-standard node-type : a hack! */ if (objc != 2) { Tcl_WrongNumArgs(interp, 1, objv, "markup"); - return TCL_ERROR; + ret = TCL_ERROR; + goto end; } ret = tcldom_appendXML(interp, parent, objv[1]); break; @@ -424,7 +444,8 @@ NodeObjCmd (arg, interp, objc, objv) if ((len % 2)) { Tcl_AppendResult(interp, "list must have " "an even number of elements", NULL); - return TCL_ERROR; + ret = TCL_ERROR; + goto end; } cmdObj = objv[2]; } else { @@ -440,14 +461,16 @@ NodeObjCmd (arg, interp, objc, objv) if (abs(type) == ELEMENT_NODE_ANAME_CHK || abs(type) == ELEMENT_NODE_CHK) { if (!tcldom_nameCheck (interp, tval, "attribute", 0)) { - return TCL_ERROR; + ret = TCL_ERROR; + goto end; } } aval = Tcl_GetString(opts[i+1]); if (abs(type) == ELEMENT_NODE_AVALUE_CHK || abs(type) == ELEMENT_NODE_CHK) { if (!tcldom_textCheck (interp, aval, "attribute")) { - return TCL_ERROR; + ret = TCL_ERROR; + goto end; } } domSetAttribute(newNode, tval, aval); @@ -458,13 +481,18 @@ NodeObjCmd (arg, interp, objc, objv) break; } - if (type < 0 && newNode != NULL) { +end: + if (ret != TCL_ERROR && type < 0 && newNode != NULL) { char buf[64]; tcldom_createNodeObj(interp, newNode, buf); Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, strlen(buf))); } if (ret == TCL_OK) doc->nodeFlags |= NEEDS_RENUMBERING; + if (ret == TCL_ERROR && newNode) { + /* prevent errors from leaving half-added nodes in the document */ + domDeleteNode(newNode, NULL, NULL); newNode = NULL; + } return ret; } @@ -526,7 +554,7 @@ nodecmd_createNodeCmd (interp, objc, objv, checkName, checkCharData) ELM_NODE, TXT_NODE, CDS_NODE, CMT_NODE, PIC_NODE, PRS_NODE }; - static CONST84 char *subcmds[] = { + static const char *subcmds[] = { "elementNode", "textNode", "cdataNode", "commentNode", "piNode", "parserNode", NULL }; diff --git a/generic/tcldom.c b/generic/tcldom.c index 55ee2094..965a62b9 100644 --- a/generic/tcldom.c +++ b/generic/tcldom.c @@ -503,8 +503,8 @@ static char * tcldom_docTrace ( ClientData clientData, Tcl_Interp *interp, - CONST84 char *name1, - CONST84 char *name2, + const char *name1, + const char *name2, int flags ) { @@ -1606,7 +1606,7 @@ int tcldom_selectNodes ( xpathCBs cbs; xpathParseVarCB parseVarCB; - static CONST84 char *selectNodesOptions[] = { + static const char *selectNodesOptions[] = { "-namespaces", "-cache", NULL }; enum selectNodesOption { @@ -2544,7 +2544,9 @@ void tcldom_treeAsHTML ( int escapeNonASCII, int htmlEntities, int doctypeDeclaration, - int noEscaping + int noEscaping, + int onlyContents, + int breakLines ) { int empty, scriptTag; @@ -2584,7 +2586,8 @@ void tcldom_treeAsHTML ( child = doc->rootNode->firstChild; while (child) { tcldom_treeAsHTML(htmlString, child, chan, escapeNonASCII, - htmlEntities, doctypeDeclaration, 0); + htmlEntities, doctypeDeclaration, 0, + onlyContents, breakLines); child = child->nextSibling; } return; @@ -2637,8 +2640,6 @@ void tcldom_treeAsHTML ( } tcldom_tolower(node->nodeName, tag, 80); - writeChars(htmlString, chan, "<", 1); - writeChars(htmlString, chan, tag, -1); /*----------------------------------------------------------- @@ -2668,18 +2669,26 @@ void tcldom_treeAsHTML ( } - attrs = node->firstAttr; - while (attrs) { - tcldom_tolower(attrs->nodeName, attrName, 80); - writeChars(htmlString, chan, " ", 1); - writeChars (htmlString, chan, attrName, -1); - writeChars(htmlString, chan, "=\"", 2); - tcldom_AppendEscaped(htmlString, chan, attrs->nodeValue, -1, 1, - escapeNonASCII, htmlEntities, 0); - writeChars(htmlString, chan, "\"", 1); - attrs = attrs->nextSibling; + if (!onlyContents) { + writeChars(htmlString, chan, "<", 1); + writeChars(htmlString, chan, tag, -1); + attrs = node->firstAttr; + while (attrs) { + tcldom_tolower(attrs->nodeName, attrName, 80); + writeChars(htmlString, chan, " ", 1); + writeChars (htmlString, chan, attrName, -1); + writeChars(htmlString, chan, "=\"", 2); + tcldom_AppendEscaped(htmlString, chan, attrs->nodeValue, -1, 1, + escapeNonASCII, htmlEntities, 0); + writeChars(htmlString, chan, "\"", 1); + attrs = attrs->nextSibling; + } + if (breakLines) { + writeChars(htmlString, chan, "\n>", 2); + } else { + writeChars(htmlString, chan, ">", 1); + } } - writeChars(htmlString, chan, ">", 1); if (empty) { @@ -2687,7 +2696,8 @@ void tcldom_treeAsHTML ( child = node->firstChild; while (child != NULL) { tcldom_treeAsHTML(htmlString, child, chan, escapeNonASCII, - htmlEntities, doctypeDeclaration, scriptTag); + htmlEntities, doctypeDeclaration, scriptTag, 0, + breakLines); child = child->nextSibling; } return; @@ -2701,7 +2711,8 @@ void tcldom_treeAsHTML ( } while (child != NULL) { tcldom_treeAsHTML(htmlString, child, chan, escapeNonASCII, - htmlEntities, doctypeDeclaration, scriptTag); + htmlEntities, doctypeDeclaration, scriptTag, 0, + breakLines); child = child->nextSibling; } if ((node->firstChild != NULL) && (node->firstChild != node->lastChild) @@ -2709,9 +2720,11 @@ void tcldom_treeAsHTML ( writeChars(htmlString, chan, "\n", 1); } } - writeChars(htmlString, chan, "", 1); + if (!onlyContents) { + writeChars(htmlString, chan, "", 1); + } } @@ -3003,7 +3016,7 @@ static int serializeAsXML ( Tcl_HashEntry *h; Tcl_DString dStr; - static CONST84 char *asXMLOptions[] = { + static const char *asXMLOptions[] = { "-indent", "-channel", "-escapeNonASCII", "-doctypeDeclaration", "-escapeAllQuot", NULL @@ -3146,22 +3159,24 @@ static int serializeAsHTML ( { char *channelId; int optionIndex, mode, escapeNonASCII = 0, htmlEntities = 0; - int doctypeDeclaration = 0; + int doctypeDeclaration = 0, onlyContents = 0, breakLines = 0; Tcl_Obj *resultPtr; Tcl_Channel chan = (Tcl_Channel) NULL; - static CONST84 char *asHTMLOptions[] = { + static const char *asHTMLOptions[] = { "-channel", "-escapeNonASCII", "-htmlEntities", "-doctypeDeclaration", - NULL + "-onlyContents", "-breakLines", NULL }; enum asHTMLOption { - m_channel, m_escapeNonASCII, m_htmlEntities, m_doctypeDeclaration + m_channel, m_escapeNonASCII, m_htmlEntities, m_doctypeDeclaration, + m_onlyContents, m_breakLines }; - if (objc > 8) { + if (objc > 10) { Tcl_WrongNumArgs(interp, 2, objv, "?-channel ? ?-escapeNonASCII? " - "?-htmlEntities? ?-doctypeDeclaration ?"); + "?-htmlEntities? ?-doctypeDeclaration ? " + "?-onlyContents? ?-breakLines?"); return TCL_ERROR; } while (objc > 2) { @@ -3221,11 +3236,23 @@ static int serializeAsHTML ( objc -= 2; objv += 2; break; + + case m_onlyContents: + onlyContents = 1; + objc--; + objv++; + break; + + case m_breakLines: + breakLines = 1; + objc--; + objv++; + break; } } resultPtr = Tcl_NewStringObj("", 0); tcldom_treeAsHTML(resultPtr, node, chan, escapeNonASCII, htmlEntities, - doctypeDeclaration, 0); + doctypeDeclaration, 0, onlyContents, breakLines); Tcl_AppendResult(interp, Tcl_GetString(resultPtr), NULL); Tcl_DecrRefCount(resultPtr); return TCL_OK; @@ -3480,7 +3507,7 @@ static int applyXSLT ( "?-ignoreUndeclaredParameters? ?-xsltmessagecmd cmd? " "?objVar?\""; - static CONST84 char *xsltOptions[] = { + static const char *xsltOptions[] = { "-parameters", "-ignoreUndeclaredParameters", "-xsltmessagecmd", NULL }; @@ -3618,7 +3645,7 @@ static int tcldom_XSLTObjCmd ( int index; char *errMsg = NULL; - static CONST84 char *options[] = { + static const char *options[] = { "transform", "delete", NULL }; enum option { @@ -3733,7 +3760,7 @@ int tcldom_NodeObjCmd ( Tcl_CmdInfo cmdInfo; Tcl_HashEntry *h; - static CONST84 char *nodeMethods[] = { + static const char *nodeMethods[] = { "firstChild", "nextSibling", "getAttribute", "nodeName", "nodeValue", "nodeType", "attributes", "asList", "find", "setAttribute", "removeAttribute", "parentNode", @@ -4755,7 +4782,7 @@ int tcldom_DocObjCmd ( Tcl_CmdInfo cmdInfo; Tcl_Obj * mobjv[MAX_REWRITE_ARGS]; - static CONST84 char *docMethods[] = { + static const char *docMethods[] = { "documentElement", "getElementsByTagName", "delete", "createElement", "createCDATASection", "createTextNode", "createComment", "createProcessingInstruction", @@ -5437,7 +5464,7 @@ int tcldom_parse ( char *xml_string, *option, *errStr, *channelId, *baseURI = NULL; Tcl_Obj *extResolver = NULL; Tcl_Obj *feedbackCmd = NULL; - CONST84 char *interpResult; + const char *interpResult; int optionIndex, value, xml_string_len, mode; int ignoreWhiteSpaces = 1; int takeSimpleParser = 0; @@ -5453,7 +5480,7 @@ int tcldom_parse ( Tcl_Channel chan = (Tcl_Channel) NULL; Tcl_CmdInfo cmdInfo; - static CONST84 char *parseOptions[] = { + static const char *parseOptions[] = { "-keepEmpties", "-simple", "-html", "-feedbackAfter", "-channel", "-baseurl", "-externalentitycommand", "-useForeignDTD", "-paramentityparsing", @@ -5467,7 +5494,7 @@ int tcldom_parse ( o_feedbackcmd }; - static CONST84 char *paramEntityParsingValues[] = { + static const char *paramEntityParsingValues[] = { "always", "never", "notstandalone", @@ -5817,7 +5844,7 @@ int tcldom_featureinfo ( { int featureIndex, result; - static CONST84 char *features[] = { + static const char *features[] = { "expatversion", "expatmajorversion", "expatminorversion", "expatmicroversion", "dtd", "ns", "unknown", "tdomalloc", "lessns", @@ -5913,7 +5940,7 @@ int tcldom_DomObjCmd ( Tcl_CmdInfo cmdInfo; Tcl_Obj * mobjv[MAX_REWRITE_ARGS]; - static CONST84 char *domMethods[] = { + static const char *domMethods[] = { "createDocument", "createDocumentNS", "createNodeCmd", "parse", "setResultEncoding", "setStoreLineColumn", "isCharData", "isName", "isPIName", @@ -5939,7 +5966,7 @@ int tcldom_DomObjCmd ( #endif }; - static CONST84 char *nodeModeValues[] = { + static const char *nodeModeValues[] = { "automatic", "command", "token", NULL }; enum nodeModeValue { diff --git a/generic/tclexpat.c b/generic/tclexpat.c index 9683cbeb..121a6158 100644 --- a/generic/tclexpat.c +++ b/generic/tclexpat.c @@ -672,7 +672,7 @@ TclExpatInstanceCmd (clientData, interp, objc, objv) char *data; int len = 0, optionIndex, result = TCL_OK; - static CONST84 char *options[] = { + static const char *options[] = { "configure", "cget", "free", "get", "parse", "parsechannel", "parsefile", "reset", NULL }; @@ -1055,7 +1055,7 @@ TclExpatConfigure (interp, expat, objc, objv) int objc; Tcl_Obj *CONST objv[]; { - static CONST84 char *switches[] = { + static const char *switches[] = { "-final", "-baseurl", "-elementstartcommand", @@ -1110,7 +1110,7 @@ TclExpatConfigure (interp, expat, objc, objv) EXPAT_HANDLERSET, EXPAT_NOEXPAND }; - static CONST84 char *paramEntityParsingValues[] = { + static const char *paramEntityParsingValues[] = { "always", "never", "notstandalone", @@ -1553,7 +1553,7 @@ TclExpatCget (interp, expat, objc, objv) int objc; Tcl_Obj *CONST objv[]; { - static CONST84 char *switches[] = { + static const char *switches[] = { "-final", "-baseurl", "-elementstartcommand", @@ -1955,7 +1955,7 @@ TclExpatGet (interp, expat, objc, objv) int objc; Tcl_Obj *CONST objv[]; { - static CONST84 char *getSwitches[] = { + static const char *getSwitches[] = { "-specifiedattributecount", "-currentbytecount", "-currentlinenumber", diff --git a/generic/tdominit.c b/generic/tdominit.c index 484073e6..67aeeb65 100644 --- a/generic/tdominit.c +++ b/generic/tdominit.c @@ -74,7 +74,7 @@ Tdom_Init (interp) nrOfBytes = Tcl_UtfToUniChar ("\xF4\xA2\xA2\xA2", &uniChar); -#if (TCL_MAJOR_VERSION == 8) && (TCL_MINOR_VERSION == 6) +#if (TCL_MAJOR_VERSION == 8) && (TCL_MINOR_VERSION == 6 || TCL_MINOR_VERSION == 7) # if TCL_UTF_MAX > 4 if (nrOfBytes != 4) { # else diff --git a/tests/htmlreader.test b/tests/htmlreader.test index 6f5883a6..419f6c28 100644 --- a/tests/htmlreader.test +++ b/tests/htmlreader.test @@ -175,6 +175,13 @@ test html-2.8 {HTML parsing} { } +test html-2.9 {HTML parsing - incomplete character references} { + set doc [dom parse -html "foo は, foo"] + $doc documentElement root + $doc asHTML +} {foo &#12399, foo} + + test html-3.1 {Bad data} { set data {line 6 column 17 - Warning: