Skip to content

Commit ace886e

Browse files
committed
Updated libxml to 2.9.10
1 parent 68e6e4d commit ace886e

File tree

393 files changed

+8142
-6987
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

393 files changed

+8142
-6987
lines changed

ChangeLog

+56-56
Large diffs are not rendered by default.

HTMLparser.c

+23-16
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#ifdef HAVE_UNISTD_H
2727
#include <unistd.h>
2828
#endif
29-
#ifdef HAVE_ZLIB_H
29+
#ifdef LIBXML_ZLIB_ENABLED
3030
#include <zlib.h>
3131
#endif
3232

@@ -317,7 +317,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
317317

318318
#define SKIP_BLANKS htmlSkipBlankChars(ctxt)
319319

320-
/* Inported from XML */
320+
/* Imported from XML */
321321

322322
/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
323323
#define CUR ((int) (*ctxt->input->cur))
@@ -537,7 +537,7 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
537537
encoding_error:
538538
/*
539539
* If we detect an UTF8 error that probably mean that the
540-
* input encoding didn't get properly advertized in the
540+
* input encoding didn't get properly advertised in the
541541
* declaration header. Report the error and switch the encoding
542542
* to ISO-Latin-1 (if you don't like this policy, just declare the
543543
* encoding !)
@@ -602,8 +602,8 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
602602
************************************************************************/
603603

604604
/*
605-
* Start Tag: 1 means the start tag can be ommited
606-
* End Tag: 1 means the end tag can be ommited
605+
* Start Tag: 1 means the start tag can be omitted
606+
* End Tag: 1 means the end tag can be omitted
607607
* 2 means it's forbidden (empty elements)
608608
* 3 means the tag is stylistic and should be closed easily
609609
* Depr: this element is deprecated
@@ -1084,7 +1084,7 @@ static const char * const htmlStartClose[] = {
10841084
"menu", "p", "head", "ul", NULL,
10851085
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
10861086
"div", "p", "head", NULL,
1087-
"noscript", "p", NULL,
1087+
"noscript", "script", NULL,
10881088
"center", "font", "b", "i", "p", "head", NULL,
10891089
"a", "a", "head", NULL,
10901090
"caption", "p", NULL,
@@ -1342,7 +1342,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
13421342
if (xmlStrEqual(newtag, ctxt->nameTab[i]))
13431343
break;
13441344
/*
1345-
* A missplaced endtag can only close elements with lower
1345+
* A misplaced endtag can only close elements with lower
13461346
* or equal priority, so if we find an element with higher
13471347
* priority before we find an element with
13481348
* matching name, we just ignore this endtag
@@ -2176,6 +2176,7 @@ htmlEncodeEntities(unsigned char* out, int *outlen,
21762176
* *
21772177
************************************************************************/
21782178

2179+
#ifdef LIBXML_PUSH_ENABLED
21792180
/**
21802181
* htmlNewInputStream:
21812182
* @ctxt: an HTML parser context
@@ -2207,6 +2208,7 @@ htmlNewInputStream(htmlParserCtxtPtr ctxt) {
22072208
input->length = 0;
22082209
return(input);
22092210
}
2211+
#endif
22102212

22112213

22122214
/************************************************************************
@@ -2216,9 +2218,9 @@ htmlNewInputStream(htmlParserCtxtPtr ctxt) {
22162218
************************************************************************/
22172219
/*
22182220
* all tags allowing pc data from the html 4.01 loose dtd
2219-
* NOTE: it might be more apropriate to integrate this information
2221+
* NOTE: it might be more appropriate to integrate this information
22202222
* into the html40ElementTable array but I don't want to risk any
2221-
* binary incomptibility
2223+
* binary incompatibility
22222224
*/
22232225
static const char *allowPCData[] = {
22242226
"a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
@@ -2959,6 +2961,7 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
29592961
}
29602962
COPY_BUF(l,buf,nbchar,cur);
29612963
if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
2964+
buf[nbchar] = 0;
29622965
if (ctxt->sax->cdataBlock!= NULL) {
29632966
/*
29642967
* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
@@ -2983,6 +2986,7 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
29832986
}
29842987

29852988
if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2989+
buf[nbchar] = 0;
29862990
if (ctxt->sax->cdataBlock!= NULL) {
29872991
/*
29882992
* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
@@ -3028,6 +3032,8 @@ htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) {
30283032
COPY_BUF(l,buf,nbchar,cur);
30293033
}
30303034
if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
3035+
buf[nbchar] = 0;
3036+
30313037
/*
30323038
* Ok the segment is to be consumed as chars.
30333039
*/
@@ -3636,12 +3642,12 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
36363642
processed = ctxt->input->cur - ctxt->input->base;
36373643
xmlBufShrink(ctxt->input->buf->buffer, processed);
36383644
nbchars = xmlCharEncInput(ctxt->input->buf, 1);
3645+
xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
36393646
if (nbchars < 0) {
36403647
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
36413648
"htmlCheckEncoding: encoder error\n",
36423649
NULL, NULL);
36433650
}
3644-
xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
36453651
}
36463652
}
36473653
}
@@ -5762,13 +5768,13 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
57625768
break;
57635769
}
57645770
case XML_PARSER_CONTENT: {
5771+
xmlChar chr[2] = { 0, 0 };
57655772
long cons;
5773+
57665774
/*
57675775
* Handle preparsed entities and charRef
57685776
*/
57695777
if (ctxt->token != 0) {
5770-
xmlChar chr[2] = { 0 , 0 } ;
5771-
57725778
chr[0] = (xmlChar) ctxt->token;
57735779
htmlCheckParagraph(ctxt);
57745780
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
@@ -5780,21 +5786,22 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
57805786
cur = in->cur[0];
57815787
if ((cur != '<') && (cur != '&')) {
57825788
if (ctxt->sax != NULL) {
5789+
chr[0] = cur;
57835790
if (IS_BLANK_CH(cur)) {
57845791
if (ctxt->keepBlanks) {
57855792
if (ctxt->sax->characters != NULL)
57865793
ctxt->sax->characters(
5787-
ctxt->userData, &in->cur[0], 1);
5794+
ctxt->userData, chr, 1);
57885795
} else {
57895796
if (ctxt->sax->ignorableWhitespace != NULL)
57905797
ctxt->sax->ignorableWhitespace(
5791-
ctxt->userData, &in->cur[0], 1);
5798+
ctxt->userData, chr, 1);
57925799
}
57935800
} else {
57945801
htmlCheckParagraph(ctxt);
57955802
if (ctxt->sax->characters != NULL)
57965803
ctxt->sax->characters(
5797-
ctxt->userData, &in->cur[0], 1);
5804+
ctxt->userData, chr, 1);
57985805
}
57995806
}
58005807
ctxt->token = 0;
@@ -6674,7 +6681,7 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
66746681
xmlInitNodeInfoSeq(&ctxt->node_seq);
66756682

66766683
if (ctxt->attsDefault != NULL) {
6677-
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
6684+
xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
66786685
ctxt->attsDefault = NULL;
66796686
}
66806687
if (ctxt->attsSpecial != NULL) {

HTMLtree.c

+45-79
Original file line numberDiff line numberDiff line change
@@ -502,16 +502,16 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
502502
if (handler == NULL)
503503
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
504504
}
505+
} else {
506+
/*
507+
* Fallback to HTML or ASCII when the encoding is unspecified
508+
*/
509+
if (handler == NULL)
510+
handler = xmlFindCharEncodingHandler("HTML");
511+
if (handler == NULL)
512+
handler = xmlFindCharEncodingHandler("ascii");
505513
}
506514

507-
/*
508-
* Fallback to HTML or ASCII when the encoding is unspecified
509-
*/
510-
if (handler == NULL)
511-
handler = xmlFindCharEncodingHandler("HTML");
512-
if (handler == NULL)
513-
handler = xmlFindCharEncodingHandler("ascii");
514-
515515
/*
516516
* save the content to a temp buffer.
517517
*/
@@ -570,33 +570,22 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
570570
xmlCharEncoding enc;
571571

572572
enc = xmlParseCharEncoding(encoding);
573-
if (enc != cur->charset) {
574-
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
575-
/*
576-
* Not supported yet
577-
*/
578-
*mem = NULL;
579-
*size = 0;
580-
return;
581-
}
582-
573+
if (enc != XML_CHAR_ENCODING_UTF8) {
583574
handler = xmlFindCharEncodingHandler(encoding);
584575
if (handler == NULL)
585576
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
586577

587-
} else {
588-
handler = xmlFindCharEncodingHandler(encoding);
589578
}
579+
} else {
580+
/*
581+
* Fallback to HTML or ASCII when the encoding is unspecified
582+
*/
583+
if (handler == NULL)
584+
handler = xmlFindCharEncodingHandler("HTML");
585+
if (handler == NULL)
586+
handler = xmlFindCharEncodingHandler("ascii");
590587
}
591588

592-
/*
593-
* Fallback to HTML or ASCII when the encoding is unspecified
594-
*/
595-
if (handler == NULL)
596-
handler = xmlFindCharEncodingHandler("HTML");
597-
if (handler == NULL)
598-
handler = xmlFindCharEncodingHandler("ascii");
599-
600589
buf = xmlAllocOutputBufferInternal(handler);
601590
if (buf == NULL) {
602591
*mem = NULL;
@@ -1059,7 +1048,7 @@ htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
10591048
* @cur: the document
10601049
* @encoding: the encoding string
10611050
*
1062-
* Dump an HTML document. Formating return/spaces are added.
1051+
* Dump an HTML document. Formatting return/spaces are added.
10631052
*/
10641053
void
10651054
htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
@@ -1101,30 +1090,21 @@ htmlDocDump(FILE *f, xmlDocPtr cur) {
11011090
xmlCharEncoding enc;
11021091

11031092
enc = xmlParseCharEncoding(encoding);
1104-
if (enc != cur->charset) {
1105-
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1106-
/*
1107-
* Not supported yet
1108-
*/
1109-
return(-1);
1110-
}
1111-
1093+
if (enc != XML_CHAR_ENCODING_UTF8) {
11121094
handler = xmlFindCharEncodingHandler(encoding);
11131095
if (handler == NULL)
11141096
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1115-
} else {
1116-
handler = xmlFindCharEncodingHandler(encoding);
11171097
}
1098+
} else {
1099+
/*
1100+
* Fallback to HTML or ASCII when the encoding is unspecified
1101+
*/
1102+
if (handler == NULL)
1103+
handler = xmlFindCharEncodingHandler("HTML");
1104+
if (handler == NULL)
1105+
handler = xmlFindCharEncodingHandler("ascii");
11181106
}
11191107

1120-
/*
1121-
* Fallback to HTML or ASCII when the encoding is unspecified
1122-
*/
1123-
if (handler == NULL)
1124-
handler = xmlFindCharEncodingHandler("HTML");
1125-
if (handler == NULL)
1126-
handler = xmlFindCharEncodingHandler("ascii");
1127-
11281108
buf = xmlOutputBufferCreateFile(f, handler);
11291109
if (buf == NULL) return(-1);
11301110
htmlDocContentDumpOutput(buf, cur, NULL);
@@ -1160,28 +1140,21 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
11601140
xmlCharEncoding enc;
11611141

11621142
enc = xmlParseCharEncoding(encoding);
1163-
if (enc != cur->charset) {
1164-
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1165-
/*
1166-
* Not supported yet
1167-
*/
1168-
return(-1);
1169-
}
1170-
1143+
if (enc != XML_CHAR_ENCODING_UTF8) {
11711144
handler = xmlFindCharEncodingHandler(encoding);
11721145
if (handler == NULL)
11731146
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
11741147
}
1148+
} else {
1149+
/*
1150+
* Fallback to HTML or ASCII when the encoding is unspecified
1151+
*/
1152+
if (handler == NULL)
1153+
handler = xmlFindCharEncodingHandler("HTML");
1154+
if (handler == NULL)
1155+
handler = xmlFindCharEncodingHandler("ascii");
11751156
}
11761157

1177-
/*
1178-
* Fallback to HTML or ASCII when the encoding is unspecified
1179-
*/
1180-
if (handler == NULL)
1181-
handler = xmlFindCharEncodingHandler("HTML");
1182-
if (handler == NULL)
1183-
handler = xmlFindCharEncodingHandler("ascii");
1184-
11851158
/*
11861159
* save the content to a temp buffer.
11871160
*/
@@ -1221,30 +1194,23 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
12211194
xmlCharEncoding enc;
12221195

12231196
enc = xmlParseCharEncoding(encoding);
1224-
if (enc != cur->charset) {
1225-
if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1226-
/*
1227-
* Not supported yet
1228-
*/
1229-
return(-1);
1230-
}
1231-
1197+
if (enc != XML_CHAR_ENCODING_UTF8) {
12321198
handler = xmlFindCharEncodingHandler(encoding);
12331199
if (handler == NULL)
12341200
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
12351201
}
12361202
htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
12371203
} else {
12381204
htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1239-
}
12401205

1241-
/*
1242-
* Fallback to HTML or ASCII when the encoding is unspecified
1243-
*/
1244-
if (handler == NULL)
1245-
handler = xmlFindCharEncodingHandler("HTML");
1246-
if (handler == NULL)
1247-
handler = xmlFindCharEncodingHandler("ascii");
1206+
/*
1207+
* Fallback to HTML or ASCII when the encoding is unspecified
1208+
*/
1209+
if (handler == NULL)
1210+
handler = xmlFindCharEncodingHandler("HTML");
1211+
if (handler == NULL)
1212+
handler = xmlFindCharEncodingHandler("ascii");
1213+
}
12481214

12491215
/*
12501216
* save the content to a temp buffer.

0 commit comments

Comments
 (0)