summaryrefslogtreecommitdiff
path: root/HTMLparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'HTMLparser.c')
-rw-r--r--HTMLparser.c96
1 files changed, 77 insertions, 19 deletions
diff --git a/HTMLparser.c b/HTMLparser.c
index 947e4aa..4f84d7c 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -109,13 +109,15 @@ htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
- ctxt->errNo = error;
+ if (ctxt != NULL)
+ ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
XML_ERR_ERROR, NULL, 0,
(const char *) str1, (const char *) str2,
NULL, 0, 0,
msg, str1, str2);
- ctxt->wellFormed = 0;
+ if (ctxt != NULL)
+ ctxt->wellFormed = 0;
}
/**
@@ -134,11 +136,13 @@ htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
- ctxt->errNo = error;
+ if (ctxt != NULL)
+ ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
XML_ERR_ERROR, NULL, 0, NULL, NULL,
NULL, val, 0, msg, val);
- ctxt->wellFormed = 0;
+ if (ctxt != NULL)
+ ctxt->wellFormed = 0;
}
/************************************************************************
@@ -394,13 +398,15 @@ encoding_error:
* to ISO-Latin-1 (if you don't like this policy, just declare the
* encoding !)
*/
- htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
- "Input is not proper UTF-8, indicate encoding !\n",
- NULL, NULL);
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
- ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ {
+ char buffer[150];
+
+ snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
+ htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
+ "Input is not proper UTF-8, indicate encoding !\n",
+ BAD_CAST buffer, NULL);
}
ctxt->charset = XML_CHAR_ENCODING_8859_1;
@@ -1788,6 +1794,7 @@ UTF8ToHtml(unsigned char* out, int *outlen,
unsigned int c, d;
int trailing;
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
if (in == NULL) {
/*
* initialization nothing to do
@@ -1882,13 +1889,17 @@ int
htmlEncodeEntities(unsigned char* out, int *outlen,
const unsigned char* in, int *inlen, int quoteChar) {
const unsigned char* processed = in;
- const unsigned char* outend = out + (*outlen);
+ const unsigned char* outend;
const unsigned char* outstart = out;
const unsigned char* instart = in;
- const unsigned char* inend = in + (*inlen);
+ const unsigned char* inend;
unsigned int c, d;
int trailing;
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL))
+ return(-1);
+ outend = out + (*outlen);
+ inend = in + (*inlen);
while (in < inend) {
d = *in++;
if (d < 0x80) { c= d; trailing= 0; }
@@ -2417,7 +2428,9 @@ const htmlEntityDesc *
htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
const xmlChar *name;
const htmlEntityDesc * ent = NULL;
- *str = NULL;
+
+ if (str != NULL) *str = NULL;
+ if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
if (CUR == '&') {
NEXT;
@@ -2428,7 +2441,8 @@ htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
} else {
GROW;
if (CUR == ';') {
- *str = name;
+ if (str != NULL)
+ *str = name;
/*
* Lookup the entity in the table.
@@ -2440,7 +2454,8 @@ htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
"htmlParseEntityRef: expecting ';'\n",
NULL, NULL);
- *str = name;
+ if (str != NULL)
+ *str = name;
}
}
}
@@ -3018,6 +3033,12 @@ int
htmlParseCharRef(htmlParserCtxtPtr ctxt) {
int val = 0;
+ if ((ctxt == NULL) || (ctxt->input == NULL)) {
+ htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "htmlParseCharRef: context error\n",
+ NULL, NULL);
+ return(0);
+ }
if ((CUR == '&') && (NXT(1) == '#') &&
((NXT(2) == 'x') || NXT(2) == 'X')) {
SKIP(3);
@@ -3341,6 +3362,11 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
int meta = 0;
int i;
+ if ((ctxt == NULL) || (ctxt->input == NULL)) {
+ htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "htmlParseStartTag: context error\n", NULL, NULL);
+ return;
+ }
if (CUR != '<') return;
NEXT;
@@ -3822,9 +3848,15 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
const htmlElemDesc * info;
htmlParserNodeInfo node_info;
const xmlChar *oldname;
- int depth = ctxt->nameNr;
+ int depth;
const xmlChar *oldptr;
+ if ((ctxt == NULL) || (ctxt->input == NULL)) {
+ htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "htmlParseStartTag: context error\n", NULL, NULL);
+ return;
+ }
+ depth = ctxt->nameNr;
/* Capture start position */
if (ctxt->record_info) {
node_info.begin_pos = ctxt->input->consumed +
@@ -3947,8 +3979,13 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
xmlInitParser();
htmlDefaultSAXHandlerInit();
- ctxt->html = 1;
+ if ((ctxt == NULL) || (ctxt->input == NULL)) {
+ htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "htmlParseDocument: context error\n", NULL, NULL);
+ return(XML_ERR_INTERNAL_ERROR);
+ }
+ ctxt->html = 1;
GROW;
/*
* SAX: beginning of the document processing.
@@ -4136,6 +4173,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
ctxt->replaceEntities = 0;
ctxt->linenumbers = xmlLineNumbersDefaultValue;
ctxt->html = 1;
+ ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
ctxt->vctxt.userData = ctxt;
ctxt->vctxt.error = xmlParserValidityError;
ctxt->vctxt.warning = xmlParserValidityWarning;
@@ -5131,6 +5169,11 @@ done:
int
htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
int terminate) {
+ if ((ctxt == NULL) || (ctxt->input == NULL)) {
+ htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
+ "htmlParseChunk: context error\n", NULL, NULL);
+ return(XML_ERR_INTERNAL_ERROR);
+ }
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
@@ -5360,6 +5403,9 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding)
/* htmlCharEncoding enc; */
xmlChar *content, *content_line = (xmlChar *) "charset=";
+ if (filename == NULL)
+ return(NULL);
+
ctxt = htmlNewParserCtxt();
if (ctxt == NULL) {
return(NULL);
@@ -5615,7 +5661,12 @@ void
htmlCtxtReset(htmlParserCtxtPtr ctxt)
{
xmlParserInputPtr input;
- xmlDictPtr dict = ctxt->dict;
+ xmlDictPtr dict;
+
+ if (ctxt == NULL)
+ return;
+
+ dict = ctxt->dict;
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
xmlFreeInputStream(input);
@@ -5624,8 +5675,12 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
ctxt->input = NULL;
ctxt->spaceNr = 0;
- ctxt->spaceTab[0] = -1;
- ctxt->space = &ctxt->spaceTab[0];
+ if (ctxt->spaceTab != NULL) {
+ ctxt->spaceTab[0] = -1;
+ ctxt->space = &ctxt->spaceTab[0];
+ } else {
+ ctxt->space = NULL;
+ }
ctxt->nodeNr = 0;
@@ -5695,6 +5750,9 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
int
htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
{
+ if (ctxt == NULL)
+ return(-1);
+
if (options & HTML_PARSE_NOWARNING) {
ctxt->sax->warning = NULL;
ctxt->vctxt.warning = NULL;