diff options
Diffstat (limited to 'lang/php5/patches/patch-az')
-rw-r--r-- | lang/php5/patches/patch-az | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/lang/php5/patches/patch-az b/lang/php5/patches/patch-az new file mode 100644 index 00000000000..c2a2190c26b --- /dev/null +++ b/lang/php5/patches/patch-az @@ -0,0 +1,368 @@ +$NetBSD$ + +* Fix for htmlspecialchars(): r289411, r289554, r289565, r289567, r289605. + +--- ext/standard/html.c.orig 2008-12-31 20:17:49.000000000 +0900 ++++ ext/standard/html.c +@@ -484,15 +484,31 @@ struct basic_entities_dec { + } \ + mbseq[mbpos++] = (mbchar); } + +-#define CHECK_LEN(pos, chars_need) \ +- if((str_len - (pos)) < chars_need) { \ +- *status = FAILURE; \ +- return 0; \ ++/* skip one byte and return */ ++#define MB_FAILURE(pos) do { \ ++ *newpos = pos + 1; \ ++ *status = FAILURE; \ ++ return 0; \ ++ } while (0) ++ ++#define CHECK_LEN(pos, chars_need) \ ++ if (chars_need < 1) { \ ++ if((str_len - (pos)) < chars_need) { \ ++ *newpos = pos; \ ++ *status = FAILURE; \ ++ return 0; \ ++ } \ ++ } else { \ ++ if((str_len - (pos)) < chars_need) { \ ++ *newpos = pos + 1; \ ++ *status = FAILURE; \ ++ return 0; \ ++ } \ + } + + /* {{{ get_next_char + */ +-inline static unsigned short get_next_char(enum entity_charset charset, ++inline static unsigned int get_next_char(enum entity_charset charset, + unsigned char * str, + int str_len, + int * newpos, +@@ -503,205 +519,189 @@ inline static unsigned short get_next_ch + int pos = *newpos; + int mbpos = 0; + int mbspace = *mbseqlen; +- unsigned short this_char = str[pos++]; ++ unsigned int this_char = 0; + unsigned char next_char; + + *status = SUCCESS; +- ++ + if (mbspace <= 0) { + *mbseqlen = 0; +- return this_char; ++ CHECK_LEN(pos, 1); ++ *newpos = pos + 1; ++ *newpos = pos + 1; + } +- +- MB_WRITE((unsigned char)this_char); +- ++ + switch (charset) { + case cs_utf_8: + { +- unsigned long utf = 0; +- int stat = 0; +- int more = 1; +- +- /* unpack utf-8 encoding into a wide char. +- * Code stolen from the mbstring extension */ +- +- do { +- if (this_char < 0x80) { +- more = 0; +- if(stat) { +- /* we didn't finish the UTF sequence correctly */ +- *status = FAILURE; +- } +- break; +- } else if (this_char < 0xc0) { +- switch (stat) { +- case 0x10: /* 2, 2nd */ +- case 0x21: /* 3, 3rd */ +- case 0x32: /* 4, 4th */ +- case 0x43: /* 5, 5th */ +- case 0x54: /* 6, 6th */ +- /* last byte in sequence */ +- more = 0; +- utf |= (this_char & 0x3f); +- this_char = (unsigned short)utf; +- break; +- case 0x20: /* 3, 2nd */ +- case 0x31: /* 4, 3rd */ +- case 0x42: /* 5, 4th */ +- case 0x53: /* 6, 5th */ +- /* penultimate char */ +- utf |= ((this_char & 0x3f) << 6); +- stat++; +- break; +- case 0x30: /* 4, 2nd */ +- case 0x41: /* 5, 3rd */ +- case 0x52: /* 6, 4th */ +- utf |= ((this_char & 0x3f) << 12); +- stat++; +- break; +- case 0x40: /* 5, 2nd */ +- case 0x51: +- utf |= ((this_char & 0x3f) << 18); +- stat++; +- break; +- case 0x50: /* 6, 2nd */ +- utf |= ((this_char & 0x3f) << 24); +- stat++; +- break; +- default: +- /* invalid */ +- *status = FAILURE; +- more = 0; +- } +- } +- /* lead byte */ +- else if (this_char < 0xe0) { +- stat = 0x10; /* 2 byte */ +- utf = (this_char & 0x1f) << 6; +- CHECK_LEN(pos, 1); +- } else if (this_char < 0xf0) { +- stat = 0x20; /* 3 byte */ +- utf = (this_char & 0xf) << 12; +- CHECK_LEN(pos, 2); +- } else if (this_char < 0xf8) { +- stat = 0x30; /* 4 byte */ +- utf = (this_char & 0x7) << 18; +- CHECK_LEN(pos, 3); +- } else if (this_char < 0xfc) { +- stat = 0x40; /* 5 byte */ +- utf = (this_char & 0x3) << 24; +- CHECK_LEN(pos, 4); +- } else if (this_char < 0xfe) { +- stat = 0x50; /* 6 byte */ +- utf = (this_char & 0x1) << 30; +- CHECK_LEN(pos, 5); +- } else { +- /* invalid; bail */ +- more = 0; +- *status = FAILURE; +- break; ++ unsigned char c; ++ CHECK_LEN(pos, 1); ++ c = str[pos]; ++ if (c < 0x80) { ++ MB_WRITE(c); ++ this_char = c; ++ pos++; ++ } else if (c < 0xc0) { ++ MB_FAILURE(pos); ++ } else if (c < 0xe0) { ++ CHECK_LEN(pos, 2); ++ if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { ++ MB_FAILURE(pos); + } +- +- if (more) { +- this_char = str[pos++]; +- MB_WRITE((unsigned char)this_char); ++ this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); ++ if (this_char < 0x80) { ++ MB_FAILURE(pos); + } +- } while (more); ++ MB_WRITE((unsigned char)c); ++ MB_WRITE((unsigned char)str[pos + 1]); ++ pos += 2; ++ } else if (c < 0xf0) { ++ CHECK_LEN(pos, 3); ++ if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { ++ MB_FAILURE(pos); ++ } ++ if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { ++ MB_FAILURE(pos); ++ } ++ this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); ++ if (this_char < 0x800) { ++ MB_FAILURE(pos); ++ } ++ MB_WRITE((unsigned char)c); ++ MB_WRITE((unsigned char)str[pos + 1]); ++ MB_WRITE((unsigned char)str[pos + 2]); ++ pos += 3; ++ } else if (c < 0xf8) { ++ CHECK_LEN(pos, 4); ++ if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { ++ MB_FAILURE(pos); ++ } ++ if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { ++ MB_FAILURE(pos); ++ } ++ if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) { ++ MB_FAILURE(pos); ++ } ++ this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); ++ if (this_char < 0x10000) { ++ MB_FAILURE(pos); ++ } ++ MB_WRITE((unsigned char)c); ++ MB_WRITE((unsigned char)str[pos + 1]); ++ MB_WRITE((unsigned char)str[pos + 2]); ++ MB_WRITE((unsigned char)str[pos + 3]); ++ pos += 4; ++ } else { ++ MB_FAILURE(pos); ++ } + } + break; + case cs_big5: + case cs_gb2312: + case cs_big5hkscs: + { ++ CHECK_LEN(pos, 1); ++ this_char = str[pos++]; + /* check if this is the first of a 2-byte sequence */ +- if (this_char >= 0xa1 && this_char <= 0xfe) { ++ if (this_char >= 0x81 && this_char <= 0xfe) { + /* peek at the next char */ + CHECK_LEN(pos, 1); +- next_char = str[pos]; ++ next_char = str[pos++]; + if ((next_char >= 0x40 && next_char <= 0x7e) || + (next_char >= 0xa1 && next_char <= 0xfe)) { + /* yes, this a wide char */ +- this_char <<= 8; ++ MB_WRITE(this_char); + MB_WRITE(next_char); +- this_char |= next_char; +- pos++; ++ this_char = (this_char << 8) | next_char; ++ } else { ++ MB_FAILURE(pos); + } +- ++ } else { ++ MB_WRITE(this_char); + } +- break; + } ++ break; + case cs_sjis: + { ++ CHECK_LEN(pos, 1); ++ this_char = str[pos++]; + /* check if this is the first of a 2-byte sequence */ +- if ( (this_char >= 0x81 && this_char <= 0x9f) || +- (this_char >= 0xe0 && this_char <= 0xef) +- ) { ++ if ((this_char >= 0x81 && this_char <= 0x9f) || ++ (this_char >= 0xe0 && this_char <= 0xfc)) { + /* peek at the next char */ + CHECK_LEN(pos, 1); +- next_char = str[pos]; ++ next_char = str[pos++]; + if ((next_char >= 0x40 && next_char <= 0x7e) || + (next_char >= 0x80 && next_char <= 0xfc)) + { + /* yes, this a wide char */ +- this_char <<= 8; ++ MB_WRITE(this_char); + MB_WRITE(next_char); +- this_char |= next_char; +- pos++; ++ this_char = (this_char << 8) | next_char; ++ } else { ++ MB_FAILURE(pos); + } +- ++ } else { ++ MB_WRITE(this_char); + } + break; + } + case cs_eucjp: + { ++ CHECK_LEN(pos, 1); ++ this_char = str[pos++]; + /* check if this is the first of a multi-byte sequence */ + if (this_char >= 0xa1 && this_char <= 0xfe) { + /* peek at the next char */ + CHECK_LEN(pos, 1); +- next_char = str[pos]; ++ next_char = str[pos++]; + if (next_char >= 0xa1 && next_char <= 0xfe) { + /* yes, this a jis kanji char */ +- this_char <<= 8; ++ MB_WRITE(this_char); + MB_WRITE(next_char); +- this_char |= next_char; +- pos++; ++ this_char = (this_char << 8) | next_char; ++ } else { ++ MB_FAILURE(pos); + } +- + } else if (this_char == 0x8e) { + /* peek at the next char */ + CHECK_LEN(pos, 1); +- next_char = str[pos]; ++ next_char = str[pos++]; + if (next_char >= 0xa1 && next_char <= 0xdf) { + /* JIS X 0201 kana */ +- this_char <<= 8; ++ MB_WRITE(this_char); + MB_WRITE(next_char); +- this_char |= next_char; +- pos++; ++ this_char = (this_char << 8) | next_char; ++ } else { ++ MB_FAILURE(pos); + } +- + } else if (this_char == 0x8f) { + /* peek at the next two char */ + unsigned char next2_char; + CHECK_LEN(pos, 2); + next_char = str[pos]; +- next2_char = str[pos+1]; ++ next2_char = str[pos + 1]; ++ pos += 2; + if ((next_char >= 0xa1 && next_char <= 0xfe) && + (next2_char >= 0xa1 && next2_char <= 0xfe)) { + /* JIS X 0212 hojo-kanji */ +- this_char <<= 8; ++ MB_WRITE(this_char); + MB_WRITE(next_char); +- this_char |= next_char; +- pos++; +- this_char <<= 8; + MB_WRITE(next2_char); +- this_char |= next2_char; +- pos++; ++ this_char = (this_char << 16) | (next_char << 8) | next2_char; ++ } else { ++ MB_FAILURE(pos); + } +- ++ } else { ++ MB_WRITE(this_char); + } + break; + } + default: ++ /* single-byte charsets */ ++ CHECK_LEN(pos, 1); ++ this_char = str[pos++]; ++ MB_WRITE(this_char); + break; + } + MB_RETURN; +@@ -1132,7 +1132,7 @@ PHPAPI char *php_escape_html_entities_ex + unsigned char mbsequence[16]; /* allow up to 15 characters in a multibyte sequence */ + int mbseqlen = sizeof(mbsequence); + int status = SUCCESS; +- unsigned short this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status); ++ unsigned int this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status); + + if(status == FAILURE) { + /* invalid MB sequence */ |