diff options
| author | Mark A. Hershberger <mah@debian.(none)> | 2009-03-25 00:37:27 -0400 |
|---|---|---|
| committer | Mark A. Hershberger <mah@debian.(none)> | 2009-03-25 00:37:27 -0400 |
| commit | 2d4e5b09576bb4f0ba716cc82cdf29ea04d9184b (patch) | |
| tree | 41ccc042009cba53e4ce43e727fcba4c1cfbf7f3 /ext/pdo_sqlite/sqlite/src/utf.c | |
| parent | d29a4fd2dd3b5d4cf6e80b602544d7b71d794e76 (diff) | |
| download | php-upstream/5.2.2.tar.gz | |
Imported Upstream version 5.2.2upstream/5.2.2
Diffstat (limited to 'ext/pdo_sqlite/sqlite/src/utf.c')
| -rw-r--r-- | ext/pdo_sqlite/sqlite/src/utf.c | 61 |
1 files changed, 44 insertions, 17 deletions
diff --git a/ext/pdo_sqlite/sqlite/src/utf.c b/ext/pdo_sqlite/sqlite/src/utf.c index 05d238433..58c964759 100644 --- a/ext/pdo_sqlite/sqlite/src/utf.c +++ b/ext/pdo_sqlite/sqlite/src/utf.c @@ -63,8 +63,14 @@ #include "vdbeInt.h" /* +** The following constant value is used by the SQLITE_BIGENDIAN and +** SQLITE_LITTLEENDIAN macros. +*/ +const int sqlite3one = 1; + +/* ** This table maps from the first byte of a UTF-8 character to the number -** of trailing bytes expected. A value '255' indicates that the table key +** of trailing bytes expected. A value '4' indicates that the table key ** is not a legal first byte for a UTF-8 character. */ static const u8 xtra_utf8_bytes[256] = { @@ -79,10 +85,10 @@ static const u8 xtra_utf8_bytes[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10wwwwww */ -255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* 110yyyyy */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -92,7 +98,7 @@ static const u8 xtra_utf8_bytes[256] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 11110yyy */ -3, 3, 3, 3, 3, 3, 3, 3, 255, 255, 255, 255, 255, 255, 255, 255, +3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, }; /* @@ -101,11 +107,24 @@ static const u8 xtra_utf8_bytes[256] = { ** read by a naive implementation of a UTF-8 character reader. The code ** in the READ_UTF8 macro explains things best. */ -static const int xtra_utf8_bits[4] = { -0, -12416, /* (0xC0 << 6) + (0x80) */ -925824, /* (0xE0 << 12) + (0x80 << 6) + (0x80) */ -63447168 /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ +static const int xtra_utf8_bits[] = { + 0, + 12416, /* (0xC0 << 6) + (0x80) */ + 925824, /* (0xE0 << 12) + (0x80 << 6) + (0x80) */ + 63447168 /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ +}; + +/* +** If a UTF-8 character contains N bytes extra bytes (N bytes follow +** the initial byte so that the total character length is N+1) then +** masking the character with utf8_mask[N] must produce a non-zero +** result. Otherwise, we have an (illegal) overlong encoding. +*/ +static const int utf_mask[] = { + 0x00000000, + 0xffffff80, + 0xfffff800, + 0xffff0000, }; #define READ_UTF8(zIn, c) { \ @@ -113,11 +132,14 @@ static const int xtra_utf8_bits[4] = { c = *(zIn)++; \ xtra = xtra_utf8_bytes[c]; \ switch( xtra ){ \ - case 255: c = (int)0xFFFD; break; \ + case 4: c = (int)0xFFFD; break; \ case 3: c = (c<<6) + *(zIn)++; \ case 2: c = (c<<6) + *(zIn)++; \ case 1: c = (c<<6) + *(zIn)++; \ c -= xtra_utf8_bits[xtra]; \ + if( (utf_mask[xtra]&c)==0 \ + || (c&0xFFFFF800)==0xD800 \ + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ } \ } int sqlite3ReadUtf8(const unsigned char *z){ @@ -181,6 +203,7 @@ int sqlite3ReadUtf8(const unsigned char *z){ int c2 = (*zIn++); \ c2 += ((*zIn++)<<8); \ c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ + if( (c & 0xFFFF0000)==0 ) c = 0xFFFD; \ } \ } @@ -191,6 +214,7 @@ int sqlite3ReadUtf8(const unsigned char *z){ int c2 = ((*zIn++)<<8); \ c2 += (*zIn++); \ c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ + if( (c & 0xFFFF0000)==0 ) c = 0xFFFD; \ } \ } @@ -245,7 +269,7 @@ int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){ unsigned char *zIn; /* Input iterator */ unsigned char *zTerm; /* End of input */ unsigned char *z; /* Output iterator */ - int c; + unsigned int c; assert( pMem->flags&MEM_Str ); assert( pMem->enc!=desiredEnc ); @@ -475,7 +499,7 @@ char *sqlite3utf16to8(const void *z, int nByte){ ** in pZ (or up until the first pair of 0x00 bytes, whichever comes first). */ int sqlite3utf16ByteLen(const void *zIn, int nChar){ - int c = 1; + unsigned int c = 1; char const *z = zIn; int n = 0; if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ @@ -556,11 +580,11 @@ void sqlite3utf16Substr( ** characters in each encoding are inverses of each other. */ void sqlite3utfSelfTest(){ - int i; + unsigned int i, t; unsigned char zBuf[20]; unsigned char *z; int n; - int c; + unsigned int c; for(i=0; i<0x00110000; i++){ z = zBuf; @@ -568,7 +592,10 @@ void sqlite3utfSelfTest(){ n = z-zBuf; z = zBuf; READ_UTF8(z, c); - assert( c==i ); + t = i; + if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD; + if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD; + assert( c==t ); assert( (z-zBuf)==n ); } for(i=0; i<0x00110000; i++){ |
