Imported Upstream version 5.2.2upstream/5.2.2

author: Mark A. Hershberger <mah@debian.(none)> 2009-03-25 00:37:27 -0400
committer: Mark A. Hershberger <mah@debian.(none)> 2009-03-25 00:37:27 -0400
commit: 2d4e5b09576bb4f0ba716cc82cdf29ea04d9184b (patch)
tree: 41ccc042009cba53e4ce43e727fcba4c1cfbf7f3 /ext/pdo_sqlite/sqlite/src/utf.c
parent: d29a4fd2dd3b5d4cf6e80b602544d7b71d794e76 (diff)
download: php-upstream/5.2.2.tar.gz
1 files changed, 44 insertions, 17 deletions
diff --git a/ext/pdo_sqlite/sqlite/src/utf.c b/ext/pdo_sqlite/sqlite/src/utf.c
index 05d238433..58c964759 100644
--- a/ext/pdo_sqlite/sqlite/src/utf.c
+++ b/ext/pdo_sqlite/sqlite/src/utf.c
@@ -63,8 +63,14 @@
 #include "vdbeInt.h"
 
 /*
+** The following constant value is used by the SQLITE_BIGENDIAN and
+** SQLITE_LITTLEENDIAN macros.
+*/
+const int sqlite3one = 1;
+
+/*
 ** This table maps from the first byte of a UTF-8 character to the number
-** of trailing bytes expected. A value '255' indicates that the table key
+** of trailing bytes expected. A value '4' indicates that the table key
 ** is not a legal first byte for a UTF-8 character.
 */
 static const u8 xtra_utf8_bytes[256]  = {
@@ -79,10 +85,10 @@ static const u8 xtra_utf8_bytes[256]  = {
 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
 
 /* 10wwwwww */
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
+4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
+4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
+4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
 
 /* 110yyyyy */
 1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
@@ -92,7 +98,7 @@ static const u8 xtra_utf8_bytes[256]  = {
 2, 2, 2, 2, 2, 2, 2, 2,     2, 2, 2, 2, 2, 2, 2, 2,
 
 /* 11110yyy */
-3, 3, 3, 3, 3, 3, 3, 3,     255, 255, 255, 255, 255, 255, 255, 255,
+3, 3, 3, 3, 3, 3, 3, 3,     4, 4, 4, 4, 4, 4, 4, 4,
 };
 
 /*
@@ -101,11 +107,24 @@ static const u8 xtra_utf8_bytes[256]  = {
 ** read by a naive implementation of a UTF-8 character reader. The code
 ** in the READ_UTF8 macro explains things best.
 */
-static const int xtra_utf8_bits[4] =  {
-0,
-12416,          /* (0xC0 << 6) + (0x80) */
-925824,         /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
-63447168        /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+static const int xtra_utf8_bits[] =  {
+  0,
+  12416,          /* (0xC0 << 6) + (0x80) */
+  925824,         /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
+  63447168        /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+};
+
+/*
+** If a UTF-8 character contains N bytes extra bytes (N bytes follow
+** the initial byte so that the total character length is N+1) then
+** masking the character with utf8_mask[N] must produce a non-zero
+** result.  Otherwise, we have an (illegal) overlong encoding.
+*/
+static const int utf_mask[] = {
+  0x00000000,
+  0xffffff80,
+  0xfffff800,
+  0xffff0000,
 };
 
 #define READ_UTF8(zIn, c) { \
@@ -113,11 +132,14 @@ static const int xtra_utf8_bits[4] =  {
   c = *(zIn)++;                                        \
   xtra = xtra_utf8_bytes[c];                           \
   switch( xtra ){                                      \
-    case 255: c = (int)0xFFFD; break;                  \
+    case 4: c = (int)0xFFFD; break;                    \
     case 3: c = (c<<6) + *(zIn)++;                     \
     case 2: c = (c<<6) + *(zIn)++;                     \
     case 1: c = (c<<6) + *(zIn)++;                     \
     c -= xtra_utf8_bits[xtra];                         \
+    if( (utf_mask[xtra]&c)==0                          \
+        || (c&0xFFFFF800)==0xD800                      \
+        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }    \
   }                                                    \
 }
 int sqlite3ReadUtf8(const unsigned char *z){
@@ -181,6 +203,7 @@ int sqlite3ReadUtf8(const unsigned char *z){
     int c2 = (*zIn++);                                                \
     c2 += ((*zIn++)<<8);                                              \
     c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
+    if( (c & 0xFFFF0000)==0 ) c = 0xFFFD;                             \
   }                                                                   \
 }
 
@@ -191,6 +214,7 @@ int sqlite3ReadUtf8(const unsigned char *z){
     int c2 = ((*zIn++)<<8);                                           \
     c2 += (*zIn++);                                                   \
     c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
+    if( (c & 0xFFFF0000)==0 ) c = 0xFFFD;                             \
   }                                                                   \
 }
 
@@ -245,7 +269,7 @@ int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
   unsigned char *zIn;                   /* Input iterator */
   unsigned char *zTerm;                 /* End of input */
   unsigned char *z;                     /* Output iterator */
-  int c;
+  unsigned int c;
 
   assert( pMem->flags&MEM_Str );
   assert( pMem->enc!=desiredEnc );
@@ -475,7 +499,7 @@ char *sqlite3utf16to8(const void *z, int nByte){
 ** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
 */
 int sqlite3utf16ByteLen(const void *zIn, int nChar){
-  int c = 1;
+  unsigned int c = 1;
   char const *z = zIn;
   int n = 0;
   if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
@@ -556,11 +580,11 @@ void sqlite3utf16Substr(
 ** characters in each encoding are inverses of each other.
 */
 void sqlite3utfSelfTest(){
-  int i;
+  unsigned int i, t;
   unsigned char zBuf[20];
   unsigned char *z;
   int n;
-  int c;
+  unsigned int c;
 
   for(i=0; i<0x00110000; i++){
     z = zBuf;
@@ -568,7 +592,10 @@ void sqlite3utfSelfTest(){
     n = z-zBuf;
     z = zBuf;
     READ_UTF8(z, c);
-    assert( c==i );
+    t = i;
+    if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
+    if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
+    assert( c==t );
     assert( (z-zBuf)==n );
   }
   for(i=0; i<0x00110000; i++){
author	Mark A. Hershberger <mah@debian.(none)>	2009-03-25 00:37:27 -0400
committer	Mark A. Hershberger <mah@debian.(none)>	2009-03-25 00:37:27 -0400
commit	2d4e5b09576bb4f0ba716cc82cdf29ea04d9184b (patch)
tree	41ccc042009cba53e4ce43e727fcba4c1cfbf7f3 /ext/pdo_sqlite/sqlite/src/utf.c
parent	d29a4fd2dd3b5d4cf6e80b602544d7b71d794e76 (diff)
download	php-upstream/5.2.2.tar.gz