diff options
| author | Michael Stapelberg <stapelberg@debian.org> | 2013-03-04 21:27:36 +0100 | 
|---|---|---|
| committer | Michael Stapelberg <michael@stapelberg.de> | 2013-03-04 21:27:36 +0100 | 
| commit | 04b08da9af0c450d645ab7389d1467308cfc2db8 (patch) | |
| tree | db247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/lib9/utf | |
| parent | 917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff) | |
| download | golang-upstream/1.1_hg20130304.tar.gz | |
Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304
Diffstat (limited to 'src/lib9/utf')
| -rw-r--r-- | src/lib9/utf/Makefile | 4 | ||||
| -rw-r--r-- | src/lib9/utf/rune.c | 21 | ||||
| -rw-r--r-- | src/lib9/utf/runetype.c | 2 | ||||
| -rw-r--r-- | src/lib9/utf/runetypebody-6.2.0.h (renamed from src/lib9/utf/runetypebody-6.0.0.h) | 122 | 
4 files changed, 116 insertions, 33 deletions
| diff --git a/src/lib9/utf/Makefile b/src/lib9/utf/Makefile index bbb2da6a9..5c9cdf051 100644 --- a/src/lib9/utf/Makefile +++ b/src/lib9/utf/Makefile @@ -15,13 +15,13 @@ UnicodeData-%.txt:  mkrunetype: mkrunetype.c  	cc -I../../../include -o mkrunetype -L$(GOROOT)/pkg/obj/$(GOOS)_$(GOARCH)/ mkrunetype.c -l9 -runetypebody-%.c: mkrunetype UnicodeData-%.txt +runetypebody-%.h: mkrunetype UnicodeData-%.txt  	mkrunetype -p UnicodeData-$*.txt >_$@  	mv _$@ $@  CLEANFILES+=UnicodeData.txt -UNICODE_VERSION=6.0.0 +UNICODE_VERSION=6.2.0  test: mkrunetype UnicodeData-$(UNICODE_VERSION).txt  	mkrunetype -c UnicodeData-$(UNICODE_VERSION).txt diff --git a/src/lib9/utf/rune.c b/src/lib9/utf/rune.c index cf98bab15..818771cfd 100644 --- a/src/lib9/utf/rune.c +++ b/src/lib9/utf/rune.c @@ -36,12 +36,14 @@ enum  	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */  	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */  	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */ -	Rune4	= (1<<(Bit4+3*Bitx))-1, -                                        /* 0001 1111 1111 1111 1111 1111 */ +	Rune4	= (1<<(Bit4+3*Bitx))-1,		/* 0001 1111 1111 1111 1111 1111 */  	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */  	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */ +	SurrogateMin	= 0xD800, +	SurrogateMax	= 0xDFFF, +  	Bad	= Runeerror,  }; @@ -122,6 +124,8 @@ charntorune(Rune *rune, const char *str, int length)  		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;  		if(l <= Rune2)  			goto bad; +		if (SurrogateMin <= l && l <= SurrogateMax) +			goto bad;  		*rune = l;  		return 3;  	} @@ -138,7 +142,7 @@ charntorune(Rune *rune, const char *str, int length)  		goto bad;  	if (c < T5) {  		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; -		if (l <= Rune3) +		if (l <= Rune3 || l > Runemax)  			goto bad;  		*rune = l;  		return 4; @@ -208,6 +212,8 @@ chartorune(Rune *rune, const char *str)  		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;  		if(l <= Rune2)  			goto bad; +		if (SurrogateMin <= l && l <= SurrogateMax) +			goto bad;  		*rune = l;  		return 3;  	} @@ -221,7 +227,7 @@ chartorune(Rune *rune, const char *str)  		goto bad;  	if (c < T5) {  		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; -		if (l <= Rune3) +		if (l <= Rune3 || l > Runemax)  			goto bad;  		*rune = l;  		return 4; @@ -241,7 +247,8 @@ bad:  }  int -isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) { +isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) +{  	*consumed = charntorune(rune, str, length);  	return *rune != Runeerror || *consumed == 3;  } @@ -273,13 +280,15 @@ runetochar(char *str, const Rune *rune)  	}  	/* -	 * If the Rune is out of range, convert it to the error rune. +	 * If the Rune is out of range or a surrogate half, convert it to the error rune.  	 * Do this test here because the error rune encodes to three bytes.  	 * Doing it earlier would duplicate work, since an out of range  	 * Rune wouldn't have fit in one or two bytes.  	 */  	if (c > Runemax)  		c = Runeerror; +	if (SurrogateMin <= c && c <= SurrogateMax) +		c = Runeerror;  	/*  	 * three character sequence diff --git a/src/lib9/utf/runetype.c b/src/lib9/utf/runetype.c index 51729fb01..b3634965f 100644 --- a/src/lib9/utf/runetype.c +++ b/src/lib9/utf/runetype.c @@ -35,4 +35,4 @@ rbsearch(Rune c, Rune *t, int n, int ne)  	return 0;  } -#include "runetypebody-6.0.0.h" +#include "runetypebody-6.2.0.h" diff --git a/src/lib9/utf/runetypebody-6.0.0.h b/src/lib9/utf/runetypebody-6.2.0.h index 47c0faf73..a603af0df 100644 --- a/src/lib9/utf/runetypebody-6.0.0.h +++ b/src/lib9/utf/runetypebody-6.2.0.h @@ -1,4 +1,4 @@ -/* generated automatically by mkrunetype.c from UnicodeData-6.0.0.txt */ +/* generated automatically by mkrunetype.c from UnicodeData-6.2.0.txt */  static Rune __isspacer[] = {  	0x0009, 0x000d, @@ -64,6 +64,10 @@ static Rune __isdigitr[] = {  	0xff10, 0xff19,  	0x104a0, 0x104a9,  	0x11066, 0x1106f, +	0x110f0, 0x110f9, +	0x11136, 0x1113f, +	0x111d0, 0x111d9, +	0x116c0, 0x116c9,  	0x1d7ce, 0x1d7ff,  }; @@ -110,6 +114,7 @@ static Rune __isalphar[] = {  	0x07f4, 0x07f5,  	0x0800, 0x0815,  	0x0840, 0x0858, +	0x08a2, 0x08ac,  	0x0904, 0x0939,  	0x0958, 0x0961,  	0x0971, 0x0977, @@ -189,7 +194,7 @@ static Rune __isalphar[] = {  	0x0ead, 0x0eb0,  	0x0eb2, 0x0eb3,  	0x0ec0, 0x0ec4, -	0x0edc, 0x0edd, +	0x0edc, 0x0edf,  	0x0f40, 0x0f47,  	0x0f49, 0x0f6c,  	0x0f88, 0x0f8c, @@ -201,7 +206,7 @@ static Rune __isalphar[] = {  	0x1075, 0x1081,  	0x10a0, 0x10c5,  	0x10d0, 0x10fa, -	0x1100, 0x1248, +	0x10fc, 0x1248,  	0x124a, 0x124d,  	0x1250, 0x1256,  	0x125a, 0x125d, @@ -242,12 +247,13 @@ static Rune __isalphar[] = {  	0x1b45, 0x1b4b,  	0x1b83, 0x1ba0,  	0x1bae, 0x1baf, -	0x1bc0, 0x1be5, +	0x1bba, 0x1be5,  	0x1c00, 0x1c23,  	0x1c4d, 0x1c4f,  	0x1c5a, 0x1c7d,  	0x1ce9, 0x1cec,  	0x1cee, 0x1cf1, +	0x1cf5, 0x1cf6,  	0x1d00, 0x1dbf,  	0x1e00, 0x1f15,  	0x1f18, 0x1f1d, @@ -276,8 +282,9 @@ static Rune __isalphar[] = {  	0x2c30, 0x2c5e,  	0x2c60, 0x2ce4,  	0x2ceb, 0x2cee, +	0x2cf2, 0x2cf3,  	0x2d00, 0x2d25, -	0x2d30, 0x2d65, +	0x2d30, 0x2d67,  	0x2d80, 0x2d96,  	0x2da0, 0x2da6,  	0x2da8, 0x2dae, @@ -299,7 +306,7 @@ static Rune __isalphar[] = {  	0x31a0, 0x31ba,  	0x31f0, 0x31ff,  	0x3400, 0x4db5, -	0x4e00, 0x9fcb, +	0x4e00, 0x9fcc,  	0xa000, 0xa48c,  	0xa4d0, 0xa4fd,  	0xa500, 0xa60c, @@ -311,9 +318,9 @@ static Rune __isalphar[] = {  	0xa717, 0xa71f,  	0xa722, 0xa788,  	0xa78b, 0xa78e, -	0xa790, 0xa791, -	0xa7a0, 0xa7a9, -	0xa7fa, 0xa801, +	0xa790, 0xa793, +	0xa7a0, 0xa7aa, +	0xa7f8, 0xa801,  	0xa803, 0xa805,  	0xa807, 0xa80a,  	0xa80c, 0xa822, @@ -332,6 +339,8 @@ static Rune __isalphar[] = {  	0xaab5, 0xaab6,  	0xaab9, 0xaabd,  	0xaadb, 0xaadd, +	0xaae0, 0xaaea, +	0xaaf2, 0xaaf4,  	0xab01, 0xab06,  	0xab09, 0xab0e,  	0xab11, 0xab16, @@ -341,8 +350,7 @@ static Rune __isalphar[] = {  	0xac00, 0xd7a3,  	0xd7b0, 0xd7c6,  	0xd7cb, 0xd7fb, -	0xf900, 0xfa2d, -	0xfa30, 0xfa6d, +	0xf900, 0xfa6d,  	0xfa70, 0xfad9,  	0xfb00, 0xfb06,  	0xfb13, 0xfb17, @@ -387,6 +395,8 @@ static Rune __isalphar[] = {  	0x1083f, 0x10855,  	0x10900, 0x10915,  	0x10920, 0x10939, +	0x10980, 0x109b7, +	0x109be, 0x109bf,  	0x10a10, 0x10a13,  	0x10a15, 0x10a17,  	0x10a19, 0x10a33, @@ -397,9 +407,16 @@ static Rune __isalphar[] = {  	0x10c00, 0x10c48,  	0x11003, 0x11037,  	0x11083, 0x110af, +	0x110d0, 0x110e8, +	0x11103, 0x11126, +	0x11183, 0x111b2, +	0x111c1, 0x111c4, +	0x11680, 0x116aa,  	0x12000, 0x1236e,  	0x13000, 0x1342e,  	0x16800, 0x16a38, +	0x16f00, 0x16f44, +	0x16f93, 0x16f9f,  	0x1b000, 0x1b001,  	0x1d400, 0x1d454,  	0x1d456, 0x1d49c, @@ -428,6 +445,23 @@ static Rune __isalphar[] = {  	0x1d78a, 0x1d7a8,  	0x1d7aa, 0x1d7c2,  	0x1d7c4, 0x1d7cb, +	0x1ee00, 0x1ee03, +	0x1ee05, 0x1ee1f, +	0x1ee21, 0x1ee22, +	0x1ee29, 0x1ee32, +	0x1ee34, 0x1ee37, +	0x1ee4d, 0x1ee4f, +	0x1ee51, 0x1ee52, +	0x1ee61, 0x1ee62, +	0x1ee67, 0x1ee6a, +	0x1ee6c, 0x1ee72, +	0x1ee74, 0x1ee77, +	0x1ee79, 0x1ee7c, +	0x1ee80, 0x1ee89, +	0x1ee8b, 0x1ee9b, +	0x1eea1, 0x1eea3, +	0x1eea5, 0x1eea9, +	0x1eeab, 0x1eebb,  	0x20000, 0x2a6d6,  	0x2a700, 0x2b734,  	0x2b740, 0x2b81d, @@ -451,6 +485,7 @@ static Rune __isalphas[] = {  	0x081a,  	0x0824,  	0x0828, +	0x08a0,  	0x093d,  	0x0950,  	0x09b2, @@ -481,7 +516,8 @@ static Rune __isalphas[] = {  	0x103f,  	0x1061,  	0x108e, -	0x10fc, +	0x10c7, +	0x10cd,  	0x1258,  	0x12c0,  	0x17d7, @@ -501,6 +537,8 @@ static Rune __isalphas[] = {  	0x2126,  	0x2128,  	0x214e, +	0x2d27, +	0x2d2d,  	0x2d6f,  	0x2e2f,  	0xa8fb, @@ -514,9 +552,26 @@ static Rune __isalphas[] = {  	0x10808,  	0x1083c,  	0x10a00, +	0x16f50,  	0x1d4a2,  	0x1d4bb,  	0x1d546, +	0x1ee24, +	0x1ee27, +	0x1ee39, +	0x1ee3b, +	0x1ee42, +	0x1ee47, +	0x1ee49, +	0x1ee4b, +	0x1ee54, +	0x1ee57, +	0x1ee59, +	0x1ee5b, +	0x1ee5d, +	0x1ee5f, +	0x1ee64, +	0x1ee7e,  };  int @@ -652,7 +707,8 @@ static Rune __isupperp[] = {  	0xa779, 0xa77b,  	0xa780, 0xa786,  	0xa78b, 0xa78d, -	0xa7a0, 0xa7a8, +	0xa790, 0xa792, +	0xa7a0, 0xa7aa,  };  static Rune __isuppers[] = { @@ -673,6 +729,8 @@ static Rune __isuppers[] = {  	0x03cf,  	0x03f4,  	0x03f7, +	0x10c7, +	0x10cd,  	0x2102,  	0x2107,  	0x2115, @@ -681,7 +739,7 @@ static Rune __isuppers[] = {  	0x2c60,  	0x2c72,  	0x2c75, -	0xa790, +	0x2cf2,  	0x1d49c,  	0x1d4a2,  	0x1d546, @@ -733,7 +791,7 @@ static Rune __islowerr[] = {  	0x04ce, 0x04cf,  	0x0561, 0x0587,  	0x1d00, 0x1d2b, -	0x1d62, 0x1d77, +	0x1d6b, 0x1d77,  	0x1d79, 0x1d9a,  	0x1e95, 0x1e9d,  	0x1eff, 0x1f07, @@ -764,7 +822,7 @@ static Rune __islowerr[] = {  	0x2c30, 0x2c5e,  	0x2c65, 0x2c66,  	0x2c73, 0x2c74, -	0x2c76, 0x2c7c, +	0x2c76, 0x2c7b,  	0x2ce3, 0x2ce4,  	0x2d00, 0x2d25,  	0xa72f, 0xa731, @@ -832,13 +890,12 @@ static Rune __islowerp[] = {  	0xa77a, 0xa77c,  	0xa77f, 0xa787,  	0xa78c, 0xa78e, +	0xa791, 0xa793,  	0xa7a1, 0xa7a9,  };  static Rune __islowers[] = { -	0x00aa,  	0x00b5, -	0x00ba,  	0x0188,  	0x0192,  	0x0195, @@ -864,7 +921,9 @@ static Rune __islowers[] = {  	0x2184,  	0x2c61,  	0x2c71, -	0xa791, +	0x2cf3, +	0x2d27, +	0x2d2d,  	0xa7fa,  	0x1d4bb,  	0x1d7cb, @@ -973,7 +1032,8 @@ static Rune __istitlep[] = {  	0xa779, 0xa77b,  	0xa780, 0xa786,  	0xa78b, 0xa78d, -	0xa7a0, 0xa7a8, +	0xa790, 0xa792, +	0xa7a0, 0xa7aa,  };  static Rune __istitles[] = { @@ -990,12 +1050,14 @@ static Rune __istitles[] = {  	0x038c,  	0x03cf,  	0x03f7, +	0x10c7, +	0x10cd,  	0x2132,  	0x2183,  	0x2c60,  	0x2c72,  	0x2c75, -	0xa790, +	0x2cf2,  };  int @@ -1088,6 +1150,7 @@ static Rune __toupperp[] = {  	0xa733, 0xa76f, 1048575,  	0xa77a, 0xa77c, 1048575,  	0xa77f, 0xa787, 1048575, +	0xa791, 0xa793, 1048575,  	0xa7a1, 0xa7a9, 1048575,  }; @@ -1132,6 +1195,7 @@ static Rune __touppers[] = {  	0x0260, 1048371,  	0x0263, 1048369,  	0x0265, 1090856, +	0x0266, 1090884,  	0x0268, 1048367,  	0x0269, 1048365,  	0x026b, 1059319, @@ -1178,8 +1242,10 @@ static Rune __touppers[] = {  	0x2c66, 1037784,  	0x2c73, 1048575,  	0x2c76, 1048575, +	0x2cf3, 1048575, +	0x2d27, 1041312, +	0x2d2d, 1041312,  	0xa78c, 1048575, -	0xa791, 1048575,  };  Rune @@ -1271,6 +1337,7 @@ static Rune __tolowerp[] = {  	0xa732, 0xa76e, 1048577,  	0xa779, 0xa77b, 1048577,  	0xa780, 0xa786, 1048577, +	0xa790, 0xa792, 1048577,  	0xa7a0, 0xa7a8, 1048577,  }; @@ -1337,6 +1404,8 @@ static Rune __tolowers[] = {  	0x03fa, 1048577,  	0x04c0, 1048591,  	0x04c1, 1048577, +	0x10c7, 1055840, +	0x10cd, 1055840,  	0x1e9e, 1040961,  	0x1fbc, 1048567,  	0x1fcc, 1048567, @@ -1357,11 +1426,12 @@ static Rune __tolowers[] = {  	0x2c70, 1037794,  	0x2c72, 1048577,  	0x2c75, 1048577, +	0x2cf2, 1048577,  	0xa77d, 1013244,  	0xa77e, 1048577,  	0xa78b, 1048577,  	0xa78d, 1006296, -	0xa790, 1048577, +	0xa7aa, 1006268,  };  Rune @@ -1455,6 +1525,7 @@ static Rune __totitlep[] = {  	0xa733, 0xa76f, 1048575,  	0xa77a, 0xa77c, 1048575,  	0xa77f, 0xa787, 1048575, +	0xa791, 0xa793, 1048575,  	0xa7a1, 0xa7a9, 1048575,  }; @@ -1496,6 +1567,7 @@ static Rune __totitles[] = {  	0x0260, 1048371,  	0x0263, 1048369,  	0x0265, 1090856, +	0x0266, 1090884,  	0x0268, 1048367,  	0x0269, 1048365,  	0x026b, 1059319, @@ -1542,8 +1614,10 @@ static Rune __totitles[] = {  	0x2c66, 1037784,  	0x2c73, 1048575,  	0x2c76, 1048575, +	0x2cf3, 1048575, +	0x2d27, 1041312, +	0x2d2d, 1041312,  	0xa78c, 1048575, -	0xa791, 1048575,  };  Rune | 
