diff options
Diffstat (limited to 'src/lib9/utf/rune.c')
-rw-r--r-- | src/lib9/utf/rune.c | 21 |
1 files changed, 15 insertions, 6 deletions
diff --git a/src/lib9/utf/rune.c b/src/lib9/utf/rune.c index cf98bab15..818771cfd 100644 --- a/src/lib9/utf/rune.c +++ b/src/lib9/utf/rune.c @@ -36,12 +36,14 @@ enum Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ - Rune4 = (1<<(Bit4+3*Bitx))-1, - /* 0001 1111 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */ Maskx = (1<<Bitx)-1, /* 0011 1111 */ Testx = Maskx ^ 0xFF, /* 1100 0000 */ + SurrogateMin = 0xD800, + SurrogateMax = 0xDFFF, + Bad = Runeerror, }; @@ -122,6 +124,8 @@ charntorune(Rune *rune, const char *str, int length) l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; if(l <= Rune2) goto bad; + if (SurrogateMin <= l && l <= SurrogateMax) + goto bad; *rune = l; return 3; } @@ -138,7 +142,7 @@ charntorune(Rune *rune, const char *str, int length) goto bad; if (c < T5) { l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if (l <= Rune3) + if (l <= Rune3 || l > Runemax) goto bad; *rune = l; return 4; @@ -208,6 +212,8 @@ chartorune(Rune *rune, const char *str) l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; if(l <= Rune2) goto bad; + if (SurrogateMin <= l && l <= SurrogateMax) + goto bad; *rune = l; return 3; } @@ -221,7 +227,7 @@ chartorune(Rune *rune, const char *str) goto bad; if (c < T5) { l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if (l <= Rune3) + if (l <= Rune3 || l > Runemax) goto bad; *rune = l; return 4; @@ -241,7 +247,8 @@ bad: } int -isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) { +isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) +{ *consumed = charntorune(rune, str, length); return *rune != Runeerror || *consumed == 3; } @@ -273,13 +280,15 @@ runetochar(char *str, const Rune *rune) } /* - * If the Rune is out of range, convert it to the error rune. + * If the Rune is out of range or a surrogate half, convert it to the error rune. * Do this test here because the error rune encodes to three bytes. * Doing it earlier would duplicate work, since an out of range * Rune wouldn't have fit in one or two bytes. */ if (c > Runemax) c = Runeerror; + if (SurrogateMin <= c && c <= SurrogateMax) + c = Runeerror; /* * three character sequence |