summaryrefslogtreecommitdiff
path: root/src/lib9/utf/rune.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib9/utf/rune.c')
-rw-r--r--src/lib9/utf/rune.c21
1 files changed, 15 insertions, 6 deletions
diff --git a/src/lib9/utf/rune.c b/src/lib9/utf/rune.c
index cf98bab15..818771cfd 100644
--- a/src/lib9/utf/rune.c
+++ b/src/lib9/utf/rune.c
@@ -36,12 +36,14 @@ enum
Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
- Rune4 = (1<<(Bit4+3*Bitx))-1,
- /* 0001 1111 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
Maskx = (1<<Bitx)-1, /* 0011 1111 */
Testx = Maskx ^ 0xFF, /* 1100 0000 */
+ SurrogateMin = 0xD800,
+ SurrogateMax = 0xDFFF,
+
Bad = Runeerror,
};
@@ -122,6 +124,8 @@ charntorune(Rune *rune, const char *str, int length)
l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
if(l <= Rune2)
goto bad;
+ if (SurrogateMin <= l && l <= SurrogateMax)
+ goto bad;
*rune = l;
return 3;
}
@@ -138,7 +142,7 @@ charntorune(Rune *rune, const char *str, int length)
goto bad;
if (c < T5) {
l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
- if (l <= Rune3)
+ if (l <= Rune3 || l > Runemax)
goto bad;
*rune = l;
return 4;
@@ -208,6 +212,8 @@ chartorune(Rune *rune, const char *str)
l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
if(l <= Rune2)
goto bad;
+ if (SurrogateMin <= l && l <= SurrogateMax)
+ goto bad;
*rune = l;
return 3;
}
@@ -221,7 +227,7 @@ chartorune(Rune *rune, const char *str)
goto bad;
if (c < T5) {
l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
- if (l <= Rune3)
+ if (l <= Rune3 || l > Runemax)
goto bad;
*rune = l;
return 4;
@@ -241,7 +247,8 @@ bad:
}
int
-isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {
+isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed)
+{
*consumed = charntorune(rune, str, length);
return *rune != Runeerror || *consumed == 3;
}
@@ -273,13 +280,15 @@ runetochar(char *str, const Rune *rune)
}
/*
- * If the Rune is out of range, convert it to the error rune.
+ * If the Rune is out of range or a surrogate half, convert it to the error rune.
* Do this test here because the error rune encodes to three bytes.
* Doing it earlier would duplicate work, since an out of range
* Rune wouldn't have fit in one or two bytes.
*/
if (c > Runemax)
c = Runeerror;
+ if (SurrogateMin <= c && c <= SurrogateMax)
+ c = Runeerror;
/*
* three character sequence