diff options
author | Joey Hess <joey@gnu.kitenet.net> | 2009-05-05 15:06:34 -0400 |
---|---|---|
committer | Joey Hess <joey@gnu.kitenet.net> | 2009-05-05 15:06:34 -0400 |
commit | a250ae89f37849be1caf204a07d2e4e563503390 (patch) | |
tree | 0a0f8cbaf709e4e13a574312f0d7f59382090e15 /isutf8.c | |
parent | 0aa82b9e712f62170d7f433b9fb181cdc6a60a92 (diff) | |
download | moreutils-a250ae89f37849be1caf204a07d2e4e563503390.tar.gz |
isutf8: Reject UTF-8-encoded UTF-16 surrogates. Closes: #525301 (Thanks, Jakub Wilk and liw)
Diffstat (limited to 'isutf8.c')
-rw-r--r-- | isutf8.c | 10 |
1 files changed, 9 insertions, 1 deletions
@@ -127,6 +127,14 @@ static unsigned long decodeutf8(unsigned char *buf, int nbytes) return INVALID_CHAR; u = (u << 6) | (buf[j] & 0x3f); } + + /* Conforming UTF-8 cannot contain codes 0xd800–0xdfff (UTF-16 + surrogates) as well as 0xfffe and 0xffff. */ + if (u >= 0xD800 && u <= 0xDFFF) + return INVALID_CHAR; + if (u == 0xFFFE || u == 0xFFFF) + return INVALID_CHAR; + return u; } @@ -145,7 +153,7 @@ static int is_utf8_byte_stream(FILE *file, char *filename, int quiet) { int nbytes, nbytes2; int c; unsigned long code; - unsigned long line, col, byteoff; + unsigned long line, col, byteoff; nbytes = 0; line = 1; |