diff options
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/lib/race.c')
-rw-r--r-- | contrib/idn/idnkit-1.0-src/lib/race.c | 427 |
1 files changed, 427 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/lib/race.c b/contrib/idn/idnkit-1.0-src/lib/race.c new file mode 100644 index 00000000..1b44a989 --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/lib/race.c @@ -0,0 +1,427 @@ +#ifndef lint +static char *rcsid = "$Id: race.c,v 1.1.1.1 2003/06/04 00:26:07 marka Exp $"; +#endif + +/* + * Copyright (c) 2000,2001,2002 Japan Network Information Center. + * All rights reserved. + * + * By using this file, you agree to the terms and conditions set forth bellow. + * + * LICENSE TERMS AND CONDITIONS + * + * The following License Terms and Conditions apply, unless a different + * license is obtained from Japan Network Information Center ("JPNIC"), + * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, + * Chiyoda-ku, Tokyo 101-0047, Japan. + * + * 1. Use, Modification and Redistribution (including distribution of any + * modified or derived work) in source and/or binary forms is permitted + * under this License Terms and Conditions. + * + * 2. Redistribution of source code must retain the copyright notices as they + * appear in each source code file, this License Terms and Conditions. + * + * 3. Redistribution in binary form must reproduce the Copyright Notice, + * this License Terms and Conditions, in the documentation and/or other + * materials provided with the distribution. For the purposes of binary + * distribution the "Copyright Notice" refers to the following language: + * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." + * + * 4. The name of JPNIC may not be used to endorse or promote products + * derived from this Software without specific prior written approval of + * JPNIC. + * + * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + */ + +#include <config.h> + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include <idn/result.h> +#include <idn/assert.h> +#include <idn/logmacro.h> +#include <idn/converter.h> +#include <idn/ucs4.h> +#include <idn/debug.h> +#include <idn/race.h> +#include <idn/util.h> + +#ifndef IDN_RACE_PREFIX +#define IDN_RACE_PREFIX "bq--" +#endif +#define RACE_2OCTET_MODE 0xd8 +#define RACE_ESCAPE 0xff +#define RACE_ESCAPE_2ND 0x99 + +#define RACE_BUF_SIZE 128 /* more than enough */ + +/* + * Unicode surrogate pair. + */ +#define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff) +#define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff) +#define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10)) +#define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff)) +#define SURROGATE_BASE 0x10000 +#define SURROGATE_H_OFF 0xd800 +#define SURROGATE_L_OFF 0xdc00 +#define COMBINE_SURROGATE(h, l) \ + (SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF)) + +/* + * Compression type. + */ +enum { + compress_one, /* all characters are in a single row */ + compress_two, /* row 0 and another row */ + compress_none /* nope */ +}; + +static idn_result_t race_decode_decompress(const char *from, + unsigned short *buf, + size_t buflen); +static idn_result_t race_compress_encode(const unsigned short *p, + int compress_mode, + char *to, size_t tolen); +static int get_compress_mode(unsigned short *p); + +idn_result_t +idn__race_decode(idn_converter_t ctx, void *privdata, + const char *from, unsigned long *to, size_t tolen) { + unsigned short *buf = NULL; + size_t prefixlen = strlen(IDN_RACE_PREFIX); + size_t fromlen; + size_t buflen; + idn_result_t r; + + assert(ctx != NULL); + + TRACE(("idn__race_decode(from=\"%s\", tolen=%d)\n", + idn__debug_xstring(from, 50), (int)tolen)); + + if (!idn__util_asciihaveaceprefix(from, IDN_RACE_PREFIX)) { + if (*from == '\0') { + r = idn_ucs4_utf8toucs4(from, to, tolen); + goto ret; + } + r = idn_invalid_encoding; + goto ret; + } + from += prefixlen; + fromlen = strlen(from); + + /* + * Allocate sufficient buffer. + */ + buflen = fromlen + 1; + buf = malloc(sizeof(*buf) * buflen); + if (buf == NULL) { + r = idn_nomemory; + goto ret; + } + + /* + * Decode base32 and decompress. + */ + r = race_decode_decompress(from, buf, buflen); + if (r != idn_success) + goto ret; + + /* + * Now 'buf' points the decompressed string, which must contain + * UTF-16 characters. + */ + + /* + * Convert to UCS4. + */ + r = idn_ucs4_utf16toucs4(buf, to, tolen); + if (r != idn_success) + goto ret; + +ret: + free(buf); + if (r == idn_success) { + TRACE(("idn__race_decode(): succcess (to=\"%s\")\n", + idn__debug_ucs4xstring(to, 50))); + } else { + TRACE(("idn__race_decode(): %s\n", idn_result_tostring(r))); + } + return (r); +} + +static idn_result_t +race_decode_decompress(const char *from, unsigned short *buf, size_t buflen) +{ + unsigned short *p = buf; + unsigned int bitbuf = 0; + int bitlen = 0; + int i, j; + size_t len; + + while (*from != '\0') { + int c = *from++; + int x; + + if ('a' <= c && c <= 'z') + x = c - 'a'; + else if ('A' <= c && c <= 'Z') + x = c - 'A'; + else if ('2' <= c && c <= '7') + x = c - '2' + 26; + else + return (idn_invalid_encoding); + + bitbuf = (bitbuf << 5) + x; + bitlen += 5; + if (bitlen >= 8) { + *p++ = (bitbuf >> (bitlen - 8)) & 0xff; + bitlen -= 8; + } + } + len = p - buf; + + /* + * Now 'buf' holds the decoded string. + */ + + /* + * Decompress. + */ + if (buf[0] == RACE_2OCTET_MODE) { + if ((len - 1) % 2 != 0) + return (idn_invalid_encoding); + for (i = 1, j = 0; i < len; i += 2, j++) + buf[j] = (buf[i] << 8) + buf[i + 1]; + len = j; + } else { + unsigned short c = buf[0] << 8; /* higher octet */ + + for (i = 1, j = 0; i < len; j++) { + if (buf[i] == RACE_ESCAPE) { + if (i + 1 >= len) + return (idn_invalid_encoding); + else if (buf[i + 1] == RACE_ESCAPE_2ND) + buf[j] = c | 0xff; + else + buf[j] = buf[i + 1]; + i += 2; + + } else if (buf[i] == 0x99 && c == 0x00) { + /* + * The RACE specification says this is error. + */ + return (idn_invalid_encoding); + + } else { + buf[j] = c | buf[i++]; + } + } + len = j; + } + buf[len] = '\0'; + + return (idn_success); +} + +idn_result_t +idn__race_encode(idn_converter_t ctx, void *privdata, + const unsigned long *from, char *to, size_t tolen) { + char *to_org = to; + unsigned short *p, *buf = NULL; + size_t prefixlen = strlen(IDN_RACE_PREFIX); + size_t buflen; + size_t fromlen; + idn_result_t r; + int compress_mode; + + assert(ctx != NULL); + + TRACE(("idn__race_encode(from=\"%s\", tolen=%d)\n", + idn__debug_ucs4xstring(from, 50), (int)tolen)); + + if (*from == '\0') { + r = idn_ucs4_ucs4toutf8(from, to, tolen); + goto ret; + } else if (idn__util_ucs4haveaceprefix(from, IDN_RACE_PREFIX)) { + r = idn_prohibited; + goto ret; + } + + if (tolen < prefixlen) { + r = idn_buffer_overflow; + goto ret; + } + memcpy(to, IDN_RACE_PREFIX, prefixlen); + to += prefixlen; + tolen -= prefixlen; + + fromlen = idn_ucs4_strlen(from); + buflen = fromlen * 2 + 2; + + /* + * Convert to UTF-16. + * Preserve space for a character at the top of the buffer. + */ + for (;;) { + unsigned short *new_buf; + + new_buf = realloc(buf, sizeof(*buf) * buflen); + if (new_buf == NULL) { + r = idn_nomemory; + goto ret; + } + buf = new_buf; + + r = idn_ucs4_ucs4toutf16(from, buf + 1, buflen - 1); + if (r == idn_success) + break; + else if (r != idn_buffer_overflow) + goto ret; + + buflen = fromlen * 2 + 2; + } + p = buf + 1; + + /* + * Now 'p' contains UTF-16 encoded string. + */ + + /* + * Check U+0099. + * RACE doesn't permit U+0099 in an input string. + */ + for (p = buf + 1; *p != '\0'; p++) { + if (*p == 0x0099) { + r = idn_invalid_encoding; + goto ret; + } + } + + /* + * Compress, encode in base-32 and output. + */ + compress_mode = get_compress_mode(buf + 1); + r = race_compress_encode(buf, compress_mode, to, tolen); + +ret: + free(buf); + if (r == idn_success) { + TRACE(("idn__race_encode(): succcess (to=\"%s\")\n", + idn__debug_xstring(to_org, 50))); + } else { + TRACE(("idn__race_encode(): %s\n", idn_result_tostring(r))); + } + return (r); +} + +static idn_result_t +race_compress_encode(const unsigned short *p, int compress_mode, + char *to, size_t tolen) +{ + unsigned long bitbuf = *p++; /* bit stream buffer */ + int bitlen = 8; /* # of bits in 'bitbuf' */ + + while (*p != '\0' || bitlen > 0) { + unsigned int c = *p; + + if (c == '\0') { + /* End of data. Flush. */ + bitbuf <<= (5 - bitlen); + bitlen = 5; + } else if (compress_mode == compress_none) { + /* Push 16 bit data. */ + bitbuf = (bitbuf << 16) | c; + bitlen += 16; + p++; + } else {/* compress_mode == compress_one/compress_two */ + /* Push 8 or 16 bit data. */ + if (compress_mode == compress_two && + (c & 0xff00) == 0) { + /* Upper octet is zero (and not U1). */ + bitbuf = (bitbuf << 16) | 0xff00 | c; + bitlen += 16; + } else if ((c & 0xff) == 0xff) { + /* Lower octet is 0xff. */ + bitbuf = (bitbuf << 16) | + (RACE_ESCAPE << 8) | RACE_ESCAPE_2ND; + bitlen += 16; + } else { + /* Just output lower octet. */ + bitbuf = (bitbuf << 8) | (c & 0xff); + bitlen += 8; + } + p++; + } + + /* + * Output bits in 'bitbuf' in 5-bit unit. + */ + while (bitlen >= 5) { + int x; + + /* Get top 5 bits. */ + x = (bitbuf >> (bitlen - 5)) & 0x1f; + bitlen -= 5; + + /* Encode. */ + if (x < 26) + x += 'a'; + else + x = (x - 26) + '2'; + + if (tolen < 1) + return (idn_buffer_overflow); + + *to++ = x; + tolen--; + } + } + + if (tolen <= 0) + return (idn_buffer_overflow); + + *to = '\0'; + return (idn_success); +} + +static int +get_compress_mode(unsigned short *p) { + int zero = 0; + unsigned int upper = 0; + unsigned short *modepos = p - 1; + + while (*p != '\0') { + unsigned int hi = *p++ & 0xff00; + + if (hi == 0) { + zero++; + } else if (hi == upper) { + ; + } else if (upper == 0) { + upper = hi; + } else { + *modepos = RACE_2OCTET_MODE; + return (compress_none); + } + } + *modepos = upper >> 8; + if (upper > 0 && zero > 0) + return (compress_two); + else + return (compress_one); +} |