diff options
author | Internet Software Consortium, Inc <@isc.org> | 2007-09-07 14:13:42 -0600 |
---|---|---|
committer | LaMont Jones <lamont@debian.org> | 2007-09-07 14:13:42 -0600 |
commit | b62706a673dc58ca390b47342036e3f3206a14bc (patch) | |
tree | 55df184668ce3b1ca9af85d3aab09e6470634a88 /contrib/idn/mdnkit/lib/utf6.c | |
parent | 4dd5eedee98c3fc2f40a45078bc7006cd5efd7f0 (diff) | |
download | bind9-b62706a673dc58ca390b47342036e3f3206a14bc.tar.gz |
9.2.0a2
Diffstat (limited to 'contrib/idn/mdnkit/lib/utf6.c')
-rw-r--r-- | contrib/idn/mdnkit/lib/utf6.c | 551 |
1 files changed, 551 insertions, 0 deletions
diff --git a/contrib/idn/mdnkit/lib/utf6.c b/contrib/idn/mdnkit/lib/utf6.c new file mode 100644 index 00000000..6f0d9423 --- /dev/null +++ b/contrib/idn/mdnkit/lib/utf6.c @@ -0,0 +1,551 @@ +#ifndef lint +static char *rcsid = "$Id: utf6.c,v 1.1 2001/06/09 00:30:30 tale Exp $"; +#endif + +/* + * Copyright (c) 2000 Japan Network Information Center. All rights reserved. + * + * By using this file, you agree to the terms and conditions set forth bellow. + * + * LICENSE TERMS AND CONDITIONS + * + * The following License Terms and Conditions apply, unless a different + * license is obtained from Japan Network Information Center ("JPNIC"), + * a Japanese association, Fuundo Bldg., 1-2 Kanda Ogawamachi, Chiyoda-ku, + * Tokyo, Japan. + * + * 1. Use, Modification and Redistribution (including distribution of any + * modified or derived work) in source and/or binary forms is permitted + * under this License Terms and Conditions. + * + * 2. Redistribution of source code must retain the copyright notices as they + * appear in each source code file, this License Terms and Conditions. + * + * 3. Redistribution in binary form must reproduce the Copyright Notice, + * this License Terms and Conditions, in the documentation and/or other + * materials provided with the distribution. For the purposes of binary + * distribution the "Copyright Notice" refers to the following language: + * "Copyright (c) Japan Network Information Center. All rights reserved." + * + * 4. Neither the name of JPNIC may be used to endorse or promote products + * derived from this Software without specific prior written approval of + * JPNIC. + * + * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + * + * 6. Indemnification by Licensee + * Any person or entities using and/or redistributing this Software under + * this License Terms and Conditions shall defend indemnify and hold + * harmless JPNIC from and against any and all judgements damages, + * expenses, settlement liabilities, cost and other liabilities of any + * kind as a result of use and redistribution of this Software or any + * claim, suite, action, litigation or proceeding by any third party + * arising out of or relates to this License Terms and Conditions. + * + * 7. Governing Law, Jurisdiction and Venue + * This License Terms and Conditions shall be governed by and and + * construed in accordance with the law of Japan. Any person or entities + * using and/or redistributing this Software under this License Terms and + * Conditions hereby agrees and consent to the personal and exclusive + * jurisdiction and venue of Tokyo District Court of Japan. + */ + +#include <config.h> + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include <mdn/result.h> +#include <mdn/assert.h> +#include <mdn/logmacro.h> +#include <mdn/converter.h> +#include <mdn/utf8.h> +#include <mdn/debug.h> +#include <mdn/utf6.h> +#include <mdn/ace.h> +#include <mdn/util.h> + +#ifndef MDN_UTF6_PREFIX +#define MDN_UTF6_PREFIX "wq--" +#endif + +#define UTF6_SAME_BYTE_MASK 0x00ff +#define UTF6_SAME_NIBBLE_MASK 0x0fff +#define UTF6_PLAIN_MASK 0xffff + +#define UTF6_BUF_SIZE 128 /* more than enough */ + +/* + * Compression type. + */ +enum { + same_byte_mode, /* the most significant byte of all non + '-' characters is the same value */ + same_nibble_mode, /* the most significant nibble of all non + '-' characters is the same value */ + plain_mode /* not compressed */ +}; + +static mdn_result_t utf6_decode(const char *from, size_t fromlen, + char *to, size_t tolen); +static mdn_result_t utf6_decode_utf16(const char *from, size_t fromlen, + unsigned short *buf, size_t *lenp); +static mdn_result_t utf6_decode_vlhex(const char *from, size_t len, + size_t *reslen, + unsigned short *value); +static mdn_result_t utf6_encode(const char *from, size_t fromlen, + char *to, size_t tolen); +static mdn_result_t utf6_encode_utf16(const unsigned short *p, + size_t len, char *to, size_t tolen, + int compress); +static mdn_result_t utf6_encode_vlhex(unsigned short value, char *to, + size_t tolen, size_t *reslen); +static int get_compress_mode(const unsigned short *p, size_t len); + +static mdn__ace_t utf6_ctx = { + mdn__ace_prefix, + MDN_UTF6_PREFIX, + utf6_encode, + utf6_decode, +}; + +/* ARGSUSED */ +mdn_result_t +mdn__utf6_open(mdn_converter_t ctx, mdn_converter_dir_t dir, void **privdata) { + return (mdn_success); +} + +/* ARGSUSED */ +mdn_result_t +mdn__utf6_close(mdn_converter_t ctx, void *privdata, mdn_converter_dir_t dir) { + return (mdn_success); +} + +mdn_result_t +mdn__utf6_convert(mdn_converter_t ctx, void *privdata, mdn_converter_dir_t dir, + const char *from, char *to, size_t tolen) +{ + mdn_result_t r; + + assert(ctx != NULL && + (dir == mdn_converter_l2u || dir == mdn_converter_u2l)); + + TRACE(("mdn__utf6_convert(dir=%s,from=\"%s\")\n", + dir == mdn_converter_l2u ? "l2u" : "u2l", + mdn_debug_xstring(from, 20))); + + r = mdn__ace_convert(&utf6_ctx, dir, from, to, tolen); + if (r != mdn_success) + return (r); + + DUMP(("mdn__utf6_convert: \"%s\"\n", mdn_debug_xstring(to, 70))); + + return (r); +} + +static mdn_result_t +utf6_decode(const char *from, size_t fromlen, char *to, size_t tolen) { + unsigned short *buf; + unsigned short local_buf[UTF6_BUF_SIZE]; + size_t len, reslen; + mdn_result_t r; + + /* + * Allocate sufficient buffer. + */ + if (fromlen > UTF6_BUF_SIZE) { + if ((buf = malloc(sizeof(*buf) * fromlen)) == NULL) + return (mdn_nomemory); + } else { + /* Use local buffer. */ + buf = local_buf; + } + + /* + * Decode base32 and decompress. + */ + r = utf6_decode_utf16(from, fromlen, buf, &len); + if (r != mdn_success) + goto ret; + + /* + * Now 'buf' holds the decompressed string, which must contain + * UTF-16 characters. Convert them into UTF-8. + */ + r = mdn_util_utf16toutf8(buf, len, to, tolen, &reslen); + if (r != mdn_success) + goto ret; + + /* + * Terminate with NUL. + */ + if (tolen <= reslen) { + r = mdn_buffer_overflow; + goto ret; + } + + to += reslen; + *to = '\0'; + tolen -= reslen; + + r = mdn_success; + +ret: + if (buf != local_buf) + free(buf); + return (r); +} + +static mdn_result_t +utf6_decode_utf16(const char *from, size_t fromlen, + unsigned short *buf, size_t *lenp) +{ + mdn_result_t r; + unsigned short value; + unsigned short cpart; + unsigned short vmax; + size_t len; + size_t reslen; + + /* + * Decode Base32 and put the result bytes to 'buf'. + * Since decoded string will be shorter in length, and + * the caller allocated 'buf' so that its length is not + * less than 'fromlen', we don't have to worry about overflow. + */ + + if (fromlen <= 0) + return mdn_success; + + switch (*from) { + case 'y': + case 'Y': + /* + * same_byte_mode. + */ + fromlen--; + from++; + r = utf6_decode_vlhex(from, fromlen, &reslen, &value); + if (r != mdn_success) + return (mdn_invalid_encoding); + from += reslen; + fromlen -= reslen; + + cpart = value * 0x0100; + vmax = 0x00ff; + break; + + case 'z': + case 'Z': + /* + * same_nibble_mode. + */ + fromlen--; + from++; + r = utf6_decode_vlhex(from, fromlen, &reslen, &value); + if (r != mdn_success) + return (mdn_invalid_encoding); + from += reslen; + fromlen -= reslen; + + cpart = value * 0x1000; + vmax = 0x0fff; + break; + + default: + /* + * plain_mode. + */ + cpart = 0x0000; + vmax = 0xffff; + break; + } + + + for (len = 0; fromlen > 0; len++) { + if (*from == '-') { + *buf++ = '-'; + from++; + fromlen--; + } else { + r = utf6_decode_vlhex(from, fromlen, &reslen, &value); + if (r != mdn_success) + return (mdn_invalid_encoding); + if (value > vmax) + return (mdn_invalid_encoding); + *buf++ = cpart + value; + from += reslen; + fromlen -= reslen; + } + } + + *buf = '\0'; + *lenp = len; + return (mdn_success); +} + +static mdn_result_t +utf6_decode_vlhex(const char *from, size_t len, size_t *reslen, + unsigned short *value) { + unsigned short v; + int i; + + /* + * Decode the first character of a variable length HEX string. + * The character must be in set of [ghijklmnopqrstuv]. + */ + if (len <= 0) + return (mdn_invalid_encoding); + + if ('G' <= *from && *from <= 'V') + v = *from - 'G'; + else if ('g' <= *from && *from <= 'v') + v = *from - 'g'; + else + return (mdn_invalid_encoding); + from++; + len--; + i = 1; + + /* + * Decode the rest characters of a variable length HEX string. + * The every character must be in set of [0123456789abcdef]. + */ + for (;;) { + if (len <= 0) + break; + if ('0' <= *from && *from <= '9') + v = (v << 4) + (*from - '0'); + else if ('A' <= *from && *from <= 'F') + v = (v << 4) + 0x0a + (*from - 'A'); + else if ('a' <= *from && *from <= 'f') + v = (v << 4) + 0x0a + (*from - 'a'); + else + break; + from++; + len--; + i++; + } + + *value = v; + *reslen = i; + return (mdn_success); +} + +static mdn_result_t +utf6_encode(const char *from, size_t fromlen, char *to, size_t tolen) { + unsigned short *buf; + unsigned short local_buf[UTF6_BUF_SIZE]; /* UTF-16 */ + mdn_result_t r; + size_t buflen, len; + + /* + * Convert to UTF-16. + */ + buf = local_buf; + buflen = UTF6_BUF_SIZE; + for (;;) { + r = mdn_util_utf8toutf16(from, fromlen, + buf, buflen, &len); + if (r == mdn_buffer_overflow) { + buflen *= 2; + if (buf == local_buf) + buf = malloc(sizeof(*buf) * buflen); + else + buf = realloc(buf, sizeof(*buf) * buflen); + if (buf == NULL) + return (mdn_nomemory); + } else if (r == mdn_success) { + break; + } else { + goto ret; + } + } + + /* + * Compress, encode in base-32 and output. + */ + r = utf6_encode_utf16(buf, len, to, tolen, + get_compress_mode(buf, len)); + +ret: + if (buf != local_buf) + free(buf); + return (r); +} + +static mdn_result_t +utf6_encode_utf16(const unsigned short *p, size_t len, + char *to, size_t tolen, int compress_mode) +{ + mdn_result_t r; + unsigned short mask; + size_t reslen; + int i; + + if (len <= 0) + return mdn_success; + + switch (compress_mode) { + case same_byte_mode: + mask = UTF6_SAME_BYTE_MASK; + + if (tolen < 1) + return (mdn_buffer_overflow); + *to++ = 'y'; + tolen--; + r = utf6_encode_vlhex((p[0] >> 8) & 0x00ff, to, tolen, + &reslen); + if (r != mdn_success) + return (r); + to += reslen; + tolen -= reslen; + + break; + + case same_nibble_mode: + mask = UTF6_SAME_NIBBLE_MASK; + + if (tolen < 1) + return (mdn_buffer_overflow); + *to++ = 'z'; + tolen--; + r = utf6_encode_vlhex((p[0] >> 4) & 0x0fff, to, tolen, + &reslen); + if (r != mdn_success) + return (r); + to += reslen; + tolen -= reslen; + + break; + + default: + mask = UTF6_PLAIN_MASK; + break; + } + + for (i = 0; i < len; i++) { + if (p[i] == '-') { + if (tolen < 1) + return (mdn_buffer_overflow); + *to++ = '-'; + tolen--; + + } else { + r = utf6_encode_vlhex(p[i] & mask, to, tolen, &reslen); + if (r != mdn_success) + return (r); + to += reslen; + tolen -= reslen; + } + } + + if (tolen <= 0) + return (mdn_buffer_overflow); + + *to = '\0'; + return (mdn_success); +} + +static mdn_result_t +utf6_encode_vlhex(unsigned short value, char *to, size_t tolen, + size_t *reslen) { + + static const char *vlhex_string1 = "ghijklmnopqrstuv"; + static const char *vlhex_string2 = "0123456789abcdef"; + + /* + * Encode an integer in the range of 0x000 - 0xffff as variable + * length HEX string. + */ + if (value <= 0x000f) { + if (tolen < 1) + return (mdn_buffer_overflow); + *to++ = vlhex_string1[ value & 0x0f]; + *reslen = 1; + + } else if (value <= 0x00ff) { + if (tolen < 2) + return (mdn_buffer_overflow); + *to++ = vlhex_string1[(value >> 4) & 0x0f]; + *to++ = vlhex_string2[ value & 0x0f]; + *reslen = 2; + + } else if (value <= 0x0fff) { + if (tolen < 3) + return (mdn_buffer_overflow); + *to++ = vlhex_string1[(value >> 8) & 0x0f]; + *to++ = vlhex_string2[(value >> 4) & 0x0f]; + *to++ = vlhex_string2[ value & 0x0f]; + *reslen = 3; + + } else { + if (tolen < 4) + return (mdn_buffer_overflow); + *to++ = vlhex_string1[(value >> 12) & 0x0f]; + *to++ = vlhex_string2[(value >> 8) & 0x0f]; + *to++ = vlhex_string2[(value >> 4) & 0x0f]; + *to++ = vlhex_string2[ value & 0x0f]; + *reslen = 4; + } + + return (mdn_success); +} + +static int +get_compress_mode(const unsigned short *p, size_t len) { + int non_hyphens = 0; + unsigned short same_bytes = 0; + unsigned short same_nibbles = 0; + int i; + + if (len <= 0) + return plain_mode; + + for (i = 0; i < len; i++) { + if (p[i] != '-') { + non_hyphens++; + if ((p[0] & 0xff00) == (p[i] & 0xff00)) + same_bytes++; + else if ((p[0] & 0xf000) == (p[i] & 0xf000)) + same_nibbles++; + } + } + + if (non_hyphens < 2) { + /* + * The number of non '-' characters is less than 2. + */ + return plain_mode; + } else if (same_bytes == non_hyphens) { + /* + * The same most significant byte of the every non '-' + * character is the same value. + */ + return same_byte_mode; + } else if (same_nibbles == non_hyphens) { + /* + * The same most significant nibble of the every non '-' + * character is the same value. + */ + return same_nibble_mode; + } else { + /* + * Not matched above. + */ + return plain_mode; + } + + /* Not reached */ +} |