diff options
Diffstat (limited to 'contrib/idn/mdnkit/lib/utf6.c')
-rw-r--r-- | contrib/idn/mdnkit/lib/utf6.c | 551 |
1 files changed, 0 insertions, 551 deletions
diff --git a/contrib/idn/mdnkit/lib/utf6.c b/contrib/idn/mdnkit/lib/utf6.c deleted file mode 100644 index 66f2efe4..00000000 --- a/contrib/idn/mdnkit/lib/utf6.c +++ /dev/null @@ -1,551 +0,0 @@ -#ifndef lint -static char *rcsid = "$Id: utf6.c,v 1.1.2.1 2002/02/08 12:14:35 marka Exp $"; -#endif - -/* - * Copyright (c) 2000 Japan Network Information Center. All rights reserved. - * - * By using this file, you agree to the terms and conditions set forth bellow. - * - * LICENSE TERMS AND CONDITIONS - * - * The following License Terms and Conditions apply, unless a different - * license is obtained from Japan Network Information Center ("JPNIC"), - * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, - * Chiyoda-ku, Tokyo 101-0047, Japan. - * - * 1. Use, Modification and Redistribution (including distribution of any - * modified or derived work) in source and/or binary forms is permitted - * under this License Terms and Conditions. - * - * 2. Redistribution of source code must retain the copyright notices as they - * appear in each source code file, this License Terms and Conditions. - * - * 3. Redistribution in binary form must reproduce the Copyright Notice, - * this License Terms and Conditions, in the documentation and/or other - * materials provided with the distribution. For the purposes of binary - * distribution the "Copyright Notice" refers to the following language: - * "Copyright (c) Japan Network Information Center. All rights reserved." - * - * 4. Neither the name of JPNIC may be used to endorse or promote products - * derived from this Software without specific prior written approval of - * JPNIC. - * - * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - * - * 6. Indemnification by Licensee - * Any person or entities using and/or redistributing this Software under - * this License Terms and Conditions shall defend indemnify and hold - * harmless JPNIC from and against any and all judgements damages, - * expenses, settlement liabilities, cost and other liabilities of any - * kind as a result of use and redistribution of this Software or any - * claim, suite, action, litigation or proceeding by any third party - * arising out of or relates to this License Terms and Conditions. - * - * 7. Governing Law, Jurisdiction and Venue - * This License Terms and Conditions shall be governed by and and - * construed in accordance with the law of Japan. Any person or entities - * using and/or redistributing this Software under this License Terms and - * Conditions hereby agrees and consent to the personal and exclusive - * jurisdiction and venue of Tokyo District Court of Japan. - */ - -#include <config.h> - -#include <stddef.h> -#include <stdlib.h> -#include <string.h> - -#include <mdn/result.h> -#include <mdn/assert.h> -#include <mdn/logmacro.h> -#include <mdn/converter.h> -#include <mdn/utf8.h> -#include <mdn/debug.h> -#include <mdn/utf6.h> -#include <mdn/ace.h> -#include <mdn/util.h> - -#ifndef MDN_UTF6_PREFIX -#define MDN_UTF6_PREFIX "wq--" -#endif - -#define UTF6_SAME_BYTE_MASK 0x00ff -#define UTF6_SAME_NIBBLE_MASK 0x0fff -#define UTF6_PLAIN_MASK 0xffff - -#define UTF6_BUF_SIZE 128 /* more than enough */ - -/* - * Compression type. - */ -enum { - same_byte_mode, /* the most significant byte of all non - '-' characters is the same value */ - same_nibble_mode, /* the most significant nibble of all non - '-' characters is the same value */ - plain_mode /* not compressed */ -}; - -static mdn_result_t utf6_decode(const char *from, size_t fromlen, - char *to, size_t tolen); -static mdn_result_t utf6_decode_utf16(const char *from, size_t fromlen, - unsigned short *buf, size_t *lenp); -static mdn_result_t utf6_decode_vlhex(const char *from, size_t len, - size_t *reslen, - unsigned short *value); -static mdn_result_t utf6_encode(const char *from, size_t fromlen, - char *to, size_t tolen); -static mdn_result_t utf6_encode_utf16(const unsigned short *p, - size_t len, char *to, size_t tolen, - int compress); -static mdn_result_t utf6_encode_vlhex(unsigned short value, char *to, - size_t tolen, size_t *reslen); -static int get_compress_mode(const unsigned short *p, size_t len); - -static mdn__ace_t utf6_ctx = { - mdn__ace_prefix, - MDN_UTF6_PREFIX, - utf6_encode, - utf6_decode, -}; - -/* ARGSUSED */ -mdn_result_t -mdn__utf6_open(mdn_converter_t ctx, mdn_converter_dir_t dir, void **privdata) { - return (mdn_success); -} - -/* ARGSUSED */ -mdn_result_t -mdn__utf6_close(mdn_converter_t ctx, void *privdata, mdn_converter_dir_t dir) { - return (mdn_success); -} - -mdn_result_t -mdn__utf6_convert(mdn_converter_t ctx, void *privdata, mdn_converter_dir_t dir, - const char *from, char *to, size_t tolen) -{ - mdn_result_t r; - - assert(ctx != NULL && - (dir == mdn_converter_l2u || dir == mdn_converter_u2l)); - - TRACE(("mdn__utf6_convert(dir=%s,from=\"%s\")\n", - dir == mdn_converter_l2u ? "l2u" : "u2l", - mdn_debug_xstring(from, 20))); - - r = mdn__ace_convert(&utf6_ctx, dir, from, to, tolen); - if (r != mdn_success) - return (r); - - DUMP(("mdn__utf6_convert: \"%s\"\n", mdn_debug_xstring(to, 70))); - - return (r); -} - -static mdn_result_t -utf6_decode(const char *from, size_t fromlen, char *to, size_t tolen) { - unsigned short *buf; - unsigned short local_buf[UTF6_BUF_SIZE]; - size_t len, reslen; - mdn_result_t r; - - /* - * Allocate sufficient buffer. - */ - if (fromlen > UTF6_BUF_SIZE) { - if ((buf = malloc(sizeof(*buf) * fromlen)) == NULL) - return (mdn_nomemory); - } else { - /* Use local buffer. */ - buf = local_buf; - } - - /* - * Decode base32 and decompress. - */ - r = utf6_decode_utf16(from, fromlen, buf, &len); - if (r != mdn_success) - goto ret; - - /* - * Now 'buf' holds the decompressed string, which must contain - * UTF-16 characters. Convert them into UTF-8. - */ - r = mdn_util_utf16toutf8(buf, len, to, tolen, &reslen); - if (r != mdn_success) - goto ret; - - /* - * Terminate with NUL. - */ - if (tolen <= reslen) { - r = mdn_buffer_overflow; - goto ret; - } - - to += reslen; - *to = '\0'; - tolen -= reslen; - - r = mdn_success; - -ret: - if (buf != local_buf) - free(buf); - return (r); -} - -static mdn_result_t -utf6_decode_utf16(const char *from, size_t fromlen, - unsigned short *buf, size_t *lenp) -{ - mdn_result_t r; - unsigned short value; - unsigned short cpart; - unsigned short vmax; - size_t len; - size_t reslen; - - /* - * Decode Base32 and put the result bytes to 'buf'. - * Since decoded string will be shorter in length, and - * the caller allocated 'buf' so that its length is not - * less than 'fromlen', we don't have to worry about overflow. - */ - - if (fromlen <= 0) - return mdn_success; - - switch (*from) { - case 'y': - case 'Y': - /* - * same_byte_mode. - */ - fromlen--; - from++; - r = utf6_decode_vlhex(from, fromlen, &reslen, &value); - if (r != mdn_success) - return (mdn_invalid_encoding); - from += reslen; - fromlen -= reslen; - - cpart = value * 0x0100; - vmax = 0x00ff; - break; - - case 'z': - case 'Z': - /* - * same_nibble_mode. - */ - fromlen--; - from++; - r = utf6_decode_vlhex(from, fromlen, &reslen, &value); - if (r != mdn_success) - return (mdn_invalid_encoding); - from += reslen; - fromlen -= reslen; - - cpart = value * 0x1000; - vmax = 0x0fff; - break; - - default: - /* - * plain_mode. - */ - cpart = 0x0000; - vmax = 0xffff; - break; - } - - - for (len = 0; fromlen > 0; len++) { - if (*from == '-') { - *buf++ = '-'; - from++; - fromlen--; - } else { - r = utf6_decode_vlhex(from, fromlen, &reslen, &value); - if (r != mdn_success) - return (mdn_invalid_encoding); - if (value > vmax) - return (mdn_invalid_encoding); - *buf++ = cpart + value; - from += reslen; - fromlen -= reslen; - } - } - - *buf = '\0'; - *lenp = len; - return (mdn_success); -} - -static mdn_result_t -utf6_decode_vlhex(const char *from, size_t len, size_t *reslen, - unsigned short *value) { - unsigned short v; - int i; - - /* - * Decode the first character of a variable length HEX string. - * The character must be in set of [ghijklmnopqrstuv]. - */ - if (len <= 0) - return (mdn_invalid_encoding); - - if ('G' <= *from && *from <= 'V') - v = *from - 'G'; - else if ('g' <= *from && *from <= 'v') - v = *from - 'g'; - else - return (mdn_invalid_encoding); - from++; - len--; - i = 1; - - /* - * Decode the rest characters of a variable length HEX string. - * The every character must be in set of [0123456789abcdef]. - */ - for (;;) { - if (len <= 0) - break; - if ('0' <= *from && *from <= '9') - v = (v << 4) + (*from - '0'); - else if ('A' <= *from && *from <= 'F') - v = (v << 4) + 0x0a + (*from - 'A'); - else if ('a' <= *from && *from <= 'f') - v = (v << 4) + 0x0a + (*from - 'a'); - else - break; - from++; - len--; - i++; - } - - *value = v; - *reslen = i; - return (mdn_success); -} - -static mdn_result_t -utf6_encode(const char *from, size_t fromlen, char *to, size_t tolen) { - unsigned short *buf; - unsigned short local_buf[UTF6_BUF_SIZE]; /* UTF-16 */ - mdn_result_t r; - size_t buflen, len; - - /* - * Convert to UTF-16. - */ - buf = local_buf; - buflen = UTF6_BUF_SIZE; - for (;;) { - r = mdn_util_utf8toutf16(from, fromlen, - buf, buflen, &len); - if (r == mdn_buffer_overflow) { - buflen *= 2; - if (buf == local_buf) - buf = malloc(sizeof(*buf) * buflen); - else - buf = realloc(buf, sizeof(*buf) * buflen); - if (buf == NULL) - return (mdn_nomemory); - } else if (r == mdn_success) { - break; - } else { - goto ret; - } - } - - /* - * Compress, encode in base-32 and output. - */ - r = utf6_encode_utf16(buf, len, to, tolen, - get_compress_mode(buf, len)); - -ret: - if (buf != local_buf) - free(buf); - return (r); -} - -static mdn_result_t -utf6_encode_utf16(const unsigned short *p, size_t len, - char *to, size_t tolen, int compress_mode) -{ - mdn_result_t r; - unsigned short mask; - size_t reslen; - int i; - - if (len <= 0) - return mdn_success; - - switch (compress_mode) { - case same_byte_mode: - mask = UTF6_SAME_BYTE_MASK; - - if (tolen < 1) - return (mdn_buffer_overflow); - *to++ = 'y'; - tolen--; - r = utf6_encode_vlhex((p[0] >> 8) & 0x00ff, to, tolen, - &reslen); - if (r != mdn_success) - return (r); - to += reslen; - tolen -= reslen; - - break; - - case same_nibble_mode: - mask = UTF6_SAME_NIBBLE_MASK; - - if (tolen < 1) - return (mdn_buffer_overflow); - *to++ = 'z'; - tolen--; - r = utf6_encode_vlhex((p[0] >> 4) & 0x0fff, to, tolen, - &reslen); - if (r != mdn_success) - return (r); - to += reslen; - tolen -= reslen; - - break; - - default: - mask = UTF6_PLAIN_MASK; - break; - } - - for (i = 0; i < len; i++) { - if (p[i] == '-') { - if (tolen < 1) - return (mdn_buffer_overflow); - *to++ = '-'; - tolen--; - - } else { - r = utf6_encode_vlhex(p[i] & mask, to, tolen, &reslen); - if (r != mdn_success) - return (r); - to += reslen; - tolen -= reslen; - } - } - - if (tolen <= 0) - return (mdn_buffer_overflow); - - *to = '\0'; - return (mdn_success); -} - -static mdn_result_t -utf6_encode_vlhex(unsigned short value, char *to, size_t tolen, - size_t *reslen) { - - static const char *vlhex_string1 = "ghijklmnopqrstuv"; - static const char *vlhex_string2 = "0123456789abcdef"; - - /* - * Encode an integer in the range of 0x000 - 0xffff as variable - * length HEX string. - */ - if (value <= 0x000f) { - if (tolen < 1) - return (mdn_buffer_overflow); - *to++ = vlhex_string1[ value & 0x0f]; - *reslen = 1; - - } else if (value <= 0x00ff) { - if (tolen < 2) - return (mdn_buffer_overflow); - *to++ = vlhex_string1[(value >> 4) & 0x0f]; - *to++ = vlhex_string2[ value & 0x0f]; - *reslen = 2; - - } else if (value <= 0x0fff) { - if (tolen < 3) - return (mdn_buffer_overflow); - *to++ = vlhex_string1[(value >> 8) & 0x0f]; - *to++ = vlhex_string2[(value >> 4) & 0x0f]; - *to++ = vlhex_string2[ value & 0x0f]; - *reslen = 3; - - } else { - if (tolen < 4) - return (mdn_buffer_overflow); - *to++ = vlhex_string1[(value >> 12) & 0x0f]; - *to++ = vlhex_string2[(value >> 8) & 0x0f]; - *to++ = vlhex_string2[(value >> 4) & 0x0f]; - *to++ = vlhex_string2[ value & 0x0f]; - *reslen = 4; - } - - return (mdn_success); -} - -static int -get_compress_mode(const unsigned short *p, size_t len) { - int non_hyphens = 0; - unsigned short same_bytes = 0; - unsigned short same_nibbles = 0; - int i; - - if (len <= 0) - return plain_mode; - - for (i = 0; i < len; i++) { - if (p[i] != '-') { - non_hyphens++; - if ((p[0] & 0xff00) == (p[i] & 0xff00)) - same_bytes++; - else if ((p[0] & 0xf000) == (p[i] & 0xf000)) - same_nibbles++; - } - } - - if (non_hyphens < 2) { - /* - * The number of non '-' characters is less than 2. - */ - return plain_mode; - } else if (same_bytes == non_hyphens) { - /* - * The same most significant byte of the every non '-' - * character is the same value. - */ - return same_byte_mode; - } else if (same_nibbles == non_hyphens) { - /* - * The same most significant nibble of the every non '-' - * character is the same value. - */ - return same_nibble_mode; - } else { - /* - * Not matched above. - */ - return plain_mode; - } - - /* Not reached */ -} |