summaryrefslogtreecommitdiff
path: root/contrib/idn/mdnkit/lib/utf6.c
diff options
context:
space:
mode:
authorInternet Software Consortium, Inc <@isc.org>2007-09-07 14:13:42 -0600
committerLaMont Jones <lamont@debian.org>2007-09-07 14:13:42 -0600
commitb62706a673dc58ca390b47342036e3f3206a14bc (patch)
tree55df184668ce3b1ca9af85d3aab09e6470634a88 /contrib/idn/mdnkit/lib/utf6.c
parent4dd5eedee98c3fc2f40a45078bc7006cd5efd7f0 (diff)
downloadbind9-b62706a673dc58ca390b47342036e3f3206a14bc.tar.gz
9.2.0a2
Diffstat (limited to 'contrib/idn/mdnkit/lib/utf6.c')
-rw-r--r--contrib/idn/mdnkit/lib/utf6.c551
1 files changed, 551 insertions, 0 deletions
diff --git a/contrib/idn/mdnkit/lib/utf6.c b/contrib/idn/mdnkit/lib/utf6.c
new file mode 100644
index 00000000..6f0d9423
--- /dev/null
+++ b/contrib/idn/mdnkit/lib/utf6.c
@@ -0,0 +1,551 @@
+#ifndef lint
+static char *rcsid = "$Id: utf6.c,v 1.1 2001/06/09 00:30:30 tale Exp $";
+#endif
+
+/*
+ * Copyright (c) 2000 Japan Network Information Center. All rights reserved.
+ *
+ * By using this file, you agree to the terms and conditions set forth bellow.
+ *
+ * LICENSE TERMS AND CONDITIONS
+ *
+ * The following License Terms and Conditions apply, unless a different
+ * license is obtained from Japan Network Information Center ("JPNIC"),
+ * a Japanese association, Fuundo Bldg., 1-2 Kanda Ogawamachi, Chiyoda-ku,
+ * Tokyo, Japan.
+ *
+ * 1. Use, Modification and Redistribution (including distribution of any
+ * modified or derived work) in source and/or binary forms is permitted
+ * under this License Terms and Conditions.
+ *
+ * 2. Redistribution of source code must retain the copyright notices as they
+ * appear in each source code file, this License Terms and Conditions.
+ *
+ * 3. Redistribution in binary form must reproduce the Copyright Notice,
+ * this License Terms and Conditions, in the documentation and/or other
+ * materials provided with the distribution. For the purposes of binary
+ * distribution the "Copyright Notice" refers to the following language:
+ * "Copyright (c) Japan Network Information Center. All rights reserved."
+ *
+ * 4. Neither the name of JPNIC may be used to endorse or promote products
+ * derived from this Software without specific prior written approval of
+ * JPNIC.
+ *
+ * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ *
+ * 6. Indemnification by Licensee
+ * Any person or entities using and/or redistributing this Software under
+ * this License Terms and Conditions shall defend indemnify and hold
+ * harmless JPNIC from and against any and all judgements damages,
+ * expenses, settlement liabilities, cost and other liabilities of any
+ * kind as a result of use and redistribution of this Software or any
+ * claim, suite, action, litigation or proceeding by any third party
+ * arising out of or relates to this License Terms and Conditions.
+ *
+ * 7. Governing Law, Jurisdiction and Venue
+ * This License Terms and Conditions shall be governed by and and
+ * construed in accordance with the law of Japan. Any person or entities
+ * using and/or redistributing this Software under this License Terms and
+ * Conditions hereby agrees and consent to the personal and exclusive
+ * jurisdiction and venue of Tokyo District Court of Japan.
+ */
+
+#include <config.h>
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <mdn/result.h>
+#include <mdn/assert.h>
+#include <mdn/logmacro.h>
+#include <mdn/converter.h>
+#include <mdn/utf8.h>
+#include <mdn/debug.h>
+#include <mdn/utf6.h>
+#include <mdn/ace.h>
+#include <mdn/util.h>
+
+#ifndef MDN_UTF6_PREFIX
+#define MDN_UTF6_PREFIX "wq--"
+#endif
+
+#define UTF6_SAME_BYTE_MASK 0x00ff
+#define UTF6_SAME_NIBBLE_MASK 0x0fff
+#define UTF6_PLAIN_MASK 0xffff
+
+#define UTF6_BUF_SIZE 128 /* more than enough */
+
+/*
+ * Compression type.
+ */
+enum {
+ same_byte_mode, /* the most significant byte of all non
+ '-' characters is the same value */
+ same_nibble_mode, /* the most significant nibble of all non
+ '-' characters is the same value */
+ plain_mode /* not compressed */
+};
+
+static mdn_result_t utf6_decode(const char *from, size_t fromlen,
+ char *to, size_t tolen);
+static mdn_result_t utf6_decode_utf16(const char *from, size_t fromlen,
+ unsigned short *buf, size_t *lenp);
+static mdn_result_t utf6_decode_vlhex(const char *from, size_t len,
+ size_t *reslen,
+ unsigned short *value);
+static mdn_result_t utf6_encode(const char *from, size_t fromlen,
+ char *to, size_t tolen);
+static mdn_result_t utf6_encode_utf16(const unsigned short *p,
+ size_t len, char *to, size_t tolen,
+ int compress);
+static mdn_result_t utf6_encode_vlhex(unsigned short value, char *to,
+ size_t tolen, size_t *reslen);
+static int get_compress_mode(const unsigned short *p, size_t len);
+
+static mdn__ace_t utf6_ctx = {
+ mdn__ace_prefix,
+ MDN_UTF6_PREFIX,
+ utf6_encode,
+ utf6_decode,
+};
+
+/* ARGSUSED */
+mdn_result_t
+mdn__utf6_open(mdn_converter_t ctx, mdn_converter_dir_t dir, void **privdata) {
+ return (mdn_success);
+}
+
+/* ARGSUSED */
+mdn_result_t
+mdn__utf6_close(mdn_converter_t ctx, void *privdata, mdn_converter_dir_t dir) {
+ return (mdn_success);
+}
+
+mdn_result_t
+mdn__utf6_convert(mdn_converter_t ctx, void *privdata, mdn_converter_dir_t dir,
+ const char *from, char *to, size_t tolen)
+{
+ mdn_result_t r;
+
+ assert(ctx != NULL &&
+ (dir == mdn_converter_l2u || dir == mdn_converter_u2l));
+
+ TRACE(("mdn__utf6_convert(dir=%s,from=\"%s\")\n",
+ dir == mdn_converter_l2u ? "l2u" : "u2l",
+ mdn_debug_xstring(from, 20)));
+
+ r = mdn__ace_convert(&utf6_ctx, dir, from, to, tolen);
+ if (r != mdn_success)
+ return (r);
+
+ DUMP(("mdn__utf6_convert: \"%s\"\n", mdn_debug_xstring(to, 70)));
+
+ return (r);
+}
+
+static mdn_result_t
+utf6_decode(const char *from, size_t fromlen, char *to, size_t tolen) {
+ unsigned short *buf;
+ unsigned short local_buf[UTF6_BUF_SIZE];
+ size_t len, reslen;
+ mdn_result_t r;
+
+ /*
+ * Allocate sufficient buffer.
+ */
+ if (fromlen > UTF6_BUF_SIZE) {
+ if ((buf = malloc(sizeof(*buf) * fromlen)) == NULL)
+ return (mdn_nomemory);
+ } else {
+ /* Use local buffer. */
+ buf = local_buf;
+ }
+
+ /*
+ * Decode base32 and decompress.
+ */
+ r = utf6_decode_utf16(from, fromlen, buf, &len);
+ if (r != mdn_success)
+ goto ret;
+
+ /*
+ * Now 'buf' holds the decompressed string, which must contain
+ * UTF-16 characters. Convert them into UTF-8.
+ */
+ r = mdn_util_utf16toutf8(buf, len, to, tolen, &reslen);
+ if (r != mdn_success)
+ goto ret;
+
+ /*
+ * Terminate with NUL.
+ */
+ if (tolen <= reslen) {
+ r = mdn_buffer_overflow;
+ goto ret;
+ }
+
+ to += reslen;
+ *to = '\0';
+ tolen -= reslen;
+
+ r = mdn_success;
+
+ret:
+ if (buf != local_buf)
+ free(buf);
+ return (r);
+}
+
+static mdn_result_t
+utf6_decode_utf16(const char *from, size_t fromlen,
+ unsigned short *buf, size_t *lenp)
+{
+ mdn_result_t r;
+ unsigned short value;
+ unsigned short cpart;
+ unsigned short vmax;
+ size_t len;
+ size_t reslen;
+
+ /*
+ * Decode Base32 and put the result bytes to 'buf'.
+ * Since decoded string will be shorter in length, and
+ * the caller allocated 'buf' so that its length is not
+ * less than 'fromlen', we don't have to worry about overflow.
+ */
+
+ if (fromlen <= 0)
+ return mdn_success;
+
+ switch (*from) {
+ case 'y':
+ case 'Y':
+ /*
+ * same_byte_mode.
+ */
+ fromlen--;
+ from++;
+ r = utf6_decode_vlhex(from, fromlen, &reslen, &value);
+ if (r != mdn_success)
+ return (mdn_invalid_encoding);
+ from += reslen;
+ fromlen -= reslen;
+
+ cpart = value * 0x0100;
+ vmax = 0x00ff;
+ break;
+
+ case 'z':
+ case 'Z':
+ /*
+ * same_nibble_mode.
+ */
+ fromlen--;
+ from++;
+ r = utf6_decode_vlhex(from, fromlen, &reslen, &value);
+ if (r != mdn_success)
+ return (mdn_invalid_encoding);
+ from += reslen;
+ fromlen -= reslen;
+
+ cpart = value * 0x1000;
+ vmax = 0x0fff;
+ break;
+
+ default:
+ /*
+ * plain_mode.
+ */
+ cpart = 0x0000;
+ vmax = 0xffff;
+ break;
+ }
+
+
+ for (len = 0; fromlen > 0; len++) {
+ if (*from == '-') {
+ *buf++ = '-';
+ from++;
+ fromlen--;
+ } else {
+ r = utf6_decode_vlhex(from, fromlen, &reslen, &value);
+ if (r != mdn_success)
+ return (mdn_invalid_encoding);
+ if (value > vmax)
+ return (mdn_invalid_encoding);
+ *buf++ = cpart + value;
+ from += reslen;
+ fromlen -= reslen;
+ }
+ }
+
+ *buf = '\0';
+ *lenp = len;
+ return (mdn_success);
+}
+
+static mdn_result_t
+utf6_decode_vlhex(const char *from, size_t len, size_t *reslen,
+ unsigned short *value) {
+ unsigned short v;
+ int i;
+
+ /*
+ * Decode the first character of a variable length HEX string.
+ * The character must be in set of [ghijklmnopqrstuv].
+ */
+ if (len <= 0)
+ return (mdn_invalid_encoding);
+
+ if ('G' <= *from && *from <= 'V')
+ v = *from - 'G';
+ else if ('g' <= *from && *from <= 'v')
+ v = *from - 'g';
+ else
+ return (mdn_invalid_encoding);
+ from++;
+ len--;
+ i = 1;
+
+ /*
+ * Decode the rest characters of a variable length HEX string.
+ * The every character must be in set of [0123456789abcdef].
+ */
+ for (;;) {
+ if (len <= 0)
+ break;
+ if ('0' <= *from && *from <= '9')
+ v = (v << 4) + (*from - '0');
+ else if ('A' <= *from && *from <= 'F')
+ v = (v << 4) + 0x0a + (*from - 'A');
+ else if ('a' <= *from && *from <= 'f')
+ v = (v << 4) + 0x0a + (*from - 'a');
+ else
+ break;
+ from++;
+ len--;
+ i++;
+ }
+
+ *value = v;
+ *reslen = i;
+ return (mdn_success);
+}
+
+static mdn_result_t
+utf6_encode(const char *from, size_t fromlen, char *to, size_t tolen) {
+ unsigned short *buf;
+ unsigned short local_buf[UTF6_BUF_SIZE]; /* UTF-16 */
+ mdn_result_t r;
+ size_t buflen, len;
+
+ /*
+ * Convert to UTF-16.
+ */
+ buf = local_buf;
+ buflen = UTF6_BUF_SIZE;
+ for (;;) {
+ r = mdn_util_utf8toutf16(from, fromlen,
+ buf, buflen, &len);
+ if (r == mdn_buffer_overflow) {
+ buflen *= 2;
+ if (buf == local_buf)
+ buf = malloc(sizeof(*buf) * buflen);
+ else
+ buf = realloc(buf, sizeof(*buf) * buflen);
+ if (buf == NULL)
+ return (mdn_nomemory);
+ } else if (r == mdn_success) {
+ break;
+ } else {
+ goto ret;
+ }
+ }
+
+ /*
+ * Compress, encode in base-32 and output.
+ */
+ r = utf6_encode_utf16(buf, len, to, tolen,
+ get_compress_mode(buf, len));
+
+ret:
+ if (buf != local_buf)
+ free(buf);
+ return (r);
+}
+
+static mdn_result_t
+utf6_encode_utf16(const unsigned short *p, size_t len,
+ char *to, size_t tolen, int compress_mode)
+{
+ mdn_result_t r;
+ unsigned short mask;
+ size_t reslen;
+ int i;
+
+ if (len <= 0)
+ return mdn_success;
+
+ switch (compress_mode) {
+ case same_byte_mode:
+ mask = UTF6_SAME_BYTE_MASK;
+
+ if (tolen < 1)
+ return (mdn_buffer_overflow);
+ *to++ = 'y';
+ tolen--;
+ r = utf6_encode_vlhex((p[0] >> 8) & 0x00ff, to, tolen,
+ &reslen);
+ if (r != mdn_success)
+ return (r);
+ to += reslen;
+ tolen -= reslen;
+
+ break;
+
+ case same_nibble_mode:
+ mask = UTF6_SAME_NIBBLE_MASK;
+
+ if (tolen < 1)
+ return (mdn_buffer_overflow);
+ *to++ = 'z';
+ tolen--;
+ r = utf6_encode_vlhex((p[0] >> 4) & 0x0fff, to, tolen,
+ &reslen);
+ if (r != mdn_success)
+ return (r);
+ to += reslen;
+ tolen -= reslen;
+
+ break;
+
+ default:
+ mask = UTF6_PLAIN_MASK;
+ break;
+ }
+
+ for (i = 0; i < len; i++) {
+ if (p[i] == '-') {
+ if (tolen < 1)
+ return (mdn_buffer_overflow);
+ *to++ = '-';
+ tolen--;
+
+ } else {
+ r = utf6_encode_vlhex(p[i] & mask, to, tolen, &reslen);
+ if (r != mdn_success)
+ return (r);
+ to += reslen;
+ tolen -= reslen;
+ }
+ }
+
+ if (tolen <= 0)
+ return (mdn_buffer_overflow);
+
+ *to = '\0';
+ return (mdn_success);
+}
+
+static mdn_result_t
+utf6_encode_vlhex(unsigned short value, char *to, size_t tolen,
+ size_t *reslen) {
+
+ static const char *vlhex_string1 = "ghijklmnopqrstuv";
+ static const char *vlhex_string2 = "0123456789abcdef";
+
+ /*
+ * Encode an integer in the range of 0x000 - 0xffff as variable
+ * length HEX string.
+ */
+ if (value <= 0x000f) {
+ if (tolen < 1)
+ return (mdn_buffer_overflow);
+ *to++ = vlhex_string1[ value & 0x0f];
+ *reslen = 1;
+
+ } else if (value <= 0x00ff) {
+ if (tolen < 2)
+ return (mdn_buffer_overflow);
+ *to++ = vlhex_string1[(value >> 4) & 0x0f];
+ *to++ = vlhex_string2[ value & 0x0f];
+ *reslen = 2;
+
+ } else if (value <= 0x0fff) {
+ if (tolen < 3)
+ return (mdn_buffer_overflow);
+ *to++ = vlhex_string1[(value >> 8) & 0x0f];
+ *to++ = vlhex_string2[(value >> 4) & 0x0f];
+ *to++ = vlhex_string2[ value & 0x0f];
+ *reslen = 3;
+
+ } else {
+ if (tolen < 4)
+ return (mdn_buffer_overflow);
+ *to++ = vlhex_string1[(value >> 12) & 0x0f];
+ *to++ = vlhex_string2[(value >> 8) & 0x0f];
+ *to++ = vlhex_string2[(value >> 4) & 0x0f];
+ *to++ = vlhex_string2[ value & 0x0f];
+ *reslen = 4;
+ }
+
+ return (mdn_success);
+}
+
+static int
+get_compress_mode(const unsigned short *p, size_t len) {
+ int non_hyphens = 0;
+ unsigned short same_bytes = 0;
+ unsigned short same_nibbles = 0;
+ int i;
+
+ if (len <= 0)
+ return plain_mode;
+
+ for (i = 0; i < len; i++) {
+ if (p[i] != '-') {
+ non_hyphens++;
+ if ((p[0] & 0xff00) == (p[i] & 0xff00))
+ same_bytes++;
+ else if ((p[0] & 0xf000) == (p[i] & 0xf000))
+ same_nibbles++;
+ }
+ }
+
+ if (non_hyphens < 2) {
+ /*
+ * The number of non '-' characters is less than 2.
+ */
+ return plain_mode;
+ } else if (same_bytes == non_hyphens) {
+ /*
+ * The same most significant byte of the every non '-'
+ * character is the same value.
+ */
+ return same_byte_mode;
+ } else if (same_nibbles == non_hyphens) {
+ /*
+ * The same most significant nibble of the every non '-'
+ * character is the same value.
+ */
+ return same_nibble_mode;
+ } else {
+ /*
+ * Not matched above.
+ */
+ return plain_mode;
+ }
+
+ /* Not reached */
+}