summaryrefslogtreecommitdiff
path: root/usr/src/cmd/ssh/libssh/common/g11n.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/ssh/libssh/common/g11n.c')
-rw-r--r--usr/src/cmd/ssh/libssh/common/g11n.c1024
1 files changed, 1024 insertions, 0 deletions
diff --git a/usr/src/cmd/ssh/libssh/common/g11n.c b/usr/src/cmd/ssh/libssh/common/g11n.c
new file mode 100644
index 0000000000..52e5ddf4d7
--- /dev/null
+++ b/usr/src/cmd/ssh/libssh/common/g11n.c
@@ -0,0 +1,1024 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <iconv.h>
+#include <ctype.h>
+#include <strings.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "includes.h"
+#include "xmalloc.h"
+#include "xlist.h"
+
+#ifdef MIN
+#undef MIN
+#endif /* MIN */
+
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+#define LOCALE_PATH "/usr/bin/locale"
+
+#define LANGTAG_MAX 5 /* two-char country code, '-' and two-char region code */
+
+static u_char * do_iconv(iconv_t cd, u_int *mul_ptr,
+ const void *buf, u_int len,
+ u_int *outlen, int *err,
+ u_char **err_str);
+
+static int locale_cmp(const void *d1, const void *d2);
+static char *g11n_locale2langtag(char *locale);
+
+u_int
+g11n_validate_ascii(const char *str, u_int len, u_char **error_str);
+
+u_int
+g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str);
+
+static
+char *
+g11n_locale2langtag(char *locale)
+{
+ char *langtag;
+
+ /* base cases */
+ if (!locale || !*locale) return NULL;
+
+ if (strcmp(locale, "POSIX") == 0 ||
+ strcmp(locale, "C") == 0) return "i-default";
+
+ /* Punt for language codes which are not exactly 2 letters */
+ if (strlen(locale) < 2 ||
+ !isalpha(locale[0]) ||
+ !isalpha(locale[1]) ||
+ (locale[2] != '\0' &&
+ locale[2] != '_' &&
+ locale[2] != '.' &&
+ locale[2] != '@'))
+ return NULL;
+
+
+ /* We have a primary language sub-tag */
+ langtag = (char *) xmalloc(LANGTAG_MAX + 1);
+
+ strncpy(langtag, locale, 2);
+ langtag[2] = '\0';
+
+ /* Do we have country sub-tag? */
+ if (locale[2] == '_') {
+ if (strlen(locale) < 5 ||
+ !isalpha(locale[3]) ||
+ !isalpha(locale[4]) ||
+ (locale[5] != '\0' && (locale[5] != '.' && locale[5] != '@'))) {
+ return langtag;
+ }
+
+ /* yes, we do */
+ /* if (snprintf(langtag, 6, "%s-%s,%s", lang_subtag,
+ country_subtag, langtag) == 8) */
+ if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale,
+ 2, locale+3) == 5)
+ return langtag;
+ }
+
+ /* In all other cases we just use the primary language sub-tag */
+ return langtag;
+}
+
+u_int
+g11n_langtag_is_default(char *langtag)
+{
+ return (strcmp(langtag, "i-default") == 0);
+}
+
+/*
+ * This lang tag / locale matching function works only for two-character
+ * language primary sub-tags and two-character country sub-tags.
+ */
+u_int
+g11n_langtag_matches_locale(char *langtag, char *locale)
+{
+ /* Match "i-default" to the process' current locale if possible */
+ if (g11n_langtag_is_default(langtag)) {
+ if (strcasecmp(locale, "POSIX") == 0 ||
+ strcasecmp(locale, "C") == 0)
+ return 1;
+ else
+ return 0;
+ }
+
+ /* locale must be at least 2 chars long and the lang part must be
+ * exactly two characters */
+ if (strlen(locale) < 2 ||
+ (!isalpha(locale[0]) || !isalpha(locale[1]) ||
+ (locale[2] != '\0' && locale[2] != '_' && locale[2] != '.' && locale[2] != '@')))
+ return 0;
+
+ /* same thing with the langtag */
+ if (strlen(langtag) < 2 ||
+ (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
+ (langtag[2] != '\0' && langtag[2] != '-')))
+ return 0;
+
+ /* primary language sub-tag and the locale's language part must match */
+ if (strncasecmp(langtag, locale, 2) != 0)
+ return 0;
+
+ /* primary language sub-tag and the locale's language match, now
+ * fuzzy check country part */
+
+ /* neither langtag nor locale have more than one component */
+ if (langtag[2] == '\0' &&
+ (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
+ return 2;
+
+ /* langtag has only one sub-tag... */
+ if (langtag[2] == '\0')
+ return 1;
+
+ /* locale has no country code... */
+ if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
+ return 1;
+
+ /* langtag has more than one subtag and the locale has a country code */
+
+ /* ignore second subtag if not two chars */
+ if (strlen(langtag) < 5)
+ return 1;
+
+ if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
+ (langtag[5] != '\0' && langtag[5] != '-'))
+ return 1;
+
+ /* ignore rest of locale if there is no two-character country part */
+ if (strlen(locale) < 5)
+ return 1;
+
+ if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
+ (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
+ return 1;
+
+ /* if the country part matches, return 2 */
+ if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
+ return 2;
+
+ return 1;
+}
+
+char *
+g11n_getlocale()
+{
+ /* We have one text domain - always set it */
+ (void) textdomain(TEXT_DOMAIN);
+
+ /* If the locale is not set, set it from the env vars */
+ if (!setlocale(LC_CTYPE, NULL))
+ (void) setlocale(LC_CTYPE, "");
+
+ return setlocale(LC_CTYPE, NULL);
+}
+
+void
+g11n_setlocale(int category, const char *locale)
+{
+ char *curr;
+
+ /* We have one text domain - always set it */
+ (void) textdomain(TEXT_DOMAIN);
+
+ if (!locale)
+ return;
+
+ if (*locale && ((curr = setlocale(category, NULL))) &&
+ strcmp(curr, locale) == 0)
+ return;
+
+ /*
+ * If <category> is bogus, setlocale() will do nothing and will
+ * return NULL.
+ */
+ if (!setlocale(category, locale))
+ return;
+
+ /* If setting the locale from the environment, then we're done */
+ if (!*locale)
+ return;
+
+ /*
+ * If setting a locale from the <locale> argument, then set the
+ * related env vars.
+ */
+ switch (category) {
+ case LC_ALL:
+ setenv("LANG", locale, 1);
+ setenv("LC_ALL", locale, 1);
+ break;
+ case LC_CTYPE:
+ setenv("LC_CTYPE", locale, 1);
+ break;
+ case LC_NUMERIC:
+ setenv("LC_NUMERIC", locale, 1);
+ break;
+ case LC_TIME:
+ setenv("LC_TIME", locale, 1);
+ break;
+ case LC_COLLATE:
+ setenv("LC_COLLATE", locale, 1);
+ break;
+ case LC_MONETARY:
+ setenv("LC_MONETARY", locale, 1);
+ break;
+ case LC_MESSAGES:
+ setenv("LC_MESSAGES", locale, 1);
+ break;
+ }
+ return;
+}
+
+char **
+g11n_getlocales()
+{
+ FILE *locale_out;
+ u_int n_elems, list_size, long_line = 0;
+ char **list;
+ char locale[64]; /* 64 bytes is plenty for locale names */
+
+ if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) {
+ return NULL;
+ }
+
+ /*
+ * Start with enough room for 65 locales - that's a lot fewer than
+ * all the locales available for installation, but a lot more than
+ * what most users will need and install
+ */
+ n_elems=0;
+ list_size=192;
+ list = (char **) xmalloc(sizeof(char *) * (list_size + 1));
+ memset(list, 0, sizeof(char *) * (list_size + 1));
+
+ while (fgets(locale, sizeof(locale), locale_out)) {
+ /* skip long locale names (if any) */
+ if (!strchr(locale, '\n')) {
+ long_line = 1;
+ continue;
+ }
+ else if (long_line) {
+ long_line = 0;
+ continue;
+ }
+ if (strncmp(locale, "iso_8859", 8) == 0)
+ continue; /* ignore locale names like "iso_8859-1" */
+
+ if (n_elems == list_size) {
+ list_size *= 2;
+ list = (char **) xrealloc((void *) list, (list_size + 1) * sizeof(char *));
+ memset(&list[n_elems+1], 0, sizeof(char *) * (list_size - n_elems + 1));
+ }
+
+ *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */
+
+ list[n_elems++] = xstrdup(locale);
+ }
+ list[n_elems] = NULL;
+ (void) pclose(locale_out);
+
+ qsort(list, n_elems - 1, sizeof(char *), locale_cmp);
+ return list;
+}
+
+char *
+g11n_getlangs()
+{
+ char *locale;
+
+ if (getenv("SSH_LANGS"))
+ return xstrdup(getenv("SSH_LANGS"));
+
+ locale = g11n_getlocale();
+
+ if (!locale || !*locale)
+ return xstrdup("i-default");
+
+ return g11n_locale2langtag(locale);
+}
+
+char *
+g11n_locales2langs(char **locale_set)
+{
+ char **p, **r, **q;
+ char *langtag;
+ int locales, skip;
+
+ for (locales = 0, p = locale_set ; p && *p ; p++)
+ locales++;
+
+ r = (char **) xmalloc((locales + 1) * sizeof(char *));
+ memset(r, 0, (locales + 1) * sizeof(char *));
+
+ for (p = locale_set ; p && *p && ((p - locale_set) <= locales); p++) {
+ skip = 0;
+ if ((langtag = g11n_locale2langtag(*p)) == NULL)
+ continue;
+ for (q = r ; (q - r) < locales ; q++) {
+ if (!*q) break;
+ if (*q && strcmp(*q, langtag) == 0)
+ skip = 1;
+ }
+ if (!skip)
+ *(q++) = langtag;
+ *q = NULL;
+ }
+ return xjoin(r, ',');
+}
+
+static
+int
+sortcmp(const void *d1, const void *d2)
+{
+ char *s1 = *(char **)d1;
+ char *s2 = *(char **)d2;
+
+ return strcmp(s1, s2);
+}
+
+int
+g11n_langtag_match(char *langtag1, char *langtag2)
+{
+ int len1, len2;
+ char c1, c2;
+
+ len1 = (strchr(langtag1, '-')) ?
+ (strchr(langtag1, '-') - langtag1)
+ : strlen(langtag1);
+
+ len2 = (strchr(langtag2, '-')) ?
+ (strchr(langtag2, '-') - langtag2)
+ : strlen(langtag2);
+
+ /* no match */
+ if (len1 != len2 ||
+ strncmp(langtag1, langtag2, len1) != 0)
+ return 0;
+
+ c1 = *(langtag1 + len1);
+ c2 = *(langtag2 + len2);
+
+ /* no country sub-tags - exact match */
+ if (c1 == '\0' && c2 == '\0')
+ return 2;
+
+ /* one langtag has a country sub-tag, the other doesn't */
+ if (c1 == '\0' || c2 == '\0')
+ return 1;
+
+ /* can't happen - both langtags have a country sub-tag */
+ if (c1 != '-' || c2 != '-')
+ return 1;
+
+ /* compare country subtags */
+ langtag1 = langtag1 + len1 + 1;
+ langtag2 = langtag2 + len2 + 1;
+
+ len1 = (strchr(langtag1, '-')) ?
+ (strchr(langtag1, '-') - langtag1)
+ : strlen(langtag1);
+
+ len2 = (strchr(langtag2, '-')) ?
+ (strchr(langtag2, '-') - langtag2)
+ : strlen(langtag2);
+
+ if (len1 != len2 ||
+ strncmp(langtag1, langtag2, len1) != 0)
+ return 1;
+
+ /* country tags matched - exact match */
+ return 2;
+}
+
+char *
+g11n_langtag_set_intersect(char *set1, char *set2)
+{
+ char **list1, **list2, **list3, **p, **q, **r;
+ char *set3, *lang_subtag;
+ u_int n1, n2, n3;
+ u_int do_append;
+
+ list1 = xsplit(set1, ',');
+ list2 = xsplit(set2, ',');
+ for (n1 = 0, p = list1 ; p && *p ; p++, n1++) ;
+ for (n2 = 0, p = list2 ; p && *p ; p++, n2++) ;
+
+ list3 = (char **) xmalloc(sizeof(char *) * (n1 + n2 + 1));
+ *list3 = NULL;
+
+ /* we must not sort the user langtags - sorting or not the server's
+ * should not affect the outcome
+ */
+ qsort(list2, n2, sizeof(char *), sortcmp);
+
+ for (n3 = 0, p = list1 ; p && *p ; p++) {
+ do_append = 0;
+ for (q = list2 ; q && *q ; q++) {
+ if (g11n_langtag_match(*p, *q) != 2) continue;
+ /* append element */
+ for (r = list3; (r - list3) <= (n1 + n2) ; r++) {
+ do_append = 1;
+ if (!*r) break;
+ if (strcmp(*p, *r) == 0) {
+ do_append = 0;
+ break;
+ }
+ }
+ if (do_append && n3 <= (n1 + n2)) {
+ list3[n3++] = xstrdup(*p);
+ list3[n3] = NULL;
+ }
+ }
+ }
+
+ for (p = list1 ; p && *p ; p++) {
+ do_append = 0;
+ for (q = list2 ; q && *q ; q++) {
+ if (g11n_langtag_match(*p, *q) != 1) continue;
+ /* append element */
+ lang_subtag = xstrdup(*p);
+ if (strchr(lang_subtag, '-'))
+ *(strchr(lang_subtag, '-')) = '\0';
+ for (r = list3; (r - list3) <= (n1 + n2) ; r++) {
+ do_append = 1;
+ if (!*r) break;
+ if (strcmp(lang_subtag, *r) == 0) {
+ do_append = 0;
+ break;
+ }
+ }
+ if (do_append && n3 <= (n1 + n2)) {
+ list3[n3++] = lang_subtag;
+ list3[n3] = NULL;
+ }
+ else
+ xfree(lang_subtag);
+ }
+ }
+
+ set3 = xjoin(list3, ',');
+ xfree_split_list(list1);
+ xfree_split_list(list2);
+ xfree_split_list(list3);
+
+ return set3;
+}
+
+char *
+g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
+{
+ char *list, *result;
+ char **xlist;
+
+ /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
+ list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
+
+ if (!list)
+ return NULL;
+
+ xlist = xsplit(list, ',');
+
+ xfree(list);
+
+ if (!xlist || !*xlist)
+ return NULL;
+
+ result = xstrdup(*xlist);
+
+ xfree_split_list(xlist);
+
+ return result;
+}
+
+/*
+ * Compare locales, preferring UTF-8 codesets to others, otherwise doing
+ * a stright strcmp()
+ */
+static
+int
+locale_cmp(const void *d1, const void *d2)
+{
+ char *dot_ptr;
+ char *s1 = *(char **)d1;
+ char *s2 = *(char **)d2;
+ int s1_is_utf8 = 0;
+ int s2_is_utf8 = 0;
+
+ /* check if s1 is a UTF-8 locale */
+ if (((dot_ptr = strchr((char *) s1, '.')) != NULL) && (*dot_ptr != '\0') &&
+ (strncmp(dot_ptr+1, "UTF-8", 5) == 0) &&
+ (*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) {
+ s1_is_utf8++;
+ }
+ /* check if s2 is a UTF-8 locale */
+ if (((dot_ptr = strchr((char *) s2, '.')) != NULL) && (*dot_ptr != '\0') &&
+ (strncmp(dot_ptr+1, "UTF-8", 5) == 0) &&
+ (*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) {
+ s2_is_utf8++;
+ }
+
+ /* prefer UTF-8 locales */
+ if (s1_is_utf8 && !s2_is_utf8)
+ return -1;
+
+ if (s2_is_utf8 && !s1_is_utf8)
+ return 1;
+
+ /* prefer any locale over the default locales */
+ if (strcmp(s1, "C") == 0 ||
+ strcmp(s1, "POSIX") == 0 ||
+ strcmp(s1, "common") == 0)
+ if (strcmp(s2, "C") != 0 &&
+ strcmp(s2, "POSIX") != 0 &&
+ strcmp(s2, "common") != 0)
+ return 1;
+
+ if (strcmp(s2, "C") == 0 ||
+ strcmp(s2, "POSIX") == 0 ||
+ strcmp(s2, "common") == 0)
+ if (strcmp(s1, "C") != 0 &&
+ strcmp(s1, "POSIX") != 0 &&
+ strcmp(s1, "common") != 0)
+ return -1;
+
+ return strcmp(s1, s2);
+}
+
+
+char **
+g11n_langtag_set_locale_set_intersect(char *langtag_set,
+ char **locale_set)
+{
+ char **langtag_list, **result, **p, **q, **r;
+ char *s;
+ u_int do_append, n_langtags, n_locales, n_results, max_results;
+
+ /* Count lang tags and locales */
+ for (n_locales = 0, p = locale_set ; p && *p ; p++) n_locales++;
+ n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
+ for ( ; s = strchr(s, ',') ; s++, n_langtags++) ;
+ /*
+ while ((s = strchr(s, ','))) {
+ n_langtags++;
+ s++;
+ }
+ */
+
+ qsort(locale_set, n_locales, sizeof(char *), locale_cmp);
+
+ langtag_list = xsplit(langtag_set, ',');
+ for ( n_langtags = 0, p = langtag_list ; p && *p ; p++, n_langtags++);
+
+ max_results = MIN(n_locales, n_langtags) * 2;
+ result = (char **) xmalloc(sizeof(char *) * (max_results + 1));
+ *result = NULL;
+ n_results = 0;
+
+ /* More specific matches first */
+ for (p = langtag_list ; p && *p ; p++) {
+ do_append = 0;
+ for (q = locale_set ; q && *q ; q++) {
+ if (g11n_langtag_matches_locale(*p, *q) == 2) {
+ do_append = 1;
+ for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) {
+ if (!*r) break;
+ if (strcmp(*q, *r) == 0) {
+ do_append = 0;
+ break;
+ }
+ }
+ if (do_append && n_results < max_results) {
+ result[n_results++] = xstrdup(*q);
+ result[n_results] = NULL;
+ }
+ break;
+ }
+ }
+ }
+
+ for (p = langtag_list ; p && *p ; p++) {
+ do_append = 0;
+ for (q = locale_set ; q && *q ; q++) {
+ if (g11n_langtag_matches_locale(*p, *q) == 1) {
+ do_append = 1;
+ for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) {
+ if (!*r) break;
+ if (strcmp(*q, *r) == 0) {
+ do_append = 0;
+ break;
+ }
+ }
+ if (do_append && n_results < max_results) {
+ result[n_results++] = xstrdup(*q);
+ result[n_results] = NULL;
+ }
+ break;
+ }
+ }
+ }
+ xfree_split_list(langtag_list);
+
+ return result;
+}
+
+char *
+g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
+{
+ char **results, *result = NULL;
+
+ if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
+ srvr_locales ? srvr_locales : g11n_getlocales())) == NULL)
+ return NULL;
+
+ if (*results != NULL)
+ result = xstrdup(*results);
+
+ xfree_split_list(results);
+
+ return result;
+}
+
+
+/*
+ * Functions for validating ASCII and UTF-8 strings
+ *
+ * The error_str parameter is an optional pointer to a char variable
+ * where to store a string suitable for use with error() or fatal() or
+ * friends.
+ *
+ * The return value is 0 if success, EILSEQ or EINVAL.
+ *
+ */
+
+u_int
+g11n_validate_ascii(const char *str, u_int len, u_char **error_str)
+{
+ u_char *p;
+
+ for (p = (u_char *) str ; p && *p && (!(*p & 0x80)) ; p++) ;
+
+ if (len && ((p - (u_char *) str) != len)) {
+ return EILSEQ;
+ }
+ return 0;
+}
+
+u_int
+g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str)
+{
+ u_char *p;
+ u_int c, l;
+
+ if (len == 0) len = strlen((const char *)str);
+
+ for (p = (u_char *) str ; p && (p - str < len) && *p ; ) {
+ /* 8-bit chars begin a UTF-8 sequence */
+ if (*p & 0x80) {
+ /* Get sequence length and sanity check first byte */
+ if (*p < 0xc0)
+ return EILSEQ;
+ else if (*p < 0xe0)
+ l=2;
+ else if (*p < 0xf0)
+ l=3;
+ else if (*p < 0xf8)
+ l=4;
+ else if (*p < 0xfc)
+ l=5;
+ else if (*p < 0xfe)
+ l=6;
+ else
+ return EILSEQ;
+
+ if ((p + l - str) >= len)
+ return EILSEQ;
+
+ /* overlong detection - build codepoint */
+ c = *p & 0x3f;
+ c = c << (6 * (l-1)); /* shift c bits from first byte */
+
+ if (l > 1) {
+ if (*(p+1) && ((*(p+1) & 0xc0) == 0x80))
+ c = c | ((*(p+1) & 0x3f) << (6 * (l-2)));
+ else
+ return EILSEQ;
+ if (c < 0x80)
+ return EILSEQ;
+ }
+ if (l > 2) {
+ if (*(p+2) && ((*(p+2) & 0xc0) == 0x80))
+ c = c | ((*(p+2) & 0x3f) << (6 * (l-3)));
+ else
+ return EILSEQ;
+ if (c < 0x800)
+ return EILSEQ;
+ }
+ if (l > 3) {
+ if (*(p+3) && ((*(p+3) & 0xc0) == 0x80))
+ c = c | ((*(p+3) & 0x3f) << (6 * (l-4)));
+ else
+ return EILSEQ;
+ if (c < 0x10000)
+ return EILSEQ;
+ }
+ if (l > 4) {
+ if (*(p+4) && ((*(p+4) & 0xc0) == 0x80))
+ c = c | ((*(p+4) & 0x3f) << (6 * (l-5)));
+ else
+ return EILSEQ;
+ if (c < 0x200000)
+ return EILSEQ;
+ }
+ if (l > 5) {
+ if (*(p+5) && ((*(p+5) & 0xc0) == 0x80))
+ c = c | (*(p+5) & 0x3f) ;
+ else
+ return EILSEQ;
+ if (c < 0x4000000)
+ return EILSEQ;
+ }
+
+ /* Check for UTF-16 surrogates ifs other illegal UTF-8 * points */
+ if (((c <= 0xdfff) && (c >= 0xd800)) ||
+ (c == 0xfffe) || (c == 0xffff))
+ return EILSEQ;
+ p += l;
+ }
+ /* 7-bit chars are fine */
+ else
+ p++;
+ }
+ return 0;
+}
+
+/*
+ * Functions for converting to ASCII or UTF-8 from the local codeset
+ * Functions for converting from ASCII or UTF-8 to the local codeset
+ *
+ * The error_str parameter is an optional pointer to a char variable
+ * where to store a string suitable for use with error() or fatal() or
+ * friends.
+ *
+ * The err parameter is an optional pointer to an integer where 0
+ * (success) or EILSEQ or EINVAL will be stored (failure).
+ *
+ * These functions return NULL if the conversion fails.
+ *
+ */
+
+u_char *
+g11n_convert_from_ascii(const char *str, int *err_ptr, u_char **error_str)
+{
+ static u_int initialized = 0;
+ static u_int do_convert = 0;
+ iconv_t cd;
+ int err;
+
+ if (!initialized) {
+ /*
+ * iconv_open() fails if the to/from codesets are the
+ * same, and there are aliases of codesets to boot...
+ */
+ if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
+ strcmp("ASCII", nl_langinfo(CODESET)) == 0 ||
+ strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) {
+ initialized = 1;
+ do_convert = 0;
+ }
+ else {
+ cd = iconv_open(nl_langinfo(CODESET), "646");
+ if (cd == (iconv_t) -1) {
+ if (err_ptr) *err_ptr = errno;
+ if (error_str) *error_str = (u_char *)
+ "Cannot convert ASCII strings to the local codeset";
+ }
+ initialized = 1;
+ do_convert = 1;
+ }
+ }
+
+ if (!do_convert) {
+ if ((err = g11n_validate_ascii(str, 0, error_str))) {
+ if (err_ptr) *err_ptr = err;
+ return NULL;
+ }
+ else
+ return (u_char *) xstrdup(str);
+ }
+ return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
+}
+
+u_char *
+g11n_convert_from_utf8(const u_char *str, int *err_ptr, u_char **error_str)
+{
+ static u_int initialized = 0;
+ static u_int do_convert = 0;
+ iconv_t cd;
+ int err;
+
+ if (!initialized) {
+ /*
+ * iconv_open() fails if the to/from codesets are the
+ * same, and there are aliases of codesets to boot...
+ */
+ if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
+ strcmp("UTF8", nl_langinfo(CODESET)) == 0) {
+ initialized = 1;
+ do_convert = 0;
+ }
+ else {
+ cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
+ if (cd == (iconv_t) -1) {
+ if (err_ptr) *err_ptr = errno;
+ if (error_str) *error_str = (u_char *)
+ "Cannot convert UTF-8 strings to the local codeset";
+ }
+ initialized = 1;
+ do_convert = 1;
+ }
+ }
+
+ if (!do_convert) {
+ if ((err = g11n_validate_utf8(str, 0, error_str))) {
+ if (err_ptr) *err_ptr = err;
+ return NULL;
+ }
+ else
+ return (u_char *) xstrdup((char *) str);
+ }
+ return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
+}
+
+char *
+g11n_convert_to_ascii(const u_char *str, int *err_ptr, u_char **error_str)
+{
+ static u_int initialized = 0;
+ static u_int do_convert = 0;
+ iconv_t cd;
+
+ if (!initialized) {
+ /*
+ * iconv_open() fails if the to/from codesets are the
+ * same, and there are aliases of codesets to boot...
+ */
+ if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
+ strcmp("ASCII", nl_langinfo(CODESET)) == 0 ||
+ strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) {
+ initialized = 1;
+ do_convert = 0;
+ }
+ else {
+ cd = iconv_open("646", nl_langinfo(CODESET));
+ if (cd == (iconv_t) -1) {
+ if (err_ptr) *err_ptr = errno;
+ if (error_str) *error_str = (u_char *)
+ "Cannot convert UTF-8 strings to the local codeset";
+ }
+ initialized = 1;
+ do_convert = 1;
+ }
+ }
+
+ if (!do_convert)
+ return xstrdup((char *) str);
+ return (char *) do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
+}
+
+u_char *
+g11n_convert_to_utf8(const u_char *str, int *err_ptr, u_char **error_str)
+{
+ static u_int initialized = 0;
+ static u_int do_convert = 0;
+ iconv_t cd;
+
+ if (!initialized) {
+ /*
+ * iconv_open() fails if the to/from codesets are the
+ * same, and there are aliases of codesets to boot...
+ */
+ if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
+ strcmp("UTF8", nl_langinfo(CODESET)) == 0) {
+ initialized = 1;
+ do_convert = 0;
+ }
+ else {
+ cd = iconv_open("UTF-8", nl_langinfo(CODESET));
+ if (cd == (iconv_t) -1) {
+ if (err_ptr) *err_ptr = errno;
+ if (error_str) *error_str = (u_char *)
+ "Cannot convert UTF-8 strings to the local codeset";
+ }
+ initialized = 1;
+ do_convert = 1;
+ }
+ }
+
+ if (!do_convert)
+ return (u_char *) xstrdup((char *) str);
+ return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
+}
+
+
+/*
+ * Wrapper around iconv()
+ *
+ * The caller is responsible for freeing the result and for handling
+ * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
+ */
+
+static
+u_char *
+do_iconv(iconv_t cd, u_int *mul_ptr,
+ const void *buf, u_int len,
+ u_int *outlen, int *err,
+ u_char **err_str)
+{
+ size_t inbytesleft, outbytesleft, converted_size;
+ char *outbuf;
+ u_char *converted;
+ const char *inbuf;
+ u_int mul = 0;
+
+ if (!buf || !(*(char *)buf)) return NULL;
+ if (len == 0) len = strlen(buf);
+ /* reset conversion descriptor */
+ /* XXX Do we need initial shift sequences for UTF-8??? */
+ (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft);
+ inbuf = (const char *) buf;
+ if (mul_ptr) mul = *mul_ptr;
+ converted_size = (len << mul);
+ outbuf = (char *) xmalloc(converted_size + 1); /* for null */
+ converted = (u_char *) outbuf;
+ outbytesleft = len;
+ do {
+ if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) ==
+ (size_t) -1) {
+ if (errno == E2BIG) {
+ /* UTF-8 codepoints are at most 8 bytes long. */
+ if (mul > 2) {
+ if (err_str)
+ *err_str = (u_char *) "Conversion to UTF-8 failed due to"
+ "preposterous space requirements";
+ if (err)
+ *err = EILSEQ;
+ return NULL;
+ }
+
+ /*
+ * Re-alloc output and ensure that the outbuf
+ * and outbytesleft values are adjusted.
+ */
+ converted = xrealloc(converted, converted_size << 1 + 1);
+ outbuf = (char *) converted + converted_size - outbytesleft;
+ converted_size = (len << ++(mul));
+ outbytesleft = converted_size - outbytesleft;
+ }
+ else {
+ /*
+ * Let the caller deal with iconv() errors, probably by
+ * calling fatal(); xfree() does not set errno.
+ */
+ if (err) *err = errno;
+ xfree(converted);
+ return NULL;
+ }
+ }
+ } while (inbytesleft);
+ *outbuf = '\0'; /* ensure null-termination */
+ if (outlen) *outlen = converted_size - outbytesleft;
+ if (mul_ptr) *mul_ptr = mul;
+ return converted;
+}