diff options
author | Nobutomo Nakano <Nobutomo.Nakano@Sun.COM> | 2009-05-11 12:11:53 -0700 |
---|---|---|
committer | Nobutomo Nakano <Nobutomo.Nakano@Sun.COM> | 2009-05-11 12:11:53 -0700 |
commit | 6f786ace10b9c0c7c5515e525fb660fbccfda6a3 (patch) | |
tree | a455902edb891743770d97ebb458821963392a26 /usr/src/cmd/ssh/libssh/common/g11n.c | |
parent | 1c7408c96b4914bcab86c14af29f1af003397b16 (diff) | |
download | illumos-gate-6f786ace10b9c0c7c5515e525fb660fbccfda6a3.tar.gz |
6740240 ssh: password prompt is garbled on ja_JP.PCK/ja_JP.eucJP locale
Diffstat (limited to 'usr/src/cmd/ssh/libssh/common/g11n.c')
-rw-r--r-- | usr/src/cmd/ssh/libssh/common/g11n.c | 589 |
1 files changed, 238 insertions, 351 deletions
diff --git a/usr/src/cmd/ssh/libssh/common/g11n.c b/usr/src/cmd/ssh/libssh/common/g11n.c index 2ea20d9467..ac35a1c8c5 100644 --- a/usr/src/cmd/ssh/libssh/common/g11n.c +++ b/usr/src/cmd/ssh/libssh/common/g11n.c @@ -18,17 +18,16 @@ * * CDDL HEADER END * - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <errno.h> #include <locale.h> #include <langinfo.h> #include <iconv.h> #include <ctype.h> +#include <wctype.h> #include <strings.h> #include <string.h> #include <stdio.h> @@ -36,6 +35,8 @@ #include "includes.h" #include "xmalloc.h" #include "xlist.h" +#include "compat.h" +#include "log.h" #ifdef MIN #undef MIN @@ -48,14 +49,17 @@ /* two-char country code, '-' and two-char region code */ #define LANGTAG_MAX 5 -static uchar_t *do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, - uint_t len, uint_t *outlen, int *err, uchar_t **err_str); - static int locale_cmp(const void *d1, const void *d2); static char *g11n_locale2langtag(char *locale); -uint_t g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str); -uint_t g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str); +static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str); + +/* + * native_codeset records the codeset of the default system locale. + * It is used to convert the contents of file (eg /etc/issue) which is + * supposed to be in the codeset of default system locale. + */ +static char *native_codeset; /* * Convert locale string name into a language tag. The caller is responsible for @@ -213,6 +217,13 @@ g11n_setlocale(int category, const char *locale) { char *curr; + if (native_codeset == NULL) { + /* set default locale, and record current codeset */ + (void) setlocale(LC_ALL, ""); + curr = nl_langinfo(CODESET); + native_codeset = xstrdup(curr); + } + /* we have one text domain - always set it */ (void) textdomain(TEXT_DOMAIN); @@ -662,398 +673,274 @@ err: return (result); } - /* - * Functions for validating ASCII and UTF-8 strings + * Functions for converting to UTF-8 from the local codeset and + * converting from UTF-8 to the local codeset. * - * The error_str parameter is an optional pointer to a char variable - * where to store a string suitable for use with error() or fatal() or - * friends. - * - * The return value is 0 if success, EILSEQ or EINVAL. + * The error_str parameter is an pointer to a char variable where to + * store a string suitable for use with error() or fatal() or friends. + * It is also used for an error indicator when NULL is returned. * + * If conversion isn't necessary, *error_str is set to NULL, and + * NULL is returned. + * If conversion error occured, *error_str points to an error message, + * and NULL is returned. */ -uint_t -g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str) +char * +g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str) { - uchar_t *p; + static char *last_codeset; + static iconv_t cd = (iconv_t)-1; + char *codeset; - for (p = (uchar_t *)str; p && *p && (!(*p & 0x80)); p++) - ; + *error_str = NULL; - if (len && ((p - (uchar_t *)str) != len)) - return (EILSEQ); + codeset = nl_langinfo(CODESET); - return (0); -} - -uint_t -g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str) -{ - uchar_t *p; - uint_t c, l; - - if (len == 0) - len = strlen((const char *)str); - - for (p = (uchar_t *)str; p && (p - str < len) && *p; ) { - /* 8-bit chars begin a UTF-8 sequence */ - if (*p & 0x80) { - /* get sequence length and sanity check first byte */ - if (*p < 0xc0) - return (EILSEQ); - else if (*p < 0xe0) - l = 2; - else if (*p < 0xf0) - l = 3; - else if (*p < 0xf8) - l = 4; - else if (*p < 0xfc) - l = 5; - else if (*p < 0xfe) - l = 6; - else - return (EILSEQ); - - if ((p + l - str) >= len) - return (EILSEQ); - - /* overlong detection - build codepoint */ - c = *p & 0x3f; - /* shift c bits from first byte */ - c = c << (6 * (l - 1)); - - if (l > 1) { - if (*(p + 1) && ((*(p + 1) & 0xc0) == 0x80)) - c = c | ((*(p + 1) & 0x3f) << - (6 * (l - 2))); - else - return (EILSEQ); - - if (c < 0x80) - return (EILSEQ); - } - - if (l > 2) { - if (*(p + 2) && ((*(p + 2) & 0xc0) == 0x80)) - c = c | ((*(p + 2) & 0x3f) << - (6 * (l - 3))); - else - return (EILSEQ); + if (strcmp(codeset, "UTF-8") == 0) + return (NULL); - if (c < 0x800) - return (EILSEQ); - } + if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) { + if (last_codeset != NULL) { + xfree(last_codeset); + last_codeset = NULL; + } + if (cd != (iconv_t)-1) + (void) iconv_close(cd); - if (l > 3) { - if (*(p + 3) && ((*(p + 3) & 0xc0) == 0x80)) - c = c | ((*(p + 3) & 0x3f) << - (6 * (l - 4))); - else - return (EILSEQ); + if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) { + *error_str = gettext("Cannot convert UTF-8 " + "strings to the local codeset"); + return (NULL); + } + last_codeset = xstrdup(codeset); + } + return (do_iconv(cd, str, lenp, error_str)); +} - if (c < 0x10000) - return (EILSEQ); - } +char * +g11n_convert_to_utf8(const char *str, uint_t *lenp, + int native, char **error_str) +{ + static char *last_codeset; + static iconv_t cd = (iconv_t)-1; + char *codeset; - if (l > 4) { - if (*(p + 4) && ((*(p + 4) & 0xc0) == 0x80)) - c = c | ((*(p + 4) & 0x3f) << - (6 * (l - 5))); - else - return (EILSEQ); + *error_str = NULL; - if (c < 0x200000) - return (EILSEQ); - } + if (native) + codeset = native_codeset; + else + codeset = nl_langinfo(CODESET); - if (l > 5) { - if (*(p + 5) && ((*(p + 5) & 0xc0) == 0x80)) - c = c | (*(p + 5) & 0x3f); - else - return (EILSEQ); + if (strcmp(codeset, "UTF-8") == 0) + return (NULL); - if (c < 0x4000000) - return (EILSEQ); - } + if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) { + if (last_codeset != NULL) { + xfree(last_codeset); + last_codeset = NULL; + } + if (cd != (iconv_t)-1) + (void) iconv_close(cd); - /* - * check for UTF-16 surrogates ifs other illegal - * UTF-8 * points - */ - if (((c <= 0xdfff) && (c >= 0xd800)) || - (c == 0xfffe) || (c == 0xffff)) - return (EILSEQ); - p += l; + if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) { + *error_str = gettext("Cannot convert the " + "local codeset strings to UTF-8"); + return (NULL); } - /* 7-bit chars are fine */ - else - p++; + last_codeset = xstrdup(codeset); } - return (0); + return (do_iconv(cd, str, lenp, error_str)); } /* - * Functions for converting to ASCII or UTF-8 from the local codeset - * Functions for converting from ASCII or UTF-8 to the local codeset - * - * The error_str parameter is an optional pointer to a char variable - * where to store a string suitable for use with error() or fatal() or - * friends. - * - * The err parameter is an optional pointer to an integer where 0 - * (success) or EILSEQ or EINVAL will be stored (failure). - * - * These functions return NULL if the conversion fails. + * Wrapper around iconv() * + * The caller is responsible for freeing the result. NULL is returned when + * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). + * The caller must ensure that the input string isn't NULL pointer. */ -uchar_t * -g11n_convert_from_ascii(const char *str, int *err_ptr, uchar_t **error_str) +static char * +do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str) { - static uint_t initialized = 0; - static uint_t do_convert = 0; - iconv_t cd; - int err; - - if (!initialized) { - /* - * iconv_open() fails if the to/from codesets are the - * same, and there are aliases of codesets to boot... - */ - if (strcmp("646", nl_langinfo(CODESET)) == 0 || - strcmp("ASCII", nl_langinfo(CODESET)) == 0 || - strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { - initialized = 1; - do_convert = 0; - } else { - cd = iconv_open(nl_langinfo(CODESET), "646"); - if (cd == (iconv_t)-1) { - if (err_ptr) - *err_ptr = errno; - if (error_str) - *error_str = (uchar_t *)"Cannot " - "convert ASCII strings to the local" - " codeset"; + int ilen, olen; + size_t ileft, oleft; + char *ostr, *optr; + const char *istr; + + ilen = *lenp; + olen = ilen + 1; + + ostr = NULL; + for (;;) { + olen *= 2; + oleft = olen; + ostr = optr = xrealloc(ostr, olen); + istr = (const char *)str; + if ((ileft = ilen) == 0) + break; + + if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) { + /* success: generate reset sequence */ + if (iconv(cd, NULL, NULL, + &optr, &oleft) == (size_t)-1 && errno == E2BIG) { + continue; } - initialized = 1; - do_convert = 1; + break; } - } - - if (!do_convert) { - if ((err = g11n_validate_ascii(str, 0, error_str))) { - if (err_ptr) - *err_ptr = err; + /* failed */ + if (errno != E2BIG) { + oleft = olen; + (void) iconv(cd, NULL, NULL, &ostr, &oleft); + xfree(ostr); + *err_str = gettext("Codeset conversion failed"); return (NULL); - } else - return ((uchar_t *)xstrdup(str)); - } - - return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); -} - -uchar_t * -g11n_convert_from_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str) -{ - static uint_t initialized = 0; - static uint_t do_convert = 0; - iconv_t cd; - int err; - - if (!initialized) { - /* - * iconv_open() fails if the to/from codesets are the - * same, and there are aliases of codesets to boot... - */ - if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || - strcmp("UTF8", nl_langinfo(CODESET)) == 0) { - initialized = 1; - do_convert = 0; - } else { - cd = iconv_open(nl_langinfo(CODESET), "UTF-8"); - if (cd == (iconv_t)-1) { - if (err_ptr) - *err_ptr = errno; - if (error_str) - *error_str = (uchar_t *)"Cannot " - "convert UTF-8 strings to the " - "local codeset"; - } - initialized = 1; - do_convert = 1; } } + olen = optr - ostr; + optr = xmalloc(olen + 1); + (void) memcpy(optr, ostr, olen); + xfree(ostr); - if (!do_convert) { - if ((err = g11n_validate_utf8(str, 0, error_str))) { - if (err_ptr) - *err_ptr = err; - return (NULL); - } else - return ((uchar_t *)xstrdup((char *)str)); - } + optr[olen] = '\0'; + *lenp = olen; - return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); + return (optr); } +/* + * A filter for output string. Control and unprintable characters + * are converted into visible form (eg "\ooo"). + */ char * -g11n_convert_to_ascii(const uchar_t *str, int *err_ptr, uchar_t **error_str) +g11n_filter_string(char *s) { - static uint_t initialized = 0; - static uint_t do_convert = 0; - iconv_t cd; - - if (!initialized) { - /* - * iconv_open() fails if the to/from codesets are the - * same, and there are aliases of codesets to boot... - */ - if (strcmp("646", nl_langinfo(CODESET)) == 0 || - strcmp("ASCII", nl_langinfo(CODESET)) == 0 || - strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { - initialized = 1; - do_convert = 0; - } else { - cd = iconv_open("646", nl_langinfo(CODESET)); - if (cd == (iconv_t)-1) { - if (err_ptr) - *err_ptr = errno; - if (error_str) - *error_str = (uchar_t *)"Cannot " - "convert UTF-8 strings to the " - "local codeset"; - } - initialized = 1; - do_convert = 1; + int mb_cur_max = MB_CUR_MAX; + int mblen, len; + char *os = s; + wchar_t wc; + char *obuf, *op; + + /* all character may be converted into the form of \ooo */ + obuf = op = xmalloc(strlen(s) * 4 + 1); + + while (*s != '\0') { + mblen = mbtowc(&wc, s, mb_cur_max); + if (mblen <= 0) { + mblen = 1; + wc = (unsigned char)*s; } - } - - if (!do_convert) - return (xstrdup((char *)str)); - - return ((char *)do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); -} - -uchar_t * -g11n_convert_to_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str) -{ - static uint_t initialized = 0; - static uint_t do_convert = 0; - iconv_t cd; - - if (!initialized) { - /* - * iconv_open() fails if the to/from codesets are the - * same, and there are aliases of codesets to boot... - */ - if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || - strcmp("UTF8", nl_langinfo(CODESET)) == 0) { - initialized = 1; - do_convert = 0; + if (!iswprint(wc) && + wc != L'\n' && wc != L'\r' && wc != L'\t') { + /* + * control chars which need to be replaced + * with safe character sequence. + */ + while (mblen != 0) { + op += sprintf(op, "\\%03o", + (unsigned char)*s++); + mblen--; + } } else { - cd = iconv_open("UTF-8", nl_langinfo(CODESET)); - if (cd == (iconv_t)-1) { - if (err_ptr) - *err_ptr = errno; - if (error_str) - *error_str = (uchar_t *)"Cannot " - "convert UTF-8 strings to the " - "local codeset"; + while (mblen != 0) { + *op++ = *s++; + mblen--; } - initialized = 1; - do_convert = 1; } } - - if (!do_convert) - return ((uchar_t *)xstrdup((char *)str)); - - return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); + *op = '\0'; + len = op - obuf + 1; + op = xrealloc(os, len); + (void) memcpy(op, obuf, len); + xfree(obuf); + return (op); } - /* - * Wrapper around iconv() + * Once we negotiated with a langtag, server need to map it to a system + * locale. That is done based on the locale supported on the server side. + * We know (with the locale supported on Solaris) how the langtag is + * mapped to. However, from the client point of view, there is no way to + * know exactly what locale(encoding) will be used. * - * The caller is responsible for freeing the result and for handling - * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). + * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the + * UTF-8 characters always come over the wire, so it is no longer the problem + * as long as both side has the bug fix. However if the server side doesn't + * have the fix, client can't safely perform the code conversion since the + * incoming character encoding is unknown. + * + * To alleviate this situation, we take an empirical approach to find + * encoding from langtag. + * + * If langtag has a subtag, we can directly map the langtag to UTF-8 locale + * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions. + * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack + * of L10N support ..). Those are: + * + * no_NO, no_NY, sr_SP, sr_YU + * + * They all use ISO8859-X encoding. + * + * For those "xx" langtags, some of them can be mapped to "xx.UTF-8", + * but others cannot. So we need to use the "xx" as the locale name. + * Those locales are: + * + * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr + * + * Their encoding vary. They could be ISO8859-X or EUC or something else. + * So we don't perform code conversion for these langtags. */ -static uchar_t * -do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, uint_t len, - uint_t *outlen, int *err, uchar_t **err_str) -{ - size_t inbytesleft, outbytesleft, converted_size; - char *outbuf; - uchar_t *converted; - const char *inbuf; - uint_t mul = 0; +static const char *non_utf8_langtag[] = { + "no-NO", "no-NY", "sr-SP", "sr-YU", + "ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja", + "lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL}; - if (!buf || !(*(char *)buf)) - return (NULL); +void +g11n_test_langtag(const char *lang, int server) +{ + const char **lp; - if (len == 0) - len = strlen(buf); - - /* reset conversion descriptor */ - /* XXX Do we need initial shift sequences for UTF-8??? */ - (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft); - inbuf = (const char *) buf; - - if (mul_ptr) - mul = *mul_ptr; - - converted_size = (len << mul); - outbuf = (char *)xmalloc(converted_size + 1); /* for null */ - converted = (uchar_t *)outbuf; - outbytesleft = len; - - do { - if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == - (size_t)-1) { - if (errno == E2BIG) { - /* UTF-8 codepoints are at most 8 bytes long */ - if (mul > 2) { - if (err_str) - *err_str = (uchar_t *) - "Conversion to UTF-8 failed" - " due to preposterous space" - " requirements"; - if (err) - *err = EILSEQ; - return (NULL); - } + if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) { + /* + * We negotiated with real locale name (not lang tag). + * We shouldn't expect UTF-8, thus shouldn't do code + * conversion. + */ + datafellows |= SSH_BUG_STRING_ENCODING; + return; + } - /* - * re-alloc output and ensure that the outbuf - * and outbytesleft values are adjusted - */ - converted = xrealloc(converted, - converted_size << 1 + 1); - outbuf = (char *)converted + converted_size - - outbytesleft; - converted_size = (len << ++(mul)); - outbytesleft = converted_size - outbytesleft; - } else { - /* - * let the caller deal with iconv() errors, - * probably by calling fatal(); xfree() does - * not set errno - */ - if (err) - *err = errno; - xfree(converted); - return (NULL); - } + if (datafellows & SSH_BUG_STRING_ENCODING) { + if (server) { + /* + * Whatever bug exists in the client side, server + * side has nothing to do, since server has no way + * to know what actual encoding is used on the client + * side. For example, even if we negotiated with + * en_US, client locale could be en_US.ISO8859-X or + * en_US.UTF-8. + */ + return; } - } while (inbytesleft); - - *outbuf = '\0'; /* ensure null-termination */ - if (outlen) - *outlen = converted_size - outbytesleft; - if (mul_ptr) - *mul_ptr = mul; - - return (converted); + /* + * We are on the client side. We'll check with known + * locales to see if non-UTF8 characters could come in. + */ + for (lp = non_utf8_langtag; *lp != NULL; lp++) { + if (strcmp(lang, *lp) == 0) + break; + } + if (*lp == NULL) { + debug2("Server is expected to use UTF-8 locale"); + datafellows &= ~SSH_BUG_STRING_ENCODING; + } else { + /* + * Server is expected to use non-UTF8 encoding. + */ + debug2("Enforcing no code conversion: %s", lang); + } + } } /* |