diff options
Diffstat (limited to 'usr/src/common')
-rw-r--r-- | usr/src/common/smbsrv/smb_match.c | 2 | ||||
-rw-r--r-- | usr/src/common/smbsrv/smb_msgbuf.c | 452 | ||||
-rw-r--r-- | usr/src/common/smbsrv/smb_string.c | 18 | ||||
-rw-r--r-- | usr/src/common/smbsrv/smb_utf8.c | 467 |
4 files changed, 590 insertions, 349 deletions
diff --git a/usr/src/common/smbsrv/smb_match.c b/usr/src/common/smbsrv/smb_match.c index e687e3cc9f..b35833ff29 100644 --- a/usr/src/common/smbsrv/smb_match.c +++ b/usr/src/common/smbsrv/smb_match.c @@ -142,7 +142,7 @@ smb_match_private(const char *pat, const char *str, struct match_priv *priv) const char *limit; char pc; /* current pattern char */ int rc; - smb_wchar_t wcpat, wcstr; /* current wchar in pat, str */ + uint32_t wcpat, wcstr; /* current wchar in pat, str */ int nbpat, nbstr; /* multi-byte length of it */ if (priv->depth >= SMB_MATCH_DEPTH_MAX) diff --git a/usr/src/common/smbsrv/smb_msgbuf.c b/usr/src/common/smbsrv/smb_msgbuf.c index 54cb75e066..b11cd39a50 100644 --- a/usr/src/common/smbsrv/smb_msgbuf.c +++ b/usr/src/common/smbsrv/smb_msgbuf.c @@ -22,7 +22,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ /* @@ -53,6 +53,12 @@ static int buf_encode(smb_msgbuf_t *, char *, va_list ap); static void *smb_msgbuf_malloc(smb_msgbuf_t *, size_t); static int smb_msgbuf_chkerc(char *text, int erc); +static int msgbuf_get_oem_string(smb_msgbuf_t *, char **, int); +static int msgbuf_get_unicode_string(smb_msgbuf_t *, char **, int); +static int msgbuf_put_oem_string(smb_msgbuf_t *, char *, int); +static int msgbuf_put_unicode_string(smb_msgbuf_t *, char *, int); + + /* * Returns the offset or number of bytes used within the buffer. */ @@ -177,7 +183,7 @@ smb_msgbuf_term(smb_msgbuf_t *mb) * Decode a smb_msgbuf buffer as indicated by the format string into * the variable arg list. This is similar to a scanf operation. * - * On success, returns the number of bytes encoded. Otherwise + * On success, returns the number of bytes decoded. Otherwise * returns a -ve error code. */ int @@ -213,15 +219,12 @@ smb_msgbuf_decode(smb_msgbuf_t *mb, char *fmt, ...) static int buf_decode(smb_msgbuf_t *mb, char *fmt, va_list ap) { - uint32_t ival; uint8_t c; uint8_t *bvalp; uint16_t *wvalp; uint32_t *lvalp; uint64_t *llvalp; - char *cvalp; char **cvalpp; - smb_wchar_t wchar; boolean_t repc_specified; int repc; int rc; @@ -324,75 +327,23 @@ buf_decode(smb_msgbuf_t *mb, char *fmt, va_list ap) goto unicode_translation; /*FALLTHROUGH*/ - case 's': /* get string */ - if (!repc_specified) - repc = strlen((const char *)mb->scan) + 1; - if (smb_msgbuf_has_space(mb, repc) == 0) - return (SMB_MSGBUF_UNDERFLOW); - if ((cvalp = smb_msgbuf_malloc(mb, repc * 2)) == 0) - return (SMB_MSGBUF_UNDERFLOW); + case 's': /* get OEM string */ cvalpp = va_arg(ap, char **); - *cvalpp = cvalp; - /* Translate OEM to mbs */ - while (repc > 0) { - wchar = *mb->scan++; - repc--; - if (wchar == 0) - break; - ival = smb_wctomb(cvalp, wchar); - cvalp += ival; - } - *cvalp = '\0'; - if (repc > 0) - mb->scan += repc; + if (!repc_specified) + repc = 0; + rc = msgbuf_get_oem_string(mb, cvalpp, repc); + if (rc != 0) + return (rc); break; - case 'U': /* get unicode string */ + case 'U': /* get UTF-16 string */ unicode_translation: - /* - * Unicode strings are always word aligned. - * The malloc'd area is larger than the - * original string because the UTF-8 chars - * may be longer than the wide-chars. - */ - smb_msgbuf_word_align(mb); - if (!repc_specified) { - /* - * Count bytes, including the null. - */ - uint8_t *tmp_scan = mb->scan; - repc = 2; /* the null */ - while ((wchar = LE_IN16(tmp_scan)) != 0) { - tmp_scan += 2; - repc += 2; - } - } - if (smb_msgbuf_has_space(mb, repc) == 0) - return (SMB_MSGBUF_UNDERFLOW); - /* - * Get space for translated string - * Allocates worst-case size. - */ - if ((cvalp = smb_msgbuf_malloc(mb, repc * 2)) == 0) - return (SMB_MSGBUF_UNDERFLOW); cvalpp = va_arg(ap, char **); - *cvalpp = cvalp; - /* - * Translate unicode to mbs, stopping after - * null or repc limit. - */ - while (repc >= 2) { - wchar = LE_IN16(mb->scan); - mb->scan += 2; - repc -= 2; - if (wchar == 0) - break; - ival = smb_wctomb(cvalp, wchar); - cvalp += ival; - } - *cvalp = '\0'; - if (repc > 0) - mb->scan += repc; + if (!repc_specified) + repc = 0; + rc = msgbuf_get_unicode_string(mb, cvalpp, repc); + if (rc != 0) + return (rc); break; case 'M': @@ -416,6 +367,151 @@ unicode_translation: return (SMB_MSGBUF_SUCCESS); } +/* + * msgbuf_get_oem_string + * + * Decode an OEM string, returning its UTF-8 form in strpp, + * allocated using smb_msgbuf_malloc (automatically freed). + * If max_bytes != 0, consume at most max_bytes of the mb. + * See also: mbc_marshal_get_oem_string + */ +static int +msgbuf_get_oem_string(smb_msgbuf_t *mb, char **strpp, int max_bytes) +{ + char *mbs; + uint8_t *oembuf = NULL; + int oemlen; // len of OEM string, w/o null + int datalen; // OtW data len + int mbsmax; // max len of ret str + int rlen; + + if (max_bytes == 0) + max_bytes = 0xffff; + + /* + * Determine the OtW data length and OEM string length + * Note: oemlen is the string length (w/o null) and + * datalen is how much we move mb->scan + */ + datalen = 0; + oemlen = 0; + for (;;) { + if (datalen >= max_bytes) + break; + /* in-line smb_msgbuf_has_space */ + if ((mb->scan + datalen) >= mb->end) + return (SMB_MSGBUF_UNDERFLOW); + datalen++; + if (mb->scan[datalen - 1] == 0) + break; + oemlen++; + } + + /* + * Get datalen bytes into a temp buffer + * sized with room to add a null. + * Free oembuf in smb_msgbuf_term + */ + oembuf = smb_msgbuf_malloc(mb, datalen + 1); + if (oembuf == NULL) + return (SMB_MSGBUF_UNDERFLOW); + bcopy(mb->scan, oembuf, datalen); + mb->scan += datalen; + oembuf[oemlen] = '\0'; + + /* + * Get the buffer we'll return and convert to UTF-8. + * May take as much as double the space. + */ + mbsmax = oemlen * 2; + mbs = smb_msgbuf_malloc(mb, mbsmax + 1); + if (mbs == NULL) + return (SMB_MSGBUF_UNDERFLOW); + rlen = smb_oemtombs(mbs, oembuf, mbsmax); + if (rlen < 0) + return (SMB_MSGBUF_UNDERFLOW); + if (rlen > mbsmax) + rlen = mbsmax; + mbs[rlen] = '\0'; + *strpp = mbs; + return (0); +} + +/* + * msgbuf_get_unicode_string + * + * Decode a UTF-16 string, returning its UTF-8 form in strpp, + * allocated using smb_msgbuf_malloc (automatically freed). + * If max_bytes != 0, consume at most max_bytes of the mb. + * See also: mbc_marshal_get_unicode_string + */ +static int +msgbuf_get_unicode_string(smb_msgbuf_t *mb, char **strpp, int max_bytes) +{ + char *mbs; + uint16_t *wcsbuf = NULL; + int wcslen; // wchar count + int datalen; // OtW data len + size_t mbsmax; // max len of ret str + size_t rlen; + + if (max_bytes == 0) + max_bytes = 0xffff; + + /* + * Unicode strings are always word aligned. + */ + smb_msgbuf_word_align(mb); + + /* + * Determine the OtW data length and (WC) string length + * Note: wcslen counts 16-bit wide_chars (w/o null), + * and datalen is how much we move mb->scan + */ + datalen = 0; + wcslen = 0; + for (;;) { + if (datalen >= max_bytes) + break; + /* in-line smb_msgbuf_has_space */ + if ((mb->scan + datalen) >= mb->end) + return (SMB_MSGBUF_UNDERFLOW); + datalen += 2; + if (mb->scan[datalen - 2] == 0 && + mb->scan[datalen - 1] == 0) + break; + wcslen++; + } + + /* + * Get datalen bytes into a temp buffer + * sized with room to add a (WC) null. + * Note: wcsbuf has little-endian order + */ + wcsbuf = smb_msgbuf_malloc(mb, datalen + 2); + if (wcsbuf == NULL) + return (SMB_MSGBUF_UNDERFLOW); + bcopy(mb->scan, wcsbuf, datalen); + mb->scan += datalen; + wcsbuf[wcslen] = 0; + + /* + * Get the buffer we'll return and convert to UTF-8. + * May take as much 4X number of wide chars. + */ + mbsmax = wcslen * MTS_MB_CUR_MAX; + mbs = smb_msgbuf_malloc(mb, mbsmax + 1); + if (mbs == NULL) + return (SMB_MSGBUF_UNDERFLOW); + rlen = smb_wcstombs(mbs, wcsbuf, mbsmax); + if (rlen == (size_t)-1) + return (SMB_MSGBUF_UNDERFLOW); + if (rlen > mbsmax) + rlen = mbsmax; + mbs[rlen] = '\0'; + *strpp = mbs; + return (0); +} /* * smb_msgbuf_encode @@ -466,8 +562,6 @@ buf_encode(smb_msgbuf_t *mb, char *fmt, va_list ap) uint8_t *bvalp; char *cvalp; uint8_t c; - smb_wchar_t wchar; - int count; boolean_t repc_specified; int repc; int rc; @@ -571,80 +665,23 @@ buf_encode(smb_msgbuf_t *mb, char *fmt, va_list ap) goto unicode_translation; /* FALLTHROUGH */ - case 's': /* put string */ + case 's': /* put OEM string */ cvalp = va_arg(ap, char *); - if (!repc_specified) { - repc = smb_sbequiv_strlen(cvalp); - if (repc == -1) - return (SMB_MSGBUF_OVERFLOW); - if (!(mb->flags & SMB_MSGBUF_NOTERM)) - repc++; - } - if (smb_msgbuf_has_space(mb, repc) == 0) - return (SMB_MSGBUF_OVERFLOW); - while (repc > 0) { - count = smb_mbtowc(&wchar, cvalp, - MTS_MB_CHAR_MAX); - if (count < 0) - return (SMB_MSGBUF_DATA_ERROR); - cvalp += count; - if (wchar == 0) - break; - *mb->scan++ = (uint8_t)wchar; - repc--; - if (wchar & 0xff00) { - *mb->scan++ = wchar >> 8; - repc--; - } - } - if (*cvalp == '\0' && repc > 0 && - (mb->flags & SMB_MSGBUF_NOTERM) == 0) { - *mb->scan++ = 0; - repc--; - } - while (repc > 0) { - *mb->scan++ = 0; - repc--; - } + if (!repc_specified) + repc = 0; + rc = msgbuf_put_oem_string(mb, cvalp, repc); + if (rc != 0) + return (rc); break; - case 'U': /* put unicode string */ + case 'U': /* put UTF-16 string */ unicode_translation: - /* - * Unicode strings are always word aligned. - */ - smb_msgbuf_word_align(mb); cvalp = va_arg(ap, char *); - if (!repc_specified) { - repc = smb_wcequiv_strlen(cvalp); - if (!(mb->flags & SMB_MSGBUF_NOTERM)) - repc += 2; - } - if (!smb_msgbuf_has_space(mb, repc)) - return (SMB_MSGBUF_OVERFLOW); - while (repc >= 2) { - count = smb_mbtowc(&wchar, cvalp, - MTS_MB_CHAR_MAX); - if (count < 0) - return (SMB_MSGBUF_DATA_ERROR); - cvalp += count; - if (wchar == 0) - break; - - LE_OUT16(mb->scan, wchar); - mb->scan += 2; - repc -= 2; - } - if (*cvalp == '\0' && repc >= 2 && - (mb->flags & SMB_MSGBUF_NOTERM) == 0) { - LE_OUT16(mb->scan, 0); - mb->scan += 2; - repc -= 2; - } - while (repc > 0) { - *mb->scan++ = 0; - repc--; - } + if (!repc_specified) + repc = 0; + rc = msgbuf_put_unicode_string(mb, cvalp, repc); + if (rc != 0) + return (rc); break; case 'M': @@ -665,6 +702,141 @@ unicode_translation: return (SMB_MSGBUF_SUCCESS); } +/* + * Marshal a UTF-8 string (str) into mbc, converting to OEM codeset. + * Also write a null unless the repc count limits the length we put. + * When (repc > 0) the length we marshal must be exactly repc, and + * truncate or pad the mb data as necessary. + * See also: mbc_marshal_put_oem_string + */ +static int +msgbuf_put_oem_string(smb_msgbuf_t *mb, char *mbs, int repc) +{ + uint8_t *oembuf = NULL; + uint8_t *s; + int oemlen; + int rlen; + + /* + * Compute length of converted OEM string, + * NOT including null terminator + */ + if ((oemlen = smb_sbequiv_strlen(mbs)) == -1) + return (SMB_MSGBUF_DATA_ERROR); + + /* + * If repc not specified, put whole string + NULL, + * otherwise will truncate or pad as needed. + */ + if (repc <= 0) { + repc = oemlen; + if ((mb->flags & SMB_MSGBUF_NOTERM) == 0) + repc += sizeof (char); + } + if (smb_msgbuf_has_space(mb, repc) == 0) + return (SMB_MSGBUF_OVERFLOW); + + /* + * Convert into a temporary buffer + * Free oembuf in smb_msgbuf_term. + */ + oembuf = smb_msgbuf_malloc(mb, oemlen + 1); + if (oembuf == NULL) + return (SMB_MSGBUF_UNDERFLOW); + rlen = smb_mbstooem(oembuf, mbs, oemlen); + if (rlen < 0) + return (SMB_MSGBUF_DATA_ERROR); + if (rlen > oemlen) + rlen = oemlen; + oembuf[rlen] = '\0'; + + /* + * Copy the converted string into the message, + * truncated or paded as required. + */ + s = oembuf; + while (repc > 0) { + *mb->scan++ = *s; + if (*s != '\0') + s++; + repc--; + } + + return (0); +} + +/* + * Marshal a UTF-8 string (str) into mbc, converting to UTF-16. + * Also write a null unless the repc count limits the length. + * When (repc > 0) the length we marshal must be exactly repc, + * and truncate or pad the mb data as necessary. + * See also: mbc_marshal_put_unicode_string + */ +static int +msgbuf_put_unicode_string(smb_msgbuf_t *mb, char *mbs, int repc) +{ + smb_wchar_t *wcsbuf = NULL; + smb_wchar_t *wp; + size_t wcslen, wcsbytes; + size_t rlen; + + /* align to word boundary */ + smb_msgbuf_word_align(mb); + + /* + * Compute length of converted UTF-16 string, + * NOT including null terminator (in bytes). + */ + wcsbytes = smb_wcequiv_strlen(mbs); + if (wcsbytes == (size_t)-1) + return (SMB_MSGBUF_DATA_ERROR); + + /* + * If repc not specified, put whole string + NULL, + * otherwise will truncate or pad as needed. + */ + if (repc <= 0) { + repc = (int)wcsbytes; + if ((mb->flags & SMB_MSGBUF_NOTERM) == 0) + repc += sizeof (smb_wchar_t); + } + if (smb_msgbuf_has_space(mb, repc) == 0) + return (SMB_MSGBUF_OVERFLOW); + + /* + * Convert into a temporary buffer + * Free wcsbuf in smb_msgbuf_term + */ + wcslen = wcsbytes / 2; + wcsbuf = smb_msgbuf_malloc(mb, wcsbytes + 2); + if (wcsbuf == NULL) + return (SMB_MSGBUF_UNDERFLOW); + rlen = smb_mbstowcs(wcsbuf, mbs, wcslen); + if (rlen == (size_t)-1) + return (SMB_MSGBUF_DATA_ERROR); + if (rlen > wcslen) + rlen = wcslen; + wcsbuf[rlen] = 0; + + /* + * Copy the converted string into the message, + * truncated or paded as required. Preserve + * little-endian order while copying. + */ + wp = wcsbuf; + while (repc > 1) { + smb_wchar_t wchar = LE_IN16(wp); + LE_OUT16(mb->scan, wchar); + mb->scan += 2; + if (wchar != 0) + wp++; + repc -= sizeof (smb_wchar_t); + } + if (repc > 0) + *mb->scan++ = '\0'; + + return (0); +} /* * smb_msgbuf_malloc diff --git a/usr/src/common/smbsrv/smb_string.c b/usr/src/common/smbsrv/smb_string.c index 3d2abc474b..7922d84916 100644 --- a/usr/src/common/smbsrv/smb_string.c +++ b/usr/src/common/smbsrv/smb_string.c @@ -174,8 +174,8 @@ smb_islower(int c) * If the specified character is lowercase, the uppercase value will * be returned. Otherwise the original value will be returned. */ -int -smb_toupper(int c) +uint32_t +smb_toupper(uint32_t c) { uint16_t mask = is_unicode ? 0xffff : 0xff; @@ -187,8 +187,8 @@ smb_toupper(int c) * If the specified character is uppercase, the lowercase value will * be returned. Otherwise the original value will be returned. */ -int -smb_tolower(int c) +uint32_t +smb_tolower(uint32_t c) { uint16_t mask = is_unicode ? 0xffff : 0xff; @@ -204,7 +204,7 @@ smb_tolower(int c) char * smb_strupr(char *s) { - smb_wchar_t c; + uint32_t c; char *p = s; while (*p) { @@ -235,7 +235,7 @@ smb_strupr(char *s) char * smb_strlwr(char *s) { - smb_wchar_t c; + uint32_t c; char *p = s; while (*p) { @@ -264,7 +264,7 @@ smb_strlwr(char *s) int smb_isstrlwr(const char *s) { - smb_wchar_t c; + uint32_t c; int n; const char *p = s; @@ -295,7 +295,7 @@ smb_isstrlwr(const char *s) int smb_isstrupr(const char *s) { - smb_wchar_t c; + uint32_t c; int n; const char *p = s; @@ -440,7 +440,7 @@ smb_unicode_init(void) * unc_server server or domain name with no leading/trailing '\' * unc_share share name with no leading/trailing '\' * unc_path relative path to the share with no leading/trailing '\' - * it is valid for unc_path to be NULL. + * it is valid for unc_path to be NULL. * * Upon successful return of this function, smb_unc_free() * MUST be called when returned 'unc' is no longer needed. diff --git a/usr/src/common/smbsrv/smb_utf8.c b/usr/src/common/smbsrv/smb_utf8.c index 3b84363dbd..8446fb0b9e 100644 --- a/usr/src/common/smbsrv/smb_utf8.c +++ b/usr/src/common/smbsrv/smb_utf8.c @@ -22,43 +22,25 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ /* - * Multibyte/wide-char conversion routines. Wide-char encoding provides - * a fixed size character encoding that maps to the Unicode 16-bit - * (UCS-2) character set standard. Multibyte or UCS transformation - * format (UTF) encoding is a variable length character encoding scheme - * that s compatible with existing ASCII characters and guarantees that - * the resultant strings do not contain embedded null characters. Both - * types of encoding provide a null terminator: single byte for UTF-8 - * and a wide-char null for Unicode. See RFC 2044. - * - * The table below illustrates the UTF-8 encoding scheme. The letter x - * indicates bits available for encoding the character value. - * - * UCS-2 UTF-8 octet sequence (binary) - * 0x0000-0x007F 0xxxxxxx - * 0x0080-0x07FF 110xxxxx 10xxxxxx - * 0x0800-0xFFFF 1110xxxx 10xxxxxx 10xxxxxx - * - * RFC 2044 - * UTF-8,a transformation format of UNICODE and ISO 10646 - * F. Yergeau - * Alis Technologies - * October 1996 + * Multibyte/wide-char conversion routines. SMB uses UTF-16 on the wire + * (smb_wchar_t) and we use UTF-8 internally (our multi-byte, or mbs). */ #if defined(_KERNEL) || defined(_FAKE_KERNEL) #include <sys/types.h> #include <sys/sunddi.h> -#else +#else /* _KERNEL || _FAKE_KERNEL */ #include <stdio.h> #include <stdlib.h> -#include <assert.h> #include <strings.h> -#endif +#include <iconv.h> +#include <assert.h> +#endif /* _KERNEL || _FAKE_KERNEL */ +#include <sys/u8_textprep.h> #include <smbsrv/string.h> @@ -75,26 +57,37 @@ * multibyte character is encountered. */ size_t -smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars) +smb_mbstowcs(smb_wchar_t *wcs, const char *mbs, size_t nwchars) { - int len; - smb_wchar_t *start = wcstring; - - while (nwchars--) { - len = smb_mbtowc(wcstring, mbstring, MTS_MB_CHAR_MAX); - if (len < 0) { - *wcstring = 0; - return ((size_t)-1); - } + size_t mbslen, wcslen; + int err; - if (*mbstring == 0) - break; + /* NULL or empty input is allowed. */ + if (mbs == NULL || *mbs == '\0') { + if (wcs != NULL && nwchars > 0) + *wcs = 0; + return (0); + } - ++wcstring; - mbstring += len; + /* + * Traditional mbstowcs(3C) allows wcs==NULL to get the length. + * SMB never calls it that way, but let's future-proof. + */ + if (wcs == NULL) { + return ((size_t)-1); } - return (wcstring - start); + mbslen = strlen(mbs); + wcslen = nwchars; + err = uconv_u8tou16((const uchar_t *)mbs, &mbslen, + wcs, &wcslen, UCONV_OUT_LITTLE_ENDIAN); + if (err != 0) + return ((size_t)-1); + + if (wcslen < nwchars) + wcs[wcslen] = 0; + + return (wcslen); } @@ -113,49 +106,36 @@ smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars) * states. Otherwise it should be return 0. * * If mbchar is non-null, returns the number of bytes processed in - * mbchar. If mbchar is invalid, returns -1. + * mbchar. If mbchar is null, convert the null (wcharp=0) but + * return length zero. If mbchar is invalid, returns -1. */ int /*ARGSUSED*/ -smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes) +smb_mbtowc(uint32_t *wcharp, const char *mbchar, size_t nbytes) { - unsigned char mbyte; - smb_wchar_t wide_char; - int count; - int bytes_left; + uint32_t wide_char; + int count, err; + size_t mblen; + size_t wclen; if (mbchar == NULL) return (0); /* no shift states */ - /* 0xxxxxxx -> 1 byte ASCII encoding */ - if (((mbyte = *mbchar++) & 0x80) == 0) { - if (wcharp) - *wcharp = (smb_wchar_t)mbyte; - - return (mbyte ? 1 : 0); - } - - /* 10xxxxxx -> invalid first byte */ - if ((mbyte & 0x40) == 0) + /* + * How many bytes in this symbol? + */ + count = u8_validate((char *)mbchar, nbytes, NULL, 0, &err); + if (count < 0) return (-1); - wide_char = mbyte; - if ((mbyte & 0x20) == 0) { - wide_char &= 0x1f; - bytes_left = 1; - } else if ((mbyte & 0x10) == 0) { - wide_char &= 0x0f; - bytes_left = 2; - } else { + mblen = count; + wclen = 1; + err = uconv_u8tou32((const uchar_t *)mbchar, &mblen, + &wide_char, &wclen, UCONV_OUT_SYSTEM_ENDIAN); + if (err != 0) return (-1); - } - - count = 1; - while (bytes_left--) { - if (((mbyte = *mbchar++) & 0xc0) != 0x80) - return (-1); - - count++; - wide_char = (wide_char << 6) | (mbyte & 0x3f); + if (wclen == 0) { + wide_char = 0; + count = 0; } if (wcharp) @@ -173,25 +153,27 @@ smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes) * mbchar must be large enough to accommodate the multibyte character. * * Returns the numberof bytes written to mbchar. + * Note: handles null like any 1-byte char. */ int -smb_wctomb(char *mbchar, smb_wchar_t wchar) +smb_wctomb(char *mbchar, uint32_t wchar) { - if ((wchar & ~0x7f) == 0) { - *mbchar = (char)wchar; - return (1); - } + char junk[MTS_MB_CUR_MAX+1]; + size_t mblen; + size_t wclen; + int err; - if ((wchar & ~0x7ff) == 0) { - *mbchar++ = (wchar >> 6) | 0xc0; - *mbchar = (wchar & 0x3f) | 0x80; - return (2); - } + if (mbchar == NULL) + mbchar = junk; - *mbchar++ = (wchar >> 12) | 0xe0; - *mbchar++ = ((wchar >> 6) & 0x3f) | 0x80; - *mbchar = (wchar & 0x3f) | 0x80; - return (3); + mblen = MTS_MB_CUR_MAX; + wclen = 1; + err = uconv_u32tou8(&wchar, &wclen, (uchar_t *)mbchar, &mblen, + UCONV_IN_SYSTEM_ENDIAN | UCONV_IGNORE_NULL); + if (err != 0) + return (-1); + + return ((int)mblen); } @@ -205,46 +187,46 @@ smb_wctomb(char *mbchar, smb_wchar_t wchar) * terminated if there is room. * * Returns the number of bytes converted, not counting the terminating - * null byte. + * null byte. Returns -1 if an invalid WC sequence is encountered. */ size_t -smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes) +smb_wcstombs(char *mbs, const smb_wchar_t *wcs, size_t nbytes) { - char *start = mbstring; - const smb_wchar_t *wcp = wcstring; - smb_wchar_t wide_char = 0; - char buf[4]; - size_t len; + size_t mbslen, wcslen; + int err; - if ((mbstring == NULL) || (wcstring == NULL)) + /* NULL or empty input is allowed. */ + if (wcs == NULL || *wcs == 0) { + if (mbs != NULL && nbytes > 0) + *mbs = '\0'; return (0); + } - while (nbytes > MTS_MB_CHAR_MAX) { - wide_char = *wcp++; - len = smb_wctomb(mbstring, wide_char); - - if (wide_char == 0) - /*LINTED E_PTRDIFF_OVERFLOW*/ - return (mbstring - start); - - mbstring += len; - nbytes -= len; + /* + * Traditional wcstombs(3C) allows mbs==NULL to get the length. + * SMB never calls it that way, but let's future-proof. + */ + if (mbs == NULL) { + return ((size_t)-1); } - while (wide_char && nbytes) { - wide_char = *wcp++; - if ((len = smb_wctomb(buf, wide_char)) > nbytes) { - *mbstring = 0; - break; - } + /* + * Compute wcslen + */ + wcslen = 0; + while (wcs[wcslen] != 0) + wcslen++; - bcopy(buf, mbstring, len); - mbstring += len; - nbytes -= len; - } + mbslen = nbytes; + err = uconv_u16tou8(wcs, &wcslen, + (uchar_t *)mbs, &mbslen, UCONV_IN_LITTLE_ENDIAN); + if (err != 0) + return ((size_t)-1); + + if (mbslen < nbytes) + mbs[mbslen] = '\0'; - /*LINTED E_PTRDIFF_OVERFLOW*/ - return (mbstring - start); + return (mbslen); } @@ -256,7 +238,7 @@ smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes) size_t smb_wcequiv_strlen(const char *mbs) { - smb_wchar_t wide_char; + uint32_t wide_char; size_t bytes; size_t len = 0; @@ -264,9 +246,15 @@ smb_wcequiv_strlen(const char *mbs) bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX); if (bytes == ((size_t)-1)) return ((size_t)-1); + mbs += bytes; len += sizeof (smb_wchar_t); - mbs += bytes; + if (bytes > 3) { + /* + * Extended unicode, so TWO smb_wchar_t + */ + len += sizeof (smb_wchar_t); + } } return (len); @@ -275,25 +263,38 @@ smb_wcequiv_strlen(const char *mbs) /* * Returns the number of bytes that would be written if the multi- - * byte string mbs was converted to a single byte character string, - * not counting the terminating null character. + * byte string mbs was converted to an OEM character string, + * (smb_mbstooem) not counting the terminating null character. */ size_t smb_sbequiv_strlen(const char *mbs) { - smb_wchar_t wide_char; size_t nbytes; size_t len = 0; while (*mbs) { - nbytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX); + nbytes = smb_mbtowc(NULL, mbs, MTS_MB_CHAR_MAX); if (nbytes == ((size_t)-1)) return ((size_t)-1); + if (nbytes == 0) + break; - if (wide_char & 0xFF00) - len += sizeof (smb_wchar_t); - else - ++len; + if (nbytes == 1) { + /* ASCII */ + len++; + } else if (nbytes < 8) { + /* Compute OEM length */ + char mbsbuf[8]; + uint8_t oembuf[8]; + int oemlen; + (void) strlcpy(mbsbuf, mbs, nbytes+1); + oemlen = smb_mbstooem(oembuf, mbsbuf, 8); + if (oemlen < 0) + return ((size_t)-1); + len += oemlen; + } else { + return ((size_t)-1); + } mbs += nbytes; } @@ -301,106 +302,174 @@ smb_sbequiv_strlen(const char *mbs) return (len); } +/* + * Convert OEM strings to/from internal (UTF-8) form. + * + * We rarely encounter these anymore because all modern + * SMB clients use Unicode (UTF-16). The few cases where + * this IS still called are normally using ASCII, i.e. + * tag names etc. so short-cut those cases. If we get + * something non-ASCII we have to call iconv. + * + * If we were to really support OEM code pages, we would + * need to have a way to set the OEM code page from some + * configuration value. For now it's always CP850. + * See also ./smb_oem.c + */ +static char smb_oem_codepage[32] = "CP850"; /* - * stombs + * smb_oemtombs * - * Convert a regular null terminated string 'string' to a UTF-8 encoded - * null terminated multi-byte string 'mbstring'. Only full converted - * UTF-8 characters will be written 'mbstring'. If a character will not - * fit within the remaining buffer space or 'mbstring' will overflow - * max_mblen, the conversion process will be terminated and 'mbstring' - * will be null terminated. + * Convert a null terminated OEM string 'string' to a UTF-8 string + * no longer than max_mblen (null terminated if space). * - * Returns the number of bytes written to 'mbstring', excluding the - * terminating null character. + * If the input string contains invalid OEM characters, a value + * of -1 will be returned. Otherwise returns the length of 'mbs', + * excluding the terminating null character. * * If either mbstring or string is a null pointer, -1 is returned. */ int -smb_stombs(char *mbstring, char *string, int max_mblen) +smb_oemtombs(char *mbs, const uint8_t *oems, int max_mblen) { - char *start = mbstring; - unsigned char *p = (unsigned char *)string; - int space_left = max_mblen; - int len; - smb_wchar_t wide_char; - char buf[4]; - - if (!mbstring || !string) + uchar_t *p; + int oemlen; + int rlen; + boolean_t need_iconv = B_FALSE; + + if (mbs == NULL || oems == NULL) return (-1); - while (*p && space_left > 2) { - wide_char = *p++; - len = smb_wctomb(mbstring, wide_char); - mbstring += len; - space_left -= len; + /* + * Check if the oems is all ASCII (and get the length + * while we're at it) so we know if we need to iconv. + * We usually can avoid the iconv calls. + */ + oemlen = 0; + p = (uchar_t *)oems; + while (*p != '\0') { + oemlen++; + if (*p & 0x80) + need_iconv = B_TRUE; + p++; } - if (*p) { - wide_char = *p; - if ((len = smb_wctomb(buf, wide_char)) < 2) { - *mbstring = *buf; - mbstring += len; - space_left -= len; - } + if (need_iconv) { + int rc; + char *obuf = mbs; + size_t olen = max_mblen; + size_t ilen = oemlen; +#if defined(_KERNEL) || defined(_FAKE_KERNEL) + char *ibuf = (char *)oems; + kiconv_t ic; + int err; + + ic = kiconv_open("UTF-8", smb_oem_codepage); + if (ic == (kiconv_t)-1) + goto just_copy; + rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err); + (void) kiconv_close(ic); +#else /* _KERNEL || _FAKE_KERNEL */ + const char *ibuf = (char *)oems; + iconv_t ic; + ic = iconv_open("UTF-8", smb_oem_codepage); + if (ic == (iconv_t)-1) + goto just_copy; + rc = iconv(ic, &ibuf, &ilen, &obuf, &olen); + (void) iconv_close(ic); +#endif /* _KERNEL || _FAKE_KERNEL */ + if (rc < 0) + return (-1); + /* Return val. is output bytes. */ + rlen = (max_mblen - olen); + } else { + just_copy: + rlen = oemlen; + if (rlen > max_mblen) + rlen = max_mblen; + bcopy(oems, mbs, rlen); } + if (rlen < max_mblen) + mbs[rlen] = '\0'; - *mbstring = '\0'; - - /*LINTED E_PTRDIFF_OVERFLOW*/ - return (mbstring - start); + return (rlen); } - /* - * mbstos + * smb_mbstooem * - * Convert a null terminated multi-byte string 'mbstring' to a regular - * null terminated string 'string'. A 1-byte character in 'mbstring' - * maps to a 1-byte character in 'string'. A 2-byte character in - * 'mbstring' will be mapped to 2-bytes, if the upper byte is non-null. - * Otherwise the upper byte null will be discarded to ensure that the - * output stream does not contain embedded null characters. + * Convert a null terminated multi-byte string 'mbs' to an OEM string + * no longer than max_oemlen (null terminated if space). * - * If the input stream contains invalid multi-byte characters, a value - * of -1 will be returned. Otherwise the length of 'string', excluding - * the terminating null character, is returned. + * If the input string contains invalid multi-byte characters, a value + * of -1 will be returned. Otherwise returns the length of 'oems', + * excluding the terminating null character. * * If either mbstring or string is a null pointer, -1 is returned. */ int -smb_mbstos(char *string, const char *mbstring) +smb_mbstooem(uint8_t *oems, const char *mbs, int max_oemlen) { - smb_wchar_t wc; - unsigned char *start = (unsigned char *)string; - int len; + uchar_t *p; + int mbslen; + int rlen; + boolean_t need_iconv = B_FALSE; - if (string == NULL || mbstring == NULL) + if (oems == NULL || mbs == NULL) return (-1); - while (*mbstring) { - if ((len = smb_mbtowc(&wc, mbstring, MTS_MB_CHAR_MAX)) < 0) { - *string = 0; - return (-1); - } - - if (wc & 0xFF00) { - /*LINTED E_BAD_PTR_CAST_ALIGN*/ - *((smb_wchar_t *)string) = wc; - string += sizeof (smb_wchar_t); - } - else - { - *string = (unsigned char)wc; - string++; - } - - mbstring += len; + /* + * Check if the mbs is all ASCII (and get the length + * while we're at it) so we know if we need to iconv. + * We usually can avoid the iconv calls. + */ + mbslen = 0; + p = (uchar_t *)mbs; + while (*p != '\0') { + mbslen++; + if (*p & 0x80) + need_iconv = B_TRUE; + p++; } - *string = 0; + if (need_iconv) { + int rc; + char *obuf = (char *)oems; + size_t olen = max_oemlen; + size_t ilen = mbslen; +#if defined(_KERNEL) || defined(_FAKE_KERNEL) + char *ibuf = (char *)mbs; + kiconv_t ic; + int err; + + ic = kiconv_open(smb_oem_codepage, "UTF-8"); + if (ic == (kiconv_t)-1) + goto just_copy; + rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err); + (void) kiconv_close(ic); +#else /* _KERNEL || _FAKE_KERNEL */ + const char *ibuf = mbs; + iconv_t ic; + ic = iconv_open(smb_oem_codepage, "UTF-8"); + if (ic == (iconv_t)-1) + goto just_copy; + rc = iconv(ic, &ibuf, &ilen, &obuf, &olen); + (void) iconv_close(ic); +#endif /* _KERNEL || _FAKE_KERNEL */ + if (rc < 0) + return (-1); + /* Return val. is output bytes. */ + rlen = (max_oemlen - olen); + } else { + just_copy: + rlen = mbslen; + if (rlen > max_oemlen) + rlen = max_oemlen; + bcopy(mbs, oems, rlen); + } + if (rlen < max_oemlen) + oems[rlen] = '\0'; - /*LINTED E_PTRDIFF_OVERFLOW*/ - return ((unsigned char *)string - start); + return (rlen); } |