summaryrefslogtreecommitdiff
path: root/usr/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/common')
-rw-r--r--usr/src/common/smbsrv/smb_match.c2
-rw-r--r--usr/src/common/smbsrv/smb_msgbuf.c452
-rw-r--r--usr/src/common/smbsrv/smb_string.c18
-rw-r--r--usr/src/common/smbsrv/smb_utf8.c467
4 files changed, 590 insertions, 349 deletions
diff --git a/usr/src/common/smbsrv/smb_match.c b/usr/src/common/smbsrv/smb_match.c
index e687e3cc9f..b35833ff29 100644
--- a/usr/src/common/smbsrv/smb_match.c
+++ b/usr/src/common/smbsrv/smb_match.c
@@ -142,7 +142,7 @@ smb_match_private(const char *pat, const char *str, struct match_priv *priv)
const char *limit;
char pc; /* current pattern char */
int rc;
- smb_wchar_t wcpat, wcstr; /* current wchar in pat, str */
+ uint32_t wcpat, wcstr; /* current wchar in pat, str */
int nbpat, nbstr; /* multi-byte length of it */
if (priv->depth >= SMB_MATCH_DEPTH_MAX)
diff --git a/usr/src/common/smbsrv/smb_msgbuf.c b/usr/src/common/smbsrv/smb_msgbuf.c
index 54cb75e066..b11cd39a50 100644
--- a/usr/src/common/smbsrv/smb_msgbuf.c
+++ b/usr/src/common/smbsrv/smb_msgbuf.c
@@ -22,7 +22,7 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -53,6 +53,12 @@ static int buf_encode(smb_msgbuf_t *, char *, va_list ap);
static void *smb_msgbuf_malloc(smb_msgbuf_t *, size_t);
static int smb_msgbuf_chkerc(char *text, int erc);
+static int msgbuf_get_oem_string(smb_msgbuf_t *, char **, int);
+static int msgbuf_get_unicode_string(smb_msgbuf_t *, char **, int);
+static int msgbuf_put_oem_string(smb_msgbuf_t *, char *, int);
+static int msgbuf_put_unicode_string(smb_msgbuf_t *, char *, int);
+
+
/*
* Returns the offset or number of bytes used within the buffer.
*/
@@ -177,7 +183,7 @@ smb_msgbuf_term(smb_msgbuf_t *mb)
* Decode a smb_msgbuf buffer as indicated by the format string into
* the variable arg list. This is similar to a scanf operation.
*
- * On success, returns the number of bytes encoded. Otherwise
+ * On success, returns the number of bytes decoded. Otherwise
* returns a -ve error code.
*/
int
@@ -213,15 +219,12 @@ smb_msgbuf_decode(smb_msgbuf_t *mb, char *fmt, ...)
static int
buf_decode(smb_msgbuf_t *mb, char *fmt, va_list ap)
{
- uint32_t ival;
uint8_t c;
uint8_t *bvalp;
uint16_t *wvalp;
uint32_t *lvalp;
uint64_t *llvalp;
- char *cvalp;
char **cvalpp;
- smb_wchar_t wchar;
boolean_t repc_specified;
int repc;
int rc;
@@ -324,75 +327,23 @@ buf_decode(smb_msgbuf_t *mb, char *fmt, va_list ap)
goto unicode_translation;
/*FALLTHROUGH*/
- case 's': /* get string */
- if (!repc_specified)
- repc = strlen((const char *)mb->scan) + 1;
- if (smb_msgbuf_has_space(mb, repc) == 0)
- return (SMB_MSGBUF_UNDERFLOW);
- if ((cvalp = smb_msgbuf_malloc(mb, repc * 2)) == 0)
- return (SMB_MSGBUF_UNDERFLOW);
+ case 's': /* get OEM string */
cvalpp = va_arg(ap, char **);
- *cvalpp = cvalp;
- /* Translate OEM to mbs */
- while (repc > 0) {
- wchar = *mb->scan++;
- repc--;
- if (wchar == 0)
- break;
- ival = smb_wctomb(cvalp, wchar);
- cvalp += ival;
- }
- *cvalp = '\0';
- if (repc > 0)
- mb->scan += repc;
+ if (!repc_specified)
+ repc = 0;
+ rc = msgbuf_get_oem_string(mb, cvalpp, repc);
+ if (rc != 0)
+ return (rc);
break;
- case 'U': /* get unicode string */
+ case 'U': /* get UTF-16 string */
unicode_translation:
- /*
- * Unicode strings are always word aligned.
- * The malloc'd area is larger than the
- * original string because the UTF-8 chars
- * may be longer than the wide-chars.
- */
- smb_msgbuf_word_align(mb);
- if (!repc_specified) {
- /*
- * Count bytes, including the null.
- */
- uint8_t *tmp_scan = mb->scan;
- repc = 2; /* the null */
- while ((wchar = LE_IN16(tmp_scan)) != 0) {
- tmp_scan += 2;
- repc += 2;
- }
- }
- if (smb_msgbuf_has_space(mb, repc) == 0)
- return (SMB_MSGBUF_UNDERFLOW);
- /*
- * Get space for translated string
- * Allocates worst-case size.
- */
- if ((cvalp = smb_msgbuf_malloc(mb, repc * 2)) == 0)
- return (SMB_MSGBUF_UNDERFLOW);
cvalpp = va_arg(ap, char **);
- *cvalpp = cvalp;
- /*
- * Translate unicode to mbs, stopping after
- * null or repc limit.
- */
- while (repc >= 2) {
- wchar = LE_IN16(mb->scan);
- mb->scan += 2;
- repc -= 2;
- if (wchar == 0)
- break;
- ival = smb_wctomb(cvalp, wchar);
- cvalp += ival;
- }
- *cvalp = '\0';
- if (repc > 0)
- mb->scan += repc;
+ if (!repc_specified)
+ repc = 0;
+ rc = msgbuf_get_unicode_string(mb, cvalpp, repc);
+ if (rc != 0)
+ return (rc);
break;
case 'M':
@@ -416,6 +367,151 @@ unicode_translation:
return (SMB_MSGBUF_SUCCESS);
}
+/*
+ * msgbuf_get_oem_string
+ *
+ * Decode an OEM string, returning its UTF-8 form in strpp,
+ * allocated using smb_msgbuf_malloc (automatically freed).
+ * If max_bytes != 0, consume at most max_bytes of the mb.
+ * See also: mbc_marshal_get_oem_string
+ */
+static int
+msgbuf_get_oem_string(smb_msgbuf_t *mb, char **strpp, int max_bytes)
+{
+ char *mbs;
+ uint8_t *oembuf = NULL;
+ int oemlen; // len of OEM string, w/o null
+ int datalen; // OtW data len
+ int mbsmax; // max len of ret str
+ int rlen;
+
+ if (max_bytes == 0)
+ max_bytes = 0xffff;
+
+ /*
+ * Determine the OtW data length and OEM string length
+ * Note: oemlen is the string length (w/o null) and
+ * datalen is how much we move mb->scan
+ */
+ datalen = 0;
+ oemlen = 0;
+ for (;;) {
+ if (datalen >= max_bytes)
+ break;
+ /* in-line smb_msgbuf_has_space */
+ if ((mb->scan + datalen) >= mb->end)
+ return (SMB_MSGBUF_UNDERFLOW);
+ datalen++;
+ if (mb->scan[datalen - 1] == 0)
+ break;
+ oemlen++;
+ }
+
+ /*
+ * Get datalen bytes into a temp buffer
+ * sized with room to add a null.
+ * Free oembuf in smb_msgbuf_term
+ */
+ oembuf = smb_msgbuf_malloc(mb, datalen + 1);
+ if (oembuf == NULL)
+ return (SMB_MSGBUF_UNDERFLOW);
+ bcopy(mb->scan, oembuf, datalen);
+ mb->scan += datalen;
+ oembuf[oemlen] = '\0';
+
+ /*
+ * Get the buffer we'll return and convert to UTF-8.
+ * May take as much as double the space.
+ */
+ mbsmax = oemlen * 2;
+ mbs = smb_msgbuf_malloc(mb, mbsmax + 1);
+ if (mbs == NULL)
+ return (SMB_MSGBUF_UNDERFLOW);
+ rlen = smb_oemtombs(mbs, oembuf, mbsmax);
+ if (rlen < 0)
+ return (SMB_MSGBUF_UNDERFLOW);
+ if (rlen > mbsmax)
+ rlen = mbsmax;
+ mbs[rlen] = '\0';
+ *strpp = mbs;
+ return (0);
+}
+
+/*
+ * msgbuf_get_unicode_string
+ *
+ * Decode a UTF-16 string, returning its UTF-8 form in strpp,
+ * allocated using smb_msgbuf_malloc (automatically freed).
+ * If max_bytes != 0, consume at most max_bytes of the mb.
+ * See also: mbc_marshal_get_unicode_string
+ */
+static int
+msgbuf_get_unicode_string(smb_msgbuf_t *mb, char **strpp, int max_bytes)
+{
+ char *mbs;
+ uint16_t *wcsbuf = NULL;
+ int wcslen; // wchar count
+ int datalen; // OtW data len
+ size_t mbsmax; // max len of ret str
+ size_t rlen;
+
+ if (max_bytes == 0)
+ max_bytes = 0xffff;
+
+ /*
+ * Unicode strings are always word aligned.
+ */
+ smb_msgbuf_word_align(mb);
+
+ /*
+ * Determine the OtW data length and (WC) string length
+ * Note: wcslen counts 16-bit wide_chars (w/o null),
+ * and datalen is how much we move mb->scan
+ */
+ datalen = 0;
+ wcslen = 0;
+ for (;;) {
+ if (datalen >= max_bytes)
+ break;
+ /* in-line smb_msgbuf_has_space */
+ if ((mb->scan + datalen) >= mb->end)
+ return (SMB_MSGBUF_UNDERFLOW);
+ datalen += 2;
+ if (mb->scan[datalen - 2] == 0 &&
+ mb->scan[datalen - 1] == 0)
+ break;
+ wcslen++;
+ }
+
+ /*
+ * Get datalen bytes into a temp buffer
+ * sized with room to add a (WC) null.
+ * Note: wcsbuf has little-endian order
+ */
+ wcsbuf = smb_msgbuf_malloc(mb, datalen + 2);
+ if (wcsbuf == NULL)
+ return (SMB_MSGBUF_UNDERFLOW);
+ bcopy(mb->scan, wcsbuf, datalen);
+ mb->scan += datalen;
+ wcsbuf[wcslen] = 0;
+
+ /*
+ * Get the buffer we'll return and convert to UTF-8.
+ * May take as much 4X number of wide chars.
+ */
+ mbsmax = wcslen * MTS_MB_CUR_MAX;
+ mbs = smb_msgbuf_malloc(mb, mbsmax + 1);
+ if (mbs == NULL)
+ return (SMB_MSGBUF_UNDERFLOW);
+ rlen = smb_wcstombs(mbs, wcsbuf, mbsmax);
+ if (rlen == (size_t)-1)
+ return (SMB_MSGBUF_UNDERFLOW);
+ if (rlen > mbsmax)
+ rlen = mbsmax;
+ mbs[rlen] = '\0';
+ *strpp = mbs;
+ return (0);
+}
/*
* smb_msgbuf_encode
@@ -466,8 +562,6 @@ buf_encode(smb_msgbuf_t *mb, char *fmt, va_list ap)
uint8_t *bvalp;
char *cvalp;
uint8_t c;
- smb_wchar_t wchar;
- int count;
boolean_t repc_specified;
int repc;
int rc;
@@ -571,80 +665,23 @@ buf_encode(smb_msgbuf_t *mb, char *fmt, va_list ap)
goto unicode_translation;
/* FALLTHROUGH */
- case 's': /* put string */
+ case 's': /* put OEM string */
cvalp = va_arg(ap, char *);
- if (!repc_specified) {
- repc = smb_sbequiv_strlen(cvalp);
- if (repc == -1)
- return (SMB_MSGBUF_OVERFLOW);
- if (!(mb->flags & SMB_MSGBUF_NOTERM))
- repc++;
- }
- if (smb_msgbuf_has_space(mb, repc) == 0)
- return (SMB_MSGBUF_OVERFLOW);
- while (repc > 0) {
- count = smb_mbtowc(&wchar, cvalp,
- MTS_MB_CHAR_MAX);
- if (count < 0)
- return (SMB_MSGBUF_DATA_ERROR);
- cvalp += count;
- if (wchar == 0)
- break;
- *mb->scan++ = (uint8_t)wchar;
- repc--;
- if (wchar & 0xff00) {
- *mb->scan++ = wchar >> 8;
- repc--;
- }
- }
- if (*cvalp == '\0' && repc > 0 &&
- (mb->flags & SMB_MSGBUF_NOTERM) == 0) {
- *mb->scan++ = 0;
- repc--;
- }
- while (repc > 0) {
- *mb->scan++ = 0;
- repc--;
- }
+ if (!repc_specified)
+ repc = 0;
+ rc = msgbuf_put_oem_string(mb, cvalp, repc);
+ if (rc != 0)
+ return (rc);
break;
- case 'U': /* put unicode string */
+ case 'U': /* put UTF-16 string */
unicode_translation:
- /*
- * Unicode strings are always word aligned.
- */
- smb_msgbuf_word_align(mb);
cvalp = va_arg(ap, char *);
- if (!repc_specified) {
- repc = smb_wcequiv_strlen(cvalp);
- if (!(mb->flags & SMB_MSGBUF_NOTERM))
- repc += 2;
- }
- if (!smb_msgbuf_has_space(mb, repc))
- return (SMB_MSGBUF_OVERFLOW);
- while (repc >= 2) {
- count = smb_mbtowc(&wchar, cvalp,
- MTS_MB_CHAR_MAX);
- if (count < 0)
- return (SMB_MSGBUF_DATA_ERROR);
- cvalp += count;
- if (wchar == 0)
- break;
-
- LE_OUT16(mb->scan, wchar);
- mb->scan += 2;
- repc -= 2;
- }
- if (*cvalp == '\0' && repc >= 2 &&
- (mb->flags & SMB_MSGBUF_NOTERM) == 0) {
- LE_OUT16(mb->scan, 0);
- mb->scan += 2;
- repc -= 2;
- }
- while (repc > 0) {
- *mb->scan++ = 0;
- repc--;
- }
+ if (!repc_specified)
+ repc = 0;
+ rc = msgbuf_put_unicode_string(mb, cvalp, repc);
+ if (rc != 0)
+ return (rc);
break;
case 'M':
@@ -665,6 +702,141 @@ unicode_translation:
return (SMB_MSGBUF_SUCCESS);
}
+/*
+ * Marshal a UTF-8 string (str) into mbc, converting to OEM codeset.
+ * Also write a null unless the repc count limits the length we put.
+ * When (repc > 0) the length we marshal must be exactly repc, and
+ * truncate or pad the mb data as necessary.
+ * See also: mbc_marshal_put_oem_string
+ */
+static int
+msgbuf_put_oem_string(smb_msgbuf_t *mb, char *mbs, int repc)
+{
+ uint8_t *oembuf = NULL;
+ uint8_t *s;
+ int oemlen;
+ int rlen;
+
+ /*
+ * Compute length of converted OEM string,
+ * NOT including null terminator
+ */
+ if ((oemlen = smb_sbequiv_strlen(mbs)) == -1)
+ return (SMB_MSGBUF_DATA_ERROR);
+
+ /*
+ * If repc not specified, put whole string + NULL,
+ * otherwise will truncate or pad as needed.
+ */
+ if (repc <= 0) {
+ repc = oemlen;
+ if ((mb->flags & SMB_MSGBUF_NOTERM) == 0)
+ repc += sizeof (char);
+ }
+ if (smb_msgbuf_has_space(mb, repc) == 0)
+ return (SMB_MSGBUF_OVERFLOW);
+
+ /*
+ * Convert into a temporary buffer
+ * Free oembuf in smb_msgbuf_term.
+ */
+ oembuf = smb_msgbuf_malloc(mb, oemlen + 1);
+ if (oembuf == NULL)
+ return (SMB_MSGBUF_UNDERFLOW);
+ rlen = smb_mbstooem(oembuf, mbs, oemlen);
+ if (rlen < 0)
+ return (SMB_MSGBUF_DATA_ERROR);
+ if (rlen > oemlen)
+ rlen = oemlen;
+ oembuf[rlen] = '\0';
+
+ /*
+ * Copy the converted string into the message,
+ * truncated or paded as required.
+ */
+ s = oembuf;
+ while (repc > 0) {
+ *mb->scan++ = *s;
+ if (*s != '\0')
+ s++;
+ repc--;
+ }
+
+ return (0);
+}
+
+/*
+ * Marshal a UTF-8 string (str) into mbc, converting to UTF-16.
+ * Also write a null unless the repc count limits the length.
+ * When (repc > 0) the length we marshal must be exactly repc,
+ * and truncate or pad the mb data as necessary.
+ * See also: mbc_marshal_put_unicode_string
+ */
+static int
+msgbuf_put_unicode_string(smb_msgbuf_t *mb, char *mbs, int repc)
+{
+ smb_wchar_t *wcsbuf = NULL;
+ smb_wchar_t *wp;
+ size_t wcslen, wcsbytes;
+ size_t rlen;
+
+ /* align to word boundary */
+ smb_msgbuf_word_align(mb);
+
+ /*
+ * Compute length of converted UTF-16 string,
+ * NOT including null terminator (in bytes).
+ */
+ wcsbytes = smb_wcequiv_strlen(mbs);
+ if (wcsbytes == (size_t)-1)
+ return (SMB_MSGBUF_DATA_ERROR);
+
+ /*
+ * If repc not specified, put whole string + NULL,
+ * otherwise will truncate or pad as needed.
+ */
+ if (repc <= 0) {
+ repc = (int)wcsbytes;
+ if ((mb->flags & SMB_MSGBUF_NOTERM) == 0)
+ repc += sizeof (smb_wchar_t);
+ }
+ if (smb_msgbuf_has_space(mb, repc) == 0)
+ return (SMB_MSGBUF_OVERFLOW);
+
+ /*
+ * Convert into a temporary buffer
+ * Free wcsbuf in smb_msgbuf_term
+ */
+ wcslen = wcsbytes / 2;
+ wcsbuf = smb_msgbuf_malloc(mb, wcsbytes + 2);
+ if (wcsbuf == NULL)
+ return (SMB_MSGBUF_UNDERFLOW);
+ rlen = smb_mbstowcs(wcsbuf, mbs, wcslen);
+ if (rlen == (size_t)-1)
+ return (SMB_MSGBUF_DATA_ERROR);
+ if (rlen > wcslen)
+ rlen = wcslen;
+ wcsbuf[rlen] = 0;
+
+ /*
+ * Copy the converted string into the message,
+ * truncated or paded as required. Preserve
+ * little-endian order while copying.
+ */
+ wp = wcsbuf;
+ while (repc > 1) {
+ smb_wchar_t wchar = LE_IN16(wp);
+ LE_OUT16(mb->scan, wchar);
+ mb->scan += 2;
+ if (wchar != 0)
+ wp++;
+ repc -= sizeof (smb_wchar_t);
+ }
+ if (repc > 0)
+ *mb->scan++ = '\0';
+
+ return (0);
+}
/*
* smb_msgbuf_malloc
diff --git a/usr/src/common/smbsrv/smb_string.c b/usr/src/common/smbsrv/smb_string.c
index 3d2abc474b..7922d84916 100644
--- a/usr/src/common/smbsrv/smb_string.c
+++ b/usr/src/common/smbsrv/smb_string.c
@@ -174,8 +174,8 @@ smb_islower(int c)
* If the specified character is lowercase, the uppercase value will
* be returned. Otherwise the original value will be returned.
*/
-int
-smb_toupper(int c)
+uint32_t
+smb_toupper(uint32_t c)
{
uint16_t mask = is_unicode ? 0xffff : 0xff;
@@ -187,8 +187,8 @@ smb_toupper(int c)
* If the specified character is uppercase, the lowercase value will
* be returned. Otherwise the original value will be returned.
*/
-int
-smb_tolower(int c)
+uint32_t
+smb_tolower(uint32_t c)
{
uint16_t mask = is_unicode ? 0xffff : 0xff;
@@ -204,7 +204,7 @@ smb_tolower(int c)
char *
smb_strupr(char *s)
{
- smb_wchar_t c;
+ uint32_t c;
char *p = s;
while (*p) {
@@ -235,7 +235,7 @@ smb_strupr(char *s)
char *
smb_strlwr(char *s)
{
- smb_wchar_t c;
+ uint32_t c;
char *p = s;
while (*p) {
@@ -264,7 +264,7 @@ smb_strlwr(char *s)
int
smb_isstrlwr(const char *s)
{
- smb_wchar_t c;
+ uint32_t c;
int n;
const char *p = s;
@@ -295,7 +295,7 @@ smb_isstrlwr(const char *s)
int
smb_isstrupr(const char *s)
{
- smb_wchar_t c;
+ uint32_t c;
int n;
const char *p = s;
@@ -440,7 +440,7 @@ smb_unicode_init(void)
* unc_server server or domain name with no leading/trailing '\'
* unc_share share name with no leading/trailing '\'
* unc_path relative path to the share with no leading/trailing '\'
- * it is valid for unc_path to be NULL.
+ * it is valid for unc_path to be NULL.
*
* Upon successful return of this function, smb_unc_free()
* MUST be called when returned 'unc' is no longer needed.
diff --git a/usr/src/common/smbsrv/smb_utf8.c b/usr/src/common/smbsrv/smb_utf8.c
index 3b84363dbd..8446fb0b9e 100644
--- a/usr/src/common/smbsrv/smb_utf8.c
+++ b/usr/src/common/smbsrv/smb_utf8.c
@@ -22,43 +22,25 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
- * Multibyte/wide-char conversion routines. Wide-char encoding provides
- * a fixed size character encoding that maps to the Unicode 16-bit
- * (UCS-2) character set standard. Multibyte or UCS transformation
- * format (UTF) encoding is a variable length character encoding scheme
- * that s compatible with existing ASCII characters and guarantees that
- * the resultant strings do not contain embedded null characters. Both
- * types of encoding provide a null terminator: single byte for UTF-8
- * and a wide-char null for Unicode. See RFC 2044.
- *
- * The table below illustrates the UTF-8 encoding scheme. The letter x
- * indicates bits available for encoding the character value.
- *
- * UCS-2 UTF-8 octet sequence (binary)
- * 0x0000-0x007F 0xxxxxxx
- * 0x0080-0x07FF 110xxxxx 10xxxxxx
- * 0x0800-0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
- *
- * RFC 2044
- * UTF-8,a transformation format of UNICODE and ISO 10646
- * F. Yergeau
- * Alis Technologies
- * October 1996
+ * Multibyte/wide-char conversion routines. SMB uses UTF-16 on the wire
+ * (smb_wchar_t) and we use UTF-8 internally (our multi-byte, or mbs).
*/
#if defined(_KERNEL) || defined(_FAKE_KERNEL)
#include <sys/types.h>
#include <sys/sunddi.h>
-#else
+#else /* _KERNEL || _FAKE_KERNEL */
#include <stdio.h>
#include <stdlib.h>
-#include <assert.h>
#include <strings.h>
-#endif
+#include <iconv.h>
+#include <assert.h>
+#endif /* _KERNEL || _FAKE_KERNEL */
+#include <sys/u8_textprep.h>
#include <smbsrv/string.h>
@@ -75,26 +57,37 @@
* multibyte character is encountered.
*/
size_t
-smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars)
+smb_mbstowcs(smb_wchar_t *wcs, const char *mbs, size_t nwchars)
{
- int len;
- smb_wchar_t *start = wcstring;
-
- while (nwchars--) {
- len = smb_mbtowc(wcstring, mbstring, MTS_MB_CHAR_MAX);
- if (len < 0) {
- *wcstring = 0;
- return ((size_t)-1);
- }
+ size_t mbslen, wcslen;
+ int err;
- if (*mbstring == 0)
- break;
+ /* NULL or empty input is allowed. */
+ if (mbs == NULL || *mbs == '\0') {
+ if (wcs != NULL && nwchars > 0)
+ *wcs = 0;
+ return (0);
+ }
- ++wcstring;
- mbstring += len;
+ /*
+ * Traditional mbstowcs(3C) allows wcs==NULL to get the length.
+ * SMB never calls it that way, but let's future-proof.
+ */
+ if (wcs == NULL) {
+ return ((size_t)-1);
}
- return (wcstring - start);
+ mbslen = strlen(mbs);
+ wcslen = nwchars;
+ err = uconv_u8tou16((const uchar_t *)mbs, &mbslen,
+ wcs, &wcslen, UCONV_OUT_LITTLE_ENDIAN);
+ if (err != 0)
+ return ((size_t)-1);
+
+ if (wcslen < nwchars)
+ wcs[wcslen] = 0;
+
+ return (wcslen);
}
@@ -113,49 +106,36 @@ smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars)
* states. Otherwise it should be return 0.
*
* If mbchar is non-null, returns the number of bytes processed in
- * mbchar. If mbchar is invalid, returns -1.
+ * mbchar. If mbchar is null, convert the null (wcharp=0) but
+ * return length zero. If mbchar is invalid, returns -1.
*/
int /*ARGSUSED*/
-smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes)
+smb_mbtowc(uint32_t *wcharp, const char *mbchar, size_t nbytes)
{
- unsigned char mbyte;
- smb_wchar_t wide_char;
- int count;
- int bytes_left;
+ uint32_t wide_char;
+ int count, err;
+ size_t mblen;
+ size_t wclen;
if (mbchar == NULL)
return (0); /* no shift states */
- /* 0xxxxxxx -> 1 byte ASCII encoding */
- if (((mbyte = *mbchar++) & 0x80) == 0) {
- if (wcharp)
- *wcharp = (smb_wchar_t)mbyte;
-
- return (mbyte ? 1 : 0);
- }
-
- /* 10xxxxxx -> invalid first byte */
- if ((mbyte & 0x40) == 0)
+ /*
+ * How many bytes in this symbol?
+ */
+ count = u8_validate((char *)mbchar, nbytes, NULL, 0, &err);
+ if (count < 0)
return (-1);
- wide_char = mbyte;
- if ((mbyte & 0x20) == 0) {
- wide_char &= 0x1f;
- bytes_left = 1;
- } else if ((mbyte & 0x10) == 0) {
- wide_char &= 0x0f;
- bytes_left = 2;
- } else {
+ mblen = count;
+ wclen = 1;
+ err = uconv_u8tou32((const uchar_t *)mbchar, &mblen,
+ &wide_char, &wclen, UCONV_OUT_SYSTEM_ENDIAN);
+ if (err != 0)
return (-1);
- }
-
- count = 1;
- while (bytes_left--) {
- if (((mbyte = *mbchar++) & 0xc0) != 0x80)
- return (-1);
-
- count++;
- wide_char = (wide_char << 6) | (mbyte & 0x3f);
+ if (wclen == 0) {
+ wide_char = 0;
+ count = 0;
}
if (wcharp)
@@ -173,25 +153,27 @@ smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes)
* mbchar must be large enough to accommodate the multibyte character.
*
* Returns the numberof bytes written to mbchar.
+ * Note: handles null like any 1-byte char.
*/
int
-smb_wctomb(char *mbchar, smb_wchar_t wchar)
+smb_wctomb(char *mbchar, uint32_t wchar)
{
- if ((wchar & ~0x7f) == 0) {
- *mbchar = (char)wchar;
- return (1);
- }
+ char junk[MTS_MB_CUR_MAX+1];
+ size_t mblen;
+ size_t wclen;
+ int err;
- if ((wchar & ~0x7ff) == 0) {
- *mbchar++ = (wchar >> 6) | 0xc0;
- *mbchar = (wchar & 0x3f) | 0x80;
- return (2);
- }
+ if (mbchar == NULL)
+ mbchar = junk;
- *mbchar++ = (wchar >> 12) | 0xe0;
- *mbchar++ = ((wchar >> 6) & 0x3f) | 0x80;
- *mbchar = (wchar & 0x3f) | 0x80;
- return (3);
+ mblen = MTS_MB_CUR_MAX;
+ wclen = 1;
+ err = uconv_u32tou8(&wchar, &wclen, (uchar_t *)mbchar, &mblen,
+ UCONV_IN_SYSTEM_ENDIAN | UCONV_IGNORE_NULL);
+ if (err != 0)
+ return (-1);
+
+ return ((int)mblen);
}
@@ -205,46 +187,46 @@ smb_wctomb(char *mbchar, smb_wchar_t wchar)
* terminated if there is room.
*
* Returns the number of bytes converted, not counting the terminating
- * null byte.
+ * null byte. Returns -1 if an invalid WC sequence is encountered.
*/
size_t
-smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes)
+smb_wcstombs(char *mbs, const smb_wchar_t *wcs, size_t nbytes)
{
- char *start = mbstring;
- const smb_wchar_t *wcp = wcstring;
- smb_wchar_t wide_char = 0;
- char buf[4];
- size_t len;
+ size_t mbslen, wcslen;
+ int err;
- if ((mbstring == NULL) || (wcstring == NULL))
+ /* NULL or empty input is allowed. */
+ if (wcs == NULL || *wcs == 0) {
+ if (mbs != NULL && nbytes > 0)
+ *mbs = '\0';
return (0);
+ }
- while (nbytes > MTS_MB_CHAR_MAX) {
- wide_char = *wcp++;
- len = smb_wctomb(mbstring, wide_char);
-
- if (wide_char == 0)
- /*LINTED E_PTRDIFF_OVERFLOW*/
- return (mbstring - start);
-
- mbstring += len;
- nbytes -= len;
+ /*
+ * Traditional wcstombs(3C) allows mbs==NULL to get the length.
+ * SMB never calls it that way, but let's future-proof.
+ */
+ if (mbs == NULL) {
+ return ((size_t)-1);
}
- while (wide_char && nbytes) {
- wide_char = *wcp++;
- if ((len = smb_wctomb(buf, wide_char)) > nbytes) {
- *mbstring = 0;
- break;
- }
+ /*
+ * Compute wcslen
+ */
+ wcslen = 0;
+ while (wcs[wcslen] != 0)
+ wcslen++;
- bcopy(buf, mbstring, len);
- mbstring += len;
- nbytes -= len;
- }
+ mbslen = nbytes;
+ err = uconv_u16tou8(wcs, &wcslen,
+ (uchar_t *)mbs, &mbslen, UCONV_IN_LITTLE_ENDIAN);
+ if (err != 0)
+ return ((size_t)-1);
+
+ if (mbslen < nbytes)
+ mbs[mbslen] = '\0';
- /*LINTED E_PTRDIFF_OVERFLOW*/
- return (mbstring - start);
+ return (mbslen);
}
@@ -256,7 +238,7 @@ smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes)
size_t
smb_wcequiv_strlen(const char *mbs)
{
- smb_wchar_t wide_char;
+ uint32_t wide_char;
size_t bytes;
size_t len = 0;
@@ -264,9 +246,15 @@ smb_wcequiv_strlen(const char *mbs)
bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
if (bytes == ((size_t)-1))
return ((size_t)-1);
+ mbs += bytes;
len += sizeof (smb_wchar_t);
- mbs += bytes;
+ if (bytes > 3) {
+ /*
+ * Extended unicode, so TWO smb_wchar_t
+ */
+ len += sizeof (smb_wchar_t);
+ }
}
return (len);
@@ -275,25 +263,38 @@ smb_wcequiv_strlen(const char *mbs)
/*
* Returns the number of bytes that would be written if the multi-
- * byte string mbs was converted to a single byte character string,
- * not counting the terminating null character.
+ * byte string mbs was converted to an OEM character string,
+ * (smb_mbstooem) not counting the terminating null character.
*/
size_t
smb_sbequiv_strlen(const char *mbs)
{
- smb_wchar_t wide_char;
size_t nbytes;
size_t len = 0;
while (*mbs) {
- nbytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
+ nbytes = smb_mbtowc(NULL, mbs, MTS_MB_CHAR_MAX);
if (nbytes == ((size_t)-1))
return ((size_t)-1);
+ if (nbytes == 0)
+ break;
- if (wide_char & 0xFF00)
- len += sizeof (smb_wchar_t);
- else
- ++len;
+ if (nbytes == 1) {
+ /* ASCII */
+ len++;
+ } else if (nbytes < 8) {
+ /* Compute OEM length */
+ char mbsbuf[8];
+ uint8_t oembuf[8];
+ int oemlen;
+ (void) strlcpy(mbsbuf, mbs, nbytes+1);
+ oemlen = smb_mbstooem(oembuf, mbsbuf, 8);
+ if (oemlen < 0)
+ return ((size_t)-1);
+ len += oemlen;
+ } else {
+ return ((size_t)-1);
+ }
mbs += nbytes;
}
@@ -301,106 +302,174 @@ smb_sbequiv_strlen(const char *mbs)
return (len);
}
+/*
+ * Convert OEM strings to/from internal (UTF-8) form.
+ *
+ * We rarely encounter these anymore because all modern
+ * SMB clients use Unicode (UTF-16). The few cases where
+ * this IS still called are normally using ASCII, i.e.
+ * tag names etc. so short-cut those cases. If we get
+ * something non-ASCII we have to call iconv.
+ *
+ * If we were to really support OEM code pages, we would
+ * need to have a way to set the OEM code page from some
+ * configuration value. For now it's always CP850.
+ * See also ./smb_oem.c
+ */
+static char smb_oem_codepage[32] = "CP850";
/*
- * stombs
+ * smb_oemtombs
*
- * Convert a regular null terminated string 'string' to a UTF-8 encoded
- * null terminated multi-byte string 'mbstring'. Only full converted
- * UTF-8 characters will be written 'mbstring'. If a character will not
- * fit within the remaining buffer space or 'mbstring' will overflow
- * max_mblen, the conversion process will be terminated and 'mbstring'
- * will be null terminated.
+ * Convert a null terminated OEM string 'string' to a UTF-8 string
+ * no longer than max_mblen (null terminated if space).
*
- * Returns the number of bytes written to 'mbstring', excluding the
- * terminating null character.
+ * If the input string contains invalid OEM characters, a value
+ * of -1 will be returned. Otherwise returns the length of 'mbs',
+ * excluding the terminating null character.
*
* If either mbstring or string is a null pointer, -1 is returned.
*/
int
-smb_stombs(char *mbstring, char *string, int max_mblen)
+smb_oemtombs(char *mbs, const uint8_t *oems, int max_mblen)
{
- char *start = mbstring;
- unsigned char *p = (unsigned char *)string;
- int space_left = max_mblen;
- int len;
- smb_wchar_t wide_char;
- char buf[4];
-
- if (!mbstring || !string)
+ uchar_t *p;
+ int oemlen;
+ int rlen;
+ boolean_t need_iconv = B_FALSE;
+
+ if (mbs == NULL || oems == NULL)
return (-1);
- while (*p && space_left > 2) {
- wide_char = *p++;
- len = smb_wctomb(mbstring, wide_char);
- mbstring += len;
- space_left -= len;
+ /*
+ * Check if the oems is all ASCII (and get the length
+ * while we're at it) so we know if we need to iconv.
+ * We usually can avoid the iconv calls.
+ */
+ oemlen = 0;
+ p = (uchar_t *)oems;
+ while (*p != '\0') {
+ oemlen++;
+ if (*p & 0x80)
+ need_iconv = B_TRUE;
+ p++;
}
- if (*p) {
- wide_char = *p;
- if ((len = smb_wctomb(buf, wide_char)) < 2) {
- *mbstring = *buf;
- mbstring += len;
- space_left -= len;
- }
+ if (need_iconv) {
+ int rc;
+ char *obuf = mbs;
+ size_t olen = max_mblen;
+ size_t ilen = oemlen;
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
+ char *ibuf = (char *)oems;
+ kiconv_t ic;
+ int err;
+
+ ic = kiconv_open("UTF-8", smb_oem_codepage);
+ if (ic == (kiconv_t)-1)
+ goto just_copy;
+ rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
+ (void) kiconv_close(ic);
+#else /* _KERNEL || _FAKE_KERNEL */
+ const char *ibuf = (char *)oems;
+ iconv_t ic;
+ ic = iconv_open("UTF-8", smb_oem_codepage);
+ if (ic == (iconv_t)-1)
+ goto just_copy;
+ rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
+ (void) iconv_close(ic);
+#endif /* _KERNEL || _FAKE_KERNEL */
+ if (rc < 0)
+ return (-1);
+ /* Return val. is output bytes. */
+ rlen = (max_mblen - olen);
+ } else {
+ just_copy:
+ rlen = oemlen;
+ if (rlen > max_mblen)
+ rlen = max_mblen;
+ bcopy(oems, mbs, rlen);
}
+ if (rlen < max_mblen)
+ mbs[rlen] = '\0';
- *mbstring = '\0';
-
- /*LINTED E_PTRDIFF_OVERFLOW*/
- return (mbstring - start);
+ return (rlen);
}
-
/*
- * mbstos
+ * smb_mbstooem
*
- * Convert a null terminated multi-byte string 'mbstring' to a regular
- * null terminated string 'string'. A 1-byte character in 'mbstring'
- * maps to a 1-byte character in 'string'. A 2-byte character in
- * 'mbstring' will be mapped to 2-bytes, if the upper byte is non-null.
- * Otherwise the upper byte null will be discarded to ensure that the
- * output stream does not contain embedded null characters.
+ * Convert a null terminated multi-byte string 'mbs' to an OEM string
+ * no longer than max_oemlen (null terminated if space).
*
- * If the input stream contains invalid multi-byte characters, a value
- * of -1 will be returned. Otherwise the length of 'string', excluding
- * the terminating null character, is returned.
+ * If the input string contains invalid multi-byte characters, a value
+ * of -1 will be returned. Otherwise returns the length of 'oems',
+ * excluding the terminating null character.
*
* If either mbstring or string is a null pointer, -1 is returned.
*/
int
-smb_mbstos(char *string, const char *mbstring)
+smb_mbstooem(uint8_t *oems, const char *mbs, int max_oemlen)
{
- smb_wchar_t wc;
- unsigned char *start = (unsigned char *)string;
- int len;
+ uchar_t *p;
+ int mbslen;
+ int rlen;
+ boolean_t need_iconv = B_FALSE;
- if (string == NULL || mbstring == NULL)
+ if (oems == NULL || mbs == NULL)
return (-1);
- while (*mbstring) {
- if ((len = smb_mbtowc(&wc, mbstring, MTS_MB_CHAR_MAX)) < 0) {
- *string = 0;
- return (-1);
- }
-
- if (wc & 0xFF00) {
- /*LINTED E_BAD_PTR_CAST_ALIGN*/
- *((smb_wchar_t *)string) = wc;
- string += sizeof (smb_wchar_t);
- }
- else
- {
- *string = (unsigned char)wc;
- string++;
- }
-
- mbstring += len;
+ /*
+ * Check if the mbs is all ASCII (and get the length
+ * while we're at it) so we know if we need to iconv.
+ * We usually can avoid the iconv calls.
+ */
+ mbslen = 0;
+ p = (uchar_t *)mbs;
+ while (*p != '\0') {
+ mbslen++;
+ if (*p & 0x80)
+ need_iconv = B_TRUE;
+ p++;
}
- *string = 0;
+ if (need_iconv) {
+ int rc;
+ char *obuf = (char *)oems;
+ size_t olen = max_oemlen;
+ size_t ilen = mbslen;
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
+ char *ibuf = (char *)mbs;
+ kiconv_t ic;
+ int err;
+
+ ic = kiconv_open(smb_oem_codepage, "UTF-8");
+ if (ic == (kiconv_t)-1)
+ goto just_copy;
+ rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
+ (void) kiconv_close(ic);
+#else /* _KERNEL || _FAKE_KERNEL */
+ const char *ibuf = mbs;
+ iconv_t ic;
+ ic = iconv_open(smb_oem_codepage, "UTF-8");
+ if (ic == (iconv_t)-1)
+ goto just_copy;
+ rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
+ (void) iconv_close(ic);
+#endif /* _KERNEL || _FAKE_KERNEL */
+ if (rc < 0)
+ return (-1);
+ /* Return val. is output bytes. */
+ rlen = (max_oemlen - olen);
+ } else {
+ just_copy:
+ rlen = mbslen;
+ if (rlen > max_oemlen)
+ rlen = max_oemlen;
+ bcopy(mbs, oems, rlen);
+ }
+ if (rlen < max_oemlen)
+ oems[rlen] = '\0';
- /*LINTED E_PTRDIFF_OVERFLOW*/
- return ((unsigned char *)string - start);
+ return (rlen);
}