4 files changed, 590 insertions, 349 deletions
diff --git a/usr/src/common/smbsrv/smb_match.c b/usr/src/common/smbsrv/smb_match.c
index e687e3cc9f..b35833ff29 100644
--- a/usr/src/common/smbsrv/smb_match.c
+++ b/usr/src/common/smbsrv/smb_match.c
@@ -142,7 +142,7 @@ smb_match_private(const char *pat, const char *str, struct match_priv *priv)
 	const char	*limit;
 	char		pc;		/* current pattern char */
 	int		rc;
-	smb_wchar_t	wcpat, wcstr;	/* current wchar in pat, str */
+	uint32_t	wcpat, wcstr;	/* current wchar in pat, str */
 	int		nbpat, nbstr;	/* multi-byte length of it */
 
 	if (priv->depth >= SMB_MATCH_DEPTH_MAX)
diff --git a/usr/src/common/smbsrv/smb_msgbuf.c b/usr/src/common/smbsrv/smb_msgbuf.c
index 54cb75e066..b11cd39a50 100644
--- a/usr/src/common/smbsrv/smb_msgbuf.c
+++ b/usr/src/common/smbsrv/smb_msgbuf.c
@@ -22,7 +22,7 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -53,6 +53,12 @@ static int buf_encode(smb_msgbuf_t *, char *, va_list ap);
 static void *smb_msgbuf_malloc(smb_msgbuf_t *, size_t);
 static int smb_msgbuf_chkerc(char *text, int erc);
 
+static int msgbuf_get_oem_string(smb_msgbuf_t *, char **, int);
+static int msgbuf_get_unicode_string(smb_msgbuf_t *, char **, int);
+static int msgbuf_put_oem_string(smb_msgbuf_t *, char *, int);
+static int msgbuf_put_unicode_string(smb_msgbuf_t *, char *, int);
+
+
 /*
  * Returns the offset or number of bytes used within the buffer.
  */
@@ -177,7 +183,7 @@ smb_msgbuf_term(smb_msgbuf_t *mb)
  * Decode a smb_msgbuf buffer as indicated by the format string into
  * the variable arg list. This is similar to a scanf operation.
  *
- * On success, returns the number of bytes encoded. Otherwise
+ * On success, returns the number of bytes decoded. Otherwise
  * returns a -ve error code.
  */
 int
@@ -213,15 +219,12 @@ smb_msgbuf_decode(smb_msgbuf_t *mb, char *fmt, ...)
 static int
 buf_decode(smb_msgbuf_t *mb, char *fmt, va_list ap)
 {
-	uint32_t ival;
 	uint8_t c;
 	uint8_t *bvalp;
 	uint16_t *wvalp;
 	uint32_t *lvalp;
 	uint64_t *llvalp;
-	char *cvalp;
 	char **cvalpp;
-	smb_wchar_t wchar;
 	boolean_t repc_specified;
 	int repc;
 	int rc;
@@ -324,75 +327,23 @@ buf_decode(smb_msgbuf_t *mb, char *fmt, va_list ap)
 				goto unicode_translation;
 			/*FALLTHROUGH*/
 
-		case 's': /* get string */
-			if (!repc_specified)
-				repc = strlen((const char *)mb->scan) + 1;
-			if (smb_msgbuf_has_space(mb, repc) == 0)
-				return (SMB_MSGBUF_UNDERFLOW);
-			if ((cvalp = smb_msgbuf_malloc(mb, repc * 2)) == 0)
-				return (SMB_MSGBUF_UNDERFLOW);
+		case 's': /* get OEM string */
 			cvalpp = va_arg(ap, char **);
-			*cvalpp = cvalp;
-			/* Translate OEM to mbs */
-			while (repc > 0) {
-				wchar = *mb->scan++;
-				repc--;
-				if (wchar == 0)
-					break;
-				ival = smb_wctomb(cvalp, wchar);
-				cvalp += ival;
-			}
-			*cvalp = '\0';
-			if (repc > 0)
-				mb->scan += repc;
+			if (!repc_specified)
+				repc = 0;
+			rc = msgbuf_get_oem_string(mb, cvalpp, repc);
+			if (rc != 0)
+				return (rc);
 			break;
 
-		case 'U': /* get unicode string */
+		case 'U': /* get UTF-16 string */
 unicode_translation:
-			/*
-			 * Unicode strings are always word aligned.
-			 * The malloc'd area is larger than the
-			 * original string because the UTF-8 chars
-			 * may be longer than the wide-chars.
-			 */
-			smb_msgbuf_word_align(mb);
-			if (!repc_specified) {
-				/*
-				 * Count bytes, including the null.
-				 */
-				uint8_t *tmp_scan = mb->scan;
-				repc = 2; /* the null */
-				while ((wchar = LE_IN16(tmp_scan)) != 0) {
-					tmp_scan += 2;
-					repc += 2;
-				}
-			}
-			if (smb_msgbuf_has_space(mb, repc) == 0)
-				return (SMB_MSGBUF_UNDERFLOW);
-			/*
-			 * Get space for translated string
-			 * Allocates worst-case size.
-			 */
-			if ((cvalp = smb_msgbuf_malloc(mb, repc * 2)) == 0)
-				return (SMB_MSGBUF_UNDERFLOW);
 			cvalpp = va_arg(ap, char **);
-			*cvalpp = cvalp;
-			/*
-			 * Translate unicode to mbs, stopping after
-			 * null or repc limit.
-			 */
-			while (repc >= 2) {
-				wchar = LE_IN16(mb->scan);
-				mb->scan += 2;
-				repc -= 2;
-				if (wchar == 0)
-					break;
-				ival = smb_wctomb(cvalp, wchar);
-				cvalp += ival;
-			}
-			*cvalp = '\0';
-			if (repc > 0)
-				mb->scan += repc;
+			if (!repc_specified)
+				repc = 0;
+			rc = msgbuf_get_unicode_string(mb, cvalpp, repc);
+			if (rc != 0)
+				return (rc);
 			break;
 
 		case 'M':
@@ -416,6 +367,151 @@ unicode_translation:
 	return (SMB_MSGBUF_SUCCESS);
 }
 
+/*
+ * msgbuf_get_oem_string
+ *
+ * Decode an OEM string, returning its UTF-8 form in strpp,
+ * allocated using smb_msgbuf_malloc (automatically freed).
+ * If max_bytes != 0, consume at most max_bytes of the mb.
+ * See also: mbc_marshal_get_oem_string
+ */
+static int
+msgbuf_get_oem_string(smb_msgbuf_t *mb, char **strpp, int max_bytes)
+{
+	char		*mbs;
+	uint8_t		*oembuf = NULL;
+	int		oemlen;		// len of OEM string, w/o null
+	int		datalen;	// OtW data len
+	int		mbsmax;		// max len of ret str
+	int		rlen;
+
+	if (max_bytes == 0)
+		max_bytes = 0xffff;
+
+	/*
+	 * Determine the OtW data length and OEM string length
+	 * Note: oemlen is the string length (w/o null) and
+	 * datalen is how much we move mb->scan
+	 */
+	datalen = 0;
+	oemlen = 0;
+	for (;;) {
+		if (datalen >= max_bytes)
+			break;
+		/* in-line smb_msgbuf_has_space */
+		if ((mb->scan + datalen) >= mb->end)
+			return (SMB_MSGBUF_UNDERFLOW);
+		datalen++;
+		if (mb->scan[datalen - 1] == 0)
+			break;
+		oemlen++;
+	}
+
+	/*
+	 * Get datalen bytes into a temp buffer
+	 * sized with room to add a null.
+	 * Free oembuf in smb_msgbuf_term
+	 */
+	oembuf = smb_msgbuf_malloc(mb, datalen + 1);
+	if (oembuf == NULL)
+		return (SMB_MSGBUF_UNDERFLOW);
+	bcopy(mb->scan, oembuf, datalen);
+	mb->scan += datalen;
+	oembuf[oemlen] = '\0';
+
+	/*
+	 * Get the buffer we'll return and convert to UTF-8.
+	 * May take as much as double the space.
+	 */
+	mbsmax = oemlen * 2;
+	mbs = smb_msgbuf_malloc(mb, mbsmax + 1);
+	if (mbs == NULL)
+		return (SMB_MSGBUF_UNDERFLOW);
+	rlen = smb_oemtombs(mbs, oembuf, mbsmax);
+	if (rlen < 0)
+		return (SMB_MSGBUF_UNDERFLOW);
+	if (rlen > mbsmax)
+		rlen = mbsmax;
+	mbs[rlen] = '\0';
+	*strpp = mbs;
+	return (0);
+}
+
+/*
+ * msgbuf_get_unicode_string
+ *
+ * Decode a UTF-16 string, returning its UTF-8 form in strpp,
+ * allocated using smb_msgbuf_malloc (automatically freed).
+ * If max_bytes != 0, consume at most max_bytes of the mb.
+ * See also: mbc_marshal_get_unicode_string
+ */
+static int
+msgbuf_get_unicode_string(smb_msgbuf_t *mb, char **strpp, int max_bytes)
+{
+	char		*mbs;
+	uint16_t	*wcsbuf = NULL;
+	int		wcslen;		// wchar count
+	int		datalen;	// OtW data len
+	size_t		mbsmax;		// max len of ret str
+	size_t		rlen;
+
+	if (max_bytes == 0)
+		max_bytes = 0xffff;
+
+	/*
+	 * Unicode strings are always word aligned.
+	 */
+	smb_msgbuf_word_align(mb);
+
+	/*
+	 * Determine the OtW data length and (WC) string length
+	 * Note: wcslen counts 16-bit wide_chars (w/o null),
+	 * and datalen is how much we move mb->scan
+	 */
+	datalen = 0;
+	wcslen = 0;
+	for (;;) {
+		if (datalen >= max_bytes)
+			break;
+		/* in-line smb_msgbuf_has_space */
+		if ((mb->scan + datalen) >= mb->end)
+			return (SMB_MSGBUF_UNDERFLOW);
+		datalen += 2;
+		if (mb->scan[datalen - 2] == 0 &&
+		    mb->scan[datalen - 1] == 0)
+			break;
+		wcslen++;
+	}
+
+	/*
+	 * Get datalen bytes into a temp buffer
+	 * sized with room to add a (WC) null.
+	 * Note: wcsbuf has little-endian order
+	 */
+	wcsbuf = smb_msgbuf_malloc(mb, datalen + 2);
+	if (wcsbuf == NULL)
+		return (SMB_MSGBUF_UNDERFLOW);
+	bcopy(mb->scan, wcsbuf, datalen);
+	mb->scan += datalen;
+	wcsbuf[wcslen] = 0;
+
+	/*
+	 * Get the buffer we'll return and convert to UTF-8.
+	 * May take as much 4X number of wide chars.
+	 */
+	mbsmax = wcslen * MTS_MB_CUR_MAX;
+	mbs = smb_msgbuf_malloc(mb, mbsmax + 1);
+	if (mbs == NULL)
+		return (SMB_MSGBUF_UNDERFLOW);
+	rlen = smb_wcstombs(mbs, wcsbuf, mbsmax);
+	if (rlen == (size_t)-1)
+		return (SMB_MSGBUF_UNDERFLOW);
+	if (rlen > mbsmax)
+		rlen = mbsmax;
+	mbs[rlen] = '\0';
+	*strpp = mbs;
+	return (0);
+}
 
 /*
  * smb_msgbuf_encode
@@ -466,8 +562,6 @@ buf_encode(smb_msgbuf_t *mb, char *fmt, va_list ap)
 	uint8_t *bvalp;
 	char *cvalp;
 	uint8_t c;
-	smb_wchar_t wchar;
-	int count;
 	boolean_t repc_specified;
 	int repc;
 	int rc;
@@ -571,80 +665,23 @@ buf_encode(smb_msgbuf_t *mb, char *fmt, va_list ap)
 				goto unicode_translation;
 			/* FALLTHROUGH */
 
-		case 's': /* put string */
+		case 's': /* put OEM string */
 			cvalp = va_arg(ap, char *);
-			if (!repc_specified) {
-				repc = smb_sbequiv_strlen(cvalp);
-				if (repc == -1)
-					return (SMB_MSGBUF_OVERFLOW);
-				if (!(mb->flags & SMB_MSGBUF_NOTERM))
-					repc++;
-			}
-			if (smb_msgbuf_has_space(mb, repc) == 0)
-				return (SMB_MSGBUF_OVERFLOW);
-			while (repc > 0) {
-				count = smb_mbtowc(&wchar, cvalp,
-				    MTS_MB_CHAR_MAX);
-				if (count < 0)
-					return (SMB_MSGBUF_DATA_ERROR);
-				cvalp += count;
-				if (wchar == 0)
-					break;
-				*mb->scan++ = (uint8_t)wchar;
-				repc--;
-				if (wchar & 0xff00) {
-					*mb->scan++ = wchar >> 8;
-					repc--;
-				}
-			}
-			if (*cvalp == '\0' && repc > 0 &&
-			    (mb->flags & SMB_MSGBUF_NOTERM) == 0) {
-				*mb->scan++ = 0;
-				repc--;
-			}
-			while (repc > 0) {
-				*mb->scan++ = 0;
-				repc--;
-			}
+			if (!repc_specified)
+				repc = 0;
+			rc = msgbuf_put_oem_string(mb, cvalp, repc);
+			if (rc != 0)
+				return (rc);
 			break;
 
-		case 'U': /* put unicode string */
+		case 'U': /* put UTF-16 string */
 unicode_translation:
-			/*
-			 * Unicode strings are always word aligned.
-			 */
-			smb_msgbuf_word_align(mb);
 			cvalp = va_arg(ap, char *);
-			if (!repc_specified) {
-				repc = smb_wcequiv_strlen(cvalp);
-				if (!(mb->flags & SMB_MSGBUF_NOTERM))
-					repc += 2;
-			}
-			if (!smb_msgbuf_has_space(mb, repc))
-				return (SMB_MSGBUF_OVERFLOW);
-			while (repc >= 2) {
-				count = smb_mbtowc(&wchar, cvalp,
-				    MTS_MB_CHAR_MAX);
-				if (count < 0)
-					return (SMB_MSGBUF_DATA_ERROR);
-				cvalp += count;
-				if (wchar == 0)
-					break;
-
-				LE_OUT16(mb->scan, wchar);
-				mb->scan += 2;
-				repc -= 2;
-			}
-			if (*cvalp == '\0' && repc >= 2 &&
-			    (mb->flags & SMB_MSGBUF_NOTERM) == 0) {
-				LE_OUT16(mb->scan, 0);
-				mb->scan += 2;
-				repc -= 2;
-			}
-			while (repc > 0) {
-				*mb->scan++ = 0;
-				repc--;
-			}
+			if (!repc_specified)
+				repc = 0;
+			rc = msgbuf_put_unicode_string(mb, cvalp, repc);
+			if (rc != 0)
+				return (rc);
 			break;
 
 		case 'M':
@@ -665,6 +702,141 @@ unicode_translation:
 	return (SMB_MSGBUF_SUCCESS);
 }
 
+/*
+ * Marshal a UTF-8 string (str) into mbc, converting to OEM codeset.
+ * Also write a null unless the repc count limits the length we put.
+ * When (repc > 0) the length we marshal must be exactly repc, and
+ * truncate or pad the mb data as necessary.
+ * See also: mbc_marshal_put_oem_string
+ */
+static int
+msgbuf_put_oem_string(smb_msgbuf_t *mb, char *mbs, int repc)
+{
+	uint8_t		*oembuf = NULL;
+	uint8_t		*s;
+	int		oemlen;
+	int		rlen;
+
+	/*
+	 * Compute length of converted OEM string,
+	 * NOT including null terminator
+	 */
+	if ((oemlen = smb_sbequiv_strlen(mbs)) == -1)
+		return (SMB_MSGBUF_DATA_ERROR);
+
+	/*
+	 * If repc not specified, put whole string + NULL,
+	 * otherwise will truncate or pad as needed.
+	 */
+	if (repc <= 0) {
+		repc = oemlen;
+		if ((mb->flags & SMB_MSGBUF_NOTERM) == 0)
+			repc += sizeof (char);
+	}
+	if (smb_msgbuf_has_space(mb, repc) == 0)
+		return (SMB_MSGBUF_OVERFLOW);
+
+	/*
+	 * Convert into a temporary buffer
+	 * Free oembuf in smb_msgbuf_term.
+	 */
+	oembuf = smb_msgbuf_malloc(mb, oemlen + 1);
+	if (oembuf == NULL)
+		return (SMB_MSGBUF_UNDERFLOW);
+	rlen = smb_mbstooem(oembuf, mbs, oemlen);
+	if (rlen < 0)
+		return (SMB_MSGBUF_DATA_ERROR);
+	if (rlen > oemlen)
+		rlen = oemlen;
+	oembuf[rlen] = '\0';
+
+	/*
+	 * Copy the converted string into the message,
+	 * truncated or paded as required.
+	 */
+	s = oembuf;
+	while (repc > 0) {
+		*mb->scan++ = *s;
+		if (*s != '\0')
+			s++;
+		repc--;
+	}
+
+	return (0);
+}
+
+/*
+ * Marshal a UTF-8 string (str) into mbc, converting to UTF-16.
+ * Also write a null unless the repc count limits the length.
+ * When (repc > 0) the length we marshal must be exactly repc,
+ * and truncate or pad the mb data as necessary.
+ * See also: mbc_marshal_put_unicode_string
+ */
+static int
+msgbuf_put_unicode_string(smb_msgbuf_t *mb, char *mbs, int repc)
+{
+	smb_wchar_t	*wcsbuf = NULL;
+	smb_wchar_t	*wp;
+	size_t		wcslen, wcsbytes;
+	size_t		rlen;
+
+	/* align to word boundary */
+	smb_msgbuf_word_align(mb);
+
+	/*
+	 * Compute length of converted UTF-16 string,
+	 * NOT including null terminator (in bytes).
+	 */
+	wcsbytes = smb_wcequiv_strlen(mbs);
+	if (wcsbytes == (size_t)-1)
+		return (SMB_MSGBUF_DATA_ERROR);
+
+	/*
+	 * If repc not specified, put whole string + NULL,
+	 * otherwise will truncate or pad as needed.
+	 */
+	if (repc <= 0) {
+		repc = (int)wcsbytes;
+		if ((mb->flags & SMB_MSGBUF_NOTERM) == 0)
+			repc += sizeof (smb_wchar_t);
+	}
+	if (smb_msgbuf_has_space(mb, repc) == 0)
+		return (SMB_MSGBUF_OVERFLOW);
+
+	/*
+	 * Convert into a temporary buffer
+	 * Free wcsbuf in smb_msgbuf_term
+	 */
+	wcslen = wcsbytes / 2;
+	wcsbuf = smb_msgbuf_malloc(mb, wcsbytes + 2);
+	if (wcsbuf == NULL)
+		return (SMB_MSGBUF_UNDERFLOW);
+	rlen = smb_mbstowcs(wcsbuf, mbs, wcslen);
+	if (rlen == (size_t)-1)
+		return (SMB_MSGBUF_DATA_ERROR);
+	if (rlen > wcslen)
+		rlen = wcslen;
+	wcsbuf[rlen] = 0;
+
+	/*
+	 * Copy the converted string into the message,
+	 * truncated or paded as required.  Preserve
+	 * little-endian order while copying.
+	 */
+	wp = wcsbuf;
+	while (repc > 1) {
+		smb_wchar_t wchar = LE_IN16(wp);
+		LE_OUT16(mb->scan, wchar);
+		mb->scan += 2;
+		if (wchar != 0)
+			wp++;
+		repc -= sizeof (smb_wchar_t);
+	}
+	if (repc > 0)
+		*mb->scan++ = '\0';
+
+	return (0);
+}
 
 /*
  * smb_msgbuf_malloc
diff --git a/usr/src/common/smbsrv/smb_string.c b/usr/src/common/smbsrv/smb_string.c
index 3d2abc474b..7922d84916 100644
--- a/usr/src/common/smbsrv/smb_string.c
+++ b/usr/src/common/smbsrv/smb_string.c
@@ -174,8 +174,8 @@ smb_islower(int c)
  * If the specified character is lowercase, the uppercase value will
  * be returned. Otherwise the original value will be returned.
  */
-int
-smb_toupper(int c)
+uint32_t
+smb_toupper(uint32_t c)
 {
 	uint16_t mask = is_unicode ? 0xffff : 0xff;
 
@@ -187,8 +187,8 @@ smb_toupper(int c)
  * If the specified character is uppercase, the lowercase value will
  * be returned. Otherwise the original value will be returned.
  */
-int
-smb_tolower(int c)
+uint32_t
+smb_tolower(uint32_t c)
 {
 	uint16_t mask = is_unicode ? 0xffff : 0xff;
 
@@ -204,7 +204,7 @@ smb_tolower(int c)
 char *
 smb_strupr(char *s)
 {
-	smb_wchar_t c;
+	uint32_t c;
 	char *p = s;
 
 	while (*p) {
@@ -235,7 +235,7 @@ smb_strupr(char *s)
 char *
 smb_strlwr(char *s)
 {
-	smb_wchar_t c;
+	uint32_t c;
 	char *p = s;
 
 	while (*p) {
@@ -264,7 +264,7 @@ smb_strlwr(char *s)
 int
 smb_isstrlwr(const char *s)
 {
-	smb_wchar_t c;
+	uint32_t c;
 	int n;
 	const char *p = s;
 
@@ -295,7 +295,7 @@ smb_isstrlwr(const char *s)
 int
 smb_isstrupr(const char *s)
 {
-	smb_wchar_t c;
+	uint32_t c;
 	int n;
 	const char *p = s;
 
@@ -440,7 +440,7 @@ smb_unicode_init(void)
  * unc_server	server or domain name with no leading/trailing '\'
  * unc_share	share name with no leading/trailing '\'
  * unc_path	relative path to the share with no leading/trailing '\'
- * 		it is valid for unc_path to be NULL.
+ *		it is valid for unc_path to be NULL.
  *
  * Upon successful return of this function, smb_unc_free()
  * MUST be called when returned 'unc' is no longer needed.
diff --git a/usr/src/common/smbsrv/smb_utf8.c b/usr/src/common/smbsrv/smb_utf8.c
index 3b84363dbd..8446fb0b9e 100644
--- a/usr/src/common/smbsrv/smb_utf8.c
+++ b/usr/src/common/smbsrv/smb_utf8.c
@@ -22,43 +22,25 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
- * Multibyte/wide-char conversion routines. Wide-char encoding provides
- * a fixed size character encoding that maps to the Unicode 16-bit
- * (UCS-2) character set standard. Multibyte or UCS transformation
- * format (UTF) encoding is a variable length character encoding scheme
- * that s compatible with existing ASCII characters and guarantees that
- * the resultant strings do not contain embedded null characters. Both
- * types of encoding provide a null terminator: single byte for UTF-8
- * and a wide-char null for Unicode. See RFC 2044.
- *
- * The table below illustrates the UTF-8 encoding scheme. The letter x
- * indicates bits available for encoding the character value.
- *
- *	UCS-2			UTF-8 octet sequence (binary)
- *	0x0000-0x007F	0xxxxxxx
- *	0x0080-0x07FF	110xxxxx 10xxxxxx
- *	0x0800-0xFFFF	1110xxxx 10xxxxxx 10xxxxxx
- *
- * RFC 2044
- * UTF-8,a transformation format of UNICODE and ISO 10646
- * F. Yergeau
- * Alis Technologies
- * October 1996
+ * Multibyte/wide-char conversion routines. SMB uses UTF-16 on the wire
+ * (smb_wchar_t) and we use UTF-8 internally (our multi-byte, or mbs).
  */
 
 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
 #include <sys/types.h>
 #include <sys/sunddi.h>
-#else
+#else	/* _KERNEL || _FAKE_KERNEL */
 #include <stdio.h>
 #include <stdlib.h>
-#include <assert.h>
 #include <strings.h>
-#endif
+#include <iconv.h>
+#include <assert.h>
+#endif	/* _KERNEL || _FAKE_KERNEL */
+#include <sys/u8_textprep.h>
 #include <smbsrv/string.h>
 
 
@@ -75,26 +57,37 @@
  * multibyte character is encountered.
  */
 size_t
-smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars)
+smb_mbstowcs(smb_wchar_t *wcs, const char *mbs, size_t nwchars)
 {
-	int len;
-	smb_wchar_t	*start = wcstring;
-
-	while (nwchars--) {
-		len = smb_mbtowc(wcstring, mbstring, MTS_MB_CHAR_MAX);
-		if (len < 0) {
-			*wcstring = 0;
-			return ((size_t)-1);
-		}
+	size_t mbslen, wcslen;
+	int err;
 
-		if (*mbstring == 0)
-			break;
+	/* NULL or empty input is allowed. */
+	if (mbs == NULL || *mbs == '\0') {
+		if (wcs != NULL && nwchars > 0)
+			*wcs = 0;
+		return (0);
+	}
 
-		++wcstring;
-		mbstring += len;
+	/*
+	 * Traditional mbstowcs(3C) allows wcs==NULL to get the length.
+	 * SMB never calls it that way, but let's future-proof.
+	 */
+	if (wcs == NULL) {
+		return ((size_t)-1);
 	}
 
-	return (wcstring - start);
+	mbslen = strlen(mbs);
+	wcslen = nwchars;
+	err = uconv_u8tou16((const uchar_t *)mbs, &mbslen,
+	    wcs, &wcslen, UCONV_OUT_LITTLE_ENDIAN);
+	if (err != 0)
+		return ((size_t)-1);
+
+	if (wcslen < nwchars)
+		wcs[wcslen] = 0;
+
+	return (wcslen);
 }
 
 
@@ -113,49 +106,36 @@ smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars)
  * states.  Otherwise it should be return 0.
  *
  * If mbchar is non-null, returns the number of bytes processed in
- * mbchar.  If mbchar is invalid, returns -1.
+ * mbchar.  If mbchar is null, convert the null (wcharp=0) but
+ * return length zero.  If mbchar is invalid, returns -1.
  */
 int /*ARGSUSED*/
-smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes)
+smb_mbtowc(uint32_t *wcharp, const char *mbchar, size_t nbytes)
 {
-	unsigned char mbyte;
-	smb_wchar_t wide_char;
-	int count;
-	int bytes_left;
+	uint32_t wide_char;
+	int count, err;
+	size_t mblen;
+	size_t wclen;
 
 	if (mbchar == NULL)
 		return (0); /* no shift states */
 
-	/* 0xxxxxxx -> 1 byte ASCII encoding */
-	if (((mbyte = *mbchar++) & 0x80) == 0) {
-		if (wcharp)
-			*wcharp = (smb_wchar_t)mbyte;
-
-		return (mbyte ? 1 : 0);
-	}
-
-	/* 10xxxxxx -> invalid first byte */
-	if ((mbyte & 0x40) == 0)
+	/*
+	 * How many bytes in this symbol?
+	 */
+	count = u8_validate((char *)mbchar, nbytes, NULL, 0, &err);
+	if (count < 0)
 		return (-1);
 
-	wide_char = mbyte;
-	if ((mbyte & 0x20) == 0) {
-		wide_char &= 0x1f;
-		bytes_left = 1;
-	} else if ((mbyte & 0x10) == 0) {
-		wide_char &= 0x0f;
-		bytes_left = 2;
-	} else {
+	mblen = count;
+	wclen = 1;
+	err = uconv_u8tou32((const uchar_t *)mbchar, &mblen,
+	    &wide_char, &wclen, UCONV_OUT_SYSTEM_ENDIAN);
+	if (err != 0)
 		return (-1);
-	}
-
-	count = 1;
-	while (bytes_left--) {
-		if (((mbyte = *mbchar++) & 0xc0) != 0x80)
-			return (-1);
-
-		count++;
-		wide_char = (wide_char << 6) | (mbyte & 0x3f);
+	if (wclen == 0) {
+		wide_char = 0;
+		count = 0;
 	}
 
 	if (wcharp)
@@ -173,25 +153,27 @@ smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes)
  * mbchar must be large enough to accommodate the multibyte character.
  *
  * Returns the numberof bytes written to mbchar.
+ * Note: handles null like any 1-byte char.
  */
 int
-smb_wctomb(char *mbchar, smb_wchar_t wchar)
+smb_wctomb(char *mbchar, uint32_t wchar)
 {
-	if ((wchar & ~0x7f) == 0) {
-		*mbchar = (char)wchar;
-		return (1);
-	}
+	char junk[MTS_MB_CUR_MAX+1];
+	size_t mblen;
+	size_t wclen;
+	int err;
 
-	if ((wchar & ~0x7ff) == 0) {
-		*mbchar++ = (wchar >> 6) | 0xc0;
-		*mbchar = (wchar & 0x3f) | 0x80;
-		return (2);
-	}
+	if (mbchar == NULL)
+		mbchar = junk;
 
-	*mbchar++ = (wchar >> 12) | 0xe0;
-	*mbchar++ = ((wchar >> 6) & 0x3f) | 0x80;
-	*mbchar = (wchar & 0x3f) | 0x80;
-	return (3);
+	mblen = MTS_MB_CUR_MAX;
+	wclen = 1;
+	err = uconv_u32tou8(&wchar, &wclen, (uchar_t *)mbchar, &mblen,
+	    UCONV_IN_SYSTEM_ENDIAN | UCONV_IGNORE_NULL);
+	if (err != 0)
+		return (-1);
+
+	return ((int)mblen);
 }
 
 
@@ -205,46 +187,46 @@ smb_wctomb(char *mbchar, smb_wchar_t wchar)
  * terminated if there is room.
  *
  * Returns the number of bytes converted, not counting the terminating
- * null byte.
+ * null byte. Returns -1 if an invalid WC sequence is encountered.
  */
 size_t
-smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes)
+smb_wcstombs(char *mbs, const smb_wchar_t *wcs, size_t nbytes)
 {
-	char *start = mbstring;
-	const smb_wchar_t *wcp = wcstring;
-	smb_wchar_t wide_char = 0;
-	char buf[4];
-	size_t len;
+	size_t mbslen, wcslen;
+	int err;
 
-	if ((mbstring == NULL) || (wcstring == NULL))
+	/* NULL or empty input is allowed. */
+	if (wcs == NULL || *wcs == 0) {
+		if (mbs != NULL && nbytes > 0)
+			*mbs = '\0';
 		return (0);
+	}
 
-	while (nbytes > MTS_MB_CHAR_MAX) {
-		wide_char = *wcp++;
-		len = smb_wctomb(mbstring, wide_char);
-
-		if (wide_char == 0)
-			/*LINTED E_PTRDIFF_OVERFLOW*/
-			return (mbstring - start);
-
-		mbstring += len;
-		nbytes -= len;
+	/*
+	 * Traditional wcstombs(3C) allows mbs==NULL to get the length.
+	 * SMB never calls it that way, but let's future-proof.
+	 */
+	if (mbs == NULL) {
+		return ((size_t)-1);
 	}
 
-	while (wide_char && nbytes) {
-		wide_char = *wcp++;
-		if ((len = smb_wctomb(buf, wide_char)) > nbytes) {
-			*mbstring = 0;
-			break;
-		}
+	/*
+	 * Compute wcslen
+	 */
+	wcslen = 0;
+	while (wcs[wcslen] != 0)
+		wcslen++;
 
-		bcopy(buf, mbstring, len);
-		mbstring += len;
-		nbytes -= len;
-	}
+	mbslen = nbytes;
+	err = uconv_u16tou8(wcs, &wcslen,
+	    (uchar_t *)mbs, &mbslen, UCONV_IN_LITTLE_ENDIAN);
+	if (err != 0)
+		return ((size_t)-1);
+
+	if (mbslen < nbytes)
+		mbs[mbslen] = '\0';
 
-	/*LINTED E_PTRDIFF_OVERFLOW*/
-	return (mbstring - start);
+	return (mbslen);
 }
 
 
@@ -256,7 +238,7 @@ smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes)
 size_t
 smb_wcequiv_strlen(const char *mbs)
 {
-	smb_wchar_t	wide_char;
+	uint32_t	wide_char;
 	size_t bytes;
 	size_t len = 0;
 
@@ -264,9 +246,15 @@ smb_wcequiv_strlen(const char *mbs)
 		bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
 		if (bytes == ((size_t)-1))
 			return ((size_t)-1);
+		mbs += bytes;
 
 		len += sizeof (smb_wchar_t);
-		mbs += bytes;
+		if (bytes > 3) {
+			/*
+			 * Extended unicode, so TWO smb_wchar_t
+			 */
+			len += sizeof (smb_wchar_t);
+		}
 	}
 
 	return (len);
@@ -275,25 +263,38 @@ smb_wcequiv_strlen(const char *mbs)
 
 /*
  * Returns the number of bytes that would be written if the multi-
- * byte string mbs was converted to a single byte character string,
- * not counting the terminating null character.
+ * byte string mbs was converted to an OEM character string,
+ * (smb_mbstooem) not counting the terminating null character.
  */
 size_t
 smb_sbequiv_strlen(const char *mbs)
 {
-	smb_wchar_t	wide_char;
 	size_t nbytes;
 	size_t len = 0;
 
 	while (*mbs) {
-		nbytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
+		nbytes = smb_mbtowc(NULL, mbs, MTS_MB_CHAR_MAX);
 		if (nbytes == ((size_t)-1))
 			return ((size_t)-1);
+		if (nbytes == 0)
+			break;
 
-		if (wide_char & 0xFF00)
-			len += sizeof (smb_wchar_t);
-		else
-			++len;
+		if (nbytes == 1) {
+			/* ASCII */
+			len++;
+		} else if (nbytes < 8) {
+			/* Compute OEM length */
+			char mbsbuf[8];
+			uint8_t oembuf[8];
+			int oemlen;
+			(void) strlcpy(mbsbuf, mbs, nbytes+1);
+			oemlen = smb_mbstooem(oembuf, mbsbuf, 8);
+			if (oemlen < 0)
+				return ((size_t)-1);
+			len += oemlen;
+		} else {
+			return ((size_t)-1);
+		}
 
 		mbs += nbytes;
 	}
@@ -301,106 +302,174 @@ smb_sbequiv_strlen(const char *mbs)
 	return (len);
 }
 
+/*
+ * Convert OEM strings to/from internal (UTF-8) form.
+ *
+ * We rarely encounter these anymore because all modern
+ * SMB clients use Unicode (UTF-16). The few cases where
+ * this IS still called are normally using ASCII, i.e.
+ * tag names etc. so short-cut those cases.  If we get
+ * something non-ASCII we have to call iconv.
+ *
+ * If we were to really support OEM code pages, we would
+ * need to have a way to set the OEM code page from some
+ * configuration value.  For now it's always CP850.
+ * See also ./smb_oem.c
+ */
+static char smb_oem_codepage[32] = "CP850";
 
 /*
- * stombs
+ * smb_oemtombs
  *
- * Convert a regular null terminated string 'string' to a UTF-8 encoded
- * null terminated multi-byte string 'mbstring'. Only full converted
- * UTF-8 characters will be written 'mbstring'. If a character will not
- * fit within the remaining buffer space or 'mbstring' will overflow
- * max_mblen, the conversion process will be terminated and 'mbstring'
- * will be null terminated.
+ * Convert a null terminated OEM string 'string' to a UTF-8 string
+ * no longer than max_mblen (null terminated if space).
  *
- * Returns the number of bytes written to 'mbstring', excluding the
- * terminating null character.
+ * If the input string contains invalid OEM characters, a value
+ * of -1 will be returned. Otherwise returns the length of 'mbs',
+ * excluding the terminating null character.
  *
  * If either mbstring or string is a null pointer, -1 is returned.
  */
 int
-smb_stombs(char *mbstring, char *string, int max_mblen)
+smb_oemtombs(char *mbs, const uint8_t *oems, int max_mblen)
 {
-	char *start = mbstring;
-	unsigned char *p = (unsigned char *)string;
-	int space_left = max_mblen;
-	int	len;
-	smb_wchar_t	wide_char;
-	char buf[4];
-
-	if (!mbstring || !string)
+	uchar_t *p;
+	int	oemlen;
+	int	rlen;
+	boolean_t need_iconv = B_FALSE;
+
+	if (mbs == NULL || oems == NULL)
 		return (-1);
 
-	while (*p && space_left > 2) {
-		wide_char = *p++;
-		len = smb_wctomb(mbstring, wide_char);
-		mbstring += len;
-		space_left -= len;
+	/*
+	 * Check if the oems is all ASCII (and get the length
+	 * while we're at it) so we know if we need to iconv.
+	 * We usually can avoid the iconv calls.
+	 */
+	oemlen = 0;
+	p = (uchar_t *)oems;
+	while (*p != '\0') {
+		oemlen++;
+		if (*p & 0x80)
+			need_iconv = B_TRUE;
+		p++;
 	}
 
-	if (*p) {
-		wide_char = *p;
-		if ((len = smb_wctomb(buf, wide_char)) < 2) {
-			*mbstring = *buf;
-			mbstring += len;
-			space_left -= len;
-		}
+	if (need_iconv) {
+		int	rc;
+		char	*obuf = mbs;
+		size_t	olen = max_mblen;
+		size_t	ilen = oemlen;
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
+		char *ibuf = (char *)oems;
+		kiconv_t ic;
+		int	err;
+
+		ic = kiconv_open("UTF-8", smb_oem_codepage);
+		if (ic == (kiconv_t)-1)
+			goto just_copy;
+		rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
+		(void) kiconv_close(ic);
+#else	/* _KERNEL || _FAKE_KERNEL */
+		const char *ibuf = (char *)oems;
+		iconv_t	ic;
+		ic = iconv_open("UTF-8", smb_oem_codepage);
+		if (ic == (iconv_t)-1)
+			goto just_copy;
+		rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
+		(void) iconv_close(ic);
+#endif	/* _KERNEL || _FAKE_KERNEL */
+		if (rc < 0)
+			return (-1);
+		/* Return val. is output bytes. */
+		rlen = (max_mblen - olen);
+	} else {
+	just_copy:
+		rlen = oemlen;
+		if (rlen > max_mblen)
+			rlen = max_mblen;
+		bcopy(oems, mbs, rlen);
 	}
+	if (rlen < max_mblen)
+		mbs[rlen] = '\0';
 
-	*mbstring = '\0';
-
-	/*LINTED E_PTRDIFF_OVERFLOW*/
-	return (mbstring - start);
+	return (rlen);
 }
 
-
 /*
- * mbstos
+ * smb_mbstooem
  *
- * Convert a null terminated multi-byte string 'mbstring' to a regular
- * null terminated string 'string'.  A 1-byte character in 'mbstring'
- * maps to a 1-byte character in 'string'. A 2-byte character in
- * 'mbstring' will be mapped to 2-bytes, if the upper byte is non-null.
- * Otherwise the upper byte null will be discarded to ensure that the
- * output stream does not contain embedded null characters.
+ * Convert a null terminated multi-byte string 'mbs' to an OEM string
+ * no longer than max_oemlen (null terminated if space).
  *
- * If the input stream contains invalid multi-byte characters, a value
- * of -1 will be returned. Otherwise the length of 'string', excluding
- * the terminating null character, is returned.
+ * If the input string contains invalid multi-byte characters, a value
+ * of -1 will be returned. Otherwise returns the length of 'oems',
+ * excluding the terminating null character.
  *
  * If either mbstring or string is a null pointer, -1 is returned.
  */
 int
-smb_mbstos(char *string, const char *mbstring)
+smb_mbstooem(uint8_t *oems, const char *mbs, int max_oemlen)
 {
-	smb_wchar_t wc;
-	unsigned char *start = (unsigned char *)string;
-	int len;
+	uchar_t *p;
+	int	mbslen;
+	int	rlen;
+	boolean_t need_iconv = B_FALSE;
 
-	if (string == NULL || mbstring == NULL)
+	if (oems == NULL || mbs == NULL)
 		return (-1);
 
-	while (*mbstring) {
-		if ((len = smb_mbtowc(&wc, mbstring, MTS_MB_CHAR_MAX)) < 0) {
-			*string = 0;
-			return (-1);
-		}
-
-		if (wc & 0xFF00) {
-			/*LINTED E_BAD_PTR_CAST_ALIGN*/
-			*((smb_wchar_t *)string) = wc;
-			string += sizeof (smb_wchar_t);
-		}
-		else
-		{
-			*string = (unsigned char)wc;
-			string++;
-		}
-
-		mbstring += len;
+	/*
+	 * Check if the mbs is all ASCII (and get the length
+	 * while we're at it) so we know if we need to iconv.
+	 * We usually can avoid the iconv calls.
+	 */
+	mbslen = 0;
+	p = (uchar_t *)mbs;
+	while (*p != '\0') {
+		mbslen++;
+		if (*p & 0x80)
+			need_iconv = B_TRUE;
+		p++;
 	}
 
-	*string = 0;
+	if (need_iconv) {
+		int	rc;
+		char	*obuf = (char *)oems;
+		size_t	olen = max_oemlen;
+		size_t	ilen = mbslen;
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
+		char *ibuf = (char *)mbs;
+		kiconv_t ic;
+		int	err;
+
+		ic = kiconv_open(smb_oem_codepage, "UTF-8");
+		if (ic == (kiconv_t)-1)
+			goto just_copy;
+		rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
+		(void) kiconv_close(ic);
+#else	/* _KERNEL || _FAKE_KERNEL */
+		const char *ibuf = mbs;
+		iconv_t	ic;
+		ic = iconv_open(smb_oem_codepage, "UTF-8");
+		if (ic == (iconv_t)-1)
+			goto just_copy;
+		rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
+		(void) iconv_close(ic);
+#endif	/* _KERNEL || _FAKE_KERNEL */
+		if (rc < 0)
+			return (-1);
+		/* Return val. is output bytes. */
+		rlen = (max_oemlen - olen);
+	} else {
+	just_copy:
+		rlen = mbslen;
+		if (rlen > max_oemlen)
+			rlen = max_oemlen;
+		bcopy(mbs, oems, rlen);
+	}
+	if (rlen < max_oemlen)
+		oems[rlen] = '\0';
 
-	/*LINTED E_PTRDIFF_OVERFLOW*/
-	return ((unsigned char *)string - start);
+	return (rlen);
 }