1 files changed, 418 insertions, 0 deletions
diff --git a/usr/src/common/smbsrv/smb_utf8.c b/usr/src/common/smbsrv/smb_utf8.c
new file mode 100644
index 0000000000..704f01877e
--- /dev/null
+++ b/usr/src/common/smbsrv/smb_utf8.c
@@ -0,0 +1,418 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Multibyte/wide-char conversion routines. Wide-char encoding provides
+ * a fixed size character encoding that maps to the Unicode 16-bit
+ * (UCS-2) character set standard. Multibyte or UCS transformation
+ * format (UTF) encoding is a variable length character encoding scheme
+ * that s compatible with existing ASCII characters and guarantees that
+ * the resultant strings do not contain embedded null characters. Both
+ * types of encoding provide a null terminator: single byte for UTF-8
+ * and a wide-char null for Unicode. See RFC 2044.
+ *
+ * The table below illustrates the UTF-8 encoding scheme. The letter x
+ * indicates bits available for encoding the character value.
+ *
+ *	UCS-2			UTF-8 octet sequence (binary)
+ *	0x0000-0x007F	0xxxxxxx
+ *	0x0080-0x07FF	110xxxxx 10xxxxxx
+ *	0x0800-0xFFFF	1110xxxx 10xxxxxx 10xxxxxx
+ *
+ * RFC 2044
+ * UTF-8,a transformation format of UNICODE and ISO 10646
+ * F. Yergeau
+ * Alis Technologies
+ * October 1996
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#include <sys/sunddi.h>
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <strings.h>
+#endif
+#include <smbsrv/smb_i18n.h>
+#include <smbsrv/string.h>
+
+int mbtowc_verbose = 0;
+int mbtowc_announce = 0;
+
+/*
+ * mbstowcs
+ *
+ * The mbstowcs() function converts a multibyte character string
+ * mbstring into a wide character string wcstring. No more than
+ * nwchars wide characters are stored. A terminating null wide
+ * character is appended if there is room.
+ *
+ * Returns the number of wide characters converted, not counting
+ * any terminating null wide character. Returns -1 if an invalid
+ * multibyte character is encountered.
+ */
+size_t
+mts_mbstowcs(mts_wchar_t *wcstring, const char *mbstring, size_t nwchars)
+{
+	int len;
+	mts_wchar_t	*start = wcstring;
+
+	while (nwchars--) {
+		len = mts_mbtowc(wcstring, mbstring, MTS_MB_CHAR_MAX);
+		if (len < 0) {
+			*wcstring = 0;
+			return ((size_t)-1);
+		}
+
+		if (*mbstring == 0)
+			break;
+
+		++wcstring;
+		mbstring += len;
+	}
+
+	return (wcstring - start);
+}
+
+
+/*
+ * mbtowc
+ *
+ * The mbtowc() function converts a multibyte character mbchar into
+ * a wide character and stores the result in the object pointed to
+ * by wcharp. Up to nbytes bytes are examined.
+ *
+ * If mbchar is NULL, mbtowc() returns zero to indicate that shift
+ * states are not supported. If mbchar is valid, returns the number
+ * of bytes processed in mbchar. If mbchar is invalid, returns -1.
+ */
+int /*ARGSUSED*/
+mts_mbtowc(mts_wchar_t *wcharp, const char *mbchar, size_t nbytes)
+{
+	unsigned char mbyte;
+	mts_wchar_t wide_char;
+	int count;
+	int bytes_left;
+
+	if (mbchar == 0)
+		return (0); /* shift states not supported */
+
+	/* 0xxxxxxx -> 1 byte ASCII encoding */
+	if (((mbyte = *mbchar++) & 0x80) == 0) {
+		if (wcharp)
+			*wcharp = (mts_wchar_t)mbyte;
+
+		return (mbyte ? 1 : 0);
+	}
+
+	/* 10xxxxxx -> invalid first byte */
+	if ((mbyte & 0x40) == 0) {
+		if (mbtowc_verbose || mbtowc_announce == 0) {
+			mbtowc_announce = 1;
+		}
+		return (-1);
+	}
+
+	wide_char = mbyte;
+	if ((mbyte & 0x20) == 0) {
+		wide_char &= 0x1f;
+		bytes_left = 1;
+	} else if ((mbyte & 0x10) == 0) {
+		wide_char &= 0x0f;
+		bytes_left = 2;
+	} else {
+		if (mbtowc_verbose || mbtowc_announce == 0) {
+			mbtowc_announce = 1;
+		}
+		return (-1);
+	}
+
+	count = 1;
+	while (bytes_left--) {
+		if (((mbyte = *mbchar++) & 0xc0) != 0x80) {
+			if (mbtowc_verbose || mbtowc_announce == 0) {
+				mbtowc_announce = 1;
+			}
+			return (-1);
+		}
+
+		count++;
+		wide_char = (wide_char << 6) | (mbyte & 0x3f);
+	}
+
+	if (wcharp)
+		*wcharp = wide_char;
+
+	return (count);
+}
+
+
+/*
+ * wctomb
+ *
+ * The wctomb() function converts a wide character wchar into a multibyte
+ * character and stores the result in mbchar. The object pointed to by
+ * mbchar must be large enough to accommodate the multibyte character.
+ *
+ * Returns the numberof bytes written to mbchar.
+ */
+int
+mts_wctomb(char *mbchar, mts_wchar_t wchar)
+{
+#ifdef UTF8_DEBUG
+	char *start = mbchar;
+#endif
+
+	if ((wchar & ~0x7f) == 0) {
+		*mbchar = (char)wchar;
+		return (1);
+	}
+
+	if ((wchar & ~0x7ff) == 0) {
+		*mbchar++ = (wchar >> 6) | 0xc0;
+		*mbchar = (wchar & 0x3f) | 0x80;
+		return (2);
+	}
+
+	*mbchar++ = (wchar >> 12) | 0xe0;
+	*mbchar++ = ((wchar >> 6) & 0x3f) | 0x80;
+	*mbchar = (wchar & 0x3f) | 0x80;
+	return (3);
+}
+
+
+/*
+ * wcstombs
+ *
+ * The wcstombs() function converts a wide character string wcstring
+ * into a multibyte character string mbstring. Up to nbytes bytes are
+ * stored in mbstring. Partial multibyte characters at the end of the
+ * string are not stored. The multibyte character string is null
+ * terminated if there is room.
+ *
+ * Returns the number of bytes converted, not counting the terminating
+ * null byte.
+ */
+size_t
+mts_wcstombs(char *mbstring, const mts_wchar_t *wcstring, size_t nbytes)
+{
+	char *start = mbstring;
+	const mts_wchar_t *wcp = wcstring;
+	mts_wchar_t wide_char;
+	char buf[4];
+	size_t len;
+
+	if ((mbstring == 0) || (wcstring == 0))
+		return (0);
+
+	while (nbytes > MTS_MB_CHAR_MAX) {
+		wide_char = *wcp++;
+		len = mts_wctomb(mbstring, wide_char);
+
+		if (wide_char == 0)
+			/*LINTED E_PTRDIFF_OVERFLOW*/
+			return (mbstring - start);
+
+		mbstring += len;
+		nbytes -= len;
+	}
+
+	while (wide_char && nbytes) {
+		wide_char = *wcp++;
+		if ((len = mts_wctomb(buf, wide_char)) > nbytes) {
+			*mbstring = 0;
+			break;
+		}
+
+		bcopy(buf, mbstring, len);
+		mbstring += len;
+		nbytes -= len;
+	}
+
+	/*LINTED E_PTRDIFF_OVERFLOW*/
+	return (mbstring - start);
+}
+
+
+/*
+ * Returns the number of bytes that would be written if the multi-
+ * byte string mbs was converted to a wide character string, not
+ * counting the terminating null wide character.
+ */
+size_t
+mts_wcequiv_strlen(const char *mbs)
+{
+	mts_wchar_t	wide_char;
+	size_t bytes;
+	size_t len = 0;
+
+	while (*mbs) {
+		bytes = mts_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
+		if (bytes == ((size_t)-1))
+			return ((size_t)-1);
+
+		len += sizeof (mts_wchar_t);
+		mbs += bytes;
+	}
+
+	return (len);
+}
+
+
+/*
+ * Returns the number of bytes that would be written if the multi-
+ * byte string mbs was converted to a single byte character string,
+ * not counting the terminating null character.
+ */
+size_t
+mts_sbequiv_strlen(const char *mbs)
+{
+	mts_wchar_t	wide_char;
+	size_t nbytes;
+	size_t len = 0;
+
+	while (*mbs) {
+		nbytes = mts_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
+		if (nbytes == ((size_t)-1))
+			return ((size_t)-1);
+
+		if (wide_char & 0xFF00)
+			len += sizeof (mts_wchar_t);
+		else
+			++len;
+
+		mbs += nbytes;
+	}
+
+	return (len);
+}
+
+
+/*
+ * stombs
+ *
+ * Convert a regular null terminated string 'string' to a UTF-8 encoded
+ * null terminated multi-byte string 'mbstring'. Only full converted
+ * UTF-8 characters will be written 'mbstring'. If a character will not
+ * fit within the remaining buffer space or 'mbstring' will overflow
+ * max_mblen, the conversion process will be terminated and 'mbstring'
+ * will be null terminated.
+ *
+ * Returns the number of bytes written to 'mbstring', excluding the
+ * terminating null character.
+ *
+ * If either mbstring or string is a null pointer, -1 is returned.
+ */
+int
+mts_stombs(char *mbstring, char *string, int max_mblen)
+{
+	char *start = mbstring;
+	unsigned char *p = (unsigned char *)string;
+	int space_left = max_mblen;
+	int	len;
+	mts_wchar_t	wide_char;
+	char buf[4];
+
+	if (!mbstring || !string)
+		return (-1);
+
+	while (*p && space_left > 2) {
+		wide_char = *p++;
+		len = mts_wctomb(mbstring, wide_char);
+		mbstring += len;
+		space_left -= len;
+	}
+
+	if (*p) {
+		wide_char = *p;
+		if ((len = mts_wctomb(buf, wide_char)) < 2) {
+			*mbstring = *buf;
+			mbstring += len;
+			space_left -= len;
+		}
+	}
+
+	*mbstring = '\0';
+
+	/*LINTED E_PTRDIFF_OVERFLOW*/
+	return (mbstring - start);
+}
+
+
+/*
+ * mbstos
+ *
+ * Convert a null terminated multi-byte string 'mbstring' to a regular
+ * null terminated string 'string'.  A 1-byte character in 'mbstring'
+ * maps to a 1-byte character in 'string'. A 2-byte character in
+ * 'mbstring' will be mapped to 2-bytes, if the upper byte is non-null.
+ * Otherwise the upper byte null will be discarded to ensure that the
+ * output stream does not contain embedded null characters.
+ *
+ * If the input stream contains invalid multi-byte characters, a value
+ * of -1 will be returned. Otherwise the length of 'string', excluding
+ * the terminating null character, is returned.
+ *
+ * If either mbstring or string is a null pointer, -1 is returned.
+ */
+int
+mts_mbstos(char *string, const char *mbstring)
+{
+	mts_wchar_t wc;
+	unsigned char *start = (unsigned char *)string;
+	int len;
+
+	if (string == 0 || mbstring == 0)
+		return (-1);
+
+	while (*mbstring) {
+		if ((len = mts_mbtowc(&wc, mbstring, MTS_MB_CHAR_MAX)) < 0) {
+			*string = 0;
+			return (-1);
+		}
+
+		if (wc & 0xFF00) {
+			/*LINTED E_BAD_PTR_CAST_ALIGN*/
+			*((mts_wchar_t *)string) = wc;
+			string += sizeof (mts_wchar_t);
+		}
+		else
+		{
+			*string = (unsigned char)wc;
+			string++;
+		}
+
+		mbstring += len;
+	}
+
+	*string = 0;
+
+	/*LINTED E_PTRDIFF_OVERFLOW*/
+	return ((unsigned char *)string - start);
+}