1 files changed, 318 insertions, 8 deletions
diff --git a/usr/src/common/smbsrv/smb_string.c b/usr/src/common/smbsrv/smb_string.c
index 8133f72ceb..292f58d4fa 100644
--- a/usr/src/common/smbsrv/smb_string.c
+++ b/usr/src/common/smbsrv/smb_string.c
@@ -19,27 +19,35 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-/*
- * Implementation of some of the string functions.
- */
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef _KERNEL
 #include <sys/types.h>
 #include <sys/sunddi.h>
 #else
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #endif
+#include <sys/u8_textprep.h>
+#include <smbsrv/alloc.h>
 #include <smbsrv/string.h>
-#include <smbsrv/ctype.h>
+#include <smbsrv/cp_usascii.h>
+#include <smbsrv/cp_unicode.h>
+
+#define	UNICODE_N_ENTRIES	(sizeof (a_unicode) / sizeof (a_unicode[0]))
+
+/*
+ * Global pointer to the current codepage: defaults to ASCII,
+ * and a flag indicating whether the codepage is Unicode or ASCII.
+ */
+static smb_codepage_t *current_codepage = usascii_codepage;
+static boolean_t is_unicode = B_FALSE;
 
+static smb_codepage_t *smb_unicode_init(void);
 
 /*
  * strsubst
@@ -100,3 +108,305 @@ strcanon(char *buf, const char *class)
 	*q = '\0';
 	return (buf);
 }
+
+void
+smb_codepage_init(void)
+{
+	smb_codepage_t *cp;
+
+	if (is_unicode)
+		return;
+
+	if ((cp = smb_unicode_init()) != NULL) {
+		current_codepage = cp;
+		is_unicode = B_TRUE;
+	} else {
+		current_codepage = usascii_codepage;
+		is_unicode = B_FALSE;
+	}
+}
+
+/*
+ * Determine whether or not a character is an uppercase character.
+ * This function operates on the current codepage table. Returns
+ * non-zero if the character is uppercase. Otherwise returns zero.
+ */
+int
+smb_isupper(int c)
+{
+	uint16_t mask = is_unicode ? 0xffff : 0xff;
+
+	return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
+}
+
+/*
+ * Determine whether or not a character is an lowercase character.
+ * This function operates on the current codepage table. Returns
+ * non-zero if the character is lowercase. Otherwise returns zero.
+ */
+int
+smb_islower(int c)
+{
+	uint16_t mask = is_unicode ? 0xffff : 0xff;
+
+	return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
+}
+
+/*
+ * Convert individual characters to their uppercase equivalent value.
+ * If the specified character is lowercase, the uppercase value will
+ * be returned. Otherwise the original value will be returned.
+ */
+int
+smb_toupper(int c)
+{
+	uint16_t mask = is_unicode ? 0xffff : 0xff;
+
+	return (current_codepage[c & mask].upper);
+}
+
+/*
+ * Convert individual characters to their lowercase equivalent value.
+ * If the specified character is uppercase, the lowercase value will
+ * be returned. Otherwise the original value will be returned.
+ */
+int
+smb_tolower(int c)
+{
+	uint16_t mask = is_unicode ? 0xffff : 0xff;
+
+	return (current_codepage[c & mask].lower);
+}
+
+/*
+ * Convert a string to uppercase using the appropriate codepage. The
+ * string is converted in place. A pointer to the string is returned.
+ * There is an assumption here that uppercase and lowercase values
+ * always result encode to the same length.
+ */
+char *
+smb_strupr(char *s)
+{
+	smb_wchar_t c;
+	char *p = s;
+
+	while (*p) {
+		if (smb_isascii(*p)) {
+			*p = smb_toupper(*p);
+			p++;
+		} else {
+			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
+				return (0);
+
+			if (c == 0)
+				break;
+
+			c = smb_toupper(c);
+			p += smb_wctomb(p, c);
+		}
+	}
+
+	return (s);
+}
+
+/*
+ * Convert a string to lowercase using the appropriate codepage. The
+ * string is converted in place. A pointer to the string is returned.
+ * There is an assumption here that uppercase and lowercase values
+ * always result encode to the same length.
+ */
+char *
+smb_strlwr(char *s)
+{
+	smb_wchar_t c;
+	char *p = s;
+
+	while (*p) {
+		if (smb_isascii(*p)) {
+			*p = smb_tolower(*p);
+			p++;
+		} else {
+			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
+				return (0);
+
+			if (c == 0)
+				break;
+
+			c = smb_tolower(c);
+			p += smb_wctomb(p, c);
+		}
+	}
+
+	return (s);
+}
+
+/*
+ * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
+ * -1 is returned if "s" is not a valid multi-byte string.
+ */
+int
+smb_isstrlwr(const char *s)
+{
+	smb_wchar_t c;
+	int n;
+	const char *p = s;
+
+	while (*p) {
+		if (smb_isascii(*p) && smb_isupper(*p))
+			return (0);
+		else {
+			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
+				return (-1);
+
+			if (c == 0)
+				break;
+
+			if (smb_isupper(c))
+				return (0);
+
+			p += n;
+		}
+	}
+
+	return (1);
+}
+
+/*
+ * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
+ * -1 is returned if "s" is not a valid multi-byte string.
+ */
+int
+smb_isstrupr(const char *s)
+{
+	smb_wchar_t c;
+	int n;
+	const char *p = s;
+
+	while (*p) {
+		if (smb_isascii(*p) && smb_islower(*p))
+			return (0);
+		else {
+			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
+				return (-1);
+
+			if (c == 0)
+				break;
+
+			if (smb_islower(c))
+				return (0);
+
+			p += n;
+		}
+	}
+
+	return (1);
+}
+
+/*
+ * Compare the null-terminated strings s1 and s2 and return an integer
+ * greater than, equal to or less than 0 dependent on whether s1 is
+ * lexicographically greater than, equal to or less than s2 after
+ * translation of each character to lowercase.  The original strings
+ * are not modified.
+ *
+ * If n is non-zero, at most n bytes are compared.  Otherwise, the strings
+ * are compared until a null terminator is encountered.
+ *
+ * Out:    0 if strings are equal
+ *       < 0 if first string < second string
+ *       > 0 if first string > second string
+ */
+int
+smb_strcasecmp(const char *s1, const char *s2, size_t n)
+{
+	int	err = 0;
+	int	rc;
+
+	rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
+	if (err != 0)
+		return (-1);
+	return (rc);
+}
+
+/*
+ * First build a codepage based on cp_unicode.h.  Then build the unicode
+ * codepage from this interim codepage by copying the entries over while
+ * fixing them and filling in the gaps.
+ */
+static smb_codepage_t *
+smb_unicode_init(void)
+{
+	smb_codepage_t	*unicode;
+	uint32_t	a = 0;
+	uint32_t	b = 0;
+
+	unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
+	if (unicode == NULL)
+		return (NULL);
+
+	while (b != 0xffff) {
+		/*
+		 * If there is a gap in the standard,
+		 * fill in the gap with no-case entries.
+		 */
+		if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
+			unicode[b].ctype = CODEPAGE_ISNONE;
+			unicode[b].upper = (smb_wchar_t)b;
+			unicode[b].lower = (smb_wchar_t)b;
+			b++;
+			continue;
+		}
+
+		/*
+		 * Copy the entry and fixup as required.
+		 */
+		switch (a_unicode[a].ctype) {
+		case CODEPAGE_ISNONE:
+			/*
+			 * Replace 0xffff in upper/lower fields with its val.
+			 */
+			unicode[b].ctype = CODEPAGE_ISNONE;
+			unicode[b].upper = (smb_wchar_t)b;
+			unicode[b].lower = (smb_wchar_t)b;
+			break;
+		case CODEPAGE_ISUPPER:
+			/*
+			 * Some characters may have case yet not have
+			 * case conversion.  Treat them as no-case.
+			 */
+			if (a_unicode[a].lower == 0xffff) {
+				unicode[b].ctype = CODEPAGE_ISNONE;
+				unicode[b].upper = (smb_wchar_t)b;
+				unicode[b].lower = (smb_wchar_t)b;
+			} else {
+				unicode[b].ctype = CODEPAGE_ISUPPER;
+				unicode[b].upper = (smb_wchar_t)b;
+				unicode[b].lower = a_unicode[a].lower;
+			}
+			break;
+		case CODEPAGE_ISLOWER:
+			/*
+			 * Some characters may have case yet not have
+			 * case conversion.  Treat them as no-case.
+			 */
+			if (a_unicode[a].upper == 0xffff) {
+				unicode[b].ctype = CODEPAGE_ISNONE;
+				unicode[b].upper = (smb_wchar_t)b;
+				unicode[b].lower = (smb_wchar_t)b;
+			} else {
+				unicode[b].ctype = CODEPAGE_ISLOWER;
+				unicode[b].upper = a_unicode[a].upper;
+				unicode[b].lower = (smb_wchar_t)b;
+			}
+			break;
+		default:
+			MEM_FREE("unicode", unicode);
+			return (NULL);
+		}
+
+		a++;
+		b++;
+	};
+
+	return (unicode);
+}