diff options
Diffstat (limited to 'usr/src/common/smbsrv/smb_string.c')
-rw-r--r-- | usr/src/common/smbsrv/smb_string.c | 326 |
1 files changed, 318 insertions, 8 deletions
diff --git a/usr/src/common/smbsrv/smb_string.c b/usr/src/common/smbsrv/smb_string.c index 8133f72ceb..292f58d4fa 100644 --- a/usr/src/common/smbsrv/smb_string.c +++ b/usr/src/common/smbsrv/smb_string.c @@ -19,27 +19,35 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* - * Implementation of some of the string functions. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef _KERNEL #include <sys/types.h> #include <sys/sunddi.h> #else +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <strings.h> #endif +#include <sys/u8_textprep.h> +#include <smbsrv/alloc.h> #include <smbsrv/string.h> -#include <smbsrv/ctype.h> +#include <smbsrv/cp_usascii.h> +#include <smbsrv/cp_unicode.h> + +#define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0])) + +/* + * Global pointer to the current codepage: defaults to ASCII, + * and a flag indicating whether the codepage is Unicode or ASCII. + */ +static smb_codepage_t *current_codepage = usascii_codepage; +static boolean_t is_unicode = B_FALSE; +static smb_codepage_t *smb_unicode_init(void); /* * strsubst @@ -100,3 +108,305 @@ strcanon(char *buf, const char *class) *q = '\0'; return (buf); } + +void +smb_codepage_init(void) +{ + smb_codepage_t *cp; + + if (is_unicode) + return; + + if ((cp = smb_unicode_init()) != NULL) { + current_codepage = cp; + is_unicode = B_TRUE; + } else { + current_codepage = usascii_codepage; + is_unicode = B_FALSE; + } +} + +/* + * Determine whether or not a character is an uppercase character. + * This function operates on the current codepage table. Returns + * non-zero if the character is uppercase. Otherwise returns zero. + */ +int +smb_isupper(int c) +{ + uint16_t mask = is_unicode ? 0xffff : 0xff; + + return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER); +} + +/* + * Determine whether or not a character is an lowercase character. + * This function operates on the current codepage table. Returns + * non-zero if the character is lowercase. Otherwise returns zero. + */ +int +smb_islower(int c) +{ + uint16_t mask = is_unicode ? 0xffff : 0xff; + + return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER); +} + +/* + * Convert individual characters to their uppercase equivalent value. + * If the specified character is lowercase, the uppercase value will + * be returned. Otherwise the original value will be returned. + */ +int +smb_toupper(int c) +{ + uint16_t mask = is_unicode ? 0xffff : 0xff; + + return (current_codepage[c & mask].upper); +} + +/* + * Convert individual characters to their lowercase equivalent value. + * If the specified character is uppercase, the lowercase value will + * be returned. Otherwise the original value will be returned. + */ +int +smb_tolower(int c) +{ + uint16_t mask = is_unicode ? 0xffff : 0xff; + + return (current_codepage[c & mask].lower); +} + +/* + * Convert a string to uppercase using the appropriate codepage. The + * string is converted in place. A pointer to the string is returned. + * There is an assumption here that uppercase and lowercase values + * always result encode to the same length. + */ +char * +smb_strupr(char *s) +{ + smb_wchar_t c; + char *p = s; + + while (*p) { + if (smb_isascii(*p)) { + *p = smb_toupper(*p); + p++; + } else { + if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) + return (0); + + if (c == 0) + break; + + c = smb_toupper(c); + p += smb_wctomb(p, c); + } + } + + return (s); +} + +/* + * Convert a string to lowercase using the appropriate codepage. The + * string is converted in place. A pointer to the string is returned. + * There is an assumption here that uppercase and lowercase values + * always result encode to the same length. + */ +char * +smb_strlwr(char *s) +{ + smb_wchar_t c; + char *p = s; + + while (*p) { + if (smb_isascii(*p)) { + *p = smb_tolower(*p); + p++; + } else { + if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) + return (0); + + if (c == 0) + break; + + c = smb_tolower(c); + p += smb_wctomb(p, c); + } + } + + return (s); +} + +/* + * Returns 1 if string contains NO uppercase chars 0 otherwise. However, + * -1 is returned if "s" is not a valid multi-byte string. + */ +int +smb_isstrlwr(const char *s) +{ + smb_wchar_t c; + int n; + const char *p = s; + + while (*p) { + if (smb_isascii(*p) && smb_isupper(*p)) + return (0); + else { + if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) + return (-1); + + if (c == 0) + break; + + if (smb_isupper(c)) + return (0); + + p += n; + } + } + + return (1); +} + +/* + * Returns 1 if string contains NO lowercase chars 0 otherwise. However, + * -1 is returned if "s" is not a valid multi-byte string. + */ +int +smb_isstrupr(const char *s) +{ + smb_wchar_t c; + int n; + const char *p = s; + + while (*p) { + if (smb_isascii(*p) && smb_islower(*p)) + return (0); + else { + if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) + return (-1); + + if (c == 0) + break; + + if (smb_islower(c)) + return (0); + + p += n; + } + } + + return (1); +} + +/* + * Compare the null-terminated strings s1 and s2 and return an integer + * greater than, equal to or less than 0 dependent on whether s1 is + * lexicographically greater than, equal to or less than s2 after + * translation of each character to lowercase. The original strings + * are not modified. + * + * If n is non-zero, at most n bytes are compared. Otherwise, the strings + * are compared until a null terminator is encountered. + * + * Out: 0 if strings are equal + * < 0 if first string < second string + * > 0 if first string > second string + */ +int +smb_strcasecmp(const char *s1, const char *s2, size_t n) +{ + int err = 0; + int rc; + + rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err); + if (err != 0) + return (-1); + return (rc); +} + +/* + * First build a codepage based on cp_unicode.h. Then build the unicode + * codepage from this interim codepage by copying the entries over while + * fixing them and filling in the gaps. + */ +static smb_codepage_t * +smb_unicode_init(void) +{ + smb_codepage_t *unicode; + uint32_t a = 0; + uint32_t b = 0; + + unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16); + if (unicode == NULL) + return (NULL); + + while (b != 0xffff) { + /* + * If there is a gap in the standard, + * fill in the gap with no-case entries. + */ + if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) { + unicode[b].ctype = CODEPAGE_ISNONE; + unicode[b].upper = (smb_wchar_t)b; + unicode[b].lower = (smb_wchar_t)b; + b++; + continue; + } + + /* + * Copy the entry and fixup as required. + */ + switch (a_unicode[a].ctype) { + case CODEPAGE_ISNONE: + /* + * Replace 0xffff in upper/lower fields with its val. + */ + unicode[b].ctype = CODEPAGE_ISNONE; + unicode[b].upper = (smb_wchar_t)b; + unicode[b].lower = (smb_wchar_t)b; + break; + case CODEPAGE_ISUPPER: + /* + * Some characters may have case yet not have + * case conversion. Treat them as no-case. + */ + if (a_unicode[a].lower == 0xffff) { + unicode[b].ctype = CODEPAGE_ISNONE; + unicode[b].upper = (smb_wchar_t)b; + unicode[b].lower = (smb_wchar_t)b; + } else { + unicode[b].ctype = CODEPAGE_ISUPPER; + unicode[b].upper = (smb_wchar_t)b; + unicode[b].lower = a_unicode[a].lower; + } + break; + case CODEPAGE_ISLOWER: + /* + * Some characters may have case yet not have + * case conversion. Treat them as no-case. + */ + if (a_unicode[a].upper == 0xffff) { + unicode[b].ctype = CODEPAGE_ISNONE; + unicode[b].upper = (smb_wchar_t)b; + unicode[b].lower = (smb_wchar_t)b; + } else { + unicode[b].ctype = CODEPAGE_ISLOWER; + unicode[b].upper = a_unicode[a].upper; + unicode[b].lower = (smb_wchar_t)b; + } + break; + default: + MEM_FREE("unicode", unicode); + return (NULL); + } + + a++; + b++; + }; + + return (unicode); +} |