diff options
Diffstat (limited to 'usr/src')
105 files changed, 14426 insertions, 796 deletions
diff --git a/usr/src/head/regex.h b/usr/src/head/regex.h index 5402ef64cd..d664b67e71 100644 --- a/usr/src/head/regex.h +++ b/usr/src/head/regex.h @@ -28,12 +28,14 @@ * Copyright 1989, 1994 by Mortice Kern Systems Inc. * All rights reserved. */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ #ifndef _REGEX_H #define _REGEX_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/feature_tests.h> #include <sys/types.h> @@ -69,18 +71,24 @@ typedef long wchar_t; typedef ssize_t regoff_t; /* regcomp flags */ +#define REG_BASIC 0x00 #define REG_EXTENDED 0x01 /* Use Extended Regular Expressions */ #define REG_NEWLINE 0x08 /* Treat \n as regular character */ #define REG_ICASE 0x04 /* Ignore case in match */ #define REG_NOSUB 0x02 /* Don't set subexpression */ #define REG_EGREP 0x1000 /* running as egrep(1) */ -/* non-standard flags */ +/* non-standard flags - note that most of these are not supported */ #define REG_DELIM 0x10 /* string[0] is delimiter */ #define REG_DEBUG 0x20 /* Debug recomp and regexec */ #define REG_ANCHOR 0x40 /* Implicit ^ and $ */ #define REG_WORDS 0x80 /* \< and \> match word boundries */ +/* FreeBSD additions */ +#define REG_DUMP 0x2000 +#define REG_PEND 0x4000 +#define REG_NOSPEC 0x8000 + /* internal flags */ #define REG_MUST 0x100 /* check for regmust substring */ @@ -119,11 +127,14 @@ typedef struct { /* regcomp() data saved for regexec() */ size_t re_nsub; /* # of subexpressions in RE pattern */ /* - * Internal use only + * Internal use only. Note that any changes to this structure + * have to preserve sizing, as it is baked into applications. */ - void *re_comp; /* compiled RE; freed by regfree() */ - int re_cflags; /* saved cflags for regexec() */ - size_t re_erroff; /* RE pattern error offset */ + struct re_guts *re_g; + int re_magic; + const char *re_endp; + + /* here for compat */ size_t re_len; /* # wchar_t chars in compiled pattern */ struct _regex_ext_t *re_sc; /* for binary compatibility */ } regex_t; diff --git a/usr/src/lib/libc/Makefile b/usr/src/lib/libc/Makefile index 2bbe37202f..b5bede5d55 100644 --- a/usr/src/lib/libc/Makefile +++ b/usr/src/lib/libc/Makefile @@ -21,6 +21,8 @@ # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. # +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Use is subject to license terms. # LIBBASENAME= libc @@ -125,28 +127,11 @@ $(ROOTVARIANTDIR64)/$(VAR1_DYNLIB64) := FILEMODE= 755 .KEEP_STATE: -all: check_i18n .WAIT lib32 $(BUILD64) .WAIT lib64 .WAIT etc - -check_i18n: - @if [ ! -f $(ROOTFS_LIBDIR)/libc_i18n.a -o \ - ! -f $(ROOTFS_LIBDIR64)/libc_i18n.a ]; then \ - if [ "$$CLOSED_IS_PRESENT" = no ]; then \ - $(ECHO) "Warning: closed binaries not" \ - "properly unpacked." >&2;\ - $(ECHO) "Missing libc_i18n.a" >&2; \ - exit 1; \ - else \ - $(ECHO) "Warning: libc_i18n.a must be built" \ - "before libc can be built." >&2; \ - exit 1; \ - fi \ - fi +all: lib32 $(BUILD64) .WAIT lib64 .WAIT etc etc: $($(MACH)_ETC) lib32: $(MACH_COMP) $(MACHLIBS) $(MACH)/$(LIB_PIC) $(VARIANTLIBS) - mkdir -p $(MACH)/i18n_pics - cd $(MACH)/i18n_pics ; $(AR) -x $(ROOTFS_LIBDIR)/libc_i18n.a @if $(ELFDUMP) -r $(MACH)/$(DYNLIB) | $(GREP) -w environ ; then \ $(ECHO) "Error: Invalid reference to environ" ; \ $(ECHO) "Error: See comments in port/gen/env_data.c" ; \ @@ -154,8 +139,6 @@ lib32: $(MACH_COMP) $(MACHLIBS) $(MACH)/$(LIB_PIC) $(VARIANTLIBS) fi lib64: $(MACH_COMP) $(MACHLIBS64) $(MACH64)/$(LIB_PIC) $(VARIANTLIBS64) - mkdir -p $(MACH64)/i18n_pics - cd $(MACH64)/i18n_pics ; $(AR) -x $(ROOTFS_LIBDIR64)/libc_i18n.a @if $(ELFDUMP) -r $(MACH64)/$(DYNLIB) | $(GREP) -w environ ; then \ $(ECHO) "Error: Invalid reference to environ" ; \ $(ECHO) "Error: See comments in port/gen/env_data.c" ; \ @@ -246,8 +229,6 @@ $(SUBDIRS): FRC @cd $@; pwd; VERSION='$(VERSION)' $(MAKE) $(TARGET) clean clobber: $(SUBDIRS) - $(RM) -r $(MACH)/i18n_pics - $(BUILD64)$(RM) -r $(MACH64)/i18n_pics lint: $(SUBDIRS) diff --git a/usr/src/lib/libc/Makefile.targ b/usr/src/lib/libc/Makefile.targ index c4beadabd3..cb01074143 100644 --- a/usr/src/lib/libc/Makefile.targ +++ b/usr/src/lib/libc/Makefile.targ @@ -172,6 +172,11 @@ pics/%.o %.o: $(LIBCBASE)/../port/gen/%.c $(COMPILE.c) -o $@ $< $(POST_PROCESS_O) +# locale rules +pics/%.o %.o: $(LIBCBASE)/../port/locale/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + # print rules pics/%.o: $(LIBCBASE)/../port/print/%.c $(COMPILE.c) -o $@ $< diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index 2176c1da01..3cec41db51 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -22,6 +22,9 @@ # # Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. # +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Use is subject to license terms. +# LIBCBASE= . LIBCDIR= $(SRC)/lib/libc @@ -645,11 +648,6 @@ PORTSTDIO= \ wscanf.o PORTI18N= \ - __fgetwc_xpg5.o \ - __fgetws_xpg5.o \ - __fputwc_xpg5.o \ - __fputws_xpg5.o \ - __ungetwc_xpg5.o \ getwchar.o \ putwchar.o \ putws.o \ @@ -690,6 +688,7 @@ PORTI18N= \ gettext_gnu.o \ gettext_real.o \ gettext_util.o \ + isdigit.o \ plural_parser.o \ wdresolve.o \ _ctype.o \ @@ -700,6 +699,76 @@ PORTI18N_COND= \ wcstol_longlong.o \ wcstoul_longlong.o +PORTLOCALE= \ + ascii.o \ + big5.o \ + btowc.o \ + collate.o \ + collcmp.o \ + euc.o \ + fnmatch.o \ + fgetwc.o \ + fgetws.o \ + fix_grouping.o \ + fputwc.o \ + fputws.o \ + fwide.o \ + gb18030.o \ + gb2312.o \ + gbk.o \ + getdate.o \ + iswctype.o \ + ldpart.o \ + lmessages.o \ + lnumeric.o \ + lmonetary.o \ + localeconv.o \ + mbftowc.o \ + mblen.o \ + mbrlen.o \ + mbrtowc.o \ + mbsinit.o \ + mbsnrtowcs.o \ + mbsrtowcs.o \ + mbstowcs.o \ + mbtowc.o \ + mskanji.o \ + nl_langinfo.o \ + none.o \ + regcomp.o \ + regfree.o \ + regerror.o \ + regexec.o \ + rune.o \ + runetype.o \ + setlocale.o \ + setrunelocale.o \ + strcoll.o \ + strfmon.o \ + strftime.o \ + strptime.o \ + strxfrm.o \ + table.o \ + timelocal.o \ + tolower.o \ + towlower.o \ + ungetwc.o \ + utf8.o \ + wcrtomb.o \ + wcscoll.o \ + wcsftime.o \ + wcsnrtombs.o \ + wcsrtombs.o \ + wcswidth.o \ + wcstombs.o \ + wcsxfrm.o \ + wctob.o \ + wctomb.o \ + wctrans.o \ + wctype.o \ + wcwidth.o \ + wscol.o + AIOOBJS= \ aio.o \ aio_alloc.o \ @@ -843,6 +912,7 @@ MOSTOBJS= \ $(PORTGEN64) \ $(PORTI18N) \ $(PORTI18N_COND) \ + $(PORTLOCALE) \ $(PORTPRINT) \ $(PORTPRINT_W) \ $(PORTREGEX) \ @@ -885,12 +955,6 @@ CRTSRCS= ../../common/amd64 include ../../Makefile.lib include ../../Makefile.lib.64 -# NOTE: libc_i18n.a will be part of libc.so.1. Therefore, the compilation -# conditions such as the settings of CFLAGS and CPPFLAGS for the libc_i18n stuff -# need to be compatible with the ones for the libc stuff. Whenever the changes -# that affect the compilation conditions of libc happened, those for libc_i18n -# also need to be updated. - CFLAGS64 += $(CTF_FLAGS) # This is necessary to avoid problems with calling _ex_unwind(). @@ -908,7 +972,6 @@ CFLAGS64 += $(XSTRCONST) ALTPICS= $(TRACEOBJS:%=pics/%) -$(DYNLIB) := PICS += $(ROOTFS_LIBDIR64)/libc_i18n.a $(DYNLIB) := BUILD.SO = $(LD) -o $@ -G $(DYNFLAGS) $(PICS) $(ALTPICS) $(EXTPICS) MAPFILES = $(LIBCDIR)/port/mapfile-vers @@ -967,6 +1030,7 @@ SRCS= \ $(PORTFP:%.o=$(LIBCDIR)/port/fp/%.c) \ $(PORTGEN:%.o=$(LIBCDIR)/port/gen/%.c) \ $(PORTI18N:%.o=$(LIBCDIR)/port/i18n/%.c) \ + $(PORTLOCALE:%.o=$(LIBCDIR)/port/locale/%.c) \ $(PORTPRINT:%.o=$(LIBCDIR)/port/print/%.c) \ $(PORTREGEX:%.o=$(LIBCDIR)/port/regex/%.c) \ $(PORTSTDIO:%.o=$(LIBCDIR)/port/stdio/%.c) \ diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index 50d06a4e4f..6cb439b99c 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -22,6 +22,9 @@ # # Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. # +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Use is subject to license terms. +# LIBCDIR= $(SRC)/lib/libc LIB_PIC= libc_pic.a @@ -687,11 +690,6 @@ PORTSTDIO= \ wscanf.o PORTI18N= \ - __fgetwc_xpg5.o \ - __fgetws_xpg5.o \ - __fputwc_xpg5.o \ - __fputws_xpg5.o \ - __ungetwc_xpg5.o \ getwchar.o \ putwchar.o \ putws.o \ @@ -732,6 +730,7 @@ PORTI18N= \ gettext_gnu.o \ gettext_real.o \ gettext_util.o \ + isdigit.o \ plural_parser.o \ wdresolve.o \ _ctype.o \ @@ -742,6 +741,76 @@ PORTI18N_COND= \ wcstol_longlong.o \ wcstoul_longlong.o +PORTLOCALE= \ + ascii.o \ + big5.o \ + btowc.o \ + collate.o \ + collcmp.o \ + euc.o \ + fnmatch.o \ + fgetwc.o \ + fgetws.o \ + fix_grouping.o \ + fputwc.o \ + fputws.o \ + fwide.o \ + gb18030.o \ + gb2312.o \ + gbk.o \ + getdate.o \ + iswctype.o \ + ldpart.o \ + lmessages.o \ + lnumeric.o \ + lmonetary.o \ + localeconv.o \ + mbftowc.o \ + mblen.o \ + mbrlen.o \ + mbrtowc.o \ + mbsinit.o \ + mbsnrtowcs.o \ + mbsrtowcs.o \ + mbstowcs.o \ + mbtowc.o \ + mskanji.o \ + nl_langinfo.o \ + none.o \ + regcomp.o \ + regfree.o \ + regerror.o \ + regexec.o \ + rune.o \ + runetype.o \ + setlocale.o \ + setrunelocale.o \ + strcoll.o \ + strfmon.o \ + strftime.o \ + strptime.o \ + strxfrm.o \ + table.o \ + timelocal.o \ + tolower.o \ + towlower.o \ + ungetwc.o \ + utf8.o \ + wcrtomb.o \ + wcscoll.o \ + wcsftime.o \ + wcsnrtombs.o \ + wcsrtombs.o \ + wcswidth.o \ + wcstombs.o \ + wcsxfrm.o \ + wctob.o \ + wctomb.o \ + wctrans.o \ + wctype.o \ + wcwidth.o \ + wscol.o + AIOOBJS= \ aio.o \ aio_alloc.o \ @@ -878,6 +947,7 @@ MOSTOBJS= \ $(PORTGEN64) \ $(PORTI18N) \ $(PORTI18N_COND) \ + $(PORTLOCALE) \ $(PORTPRINT) \ $(PORTPRINT_C89) \ $(PORTPRINT_W) \ @@ -923,12 +993,6 @@ LDPASS_OFF= $(POUND_SIGN) # include common library definitions include ../../Makefile.lib -# NOTE: libc_i18n.a will be part of libc.so.1. Therefore, the compilation -# conditions such as the settings of CFLAGS and CPPFLAGS for the libc_i18n stuff -# need to be compatible with the ones for the libc stuff. Whenever the changes -# that affect the compilation conditions of libc happened, those for libc_i18n -# also need to be updated. - # we need to override the default SONAME here because we might # be building a variant object (still libc.so.1, but different filename) SONAME = libc.so.1 @@ -951,7 +1015,6 @@ CFLAGS += $(XSTRCONST) ALTPICS= $(TRACEOBJS:%=pics/%) -$(DYNLIB) := PICS += $(ROOTFS_LIBDIR)/libc_i18n.a $(DYNLIB) := BUILD.SO = $(LD) -o $@ -G $(DYNFLAGS) $(PICS) $(ALTPICS) \ $(EXTPICS) $(LDLIBS) @@ -1025,6 +1088,7 @@ SRCS= \ $(PORTFP:%.o=$(LIBCDIR)/port/fp/%.c) \ $(PORTGEN:%.o=$(LIBCDIR)/port/gen/%.c) \ $(PORTI18N:%.o=$(LIBCDIR)/port/i18n/%.c) \ + $(PORTLOCALE:%.o=$(LIBCDIR)/port/locale/%.c) \ $(PORTPRINT:%.o=$(LIBCDIR)/port/print/%.c) \ $(PORTREGEX:%.o=$(LIBCDIR)/port/regex/%.c) \ $(PORTSTDIO:%.o=$(LIBCDIR)/port/stdio/%.c) \ diff --git a/usr/src/lib/libc/inc/mbstatet.h b/usr/src/lib/libc/inc/mbstatet.h index c27bd0125b..baacffc00e 100644 --- a/usr/src/lib/libc/inc/mbstatet.h +++ b/usr/src/lib/libc/inc/mbstatet.h @@ -24,17 +24,22 @@ * Use is subject to license terms. */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ #ifndef _MBSTATET_H #define _MBSTATET_H -#pragma ident "%Z%%M% %I% %E% SMI" - +/* + * This structure is totally opaque. The value mirrors wchar_impl.h. + */ typedef struct __mbstate_t { - void *__lc_locale; /* pointer to _LC_locale_t */ - void *__state; /* currently unused state flag */ - char __consumed[8]; /* 8 bytes */ - char __nconsumed; - char __fill[7]; +#if defined(_LP64) + long __filler[4]; +#else + int __filler[6]; +#endif } __mbstate_t; #endif /* _MBSTATET_H */ diff --git a/usr/src/lib/libc/inc/mse.h b/usr/src/lib/libc/inc/mse.h index 7644c200dc..836407dce1 100644 --- a/usr/src/lib/libc/inc/mse.h +++ b/usr/src/lib/libc/inc/mse.h @@ -23,12 +23,15 @@ * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ #ifndef _MSE_H #define _MSE_H -#pragma ident "%Z%%M% %I% %E% SMI" - +#include "lint.h" #include <stdio.h> #include <wchar.h> #include <string.h> @@ -89,38 +92,10 @@ extern void _setorientation(FILE *, _IOP_orientation_t); * otherwise alter the orientation of a stream. */ -/* - * libc_i18n provides the following functions: - */ -extern int _set_orientation_wide(FILE *, void **, void (*(*))(void), int); -extern void *__mbst_get_lc_and_fp(const mbstate_t *, - void (*(*))(void), int); -/* - * Above two functions take either FP_WCTOMB or FP_FGETWC for the integer - * argument. - */ -#define FP_WCTOMB 0 -#define FP_FGETWC 1 - #define _SET_ORIENTATION_BYTE(iop) \ { \ if (GET_NO_MODE(iop)) \ _setorientation(iop, _BYTE_MODE); \ } -/* The following is specified in the argument of _get_internal_mbstate() */ -#define _MBRLEN 0 -#define _MBRTOWC 1 -#define _WCRTOMB 2 -#define _MBSRTOWCS 3 -#define _WCSRTOMBS 4 -#define _MAX_MB_FUNC _WCSRTOMBS - -extern void _clear_internal_mbstate(void); -extern mbstate_t *_get_internal_mbstate(int); - -#define MBSTATE_INITIAL(ps) MBSTATE_RESTART(ps) -#define MBSTATE_RESTART(ps) \ - (void) memset((void *)ps, 0, sizeof (mbstate_t)) - #endif /* _MSE_H */ diff --git a/usr/src/lib/libc/port/i18n/__fgetwc_xpg5.c b/usr/src/lib/libc/port/i18n/__fgetwc_xpg5.c deleted file mode 100644 index 67fcd186db..0000000000 --- a/usr/src/lib/libc/port/i18n/__fgetwc_xpg5.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "lint.h" -#include "file64.h" -#include "mse_int.h" -#include "mtlib.h" -#include <stdio.h> -#include <widec.h> -#include <euc.h> -#include <errno.h> -#include "stdiom.h" -#include "mse.h" - -wint_t -__fgetwc_xpg5(FILE *iop) -{ - wint_t result; - void *lc; - wint_t (*fp_fgetwc)(void *, FILE *); - rmutex_t *lk; - - FLOCKFILE(lk, iop); - - if (_set_orientation_wide(iop, &lc, - (void (*(*))(void))&fp_fgetwc, FP_FGETWC) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (WEOF); - } - - result = fp_fgetwc(lc, iop); - FUNLOCKFILE(lk); - return (result); -} - -wint_t -__getwc_xpg5(FILE *iop) -{ - return (__fgetwc_xpg5(iop)); -} diff --git a/usr/src/lib/libc/port/i18n/__fgetws_xpg5.c b/usr/src/lib/libc/port/i18n/__fgetws_xpg5.c deleted file mode 100644 index a6bfc6bedf..0000000000 --- a/usr/src/lib/libc/port/i18n/__fgetws_xpg5.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* Copyright (c) 1986 AT&T */ -/* All Rights Reserved */ - - -/* This module is created for NLS on Sep.03.86 */ - -/* - * Fgetws reads multibyte characters from the "iop", converts - * them to process codes, and places them in the wchar_t - * array pointed to by "ptr". Fgetws reads until n-1 process - * codes are transferred to "ptr", or EOF. - */ - -#include "lint.h" -#include "file64.h" -#include "mse_int.h" -#include <stdlib.h> -#include <stdio.h> -#include <widec.h> -#include <errno.h> -#include "mtlib.h" -#include "stdiom.h" -#include "libc.h" -#include "mse.h" - -wchar_t * -__fgetws_xpg5(wchar_t *ptr, int size, FILE *iop) -{ - wchar_t *ptr0 = ptr; - int c; - void *lc; - wint_t (*fp_fgetwc)(void *, FILE *); - rmutex_t *lk; - - FLOCKFILE(lk, iop); - - if (_set_orientation_wide(iop, &lc, - (void (*(*))(void))&fp_fgetwc, FP_FGETWC) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (NULL); - } - - for (size--; size > 0; size--) { - if ((c = fp_fgetwc(lc, iop)) == EOF) { - if (ptr == ptr0) { - FUNLOCKFILE(lk); - return (NULL); - } - break; - } - *ptr++ = c; - if (c == '\n') - break; - } - *ptr = 0; - FUNLOCKFILE(lk); - return (ptr0); -} diff --git a/usr/src/lib/libc/port/i18n/__fputwc_xpg5.c b/usr/src/lib/libc/port/i18n/__fputwc_xpg5.c deleted file mode 100644 index 3b19300a93..0000000000 --- a/usr/src/lib/libc/port/i18n/__fputwc_xpg5.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* Copyright (c) 1986 AT&T */ -/* All Rights Reserved */ - - -/* - * Fputwc transforms the wide character c into the multibyte character, - * and writes it onto the output stream "iop". - */ - -#include "lint.h" -#include "file64.h" -#include "mse_int.h" -#include "mtlib.h" -#include <stdlib.h> -#include <stdio.h> -#include <wchar.h> -#include <limits.h> -#include <errno.h> -#include "stdiom.h" -#include "mse.h" - -wint_t -__fputwc_xpg5(wint_t wc, FILE *iop) -{ - char mbs[MB_LEN_MAX]; - unsigned char *p; - int n; - void *lc; - int (*fp_wctomb)(void *, char *, wchar_t); - rmutex_t *lk; - - FLOCKFILE(lk, iop); - - if (_set_orientation_wide(iop, &lc, - (void (*(*))(void))&fp_wctomb, FP_WCTOMB) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (WEOF); - } - - if (wc == WEOF) { - FUNLOCKFILE(lk); - return (WEOF); - } - n = fp_wctomb(lc, mbs, (wchar_t)wc); - if (n <= 0) { - FUNLOCKFILE(lk); - return (WEOF); - } - p = (unsigned char *)mbs; - while (n--) { - /* Can wide I/O functions call byte I/O functions */ - /* because a steam bound to WIDE should not be used */ - /* by byte I/O functions ? */ - /* Anyway, I assume PUTC() macro has appropriate */ - /* definition here. */ - if (PUTC((*p++), iop) == EOF) { - FUNLOCKFILE(lk); - return (WEOF); - } - } - FUNLOCKFILE(lk); - return (wc); -} - -wint_t -__putwc_xpg5(wint_t wc, FILE *iop) -{ - return (__fputwc_xpg5(wc, iop)); -} diff --git a/usr/src/lib/libc/port/i18n/__fputws_xpg5.c b/usr/src/lib/libc/port/i18n/__fputws_xpg5.c deleted file mode 100644 index 46f39c0419..0000000000 --- a/usr/src/lib/libc/port/i18n/__fputws_xpg5.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1986 AT&T */ -/* All Rights Reserved */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * fputws transforms the process code string pointed to by "ptr" - * into a byte string, and writes the string to the named - * output "iop". - * - * Use an intermediate buffer to transform a string from wchar_t to - * multibyte char. In order to not overflow the intermediate buffer, - * impose a limit on the length of string to output to PC_MAX process - * codes. If the input string exceeds PC_MAX process codes, process - * the string in a series of smaller buffers. - */ - -#include "lint.h" -#include "file64.h" -#include "mse_int.h" -#include <stdio.h> -#include <stdlib.h> -#include <limits.h> -#include <widec.h> -#include <macros.h> -#include <errno.h> -#include "libc.h" -#include "stdiom.h" -#include "mse.h" - -#define PC_MAX 256 -#define MBBUFLEN (PC_MAX * MB_LEN_MAX) - -int -__fputws_xpg5(const wchar_t *ptr, FILE *iop) -{ - int pcsize, ret; - ssize_t pclen, pccnt; - int nbytes, i; - char mbbuf[MBBUFLEN], *mp; - void *lc; - int (*fp_wctomb)(void *, char *, wchar_t); - rmutex_t *lk; - - FLOCKFILE(lk, iop); - - if (_set_orientation_wide(iop, &lc, - (void (*(*))(void))&fp_wctomb, FP_WCTOMB) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (EOF); - } - - pclen = pccnt = wcslen(ptr); - while (pclen > 0) { - pcsize = (int)min(pclen, PC_MAX - 1); - nbytes = 0; - for (i = 0, mp = mbbuf; i < pcsize; i++, mp += ret) { - if ((ret = fp_wctomb(lc, mp, *ptr++)) == -1) { - FUNLOCKFILE(lk); - return (EOF); - } - nbytes += ret; - } - *mp = '\0'; - /* - * In terms of locking, since libc is using rmutex_t - * for locking iop, we can call fputs() with iop that - * has been already locked. - * But again, - * can wide I/O functions call byte I/O functions - * because a steam bound to WIDE should not be used - * by byte I/O functions ? - */ - if (fputs(mbbuf, iop) != nbytes) { - FUNLOCKFILE(lk); - return (EOF); - } - pclen -= pcsize; - } - FUNLOCKFILE(lk); - if (pccnt <= INT_MAX) - return ((int)pccnt); - else - return (EOF); -} diff --git a/usr/src/lib/libc/port/i18n/_ctype.c b/usr/src/lib/libc/port/i18n/_ctype.c index 5d016393a2..4217636e7c 100644 --- a/usr/src/lib/libc/port/i18n/_ctype.c +++ b/usr/src/lib/libc/port/i18n/_ctype.c @@ -24,11 +24,14 @@ * Use is subject to license terms. */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #pragma weak _ctype = __ctype #include "lint.h" @@ -37,6 +40,79 @@ #include <ctype.h> #include <sys/types.h> +unsigned char __ctype_C[SZ_TOTAL] = +{ + 0, /* EOF */ + _C, _C, _C, _C, _C, _C, _C, _C, + _C, _S|_C, _S|_C, _S|_C, _S|_C, _S|_C, _C, _C, + _C, _C, _C, _C, _C, _C, _C, _C, + _C, _C, _C, _C, _C, _C, _C, _C, + _S|_B, _P, _P, _P, _P, _P, _P, _P, + _P, _P, _P, _P, _P, _P, _P, _P, + _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, + _N|_X, _N|_X, _P, _P, _P, _P, _P, _P, + _P, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U, + _U, _U, _U, _U, _U, _U, _U, _U, + _U, _U, _U, _U, _U, _U, _U, _U, + _U, _U, _U, _P, _P, _P, _P, _P, + _P, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L, + _L, _L, _L, _L, _L, _L, _L, _L, + _L, _L, _L, _L, _L, _L, _L, _L, + _L, _L, _L, _P, _P, _P, _P, _C, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + +/* tolower() and toupper() conversion table */ 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 123, 124, 125, 126, 127, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +/* CSWIDTH information */ + 1, 0, 0, 1, 0, 0, 1, +}; + unsigned char __ctype[SZ_TOTAL] = { 0, /* EOF */ diff --git a/usr/src/lib/libc/port/i18n/getwchar.c b/usr/src/lib/libc/port/i18n/getwchar.c index 3370def351..8daa0c1c3f 100644 --- a/usr/src/lib/libc/port/i18n/getwchar.c +++ b/usr/src/lib/libc/port/i18n/getwchar.c @@ -23,8 +23,10 @@ * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ /* Copyright (c) 1986 AT&T */ /* All Rights Reserved */ @@ -48,7 +50,7 @@ wint_t getwchar(void) { - return (_getwc(stdin)); + return (getwc(stdin)); } wint_t diff --git a/usr/src/lib/libc/port/i18n/isdigit.c b/usr/src/lib/libc/port/i18n/isdigit.c new file mode 100644 index 0000000000..ca82bdb1d4 --- /dev/null +++ b/usr/src/lib/libc/port/i18n/isdigit.c @@ -0,0 +1,159 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * This file contains the implementation of various functional forms + * of the ctype tests, specifically the required by ISO C. These are defined + * in the "C" (POSIX) locale. + */ + +#include "lint.h" +#include <ctype.h> + +/* + * We are supplying functional forms, so make sure to suppress any macros + * we might have imported. + */ + +#ifdef isblank +#undef isblank +#endif + +int +isblank(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISBLANK)); +} + +#ifdef isupper +#undef isupper +#endif + +int +isupper(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISUPPER)); +} + +#ifdef islower +#undef islower +#endif + +int +islower(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISLOWER)); +} + +#ifdef isdigit +#undef isdigit +#endif + +int +isdigit(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISDIGIT)); +} + +#ifdef isxdigit +#undef isxdigit +#endif + +int +isxdigit(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISXDIGIT)); +} + +#ifdef isalpha +#undef isalpha +#endif + +int +isalpha(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISALPHA)); +} + +#ifdef isalnum +#undef isalnum +#endif + +int +isalnum(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISALNUM)); +} + +#ifdef isspace +#undef isspace +#endif + +int +isspace(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISSPACE)); +} + +#ifdef iscntrl +#undef iscntrl +#endif + +int +iscntrl(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISCNTRL)); +} + +#ifdef isgraph +#undef isgraph +#endif + +int +isgraph(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISGRAPH)); +} + +#ifdef ispunct +#undef ispunct +#endif + +int +ispunct(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISPUNCT)); +} + +#ifdef isprint +#undef isprint +#endif + +int +isprint(int c) +{ + return (((unsigned)c > 255) ? 0 : (__ctype_mask[c] & _ISPRINT)); +} diff --git a/usr/src/lib/libc/port/locale/_ctype.h b/usr/src/lib/libc/port/locale/_ctype.h new file mode 100644 index 0000000000..0b286ee6e1 --- /dev/null +++ b/usr/src/lib/libc/port/locale/_ctype.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef __CTYPE_H_ +#define __CTYPE_H_ + +/* + * Please take careful note. It turns out that the _ISxxx macros + * occupy the lower order byte, except for _ISGRAPH, _ISALPHA, and _ISPRINT. + * Those occupt 0x2000, 0x4000, and 0x8000. Now, noting *very* carefully, + * it turns out that this leaves some gaps in the extended bits, which + * are occupied by _E1 = phonogram, _E2 = ideogram, and _E3 = English. + * The *other* _Ex bits are reserved. We don't think these higher order + * bits are baked into applications (because they haven't been used before), + * so we believe it is safe to reuse them how we see fit. + * + * This makes it possible to define a single space which overlaps both the + * wctype types, and the ctype types. + */ + +#define _CTYPE_A _ISALPHA /* Alpha */ +#define _CTYPE_C _ISCNTRL /* Control */ +#define _CTYPE_D _ISDIGIT /* Digit */ +#define _CTYPE_G _ISGRAPH /* Graph */ +#define _CTYPE_L _ISLOWER /* Lower */ +#define _CTYPE_P _ISPUNCT /* Punct */ +#define _CTYPE_S _ISSPACE /* Space */ +#define _CTYPE_U _ISUPPER /* Upper */ +#define _CTYPE_X _ISXDIGIT /* X digit */ +#define _CTYPE_B _ISBLANK /* Blank */ +#define _CTYPE_R _ISPRINT /* Print */ + +#define _CTYPE_Q _E1 /* Phonogram */ +#define _CTYPE_I _E2 /* Ideogram */ +#define _CTYPE_E _E3 /* English (Solaris) */ +#define _CTYPE_N _E4 /* Number */ +#define _CTYPE_T _E5 /* Special */ + +/* These high order bits were never used for anything at all. */ +#define _CTYPE_SW0 0x20000000U /* 0 width character */ +#define _CTYPE_SW1 0x40000000U /* 1 width character */ +#define _CTYPE_SW2 0x80000000U /* 2 width character */ +#define _CTYPE_SW3 0xc0000000U /* 3 width character */ +#define _CTYPE_SWM 0xe0000000U /* Mask for screen width data */ +#define _CTYPE_SWS 30 /* Bits to shift to get width */ + +unsigned long ___runetype(int); + +#endif /* !__CTYPE_H_ */ diff --git a/usr/src/lib/libc/port/locale/ascii.c b/usr/src/lib/libc/port/locale/ascii.c new file mode 100644 index 0000000000..aae1f1ed69 --- /dev/null +++ b/usr/src/lib/libc/port/locale/ascii.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <sys/types.h> +#include <errno.h> +#include <limits.h> +#include "runetype.h" +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <note.h> +#include "mblocal.h" + +static size_t _ascii_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +static int _ascii_mbsinit(const mbstate_t *); +static size_t _ascii_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, + const char **_RESTRICT_KYWD src, size_t nms, size_t len, + mbstate_t *_RESTRICT_KYWD ps); +static size_t _ascii_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); +static size_t _ascii_wcsnrtombs(char *_RESTRICT_KYWD, + const wchar_t **_RESTRICT_KYWD, + size_t, size_t, mbstate_t *_RESTRICT_KYWD); + +int +_ascii_init(_RuneLocale *rl) +{ + + __mbrtowc = _ascii_mbrtowc; + __mbsinit = _ascii_mbsinit; + __mbsnrtowcs = _ascii_mbsnrtowcs; + __wcrtomb = _ascii_wcrtomb; + __wcsnrtombs = _ascii_wcsnrtombs; + _CurrentRuneLocale = rl; + __ctype[520] = 1; + return (0); +} + +static int +_ascii_mbsinit(const mbstate_t *ps) +{ + _NOTE(ARGUNUSED(ps)); + /* + * Encoding is not state dependent - we are always in the + * initial state. + */ + return (1); +} + +static size_t +_ascii_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + _NOTE(ARGUNUSED(ps)); + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (0); + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + if (pwc != NULL) + *pwc = (unsigned char)*s; + return (*s == '\0' ? 0 : 1); +} + +static size_t +_ascii_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, + mbstate_t *_RESTRICT_KYWD ps) +{ + _NOTE(ARGUNUSED(ps)); + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if (wc < 0 || wc > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + *s = (unsigned char)wc; + return (1); +} + +static size_t +_ascii_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src, + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + const char *s; + size_t nchr; + + _NOTE(ARGUNUSED(ps)); + + if (dst == NULL) { + for (s = *src; nms > 0 && *s != '\0'; s++, nms--) { + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + } + return (s - *src); + } + + s = *src; + nchr = 0; + while (len-- > 0 && nms-- > 0) { + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + if ((*dst++ = (unsigned char)*s++) == L'\0') { + *src = NULL; + return (nchr); + } + nchr++; + } + *src = s; + return (nchr); +} + +static size_t +_ascii_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + const wchar_t *s; + size_t nchr; + + _NOTE(ARGUNUSED(ps)); + + if (dst == NULL) { + for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) { + if (*s < 0 || *s > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + } + return (s - *src); + } + + s = *src; + nchr = 0; + while (len-- > 0 && nwc-- > 0) { + if (*s < 0 || *s > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + if ((*dst++ = *s++) == '\0') { + *src = NULL; + return (nchr); + } + nchr++; + } + *src = s; + return (nchr); +} diff --git a/usr/src/lib/libc/port/locale/big5.c b/usr/src/lib/libc/port/locale/big5.c new file mode 100644 index 0000000000..4a55598037 --- /dev/null +++ b/usr/src/lib/libc/port/locale/big5.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <sys/types.h> +#include <errno.h> +#include "runetype.h" +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" + +static size_t _BIG5_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, + size_t, mbstate_t *RESTRICT_KYWD); +static int _BIG5_mbsinit(const mbstate_t *); +static size_t _BIG5_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + +typedef struct { + wchar_t ch; +} _BIG5State; + +int +_BIG5_init(_RuneLocale *rl) +{ + + __mbrtowc = _BIG5_mbrtowc; + __wcrtomb = _BIG5_wcrtomb; + __mbsinit = _BIG5_mbsinit; + _CurrentRuneLocale = rl; + __ctype[520] = 2; + return (0); +} + +static int +_BIG5_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _BIG5State *)ps)->ch == 0); +} + +static int +_big5_check(uint_t c) +{ + + c &= 0xff; + return ((c >= 0xa1 && c <= 0xfe) ? 2 : 1); +} + +static size_t +_BIG5_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + _BIG5State *bs; + wchar_t wc; + size_t len; + + bs = (_BIG5State *)ps; + + if ((bs->ch & ~0xFF) != 0) { + /* Bad conversion state. */ + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + + if (bs->ch != 0) { + if (*s == '\0') { + errno = EILSEQ; + return ((size_t)-1); + } + wc = (bs->ch << 8) | (*s & 0xFF); + if (pwc != NULL) + *pwc = wc; + bs->ch = 0; + return (1); + } + + len = (size_t)_big5_check(*s); + wc = *s++ & 0xff; + if (len == 2) { + if (n < 2) { + /* Incomplete multibyte sequence */ + bs->ch = wc; + return ((size_t)-2); + } + if (*s == '\0') { + errno = EILSEQ; + return ((size_t)-1); + } + wc = (wc << 8) | (*s++ & 0xff); + if (pwc != NULL) + *pwc = wc; + return (2); + } else { + if (pwc != NULL) + *pwc = wc; + return (wc == L'\0' ? 0 : 1); + } +} + +static size_t +_BIG5_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) +{ + _BIG5State *bs; + + bs = (_BIG5State *)ps; + + if (bs->ch != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if (wc & 0x8000) { + *s++ = (wc >> 8) & 0xff; + *s = wc & 0xff; + return (2); + } + *s = wc & 0xff; + return (1); +} diff --git a/usr/src/lib/libc/port/locale/btowc.c b/usr/src/lib/libc/port/locale/btowc.c new file mode 100644 index 0000000000..56d2536968 --- /dev/null +++ b/usr/src/lib/libc/port/locale/btowc.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2002, 2003 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stdio.h> +#include <wchar.h> +#include "mblocal.h" + +wint_t +btowc(int c) +{ + static const mbstate_t initial = { 0 }; + mbstate_t mbs = initial; + char cc; + wchar_t wc; + + if (c == EOF) + return (WEOF); + /* + * We expect mbrtowc() to return 0 or 1, hence the check for n > 1 + * which detects error return values as well as "impossible" byte + * counts. + */ + cc = (char)c; + if (__mbrtowc(&wc, &cc, 1, &mbs) > 1) + return (WEOF); + return (wc); +} diff --git a/usr/src/lib/libc/port/locale/cname.h b/usr/src/lib/libc/port/locale/cname.h new file mode 100644 index 0000000000..070683a989 --- /dev/null +++ b/usr/src/lib/libc/port/locale/cname.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* character-name table */ +static struct cname { + char *name; + char code; +} cnames[] = { + {"NUL", '\0'}, + {"SOH", '\001'}, + {"STX", '\002'}, + {"ETX", '\003'}, + {"EOT", '\004'}, + {"ENQ", '\005'}, + {"ACK", '\006'}, + {"BEL", '\007'}, + {"alert", '\007'}, + {"BS", '\010'}, + {"backspace", '\b'}, + {"HT", '\011'}, + {"tab", '\t'}, + {"LF", '\012'}, + {"newline", '\n'}, + {"VT", '\013'}, + {"vertical-tab", '\v'}, + {"FF", '\014'}, + {"form-feed", '\f'}, + {"CR", '\015'}, + {"carriage-return", '\r'}, + {"SO", '\016'}, + {"SI", '\017'}, + {"DLE", '\020'}, + {"DC1", '\021'}, + {"DC2", '\022'}, + {"DC3", '\023'}, + {"DC4", '\024'}, + {"NAK", '\025'}, + {"SYN", '\026'}, + {"ETB", '\027'}, + {"CAN", '\030'}, + {"EM", '\031'}, + {"SUB", '\032'}, + {"ESC", '\033'}, + {"IS4", '\034'}, + {"FS", '\034'}, + {"IS3", '\035'}, + {"GS", '\035'}, + {"IS2", '\036'}, + {"RS", '\036'}, + {"IS1", '\037'}, + {"US", '\037'}, + {"space", ' '}, + {"exclamation-mark", '!'}, + {"quotation-mark", '"'}, + {"number-sign", '#'}, + {"dollar-sign", '$'}, + {"percent-sign", '%'}, + {"ampersand", '&'}, + {"apostrophe", '\''}, + {"left-parenthesis", '('}, + {"right-parenthesis", ')'}, + {"asterisk", '*'}, + {"plus-sign", '+'}, + {"comma", ','}, + {"hyphen", '-'}, + {"hyphen-minus", '-'}, + {"period", '.'}, + {"full-stop", '.'}, + {"slash", '/'}, + {"solidus", '/'}, + {"zero", '0'}, + {"one", '1'}, + {"two", '2'}, + {"three", '3'}, + {"four", '4'}, + {"five", '5'}, + {"six", '6'}, + {"seven", '7'}, + {"eight", '8'}, + {"nine", '9'}, + {"colon", ':'}, + {"semicolon", ';'}, + {"less-than-sign", '<'}, + {"equals-sign", '='}, + {"greater-than-sign", '>'}, + {"question-mark", '?'}, + {"commercial-at", '@'}, + {"left-square-bracket", '['}, + {"backslash", '\\'}, + {"reverse-solidus", '\\'}, + {"right-square-bracket", ']'}, + {"circumflex", '^'}, + {"circumflex-accent", '^'}, + {"underscore", '_'}, + {"low-line", '_'}, + {"grave-accent", '`'}, + {"left-brace", '{'}, + {"left-curly-bracket", '{'}, + {"vertical-line", '|'}, + {"right-brace", '}'}, + {"right-curly-bracket", '}'}, + {"tilde", '~'}, + {"DEL", '\177'}, + {NULL, 0} +}; diff --git a/usr/src/lib/libc/port/locale/collate.c b/usr/src/lib/libc/port/locale/collate.c new file mode 100644 index 0000000000..7783cf91bb --- /dev/null +++ b/usr/src/lib/libc/port/locale/collate.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "file64.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sysexits.h> +#include <netinet/in.h> + +#include "collate.h" +#include "setlocale.h" +#include "ldpart.h" + +int __collate_load_error = 1; +int __collate_substitute_nontrivial; + +char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN]; +struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; +struct __collate_st_chain_pri *__collate_chain_pri_table; + +int +__collate_load_tables(const char *encoding) +{ + FILE *fp; + int i, saverr, chains; + uint32_t u32; + char strbuf[STR_LEN], buf[PATH_MAX]; + void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table; + static char collate_encoding[ENCODING_LEN + 1]; + + /* 'encoding' must be already checked. */ + if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { + __collate_load_error = 1; + return (_LDP_CACHE); + } + + /* + * If the locale name is the same as our cache, use the cache. + */ + if (strcmp(encoding, collate_encoding) == 0) { + __collate_load_error = 0; + return (_LDP_CACHE); + } + + /* + * Slurp the locale file into the cache. + */ + + /* 'PathLocale' must be already set & checked. */ + /* Range checking not needed, encoding has fixed size */ + (void) strcpy(buf, _PathLocale); + (void) strcat(buf, "/"); + (void) strcat(buf, encoding); + (void) strcat(buf, "/LC_COLLATE"); + if ((fp = fopen(buf, "r")) == NULL) + return (_LDP_ERROR); + + if (fread(strbuf, sizeof (strbuf), 1, fp) != 1) { + saverr = errno; + (void) fclose(fp); + errno = saverr; + return (_LDP_ERROR); + } + chains = -1; + if (strcmp(strbuf, COLLATE_VERSION) == 0) + chains = 0; + else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0) + chains = 1; + if (chains < 0) { + (void) fclose(fp); + errno = EINVAL; + return (_LDP_ERROR); + } + if (chains) { + if (fread(&u32, sizeof (u32), 1, fp) != 1) { + saverr = errno; + (void) fclose(fp); + errno = saverr; + return (_LDP_ERROR); + } + if ((chains = (int)ntohl(u32)) < 1) { + (void) fclose(fp); + errno = EINVAL; + return (_LDP_ERROR); + } + } else + chains = TABLE_SIZE; + + if ((TMP_substitute_table = + malloc(sizeof (__collate_substitute_table))) == NULL) { + saverr = errno; + (void) fclose(fp); + errno = saverr; + return (_LDP_ERROR); + } + if ((TMP_char_pri_table = + malloc(sizeof (__collate_char_pri_table))) == NULL) { + saverr = errno; + free(TMP_substitute_table); + (void) fclose(fp); + errno = saverr; + return (_LDP_ERROR); + } + if ((TMP_chain_pri_table = + malloc(sizeof (*__collate_chain_pri_table) * chains)) == NULL) { + saverr = errno; + free(TMP_substitute_table); + free(TMP_char_pri_table); + (void) fclose(fp); + errno = saverr; + return (_LDP_ERROR); + } + +#define FREAD(a, b, c, d) \ +{ \ + if (fread(a, b, c, d) != c) { \ + saverr = errno; \ + free(TMP_substitute_table); \ + free(TMP_char_pri_table); \ + free(TMP_chain_pri_table); \ + (void) fclose(d); \ + errno = saverr; \ + return (_LDP_ERROR); \ + } \ +} + + FREAD(TMP_substitute_table, sizeof (__collate_substitute_table), 1, fp); + FREAD(TMP_char_pri_table, sizeof (__collate_char_pri_table), 1, fp); + FREAD(TMP_chain_pri_table, + sizeof (*__collate_chain_pri_table), chains, fp); + (void) fclose(fp); + + (void) strcpy(collate_encoding, encoding); + if (__collate_substitute_table_ptr != NULL) + free(__collate_substitute_table_ptr); + __collate_substitute_table_ptr = TMP_substitute_table; + if (__collate_char_pri_table_ptr != NULL) + free(__collate_char_pri_table_ptr); + __collate_char_pri_table_ptr = TMP_char_pri_table; + for (i = 0; i < UCHAR_MAX + 1; i++) { + __collate_char_pri_table[i].prim = + ntohl(__collate_char_pri_table[i].prim); + __collate_char_pri_table[i].sec = + ntohl(__collate_char_pri_table[i].sec); + } + if (__collate_chain_pri_table != NULL) + free(__collate_chain_pri_table); + __collate_chain_pri_table = TMP_chain_pri_table; + for (i = 0; i < chains; i++) { + __collate_chain_pri_table[i].prim = + ntohl(__collate_chain_pri_table[i].prim); + __collate_chain_pri_table[i].sec = + ntohl(__collate_chain_pri_table[i].sec); + } + __collate_substitute_nontrivial = 0; + for (i = 0; i < UCHAR_MAX + 1; i++) { + if (__collate_substitute_table[i][0] != i || + __collate_substitute_table[i][1] != 0) { + __collate_substitute_nontrivial = 1; + break; + } + } + __collate_load_error = 0; + + return (_LDP_LOADED); +} + +char * +__collate_substitute(const char *str) +{ + int dest_len, len, nlen; + int delta; + char *dest_str = NULL; + uchar_t *s = (uchar_t *)str; + + if (s == NULL || *s == '\0') { + return (strdup("")); + } + + delta = strlen(str); + delta += delta / 8; + dest_str = malloc(dest_len = delta); + if (dest_str == NULL) + return (NULL); + len = 0; + while (*s) { + nlen = len + strlen(__collate_substitute_table[*s]); + if (dest_len <= nlen) { + char *new_str; + new_str = realloc(dest_str, dest_len = nlen + delta); + if (new_str == NULL) { + free(dest_str); + return (NULL); + } + dest_str = new_str; + } + (void) strcpy(dest_str + len, + (char *)__collate_substitute_table[*s++]); + len = nlen; + } + return (dest_str); +} + +void +__collate_lookup(const char *t, int *len, int *prim, int *sec) +{ + struct __collate_st_chain_pri *p2; + + *len = 1; + *prim = *sec = 0; + for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) { + if (*t == p2->str[0] && + strncmp(t, (char *)p2->str, strlen((char *)p2->str)) == 0) { + *len = strlen((char *)p2->str); + *prim = p2->prim; + *sec = p2->sec; + return; + } + } + *prim = __collate_char_pri_table[(uchar_t)*t].prim; + *sec = __collate_char_pri_table[(uchar_t)*t].sec; +} diff --git a/usr/src/lib/libc/port/locale/collate.h b/usr/src/lib/libc/port/locale/collate.h new file mode 100644 index 0000000000..149ba6866a --- /dev/null +++ b/usr/src/lib/libc/port/locale/collate.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _COLLATE_H_ +#define _COLLATE_H_ + +#include <sys/types.h> +#include <limits.h> + +#define STR_LEN 10 +#define TABLE_SIZE 100 +#define COLLATE_VERSION "1.0\n" +#define COLLATE_VERSION1_2 "1.2\n" + +struct __collate_st_char_pri { + int prim, sec; +}; +struct __collate_st_chain_pri { + char str[STR_LEN]; + int prim, sec; +}; + +extern int __collate_load_error; +extern int __collate_substitute_nontrivial; + +#define __collate_substitute_table (*__collate_substitute_table_ptr) +extern char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN]; +#define __collate_char_pri_table (*__collate_char_pri_table_ptr) +extern struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; +extern struct __collate_st_chain_pri *__collate_chain_pri_table; + +char *__collate_substitute(const char *); +int __collate_load_tables(const char *); +void __collate_lookup(const char *, int *, int *, int *); +int __collate_range_cmp(int, int); + +#endif /* !_COLLATE_H_ */ diff --git a/usr/src/lib/libc/port/locale/collcmp.c b/usr/src/lib/libc/port/locale/collcmp.c new file mode 100644 index 0000000000..b73d24dd11 --- /dev/null +++ b/usr/src/lib/libc/port/locale/collcmp.c @@ -0,0 +1,43 @@ +/* + * Copyright (C) 1996 by Andrey A. Chernov, Moscow, Russia. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "lint.h" +#include <string.h> +#include "collate.h" + +/* + * Compare two characters using collate + */ + +int +__collate_range_cmp(int c1, int c2) +{ + static char s1[2], s2[2]; + + s1[0] = c1; + s2[0] = c2; + return (strcoll(s1, s2)); +} diff --git a/usr/src/lib/libc/port/locale/engine.c b/usr/src/lib/libc/port/locale/engine.c new file mode 100644 index 0000000000..6fcf81ad15 --- /dev/null +++ b/usr/src/lib/libc/port/locale/engine.c @@ -0,0 +1,1157 @@ +/* + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The matching engine and friends. This file is #included by regexec.c + * after suitable #defines of a variety of macros used herein, so that + * different state representations can be used without duplicating masses + * of code. + */ + +#ifdef SNAMES +#define matcher smatcher +#define fast sfast +#define slow sslow +#define dissect sdissect +#define backref sbackref +#define step sstep +#define print sprint +#define at sat +#define match smat +#endif +#ifdef LNAMES +#define matcher lmatcher +#define fast lfast +#define slow lslow +#define dissect ldissect +#define backref lbackref +#define step lstep +#define print lprint +#define at lat +#define match lmat +#endif +#ifdef MNAMES +#define matcher mmatcher +#define fast mfast +#define slow mslow +#define dissect mdissect +#define backref mbackref +#define step mstep +#define print mprint +#define at mat +#define match mmat +#endif + +/* another structure passed up and down to avoid zillions of parameters */ +struct match { + struct re_guts *g; + int eflags; + regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ + const char *offp; /* offsets work from here */ + const char *beginp; /* start of string -- virtual NUL precedes */ + const char *endp; /* end of string -- virtual NUL here */ + const char *coldp; /* can be no match starting before here */ + const char **lastpos; /* [nplus+1] */ + STATEVARS; + states st; /* current states */ + states fresh; /* states for a fresh start */ + states tmp; /* temporary */ + states empty; /* empty set of states */ + mbstate_t mbs; /* multibyte conversion state */ +}; + +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === engine.c === */ +static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int); +static const char *dissect(struct match *, const char *, const char *, + sopno, sopno); +static const char *backref(struct match *, const char *, const char *, sopno, + sopno, sopno, int); +static const char *fast(struct match *, const char *, const char *, sopno, + sopno); +static const char *slow(struct match *, const char *, const char *, sopno, + sopno); +static states step(struct re_guts *, sopno, sopno, states, wint_t, states); +#define MAX_RECURSION 100 +#define BOL (OUT-1) +#define EOL (BOL-1) +#define BOLEOL (BOL-2) +#define NOTHING (BOL-3) +#define BOW (BOL-4) +#define EOW (BOL-5) +#define BADCHAR (BOL-6) +#define NONCHAR(c) ((c) <= OUT) +#ifdef REDEBUG +static void print(struct match *, const char *, states, int, FILE *); +#endif +#ifdef REDEBUG +static void at(struct match *, const char *, const char *, const char *, + sopno, sopno); +#endif +#ifdef REDEBUG +static const char *pchar(int ch); +#endif + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ + +#ifdef REDEBUG +#define SP(t, s, c) print(m, t, s, c, stdout) +#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) +#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } +#else +#define SP(t, s, c) /* nothing */ +#define AT(t, p1, p2, s1, s2) /* nothing */ +#define NOTE(s) /* nothing */ +#endif + +/* + * matcher - the actual matching engine + */ +static int /* 0 success, REG_NOMATCH failure */ +matcher(struct re_guts *g, + const char *string, + size_t nmatch, + regmatch_t pmatch[], + int eflags) +{ + const char *endp; + int i; + struct match mv; + struct match *m = &mv; + const char *dp; + const sopno gf = g->firststate+1; /* +1 for OEND */ + const sopno gl = g->laststate; + const char *start; + const char *stop; + /* Boyer-Moore algorithms variables */ + const char *pp; + int cj, mj; + const char *mustfirst; + const char *mustlast; + int *matchjump; + int *charjump; + + /* simplify the situation where possible */ + if (g->cflags®_NOSUB) + nmatch = 0; +#ifdef REG_STARTEND + if (eflags®_STARTEND) { + start = string + pmatch[0].rm_so; + stop = string + pmatch[0].rm_eo; + } else { + start = string; + stop = start + strlen(start); + } +#else + start = string; + stop = start + strlen(start); +#endif + if (stop < start) + return (REG_EFATAL); + + /* prescreening; this does wonders for this rather slow code */ + if (g->must != NULL) { + if (g->charjump != NULL && g->matchjump != NULL) { + mustfirst = g->must; + mustlast = g->must + g->mlen - 1; + charjump = g->charjump; + matchjump = g->matchjump; + pp = mustlast; + for (dp = start+g->mlen-1; dp < stop; ) { + /* Fast skip non-matches */ + while (dp < stop && charjump[(int)*dp]) + dp += charjump[(int)*dp]; + + if (dp >= stop) + break; + + /* Greedy matcher */ + /* + * We depend on not being used for + * for strings of length 1 + */ + while (*--dp == *--pp && pp != mustfirst) + ; + + if (*dp == *pp) + break; + + /* Jump to next possible match */ + mj = matchjump[pp - mustfirst]; + cj = charjump[(int)*dp]; + dp += (cj < mj ? mj : cj); + pp = mustlast; + } + if (pp != mustfirst) + return (REG_NOMATCH); + } else { + for (dp = start; dp < stop; dp++) + if (*dp == g->must[0] && + stop - dp >= g->mlen && + memcmp(dp, g->must, (size_t)g->mlen) == 0) + break; + if (dp == stop) /* we didn't find g->must */ + return (REG_NOMATCH); + } + } + + /* match struct setup */ + m->g = g; + m->eflags = eflags; + m->pmatch = NULL; + m->lastpos = NULL; + m->offp = string; + m->beginp = start; + m->endp = stop; + STATESETUP(m, 4); + SETUP(m->st); + SETUP(m->fresh); + SETUP(m->tmp); + SETUP(m->empty); + CLEAR(m->empty); + ZAPSTATE(&m->mbs); + + /* Adjust start according to moffset, to speed things up */ + if (g->moffset > -1) + start = ((dp - g->moffset) < start) ? start : dp - g->moffset; + + SP("mloop", m->st, *start); + + /* this loop does only one repetition except for backrefs */ + for (;;) { + endp = fast(m, start, stop, gf, gl); + if (endp == NULL) { /* a miss */ + if (m->pmatch != NULL) + free((char *)m->pmatch); + if (m->lastpos != NULL) + free((char *)m->lastpos); + STATETEARDOWN(m); + return (REG_NOMATCH); + } + if (nmatch == 0 && !g->backrefs) + break; /* no further info needed */ + + /* where? */ + assert(m->coldp != NULL); + for (;;) { + NOTE("finding start"); + endp = slow(m, m->coldp, stop, gf, gl); + if (endp != NULL) + break; + assert(m->coldp < m->endp); + m->coldp += XMBRTOWC(NULL, m->coldp, + m->endp - m->coldp, &m->mbs, 0); + } + if (nmatch == 1 && !g->backrefs) + break; /* no further info needed */ + + /* oh my, he wants the subexpressions... */ + if (m->pmatch == NULL) + m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * + sizeof (regmatch_t)); + if (m->pmatch == NULL) { + STATETEARDOWN(m); + return (REG_ESPACE); + } + for (i = 1; i <= m->g->nsub; i++) + m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; + /* NB: FreeBSD has REG_BACKR, we do not */ + if (!g->backrefs /* && !(m->eflags®_BACKR) */) { + NOTE("dissecting"); + dp = dissect(m, m->coldp, endp, gf, gl); + } else { + if (g->nplus > 0 && m->lastpos == NULL) + m->lastpos = malloc((g->nplus+1) * + sizeof (const char *)); + if (g->nplus > 0 && m->lastpos == NULL) { + free(m->pmatch); + STATETEARDOWN(m); + return (REG_ESPACE); + } + NOTE("backref dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + } + if (dp != NULL) + break; + + /* uh-oh... we couldn't find a subexpression-level match */ + assert(g->backrefs); /* must be back references doing it */ + assert(g->nplus == 0 || m->lastpos != NULL); + for (;;) { + if (dp != NULL || endp <= m->coldp) + break; /* defeat */ + NOTE("backoff"); + endp = slow(m, m->coldp, endp-1, gf, gl); + if (endp == NULL) + break; /* defeat */ + /* try it on a shorter possibility */ +#ifndef NDEBUG + for (i = 1; i <= m->g->nsub; i++) { + assert(m->pmatch[i].rm_so == -1); + assert(m->pmatch[i].rm_eo == -1); + } +#endif + NOTE("backoff dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + } + assert(dp == NULL || dp == endp); + if (dp != NULL) /* found a shorter one */ + break; + + /* despite initial appearances, there is no match here */ + NOTE("false alarm"); + /* recycle starting later */ + start = m->coldp + XMBRTOWC(NULL, m->coldp, + stop - m->coldp, &m->mbs, 0); + assert(start <= stop); + } + + /* fill in the details if requested */ + if (nmatch > 0) { + pmatch[0].rm_so = m->coldp - m->offp; + pmatch[0].rm_eo = endp - m->offp; + } + if (nmatch > 1) { + assert(m->pmatch != NULL); + for (i = 1; i < nmatch; i++) + if (i <= m->g->nsub) + pmatch[i] = m->pmatch[i]; + else { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } + } + + if (m->pmatch != NULL) + free((char *)m->pmatch); + if (m->lastpos != NULL) + free((char *)m->lastpos); + STATETEARDOWN(m); + return (0); +} + +/* + * dissect - figure out what matched what, no back references + */ +static const char * +dissect(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) +{ + int i; + sopno ss; /* start sop of current subRE */ + sopno es; /* end sop of current subRE */ + const char *sp; /* start of string matched by it */ + const char *stp; /* string matched by it cannot pass here */ + const char *rest; /* start of rest of string */ + const char *tail; /* string unmatched by rest of RE */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + const char *ssp; /* start of string matched by subsubRE */ + const char *sep; /* end of string matched by subsubRE */ + const char *oldssp; /* previous ssp */ + const char *dp; + + AT("diss", start, stop, startst, stopst); + sp = start; + for (ss = startst; ss < stopst; ss = es) { + /* identify end of subRE */ + es = ss; + switch (OP(m->g->strip[es])) { + case OPLUS_: + case OQUEST_: + es += OPND(m->g->strip[es]); + break; + case OCH_: + while (OP(m->g->strip[es]) != O_CH) + es += OPND(m->g->strip[es]); + break; + } + es++; + + /* figure out what it matched */ + switch (OP(m->g->strip[ss])) { + case OEND: + assert(0); + break; + case OCHAR: + sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); + break; + case OBOL: + case OEOL: + case OBOW: + case OEOW: + break; + case OANY: + case OANYOF: + sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); + break; + case OBACK_: + case O_BACK: + assert(0); + break; + /* cases where length of match is hard to find */ + case OQUEST_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + /* did innards match? */ + if (slow(m, sp, rest, ssub, esub) != NULL) { + dp = dissect(m, sp, rest, ssub, esub); + assert(dp == rest); +#if defined(__lint) + (void) dp; +#endif + } else /* no */ + assert(sp == rest); + sp = rest; + break; + case OPLUS_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + ssp = sp; + oldssp = ssp; + for (;;) { /* find last match of innards */ + sep = slow(m, ssp, rest, ssub, esub); + if (sep == NULL || sep == ssp) + break; /* failed or matched null */ + oldssp = ssp; /* on to next try */ + ssp = sep; + } + if (sep == NULL) { + /* last successful match */ + sep = ssp; + ssp = oldssp; + } + assert(sep == rest); /* must exhaust substring */ + assert(slow(m, ssp, sep, ssub, esub) == rest); + dp = dissect(m, ssp, sep, ssub, esub); + assert(dp == sep); + sp = rest; + break; + case OCH_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = ss + OPND(m->g->strip[ss]) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + if (slow(m, sp, rest, ssub, esub) == rest) + break; /* it matched all of it */ + /* that one missed, try next one */ + assert(OP(m->g->strip[esub]) == OOR1); + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + dp = dissect(m, sp, rest, ssub, esub); + assert(dp == rest); + sp = rest; + break; + case O_PLUS: + case O_QUEST: + case OOR1: + case OOR2: + case O_CH: + assert(0); + break; + case OLPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_so = sp - m->offp; + break; + case ORPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_eo = sp - m->offp; + break; + default: /* uh oh */ + assert(0); + break; + } + } + + assert(sp == stop); + return (sp); +} + +/* + * backref - figure out what matched what, figuring in back references + */ +static const char * +backref(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst, sopno lev, /* PLUS nesting level */ + int rec) +{ + int i; + sopno ss; /* start sop of current subRE */ + const char *sp; /* start of string matched by it */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + const char *ssp; /* start of string matched by subsubRE */ + const char *dp; + size_t len; + int hard; + sop s; + regoff_t offsave; + cset *cs; + wint_t wc; + + AT("back", start, stop, startst, stopst); + sp = start; + + /* get as far as we can with easy stuff */ + hard = 0; + for (ss = startst; !hard && ss < stopst; ss++) + switch (OP(s = m->g->strip[ss])) { + case OCHAR: + if (sp == stop) + return (NULL); + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + if (wc != OPND(s)) + return (NULL); + break; + case OANY: + if (sp == stop) + return (NULL); + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + if (wc == BADCHAR) + return (NULL); + break; + case OANYOF: + if (sp == stop) + return (NULL); + cs = &m->g->sets[OPND(s)]; + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + if (wc == BADCHAR || !CHIN(cs, wc)) + return (NULL); + break; + case OBOL: + if ((sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE))) { + break; + } + else + return (NULL); + break; + case OEOL: + if ((sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE))) { + break; + } + else + return (NULL); + break; + case OBOW: + if (((sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp > m->beginp && !ISWORD(*(sp-1)))) && + (sp < m->endp && ISWORD(*sp))) { + break; + } else + return (NULL); + break; + case OEOW: + if (((sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp < m->endp && !ISWORD(*sp))) && + (sp > m->beginp && ISWORD(*(sp-1)))) { + break; + } else + return (NULL); + break; + case O_QUEST: + break; + case OOR1: /* matches null but needs to skip */ + ss++; + s = m->g->strip[ss]; + do { + assert(OP(s) == OOR2); + ss += OPND(s); + } while (OP(s = m->g->strip[ss]) != O_CH); + /* note that the ss++ gets us past the O_CH */ + break; + default: /* have to make a choice */ + hard = 1; + break; + } + if (!hard) { /* that was it! */ + if (sp != stop) + return (NULL); + return (sp); + } + ss--; /* adjust for the for's final increment */ + + /* the hard stuff */ + AT("hard", sp, stop, ss, stopst); + s = m->g->strip[ss]; + switch (OP(s)) { + case OBACK_: /* the vilest depths */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + if (m->pmatch[i].rm_eo == -1) + return (NULL); + assert(m->pmatch[i].rm_so != -1); + len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; + if (len == 0 && rec++ > MAX_RECURSION) + return (NULL); + assert(stop - m->beginp >= len); + if (sp > stop - len) + return (NULL); /* not enough left to match */ + ssp = m->offp + m->pmatch[i].rm_so; + if (memcmp(sp, ssp, len) != 0) + return (NULL); + while (m->g->strip[ss] != SOP(O_BACK, i)) + ss++; + return (backref(m, sp+len, stop, ss+1, stopst, lev, rec)); + break; + case OQUEST_: /* to null or not */ + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return (dp); /* not */ + return (backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); + break; + case OPLUS_: + assert(m->lastpos != NULL); + assert(lev+1 <= m->g->nplus); + m->lastpos[lev+1] = sp; + return (backref(m, sp, stop, ss+1, stopst, lev+1, rec)); + break; + case O_PLUS: + if (sp == m->lastpos[lev]) /* last pass matched null */ + return (backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + /* try another pass */ + m->lastpos[lev] = sp; + dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); + if (dp == NULL) + return (backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + else + return (dp); + break; + case OCH_: /* find the right one, if any */ + ssub = ss + 1; + esub = ss + OPND(s) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + dp = backref(m, sp, stop, ssub, esub, lev, rec); + if (dp != NULL) + return (dp); + /* that one missed, try next one */ + if (OP(m->g->strip[esub]) == O_CH) + return (NULL); /* there is none */ + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + break; + case OLPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_so; + m->pmatch[i].rm_so = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return (dp); + m->pmatch[i].rm_so = offsave; + return (NULL); + break; + case ORPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_eo; + m->pmatch[i].rm_eo = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return (dp); + m->pmatch[i].rm_eo = offsave; + return (NULL); + break; + default: /* uh oh */ + assert(0); + break; + } + + /* "can't happen" */ + assert(0); + return (NULL); +} + +/* + * fast - step through the string at top speed + */ +static const char * +fast(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) +{ + states st = m->st; + states fresh = m->fresh; + states tmp = m->tmp; + const char *p = start; + wint_t c; + wint_t lastc; /* previous c */ + wint_t flagch; + int i; + const char *coldp; /* last p after which no match was underway */ + size_t clen; + + CLEAR(st); + SET1(st, startst); + SP("fast", st, *p); + st = step(m->g, startst, stopst, st, NOTHING, st); + ASSIGN(fresh, st); + SP("start", st, *p); + coldp = NULL; + if (start == m->beginp) + c = OUT; + else { + /* + * XXX Wrong if the previous character was multi-byte. + * Newline never is (in encodings supported by FreeBSD), + * so this only breaks the ISWORD tests below. + */ + c = (uch)*(start - 1); + } + for (;;) { + /* next character */ + lastc = c; + if (p == m->endp) { + clen = 0; + c = OUT; + } else + clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); + if (EQ(st, fresh)) + coldp = p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ((lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL))) { + flagch = BOL; + i = m->g->nbol; + } + if ((c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL))) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, + flagch, st); + SP("boleol", st, c); + } + + /* how about a word boundary? */ + if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c))) { + flagch = BOW; + } + if ((lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c)))) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("boweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst) || p == stop || clen > stop - p) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, fresh); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("aft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p += clen; + } + + assert(coldp != NULL); + m->coldp = coldp; + if (ISSET(st, stopst)) + return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); + else + return (NULL); +} + +/* + * slow - step through the string more deliberately + */ +static const char * +slow(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) +{ + states st = m->st; + states empty = m->empty; + states tmp = m->tmp; + const char *p = start; + wint_t c; + wint_t lastc; /* previous c */ + wint_t flagch; + int i; + const char *matchp; /* last p at which a match ended */ + size_t clen; + + AT("slow", start, stop, startst, stopst); + CLEAR(st); + SET1(st, startst); + SP("sstart", st, *p); + st = step(m->g, startst, stopst, st, NOTHING, st); + matchp = NULL; + if (start == m->beginp) + c = OUT; + else { + /* + * XXX Wrong if the previous character was multi-byte. + * Newline never is (in encodings supported by FreeBSD), + * so this only breaks the ISWORD tests below. + */ + c = (uch)*(start - 1); + } + for (;;) { + /* next character */ + lastc = c; + if (p == m->endp) { + c = OUT; + clen = 0; + } else + clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ((lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL))) { + flagch = BOL; + i = m->g->nbol; + } + if ((c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL))) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, + flagch, st); + SP("sboleol", st, c); + } + + /* how about a word boundary? */ + if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c))) { + flagch = BOW; + } + if ((lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c)))) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst)) + matchp = p; + if (EQ(st, empty) || p == stop || clen > stop - p) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, empty); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("saft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p += clen; + } + + return (matchp); +} + + +/* + * step - map set of states reachable before char to set reachable after + */ +static states +step(struct re_guts *g, + sopno start, /* start state within strip */ + sopno stop, /* state after stop state within strip */ + states bef, /* states reachable before */ + wint_t ch, /* character or NONCHAR code */ + states aft) /* states already known reachable after */ +{ + cset *cs; + sop s; + sopno pc; + onestate here; /* note, macros know this name */ + sopno look; + int i; + + for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { + s = g->strip[pc]; + switch (OP(s)) { + case OEND: + assert(pc == stop-1); + break; + case OCHAR: + /* only characters can match */ + assert(!NONCHAR(ch) || ch != OPND(s)); + if (ch == OPND(s)) + FWD(aft, bef, 1); + break; + case OBOL: + if (ch == BOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OEOL: + if (ch == EOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OBOW: + if (ch == BOW) + FWD(aft, bef, 1); + break; + case OEOW: + if (ch == EOW) + FWD(aft, bef, 1); + break; + case OANY: + if (!NONCHAR(ch)) + FWD(aft, bef, 1); + break; + case OANYOF: + cs = &g->sets[OPND(s)]; + if (!NONCHAR(ch) && CHIN(cs, ch)) + FWD(aft, bef, 1); + break; + case OBACK_: /* ignored here */ + case O_BACK: + FWD(aft, aft, 1); + break; + case OPLUS_: /* forward, this is just an empty */ + FWD(aft, aft, 1); + break; + case O_PLUS: /* both forward and back */ + FWD(aft, aft, 1); + i = ISSETBACK(aft, OPND(s)); + BACK(aft, aft, OPND(s)); + if (!i && ISSETBACK(aft, OPND(s))) { + /* oho, must reconsider loop body */ + pc -= OPND(s) + 1; + INIT(here, pc); + } + break; + case OQUEST_: /* two branches, both forward */ + FWD(aft, aft, 1); + FWD(aft, aft, OPND(s)); + break; + case O_QUEST: /* just an empty */ + FWD(aft, aft, 1); + break; + case OLPAREN: /* not significant here */ + case ORPAREN: + FWD(aft, aft, 1); + break; + case OCH_: /* mark the first two branches */ + FWD(aft, aft, 1); + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + break; + case OOR1: /* done a branch, find the O_CH */ + if (ISSTATEIN(aft, here)) { + for (look = 1; + OP(s = g->strip[pc+look]) != O_CH; + look += OPND(s)) + assert(OP(s) == OOR2); + FWD(aft, aft, look + 1); + } + break; + case OOR2: /* propagate OCH_'s marking */ + FWD(aft, aft, 1); + if (OP(g->strip[pc+OPND(s)]) != O_CH) { + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + } + break; + case O_CH: /* just empty */ + FWD(aft, aft, 1); + break; + default: /* ooooops... */ + assert(0); + break; + } + } + + return (aft); +} + +#ifdef REDEBUG +/* + * print - print a set of states + */ +static void +print(struct match *m, const char *caption, states st, int ch, FILE *d) +{ + struct re_guts *g = m->g; + int i; + int first = 1; + + if (!(m->eflags®_TRACE)) + return; + + (void) fprintf(d, "%s", caption); + if (ch != '\0') + (void) fprintf(d, " %s", pchar(ch)); + for (i = 0; i < g->nstates; i++) + if (ISSET(st, i)) { + (void) fprintf(d, "%s%d", (first) ? "\t" : ", ", i); + first = 0; + } + (void) fprintf(d, "\n"); +} + +/* + * at - print current situation + */ +static void +at(struct match *m, const char *title, const char *start, const char *stop, + sopno startst, sopno stopst) +{ + if (!(m->eflags®_TRACE)) + return; + + (void) printf("%s %s-", title, pchar(*start)); + (void) printf("%s ", pchar(*stop)); + (void) printf("%ld-%ld\n", (long)startst, (long)stopst); +} + +#ifndef PCHARDONE +#define PCHARDONE /* never again */ +/* + * pchar - make a character printable + * + * Is this identical to regchar() over in debug.c? Well, yes. But a + * duplicate here avoids having a debugging-capable regexec.o tied to + * a matching debug.o, and this is convenient. It all disappears in + * the non-debug compilation anyway, so it doesn't matter much. + */ +static const char * +pchar(int ch) +{ + static char pbuf[10]; + + if (isprint((uch)ch) || ch == ' ') + (void) sprintf(pbuf, "%c", ch); + else + (void) sprintf(pbuf, "\\%o", ch); + return (pbuf); +} +#endif +#endif + +#undef matcher +#undef fast +#undef slow +#undef dissect +#undef backref +#undef step +#undef print +#undef at +#undef match diff --git a/usr/src/lib/libc/port/locale/euc.c b/usr/src/lib/libc/port/locale/euc.c new file mode 100644 index 0000000000..70b1a65113 --- /dev/null +++ b/usr/src/lib/libc/port/locale/euc.c @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <sys/types.h> +#include "runetype.h" +#include "mblocal.h" + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +static size_t _EUC_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +static int _EUC_mbsinit(const mbstate_t *); +static size_t _EUC_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + +typedef struct { + int count[4]; + wchar_t bits[4]; + wchar_t mask; +} _EucInfo; + +typedef struct { + wchar_t ch; + int set; + int want; +} _EucState; + +int +_EUC_init(_RuneLocale *rl) +{ + _EucInfo *ei; + int x, new__mb_cur_max; + char *v, *e; + + if (rl->__variable == NULL) + return (EINVAL); + + v = (char *)rl->__variable; + + while (*v == ' ' || *v == '\t') + ++v; + + if ((ei = malloc(sizeof (_EucInfo))) == NULL) + return (errno == 0 ? ENOMEM : errno); + + new__mb_cur_max = 0; + for (x = 0; x < 4; ++x) { + ei->count[x] = (int)strtol(v, &e, 0); + if (v == e || !(v = e)) { + free(ei); + return (EINVAL); + } + if (new__mb_cur_max < ei->count[x]) + new__mb_cur_max = ei->count[x]; + while (*v == ' ' || *v == '\t') + ++v; + ei->bits[x] = (int)strtol(v, &e, 0); + if (v == e || !(v = e)) { + free(ei); + return (EINVAL); + } + while (*v == ' ' || *v == '\t') + ++v; + } + ei->mask = (int)strtol(v, &e, 0); + if (v == e || !(v = e)) { + free(ei); + return (EINVAL); + } + rl->__variable = ei; + rl->__variable_len = sizeof (_EucInfo); + _CurrentRuneLocale = rl; + __ctype[520] = new__mb_cur_max; + __mbrtowc = _EUC_mbrtowc; + __wcrtomb = _EUC_wcrtomb; + __mbsinit = _EUC_mbsinit; + return (0); +} + +static int +_EUC_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _EucState *)ps)->want == 0); +} + +#define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) + +#define _SS2 0x008e +#define _SS3 0x008f + +#define GR_BITS 0x80808080 /* XXX: to be fixed */ + +static int +_euc_set(uint_t c) +{ + + c &= 0xff; + return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); +} + +static size_t +_EUC_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + _EucState *es; + int i, set, want; + wchar_t wc; + const char *os; + + es = (_EucState *)ps; + + if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || + es->set > 3) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + + os = s; + + if (es->want == 0) { + want = CEI->count[set = _euc_set(*s)]; + if (set == 2 || set == 3) { + --want; + if (--n == 0) { + /* Incomplete multibyte sequence */ + es->set = set; + es->want = want; + es->ch = 0; + return ((size_t)-2); + } + ++s; + if (*s == '\0') { + errno = EILSEQ; + return ((size_t)-1); + } + } + wc = (unsigned char)*s++; + } else { + set = es->set; + want = es->want; + wc = es->ch; + } + for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { + if (*s == '\0') { + errno = EILSEQ; + return ((size_t)-1); + } + wc = (wc << 8) | (unsigned char)*s++; + } + if (i < want) { + /* Incomplete multibyte sequence */ + es->set = set; + es->want = want - i; + es->ch = wc; + return ((size_t)-2); + } + wc = (wc & ~CEI->mask) | CEI->bits[set]; + if (pwc != NULL) + *pwc = wc; + es->want = 0; + return (wc == L'\0' ? 0 : s - os); +} + +static size_t +_EUC_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) +{ + _EucState *es; + wchar_t m, nm; + int i, len; + + es = (_EucState *)ps; + + if (es->want != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + + m = wc & CEI->mask; + nm = wc & ~m; + + if (m == CEI->bits[1]) { +CodeSet1: + /* Codeset 1: The first byte must have 0x80 in it. */ + i = len = CEI->count[1]; + while (i-- > 0) { + *(unsigned char *)s = (nm >> (i << 3)) | 0x80; + s++; + } + } else { + if (m == CEI->bits[0]) + i = len = CEI->count[0]; + else if (m == CEI->bits[2]) { + i = len = CEI->count[2]; + *(unsigned char *)s = _SS2; + s++; + --i; + /* SS2 designates G2 into GR */ + nm |= GR_BITS; + } else if (m == CEI->bits[3]) { + i = len = CEI->count[3]; + *(unsigned char *)s = _SS3; + s++; + --i; + /* SS3 designates G3 into GR */ + nm |= GR_BITS; + } else + goto CodeSet1; /* Bletch */ + while (i-- > 0) + *s++ = (nm >> (i << 3)) & 0xff; + } + return (len); +} diff --git a/usr/src/lib/libc/port/locale/fgetwc.c b/usr/src/lib/libc/port/locale/fgetwc.c new file mode 100644 index 0000000000..f3d0fd8728 --- /dev/null +++ b/usr/src/lib/libc/port/locale/fgetwc.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "mse_int.h" +#include "file64.h" +#include "mtlib.h" +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" +#include "stdiom.h" + +/* + * Non-MT-safe version. + */ +wint_t +_fgetwc_unlocked(FILE *fp) +{ + wchar_t wc; + size_t nconv; + int c; + mbstate_t *statep; + + if ((c = GETC(fp)) == EOF) + return (WEOF); + + if (MB_CUR_MAX == 1) { + /* Fast path for single-byte encodings. */ + return ((wint_t)c); + } + if ((statep = _getmbstate(fp)) == NULL) { + fp->_flag = _IOERR; + errno = EBADF; + return (WEOF); + } + do { + char x = (char)c; + nconv = __mbrtowc(&wc, &x, 1, statep); + if (nconv == (size_t)-1) { + break; + } else if (nconv == (size_t)-2) { + /* Incompletely decoded, consume another char */ + continue; + } else if (nconv == 0) { + /* + * Assume that the only valid representation of + * the null wide character is a single null byte. + */ + return (L'\0'); + } else { + return (wc); + } + } while ((c = GETC(fp)) != EOF); + + /* + * If we got here it means we got truncated in a character, or + * the character did not decode properly. Note that in the case + * of a botched decoding, we don't UNGETC the bad bytes. Should + * we? + */ + fp->_flag |= _IOERR; + errno = EILSEQ; + return (WEOF); +} + + +/* + * MT safe version + */ +wint_t +fgetwc(FILE *fp) +{ + wint_t r; + rmutex_t *l; + + FLOCKFILE(l, fp); + r = _fgetwc_unlocked(fp); + FUNLOCKFILE(l); + + return (r); +} + +#undef getwc +wint_t +getwc(FILE *fp) +{ + return (getwc(fp)); +} + +/* + * XPG5 version. + */ +wint_t +__fgetwc_xpg5(FILE *fp) +{ + wint_t r; + rmutex_t *l; + + FLOCKFILE(l, fp); + if (GET_NO_MODE(fp)) + _setorientation(fp, _WC_MODE); + r = _fgetwc_unlocked(fp); + FUNLOCKFILE(l); + + return (r); +} + +#undef __getwc_xpg5 +wint_t +__getwc_xpg5(FILE *fp) +{ + return (__fgetwc_xpg5(fp)); +} diff --git a/usr/src/lib/libc/port/locale/fgetws.c b/usr/src/lib/libc/port/locale/fgetws.c new file mode 100644 index 0000000000..bba61bbd83 --- /dev/null +++ b/usr/src/lib/libc/port/locale/fgetws.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "mse_int.h" +#include "file64.h" +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" +#include "mse.h" +#include "stdiom.h" +#include "libc.h" + +/* + * FreeBSD implementation here included a full version that tried to be more + * efficient with memory strings. However, for simplicity's sake, we are + * going to just use fgetwc(). We also do the stream orientation thing for + * XPG5 if we need to. + */ + +wchar_t * +_fgetws_impl(wchar_t *_RESTRICT_KYWD ws, int n, FILE *_RESTRICT_KYWD fp, + int orient) +{ + wint_t wc; + wchar_t *wsp; + rmutex_t *lk; + + FLOCKFILE(lk, fp); + if (orient && GET_NO_MODE(fp)) + _setorientation(fp, _WC_MODE); + + if (n <= 0) { + errno = EINVAL; + FUNLOCKFILE(lk); + return (NULL); + } + + wsp = ws; + while (n--) { + wc = _fgetwc_unlocked(fp); + if (wc == EOF) { + /* + * This can happen because of an EOF on + * the stream, or because of a decoding error. + * Its up to the caller to check errno. + */ + if (wsp == ws) { + /* EOF with no data read */ + FUNLOCKFILE(lk); + return (NULL); + } + break; + } + *wsp++ = wc; + + if (wc == L'\n') + break; + } + *wsp = 0; + FUNLOCKFILE(lk); + return (ws); +} + +wchar_t * +fgetws(wchar_t *_RESTRICT_KYWD ws, int n, FILE *_RESTRICT_KYWD fp) +{ + return (_fgetws_impl(ws, n, fp, 0)); +} + +wchar_t * +__fgetws_xpg5(wchar_t *ws, int n, FILE *fp) +{ + return (_fgetws_impl(ws, n, fp, 1)); +} + +wchar_t * +getws(wchar_t *ws) +{ + wint_t wc; + wchar_t *wsp; + rmutex_t *lk; + + FLOCKFILE(lk, stdin); + + wsp = ws; + for (;;) { + wc = _fgetwc_unlocked(stdin); + if (wc == EOF) { + /* + * This can happen because of an EOF on + * the stream, or because of a decoding error. + * Its up to the caller to check errno. + */ + if (wsp == ws) { + /* EOF with no data read */ + FUNLOCKFILE(lk); + return (NULL); + } + break; + } + *wsp++ = wc; + + if (wc == L'\n') + break; + } + *wsp = 0; + FUNLOCKFILE(lk); + return (ws); +} diff --git a/usr/src/lib/libc/port/locale/fix_grouping.c b/usr/src/lib/libc/port/locale/fix_grouping.c new file mode 100644 index 0000000000..3bb84060d6 --- /dev/null +++ b/usr/src/lib/libc/port/locale/fix_grouping.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <ctype.h> +#include <limits.h> +#include <stddef.h> + +static const char nogrouping[] = { CHAR_MAX, '\0' }; + +/* + * Internal helper used to convert grouping sequences from string + * representation into POSIX specified form, i.e. + * + * "3;3;-1" -> "\003\003\177\000" + */ + +const char * +__fix_locale_grouping_str(const char *str) +{ + char *src, *dst; + char n; + + if (str == NULL || *str == '\0') { + return (nogrouping); + } + + for (src = (char *)str, dst = (char *)str; *src != '\0'; src++) { + + /* input string examples: "3;3", "3;2;-1" */ + if (*src == ';') + continue; + + if (*src == '-' && *(src+1) == '1') { + *dst++ = CHAR_MAX; + src++; + continue; + } + + if (!isdigit((unsigned char)*src)) { + /* broken grouping string */ + return (nogrouping); + } + + /* assume all numbers <= 99 */ + n = *src - '0'; + if (isdigit((unsigned char)*(src+1))) { + src++; + n *= 10; + n += *src - '0'; + } + + *dst = n; + /* NOTE: assume all input started with "0" as 'no grouping' */ + if (*dst == '\0') + return ((dst == (char *)str) ? nogrouping : str); + dst++; + } + *dst = '\0'; + return (str); +} diff --git a/usr/src/lib/libc/port/locale/fnmatch.c b/usr/src/lib/libc/port/locale/fnmatch.c new file mode 100644 index 0000000000..24cc3f7b4e --- /dev/null +++ b/usr/src/lib/libc/port/locale/fnmatch.c @@ -0,0 +1,291 @@ +/* + * Copyright (c) 1989, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Guido van Rossum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. + * Compares a filename or pathname to a pattern. + */ + +/* + * Some notes on multibyte character support: + * 1. Patterns with illegal byte sequences match nothing. + * 2. Illegal byte sequences in the "string" argument are handled by treating + * them as single-byte characters with a value of the first byte of the + * sequence cast to wchar_t. + * 3. Multibyte conversion state objects (mbstate_t) are passed around and + * used for most, but not all, conversions. Further work will be required + * to support state-dependent encodings. + */ + +#include "lint.h" +#include <fnmatch.h> +#include <limits.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> + +#include "collate.h" + +#define EOS '\0' + +#define RANGE_MATCH 1 +#define RANGE_NOMATCH 0 +#define RANGE_ERROR (-1) + +static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); +static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, + mbstate_t); + +int +fnmatch(pattern, string, flags) + const char *pattern, *string; + int flags; +{ + static const mbstate_t initial = { 0 }; + + return (fnmatch1(pattern, string, string, flags, initial, initial)); +} + +static int +fnmatch1(const char *pattern, const char *string, const char *stringstart, + int flags, mbstate_t patmbs, mbstate_t strmbs) +{ + char *newp; + char c; + wchar_t pc, sc; + size_t pclen, sclen; + + for (;;) { + pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); + if (pclen == (size_t)-1 || pclen == (size_t)-2) + return (FNM_NOMATCH); + pattern += pclen; + sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); + if (sclen == (size_t)-1 || sclen == (size_t)-2) { + sc = (unsigned char)*string; + sclen = 1; + (void) memset(&strmbs, 0, sizeof (strmbs)); + } + switch (pc) { + case EOS: + /* + * Removed FNM_LEADING_DIR, as it is not present + * on Solaris. + */ + return (sc == EOS ? 0 : FNM_NOMATCH); + case '?': + if (sc == EOS) + return (FNM_NOMATCH); + if (sc == '/' && (flags & FNM_PATHNAME)) + return (FNM_NOMATCH); + if (sc == '.' && (flags & FNM_PERIOD) && + (string == stringstart || + ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) + return (FNM_NOMATCH); + string += sclen; + break; + case '*': + c = *pattern; + /* Collapse multiple stars. */ + while (c == '*') + c = *++pattern; + + if (sc == '.' && (flags & FNM_PERIOD) && + (string == stringstart || + ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) + return (FNM_NOMATCH); + + /* Optimize for pattern with * at end or before /. */ + if (c == EOS) + if (flags & FNM_PATHNAME) + return (strchr(string, '/') == NULL ? + 0 : FNM_NOMATCH); + else + return (0); + else if (c == '/' && flags & FNM_PATHNAME) { + if ((string = strchr(string, '/')) == NULL) + return (FNM_NOMATCH); + break; + } + + /* General case, use recursion. */ + while (sc != EOS) { + if (!fnmatch1(pattern, string, stringstart, + flags, patmbs, strmbs)) + return (0); + sclen = mbrtowc(&sc, string, MB_LEN_MAX, + &strmbs); + if (sclen == (size_t)-1 || + sclen == (size_t)-2) { + sc = (unsigned char)*string; + sclen = 1; + (void) memset(&strmbs, 0, + sizeof (strmbs)); + } + if (sc == '/' && flags & FNM_PATHNAME) + break; + string += sclen; + } + return (FNM_NOMATCH); + case '[': + if (sc == EOS) + return (FNM_NOMATCH); + if (sc == '/' && (flags & FNM_PATHNAME)) + return (FNM_NOMATCH); + if (sc == '.' && (flags & FNM_PERIOD) && + (string == stringstart || + ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) + return (FNM_NOMATCH); + + switch (rangematch(pattern, sc, flags, &newp, + &patmbs)) { + case RANGE_ERROR: + goto norm; + case RANGE_MATCH: + pattern = newp; + break; + case RANGE_NOMATCH: + return (FNM_NOMATCH); + } + string += sclen; + break; + case '\\': + if (!(flags & FNM_NOESCAPE)) { + pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, + &patmbs); + if (pclen == (size_t)-1 || pclen == (size_t)-2) + return (FNM_NOMATCH); + if (pclen == 0) + pc = '\\'; + pattern += pclen; + } + /* FALLTHROUGH */ + default: + norm: + if (pc == sc) + string += sclen; + + else if ((flags & FNM_IGNORECASE) && + (towlower(pc) == towlower(sc))) + string += sclen; + else + return (FNM_NOMATCH); + + break; + } + } + /* NOTREACHED */ +} + +static int +rangematch(pattern, test, flags, newp, patmbs) + const char *pattern; + wchar_t test; + int flags; + char **newp; + mbstate_t *patmbs; +{ + int negate, ok; + wchar_t c, c2; + size_t pclen; + const char *origpat; + + /* + * A bracket expression starting with an unquoted circumflex + * character produces unspecified results (IEEE 1003.2-1992, + * 3.13.2). This implementation treats it like '!', for + * consistency with the regular expression syntax. + * J.T. Conklin (conklin@ngai.kaleida.com) + */ + if ((negate = (*pattern == '!' || *pattern == '^')) != 0) + ++pattern; + + if (flags & FNM_IGNORECASE) + test = towlower(test); + + /* + * A right bracket shall lose its special meaning and represent + * itself in a bracket expression if it occurs first in the list. + * -- POSIX.2 2.8.3.2 + */ + ok = 0; + origpat = pattern; + for (;;) { + if (*pattern == ']' && pattern > origpat) { + pattern++; + break; + } else if (*pattern == '\0') { + return (RANGE_ERROR); + } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { + return (RANGE_NOMATCH); + } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) + pattern++; + pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); + if (pclen == (size_t)-1 || pclen == (size_t)-2) + return (RANGE_NOMATCH); + pattern += pclen; + + if (flags & FNM_IGNORECASE) + c = towlower(c); + + if (*pattern == '-' && *(pattern + 1) != EOS && + *(pattern + 1) != ']') { + if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) + if (*pattern != EOS) + pattern++; + pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); + if (pclen == (size_t)-1 || pclen == (size_t)-2) + return (RANGE_NOMATCH); + pattern += pclen; + if (c2 == EOS) + return (RANGE_ERROR); + + if (flags & FNM_IGNORECASE) + c2 = towlower(c2); + + if (__collate_load_error ? + c <= test && test <= c2 : + __collate_range_cmp(c, test) <= 0 && + __collate_range_cmp(test, c2) <= 0) + ok = 1; + } else if (c == test) + ok = 1; + } + + *newp = (char *)pattern; + return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); +} diff --git a/usr/src/lib/libc/port/locale/fputwc.c b/usr/src/lib/libc/port/locale/fputwc.c new file mode 100644 index 0000000000..c02d7e251f --- /dev/null +++ b/usr/src/lib/libc/port/locale/fputwc.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "file64.h" +#include "mtlib.h" +#include "mse_int.h" +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <wchar.h> +#include <synch.h> +#include "mblocal.h" +#include "stdiom.h" +#include "mse.h" + +#pragma weak _putwc = putwc + +/* + * FreeBSD had both a MT safe and non-MT safe version. For whatever reason, + * we don't need the non-MT safe version. We do this because its faster, + * since we don't have to lock the file while doing the potentially expensive + * conversion from wide to mb. + * + * Solaris also has XPG5 and legacy semantics. The new standard requires + * that the stream orientation change, but legacy calls don't do that. + * + * Note that we had the source for the XPG5 version of this, but it relied + * on closed implementation bits that we lack, so we supply replacements + * here. + */ +static wint_t +__fputwc_impl(wchar_t wc, FILE *fp, int orient) +{ + char buf[MB_LEN_MAX]; + size_t i, len; + rmutex_t *mx; + + /* If we are given WEOF, then we have to stop */ + if (wc == WEOF) + return (WEOF); + + if (MB_CUR_MAX == 1 && wc > 0 && wc <= UCHAR_MAX) { + /* + * Assume single-byte locale with no special encoding. + * A more careful test would be to check + * _CurrentRuneLocale->encoding. + */ + *buf = (unsigned char)wc; + len = 1; + } else { + /* + * FreeBSD used restartable wcrtomb. I think we can use + * the simpler wctomb form here. We should have a complete + * decode. + */ + if ((len = wctomb(buf, wc)) == (size_t)-1) { + fp->_flag |= _IOERR; + errno = EILSEQ; + return (WEOF); + } + } + + FLOCKFILE(mx, fp); + /* + * This is used for XPG 5 semantics, which requires the stream + * orientation to be changed when the function is called. + */ + if (orient && GET_NO_MODE(fp)) { + _setorientation(fp, _WC_MODE); + } + for (i = 0; i < len; i++) { + if (PUTC((unsigned char)buf[i], fp) == EOF) { + FUNLOCKFILE(mx); + return (WEOF); + } + } + FUNLOCKFILE(mx); + return ((wint_t)wc); +} + +wint_t +fputwc(wchar_t wc, FILE *fp) +{ + return (__fputwc_impl(wc, fp, 0)); +} + +/* + * Trivial functional form of the typical macro. + */ +#undef __putwc +wint_t +putwc(wchar_t wc, FILE *fp) +{ + return (__fputwc_impl(wc, fp, 0)); +} + +wint_t +__fputwc_xpg5(wint_t wc, FILE *fp) +{ + return (__fputwc_impl(wc, fp, 1)); +} + +#undef __putwc_xpg5 +wint_t +__putwc_xpg5(wint_t wc, FILE *fp) +{ + return (__fputwc_impl(wc, fp, 1)); +} diff --git a/usr/src/lib/libc/port/locale/fputws.c b/usr/src/lib/libc/port/locale/fputws.c new file mode 100644 index 0000000000..f6e7038ff8 --- /dev/null +++ b/usr/src/lib/libc/port/locale/fputws.c @@ -0,0 +1,98 @@ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "lint.h" +#include "mse_int.h" +#include "file64.h" +#include "mtlib.h" +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" +#include "stdiom.h" + +static int +_fputws_impl(const wchar_t *_RESTRICT_KYWD ws, FILE *_RESTRICT_KYWD fp, + int orient) +{ + int nchars; + int nwritten; + char buf[BUFSIZ]; + rmutex_t *lk; + + /* + * The FreeBSD implementation here was a bit more complex, because + * it repeated much of what is in fputs. For simplicity's sake, we + * juse wctomb to convert the wide string to a mbs, and then use + * fputs to print the mbs. + */ + + nchars = wcslen(ws); + nwritten = 0; + + FLOCKFILE(lk, fp); + if (orient && GET_NO_MODE(fp)) + _setorientation(fp, _WC_MODE); + + while (nchars > 0) { + int nbytes = 0; + char *ptr = buf; + while ((nbytes < (BUFSIZ - (MB_LEN_MAX * 2))) && nchars) { + int n; + if ((n = wctomb(ptr, *ws)) < 0) { + FUNLOCKFILE(lk); + fp->_flag |= _IOERR; + errno = EILSEQ; + return (EOF); + } + ws++; + ptr += n; + nbytes += n; + nchars--; + } + *ptr = '\0'; + if (fputs(buf, fp) < nbytes) { + FUNLOCKFILE(lk); + return (EOF); + } + nwritten += nbytes; + } + FUNLOCKFILE(lk); + return (nwritten); +} + +int +fputws(const wchar_t *_RESTRICT_KYWD ws, FILE *_RESTRICT_KYWD fp) +{ + return (_fputws_impl(ws, fp, 0)); +} + +int +__fputws_xpg5(const wchar_t *ws, FILE *fp) +{ + return (_fputws_impl(ws, fp, 1)); +} diff --git a/usr/src/lib/libc/port/locale/fwide.c b/usr/src/lib/libc/port/locale/fwide.c new file mode 100644 index 0000000000..6e326cff17 --- /dev/null +++ b/usr/src/lib/libc/port/locale/fwide.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "mse_int.h" +#include "file64.h" +#include "mse.h" +#include <errno.h> +#include <stdio.h> +#include <wchar.h> +#include "mblocal.h" + +int +fwide(FILE *fp, int mode) +{ + int m; + rmutex_t *lk; + + FLOCKFILE(lk, fp); + /* Only change the orientation if the stream is not oriented yet. */ + m = _getorientation(fp); + if (mode != 0 && m == _NO_MODE) + _setorientation(fp, (mode > 0) ? _WC_MODE : _BYTE_MODE); + FUNLOCKFILE(lk); + + switch (m) { + case _WC_MODE: + mode = 1; + break; + case _NO_MODE: + mode = 0; + break; + case _BYTE_MODE: + mode = -1; + break; + } + + return (mode); +} diff --git a/usr/src/lib/libc/port/locale/gb18030.c b/usr/src/lib/libc/port/locale/gb18030.c new file mode 100644 index 0000000000..f789568573 --- /dev/null +++ b/usr/src/lib/libc/port/locale/gb18030.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * PRC National Standard GB 18030-2000 encoding of Chinese text. + * + * See gb18030(5) for details. + */ + +#include "lint.h" +#include <sys/types.h> +#include <errno.h> +#include "runetype.h" +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" + + +static size_t _GB18030_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +static int _GB18030_mbsinit(const mbstate_t *); +static size_t _GB18030_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + +typedef struct { + int count; + uchar_t bytes[4]; +} _GB18030State; + +int +_GB18030_init(_RuneLocale *rl) +{ + + __mbrtowc = _GB18030_mbrtowc; + __wcrtomb = _GB18030_wcrtomb; + __mbsinit = _GB18030_mbsinit; + _CurrentRuneLocale = rl; + __ctype[520] = 4; + + return (0); +} + +static int +_GB18030_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _GB18030State *)ps)->count == 0); +} + +static size_t +_GB18030_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + _GB18030State *gs; + wchar_t wch; + int ch, len, ocount; + size_t ncopy; + + gs = (_GB18030State *)ps; + + if (gs->count < 0 || gs->count > sizeof (gs->bytes)) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof (gs->bytes) - gs->count); + (void) memcpy(gs->bytes + gs->count, s, ncopy); + ocount = gs->count; + gs->count += ncopy; + s = (char *)gs->bytes; + n = gs->count; + + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + + /* + * Single byte: [00-7f] + * Two byte: [81-fe][40-7e,80-fe] + * Four byte: [81-fe][30-39][81-fe][30-39] + */ + ch = (unsigned char)*s++; + if (ch <= 0x7f) { + len = 1; + wch = ch; + } else if (ch >= 0x81 && ch <= 0xfe) { + wch = ch; + if (n < 2) + return ((size_t)-2); + ch = (unsigned char)*s++; + if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) { + wch = (wch << 8) | ch; + len = 2; + } else if (ch >= 0x30 && ch <= 0x39) { + /* + * Strip high bit off the wide character we will + * eventually output so that it is positive when + * cast to wint_t on 32-bit twos-complement machines. + */ + wch = ((wch & 0x7f) << 8) | ch; + if (n < 3) + return ((size_t)-2); + ch = (unsigned char)*s++; + if (ch < 0x81 || ch > 0xfe) + goto ilseq; + wch = (wch << 8) | ch; + if (n < 4) + return ((size_t)-2); + ch = (unsigned char)*s++; + if (ch < 0x30 || ch > 0x39) + goto ilseq; + wch = (wch << 8) | ch; + len = 4; + } else + goto ilseq; + } else + goto ilseq; + + if (pwc != NULL) + *pwc = wch; + gs->count = 0; + return (wch == L'\0' ? 0 : len - ocount); +ilseq: + errno = EILSEQ; + return ((size_t)-1); +} + +static size_t +_GB18030_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, + mbstate_t *_RESTRICT_KYWD ps) +{ + _GB18030State *gs; + size_t len; + int c; + + gs = (_GB18030State *)ps; + + if (gs->count != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if ((wc & ~0x7fffffff) != 0) + goto ilseq; + if (wc & 0x7f000000) { + /* Replace high bit that mbrtowc() removed. */ + wc |= 0x80000000; + c = (wc >> 24) & 0xff; + if (c < 0x81 || c > 0xfe) + goto ilseq; + *s++ = c; + c = (wc >> 16) & 0xff; + if (c < 0x30 || c > 0x39) + goto ilseq; + *s++ = c; + c = (wc >> 8) & 0xff; + if (c < 0x81 || c > 0xfe) + goto ilseq; + *s++ = c; + c = wc & 0xff; + if (c < 0x30 || c > 0x39) + goto ilseq; + *s++ = c; + len = 4; + } else if (wc & 0x00ff0000) + goto ilseq; + else if (wc & 0x0000ff00) { + c = (wc >> 8) & 0xff; + if (c < 0x81 || c > 0xfe) + goto ilseq; + *s++ = c; + c = wc & 0xff; + if (c < 0x40 || c == 0x7f || c == 0xff) + goto ilseq; + *s++ = c; + len = 2; + } else if (wc <= 0x7f) { + *s++ = wc; + len = 1; + } else + goto ilseq; + + return (len); +ilseq: + errno = EILSEQ; + return ((size_t)-1); +} diff --git a/usr/src/lib/libc/port/locale/gb2312.c b/usr/src/lib/libc/port/locale/gb2312.c new file mode 100644 index 0000000000..e0eecdd3b4 --- /dev/null +++ b/usr/src/lib/libc/port/locale/gb2312.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2004 Tim J. Robbins. All rights reserved. + * Copyright (c) 2003 David Xu <davidxu@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <sys/types.h> +#include <errno.h> +#include "runetype.h" +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" + +static size_t _GB2312_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +static int _GB2312_mbsinit(const mbstate_t *); +static size_t _GB2312_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + +typedef struct { + int count; + uchar_t bytes[2]; +} _GB2312State; + +int +_GB2312_init(_RuneLocale *rl) +{ + + _CurrentRuneLocale = rl; + __mbrtowc = _GB2312_mbrtowc; + __wcrtomb = _GB2312_wcrtomb; + __mbsinit = _GB2312_mbsinit; + __ctype[520] = 2; + return (0); +} + +static int +_GB2312_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _GB2312State *)ps)->count == 0); +} + +static int +_GB2312_check(const char *str, size_t n) +{ + const uchar_t *s = (const uchar_t *)str; + + if (n == 0) + /* Incomplete multibyte sequence */ + return (-2); + if (s[0] >= 0xa1 && s[0] <= 0xfe) { + if (n < 2) + /* Incomplete multibyte sequence */ + return (-2); + if (s[1] < 0xa1 || s[1] > 0xfe) + /* Invalid multibyte sequence */ + return (-1); + return (2); + } else if (s[0] & 0x80) { + /* Invalid multibyte sequence */ + return (-1); + } + return (1); +} + +static size_t +_GB2312_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + _GB2312State *gs; + wchar_t wc; + int i, len, ocount; + size_t ncopy; + + gs = (_GB2312State *)ps; + + if (gs->count < 0 || gs->count > sizeof (gs->bytes)) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof (gs->bytes) - gs->count); + (void) memcpy(gs->bytes + gs->count, s, ncopy); + ocount = gs->count; + gs->count += ncopy; + s = (char *)gs->bytes; + n = gs->count; + + if ((len = _GB2312_check(s, n)) < 0) + return ((size_t)len); + wc = 0; + i = len; + while (i-- > 0) + wc = (wc << 8) | (unsigned char)*s++; + if (pwc != NULL) + *pwc = wc; + gs->count = 0; + return (wc == L'\0' ? 0 : len - ocount); +} + +static size_t +_GB2312_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, + mbstate_t *_RESTRICT_KYWD ps) +{ + _GB2312State *gs; + + gs = (_GB2312State *)ps; + + if (gs->count != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if (wc & 0x8000) { + *s++ = (wc >> 8) & 0xff; + *s = wc & 0xff; + return (2); + } + *s = wc & 0xff; + return (1); +} diff --git a/usr/src/lib/libc/port/locale/gbk.c b/usr/src/lib/libc/port/locale/gbk.c new file mode 100644 index 0000000000..fceff4f288 --- /dev/null +++ b/usr/src/lib/libc/port/locale/gbk.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <sys/types.h> +#include <errno.h> +#include "runetype.h" +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" + +static size_t _GBK_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +static int _GBK_mbsinit(const mbstate_t *); +static size_t _GBK_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + +typedef struct { + wchar_t ch; +} _GBKState; + +int +_GBK_init(_RuneLocale *rl) +{ + + __mbrtowc = _GBK_mbrtowc; + __wcrtomb = _GBK_wcrtomb; + __mbsinit = _GBK_mbsinit; + _CurrentRuneLocale = rl; + __ctype[520] = 2; + return (0); +} + +static int +_GBK_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _GBKState *)ps)->ch == 0); +} + +static int +_gbk_check(uint_t c) +{ + + c &= 0xff; + return ((c >= 0x81 && c <= 0xfe) ? 2 : 1); +} + +static size_t +_GBK_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + _GBKState *gs; + wchar_t wc; + size_t len; + + gs = (_GBKState *)ps; + + if ((gs->ch & ~0xFF) != 0) { + /* Bad conversion state. */ + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + + if (gs->ch != 0) { + if (*s == '\0') { + errno = EILSEQ; + return ((size_t)-1); + } + wc = (gs->ch << 8) | (*s & 0xFF); + if (pwc != NULL) + *pwc = wc; + gs->ch = 0; + return (1); + } + + len = (size_t)_gbk_check(*s); + wc = *s++ & 0xff; + if (len == 2) { + if (n < 2) { + /* Incomplete multibyte sequence */ + gs->ch = wc; + return ((size_t)-2); + } + if (*s == '\0') { + errno = EILSEQ; + return ((size_t)-1); + } + wc = (wc << 8) | (*s++ & 0xff); + if (pwc != NULL) + *pwc = wc; + return (2); + } else { + if (pwc != NULL) + *pwc = wc; + return (wc == L'\0' ? 0 : 1); + } +} + +static size_t +_GBK_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) +{ + _GBKState *gs; + + gs = (_GBKState *)ps; + + if (gs->ch != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if (wc & 0x8000) { + *s++ = (wc >> 8) & 0xff; + *s = wc & 0xff; + return (2); + } + *s = wc & 0xff; + return (1); +} diff --git a/usr/src/lib/libc/port/locale/getdate.c b/usr/src/lib/libc/port/locale/getdate.c new file mode 100644 index 0000000000..a2bb0685bf --- /dev/null +++ b/usr/src/lib/libc/port/locale/getdate.c @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Brian Ginsbach. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "file64.h" +#include <sys/types.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <sys/stat.h> + +#define TMSENTINEL (-1) + + +/* + * getdate_err is set to one of the following values on error. + * + * 1 The DATEMSK environment variable is null or undefined. + * 2 The template file cannot be opened for reading. + * 3 Failed to get file status information. + * 4 Template file is not a regular file. + * 5 Encountered an error while reading the template file. + * 6 Cannot allocate memory. + * 7 Input string does not match any line in the template. + * 8 Input string is invalid (for example, February 31) or could not + * be represented in a time_t. + * + * Note that on Solaris, getdate_err is possibly a function, to account + * for reentrancy. See the code for getdate_err.c for details. + */ + + +#pragma weak _getdate = getdate +struct tm * +getdate(const char *str) +{ + char *datemsk, *line, *rp; + FILE *fp; + struct stat sb; + static struct tm rtm, tmnow; + struct tm *tmp, *rtmp = &rtm; + time_t now; + char buf[514]; + + if (((datemsk = getenv("DATEMSK")) == NULL) || *datemsk == '\0') { + getdate_err = 1; + return (NULL); + } + + if (stat(datemsk, &sb) < 0) { + getdate_err = 3; + return (NULL); + } + + if ((sb.st_mode & S_IFMT) != S_IFREG) { + getdate_err = 4; + return (NULL); + } + + if ((fp = fopen(datemsk, "r")) == NULL) { + getdate_err = 2; + return (NULL); + } + + /* loop through datemsk file */ + errno = 0; + rp = NULL; + + /* + * The NetBSD implementation supports a rich flexible file format + * with embedded escapes, etc. We don't need any of that. Solaris + * just reads the template file and (undocumented!) requires that + * each line not exceed 512 bytes, using a fixed buffer. We could + * improve on that, but this may grow the stack unreasonably, so + * we keep it to the same 512 limit. Some day we can be smarter. + * (Note FreeBSD doesn't even have getdate(), and IMO nobody sane + * should be using this crufty API. strptime is better.) + */ + + (void) memset(buf, 0, sizeof (buf)); + while ((line = fgets(buf, sizeof (buf), fp)) != NULL) { + /* + * If the buffer consumed the entire string, then + * the input line was too long. We just check to + * see if the 2nd to last byte is set. If it isn't, + * then we hit a null byte first, and the line is + * short enough. + */ + if (buf[sizeof (buf) - 2] != 0) { + getdate_err = 5; + (void) fclose(fp); + return (NULL); + } + /* initialize tmp with sentinels */ + rtm.tm_sec = rtm.tm_min = rtm.tm_hour = TMSENTINEL; + rtm.tm_mday = rtm.tm_mon = rtm.tm_year = TMSENTINEL; + rtm.tm_wday = rtm.tm_yday = rtm.tm_isdst = TMSENTINEL; + rp = strptime(str, line, rtmp); + if (rp != NULL) + break; + errno = 0; + } + if (errno != 0 || ferror(fp)) { + if (errno == ENOMEM) + getdate_err = 6; + else + getdate_err = 5; + (void) fclose(fp); + return (NULL); + } + if (feof(fp) || (rp != NULL && *rp != '\0')) { + getdate_err = 7; + return (NULL); + } + (void) fclose(fp); + + (void) time(&now); + tmp = localtime(&now); + tmnow = *tmp; + + /* + * This implementation does not accept setting the broken-down time + * to anything other than the localtime(). It is not possible to + * change the scanned timezone with %Z. + * + * Note IRIX and Solaris accept only the current zone for %Z. + * XXX Is there any implementation that matches the standard? + * XXX (Or am I reading the standard wrong?) + * + * Note: Neither XPG 6 (POSIX 2004) nor XPG 7 (POSIX 2008) + * requires strptime(3) support for %Z. + */ + + /* + * Given only a weekday find the first matching weekday starting + * with the current weekday and moving into the future. + */ + if (rtm.tm_wday != TMSENTINEL && rtm.tm_year == TMSENTINEL && + rtm.tm_mon == TMSENTINEL && rtm.tm_mday == TMSENTINEL) { + rtm.tm_year = tmnow.tm_year; + rtm.tm_mon = tmnow.tm_mon; + rtm.tm_mday = tmnow.tm_mday + + (rtm.tm_wday - tmnow.tm_wday + 7) % 7; + } + + /* + * Given only a month (and no year) find the first matching month + * starting with the current month and moving into the future. + */ + if (rtm.tm_mon != TMSENTINEL) { + if (rtm.tm_year == TMSENTINEL) { + rtm.tm_year = tmnow.tm_year + + ((rtm.tm_mon < tmnow.tm_mon)? 1 : 0); + } + if (rtm.tm_mday == TMSENTINEL) { + /* assume the first of the month */ + rtm.tm_mday = 1; + /* + * XXX This isn't documented! Just observed behavior. + * + * Given the weekday find the first matching weekday + * starting with the weekday of the first day of the + * the month and moving into the future. + */ + if (rtm.tm_wday != TMSENTINEL) { + struct tm tm; + + (void) memset(&tm, 0, sizeof (struct tm)); + tm.tm_year = rtm.tm_year; + tm.tm_mon = rtm.tm_mon; + tm.tm_mday = 1; + (void) mktime(&tm); + rtm.tm_mday += + (rtm.tm_wday - tm.tm_wday + 7) % 7; + } + } + } + + /* + * Given no time of day assume the current time of day. + */ + if (rtm.tm_hour == TMSENTINEL && + rtm.tm_min == TMSENTINEL && rtm.tm_sec == TMSENTINEL) { + rtm.tm_hour = tmnow.tm_hour; + rtm.tm_min = tmnow.tm_min; + rtm.tm_sec = tmnow.tm_sec; + } + /* + * Given an hour and no date, find the first matching hour starting + * with the current hour and moving into the future + */ + if (rtm.tm_hour != TMSENTINEL && + rtm.tm_year == TMSENTINEL && rtm.tm_mon == TMSENTINEL && + rtm.tm_mday == TMSENTINEL) { + rtm.tm_year = tmnow.tm_year; + rtm.tm_mon = tmnow.tm_mon; + rtm.tm_mday = tmnow.tm_mday; + if (rtm.tm_hour < tmnow.tm_hour) + rtm.tm_hour += 24; + } + + /* + * Set to 'sane' values; mktime(3) does funny things otherwise. + * No hours, no minutes, no seconds, no service. + */ + if (rtm.tm_hour == TMSENTINEL) + rtm.tm_hour = 0; + if (rtm.tm_min == TMSENTINEL) + rtm.tm_min = 0; + if (rtm.tm_sec == TMSENTINEL) + rtm.tm_sec = 0; + + /* + * Given only a year the values of month, day of month, day of year, + * week day and is daylight (summer) time are unspecified. + * (Specified on the Solaris man page not POSIX.) + */ + if (rtm.tm_year != TMSENTINEL && + rtm.tm_mon == TMSENTINEL && rtm.tm_mday == TMSENTINEL) { + rtm.tm_mon = 0; + rtm.tm_mday = 1; + /* + * XXX More undocumented functionality but observed. + * + * Given the weekday find the first matching weekday + * starting with the weekday of the first day of the + * month and moving into the future. + */ + if (rtm.tm_wday != TMSENTINEL) { + struct tm tm; + + (void) memset(&tm, 0, sizeof (struct tm)); + tm.tm_year = rtm.tm_year; + tm.tm_mon = rtm.tm_mon; + tm.tm_mday = 1; + (void) mktime(&tm); + rtm.tm_mday += (rtm.tm_wday - tm.tm_wday + 7) % 7; + } + } + + /* + * Given only the century but no year within, the current year + * is assumed. (Specified on the Solaris man page not POSIX.) + * + * Warning ugly end case + * + * This is more work since strptime(3) doesn't "do the right thing". + */ + if (rtm.tm_year != TMSENTINEL && (rtm.tm_year - 1900) >= 0) { + rtm.tm_year -= 1900; + rtm.tm_year += (tmnow.tm_year % 100); + } + + /* + * mktime() will normalize all values and also check that the + * value will fit into a time_t. + * + * This is only for POSIX correctness. A date >= 1900 is + * really ok, but using a time_t limits things. + */ + if (mktime(rtmp) < 0) { + getdate_err = 8; + return (NULL); + } + + return (rtmp); +} diff --git a/usr/src/lib/libc/port/locale/iswctype.c b/usr/src/lib/libc/port/locale/iswctype.c new file mode 100644 index 0000000000..e1be429f46 --- /dev/null +++ b/usr/src/lib/libc/port/locale/iswctype.c @@ -0,0 +1,216 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <wctype.h> +#include "runefile.h" +#include "runetype.h" +#include "_ctype.h" + +/* + * We removed: iswascii, iswrune, iswhexnumber, and iswnumber, as + * these are not present on Solaris. Note that the standard requires + * iswascii to be a macro, so it is defined in our headers. + * + * We renamed (per Solaris) iswideogram, iswspecial, iswspecial to the + * equivalent values without "w". We added a new isnumber, that looks + * for non-ASCII numbers. + */ + +static int +__istype(wint_t c, unsigned long f) +{ + unsigned long rt; + + /* Fast path for single byte locales */ + if (c < 0 || c >= _CACHED_RUNES) + rt = ___runetype(c); + else + rt = _CurrentRuneLocale->__runetype[c]; + return (!!(rt & f)); +} + +static int +__isctype(wint_t c, unsigned long f) +{ + unsigned long rt; + + /* Fast path for single byte locales */ + if (c < 0 || c >= _CACHED_RUNES) + return (0); + else + rt = _CurrentRuneLocale->__runetype[c]; + return (!!(rt & f)); +} + +#undef iswctype +int +iswctype(wint_t wc, wctype_t class) +{ + return (__istype(wc, (unsigned long)class)); +} + +#undef _iswctype +unsigned +_iswctype(wchar_t wc, int class) +{ + return (__istype((wint_t)wc, (unsigned long)class)); +} + +#undef iswalnum +int +iswalnum(wint_t wc) +{ + return (__istype(wc, _CTYPE_A|_CTYPE_D)); +} + +#undef iswalpha +int +iswalpha(wint_t wc) +{ + return (__istype(wc, _CTYPE_A)); +} + +#undef iswblank +int +iswblank(wint_t wc) +{ + return (__istype(wc, _CTYPE_B)); +} + +#undef iswcntrl +int +iswcntrl(wint_t wc) +{ + return (__istype(wc, _CTYPE_C)); +} + +#undef iswdigit +int +iswdigit(wint_t wc) +{ + return (__isctype(wc, _CTYPE_D)); +} + +#undef iswgraph +int +iswgraph(wint_t wc) +{ + return (__istype(wc, _CTYPE_G)); +} + +#undef isideogram +int +isideogram(wint_t wc) +{ + return (__istype(wc, _CTYPE_I)); +} + +#undef iswlower +int +iswlower(wint_t wc) +{ + return (__istype(wc, _CTYPE_L)); +} + +#undef isphonogram +int +isphonogram(wint_t wc) +{ + return (__istype(wc, _CTYPE_Q)); +} + +#undef iswprint +int +iswprint(wint_t wc) +{ + return (__istype(wc, _CTYPE_R)); +} + +#undef iswpunct +int +iswpunct(wint_t wc) +{ + return (__istype(wc, _CTYPE_P)); +} + +#undef iswspace +int +iswspace(wint_t wc) +{ + return (__istype(wc, _CTYPE_S)); +} + +#undef iswupper +int +iswupper(wint_t wc) +{ + return (__istype(wc, _CTYPE_U)); +} + +#undef iswxdigit +int +iswxdigit(wint_t wc) +{ + return (__isctype(wc, _CTYPE_X)); +} + +#undef isenglish +int +isenglish(wint_t wc) +{ + return (__istype(wc, _CTYPE_E)); +} + +#undef isspecial +int +isspecial(wint_t wc) +{ + return (__istype(wc, _CTYPE_T)); +} + +#undef isnumber +int +isnumber(wint_t wc) +{ + return (__istype(wc, _CTYPE_N)); +} diff --git a/usr/src/lib/libc/port/locale/ldpart.c b/usr/src/lib/libc/port/locale/ldpart.c new file mode 100644 index 0000000000..01bbd3cf4f --- /dev/null +++ b/usr/src/lib/libc/port/locale/ldpart.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2000, 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "ldpart.h" +#include "setlocale.h" + +static int split_lines(char *, const char *); + +int +__part_load_locale(const char *name, int *using_locale, + char **locale_buf, const char *category_filename, + int locale_buf_size_max, int locale_buf_size_min, + const char **dst_localebuf) +{ + int saverr, fd, i, num_lines; + char *lbuf, *p; + const char *plim; + char filename[PATH_MAX]; + struct stat st; + size_t namesize, bufsize; + + /* 'name' must be already checked. */ + if (strcmp(name, "C") == 0 || strcmp(name, "POSIX") == 0) { + *using_locale = 0; + return (_LDP_CACHE); + } + + /* + * If the locale name is the same as our cache, use the cache. + */ + if (*locale_buf != NULL && strcmp(name, *locale_buf) == 0) { + *using_locale = 1; + return (_LDP_CACHE); + } + + /* + * Slurp the locale file into the cache. + */ + namesize = strlen(name) + 1; + + /* 'PathLocale' must be already set & checked. */ + + /* Range checking not needed, 'name' size is limited */ + (void) strcpy(filename, _PathLocale); + (void) strcat(filename, "/"); + (void) strcat(filename, name); + (void) strcat(filename, "/"); + (void) strcat(filename, category_filename); + if ((fd = open(filename, O_RDONLY)) < 0) + return (_LDP_ERROR); + if (fstat(fd, &st) != 0) + goto bad_locale; + if (st.st_size <= 0) { + errno = EINVAL; + goto bad_locale; + } + bufsize = namesize + st.st_size; + if ((lbuf = malloc(bufsize)) == NULL) { + errno = ENOMEM; + goto bad_locale; + } + (void) strcpy(lbuf, name); + p = lbuf + namesize; + plim = p + st.st_size; + if (read(fd, p, (size_t)st.st_size) != st.st_size) + goto bad_lbuf; + /* + * Parse the locale file into localebuf. + */ + if (plim[-1] != '\n') { + errno = EINVAL; + goto bad_lbuf; + } + num_lines = split_lines(p, plim); + if (num_lines >= locale_buf_size_max) + num_lines = locale_buf_size_max; + else if (num_lines >= locale_buf_size_min) + num_lines = locale_buf_size_min; + else { + errno = EINVAL; + goto bad_lbuf; + } + (void) close(fd); + /* + * Record the successful parse in the cache. + */ + if (*locale_buf != NULL) + free(*locale_buf); + *locale_buf = lbuf; + for (p = *locale_buf, i = 0; i < num_lines; i++) + dst_localebuf[i] = (p += strlen(p) + 1); + for (i = num_lines; i < locale_buf_size_max; i++) + dst_localebuf[i] = NULL; + *using_locale = 1; + + return (_LDP_LOADED); + +bad_lbuf: + saverr = errno; + free(lbuf); + errno = saverr; +bad_locale: + saverr = errno; + (void) close(fd); + errno = saverr; + + return (_LDP_ERROR); +} + +static int +split_lines(char *p, const char *plim) +{ + int i; + + i = 0; + while (p < plim) { + if (*p == '\n') { + *p = '\0'; + i++; + } + p++; + } + return (i); +} diff --git a/usr/src/lib/libc/port/locale/ldpart.h b/usr/src/lib/libc/port/locale/ldpart.h new file mode 100644 index 0000000000..040640237e --- /dev/null +++ b/usr/src/lib/libc/port/locale/ldpart.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LDPART_H_ +#define _LDPART_H_ + +#define _LDP_LOADED 0 +#define _LDP_ERROR (-1) +#define _LDP_CACHE 1 + +int __part_load_locale(const char *, int *, char **, const char *, + int, int, const char **); + +#endif /* !_LDPART_H_ */ diff --git a/usr/src/lib/libc/port/locale/lmessages.c b/usr/src/lib/libc/port/locale/lmessages.c new file mode 100644 index 0000000000..75076b1d01 --- /dev/null +++ b/usr/src/lib/libc/port/locale/lmessages.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stddef.h> +#include "ldpart.h" +#include "lmessages.h" + +#define LCMESSAGES_SIZE_FULL (sizeof (struct lc_messages_T) / sizeof (char *)) +#define LCMESSAGES_SIZE_MIN \ + (offsetof(struct lc_messages_T, yesstr) / sizeof (char *)) + +static char empty[] = ""; + +static const struct lc_messages_T _C_messages_locale = { + "^[yY]", /* yesexpr */ + "^[nN]", /* noexpr */ + "yes", /* yesstr */ + "no" /* nostr */ +}; + +static struct lc_messages_T _messages_locale; +static int _messages_using_locale; +static char *_messages_locale_buf; + +int +__messages_load_locale(const char *name) +{ + int ret; + + ret = __part_load_locale(name, &_messages_using_locale, + &_messages_locale_buf, "LC_MESSAGES", + LCMESSAGES_SIZE_FULL, LCMESSAGES_SIZE_MIN, + (const char **)&_messages_locale); + if (ret == _LDP_LOADED) { + if (_messages_locale.yesstr == NULL) + _messages_locale.yesstr = empty; + if (_messages_locale.nostr == NULL) + _messages_locale.nostr = empty; + } + return (ret); +} + +struct lc_messages_T * +__get_current_messages_locale(void) +{ + return (_messages_using_locale ? &_messages_locale : + (struct lc_messages_T *)&_C_messages_locale); +} diff --git a/usr/src/lib/libc/port/locale/lmessages.h b/usr/src/lib/libc/port/locale/lmessages.h new file mode 100644 index 0000000000..e08198b810 --- /dev/null +++ b/usr/src/lib/libc/port/locale/lmessages.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2000, 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LMESSAGES_H_ +#define _LMESSAGES_H_ + +struct lc_messages_T { + const char *yesexpr; + const char *noexpr; + const char *yesstr; + const char *nostr; +}; + +struct lc_messages_T *__get_current_messages_locale(void); +int __messages_load_locale(const char *); + +#endif /* !_LMESSAGES_H_ */ diff --git a/usr/src/lib/libc/port/locale/lmonetary.c b/usr/src/lib/libc/port/locale/lmonetary.c new file mode 100644 index 0000000000..ff04b5637c --- /dev/null +++ b/usr/src/lib/libc/port/locale/lmonetary.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2000, 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <limits.h> +#include <stddef.h> +#include <stdlib.h> +#include "ldpart.h" +#include "lmonetary.h" + +extern int __mlocale_changed; +extern const char *__fix_locale_grouping_str(const char *); + +#define LCMONETARY_SIZE_FULL (sizeof (struct lc_monetary_T) / sizeof (char *)) +#define LCMONETARY_SIZE_MIN \ + (offsetof(struct lc_monetary_T, int_p_cs_precedes) / sizeof (char *)) + +static char empty[] = ""; +static char numempty[] = { CHAR_MAX, '\0' }; + +static const struct lc_monetary_T _C_monetary_locale = { + empty, /* int_curr_symbol */ + empty, /* currency_symbol */ + empty, /* mon_decimal_point */ + empty, /* mon_thousands_sep */ + numempty, /* mon_grouping */ + empty, /* positive_sign */ + empty, /* negative_sign */ + numempty, /* int_frac_digits */ + numempty, /* frac_digits */ + numempty, /* p_cs_precedes */ + numempty, /* p_sep_by_space */ + numempty, /* n_cs_precedes */ + numempty, /* n_sep_by_space */ + numempty, /* p_sign_posn */ + numempty, /* n_sign_posn */ + numempty, /* int_p_cs_precedes */ + numempty, /* int_n_cs_precedes */ + numempty, /* int_p_sep_by_space */ + numempty, /* int_n_sep_by_space */ + numempty, /* int_p_sign_posn */ + numempty /* int_n_sign_posn */ +}; + +static struct lc_monetary_T _monetary_locale; +static int _monetary_using_locale; +static char *_monetary_locale_buf; + +static char +cnv(const char *str) +{ + int i = strtol(str, NULL, 10); + + if (i == -1) + i = CHAR_MAX; + return ((char)i); +} + +int +__monetary_load_locale(const char *name) +{ + int ret; + + ret = __part_load_locale(name, &_monetary_using_locale, + &_monetary_locale_buf, "LC_MONETARY", + LCMONETARY_SIZE_FULL, LCMONETARY_SIZE_MIN, + (const char **)&_monetary_locale); + if (ret != _LDP_ERROR) + __mlocale_changed = 1; + if (ret == _LDP_LOADED) { + _monetary_locale.mon_grouping = + __fix_locale_grouping_str(_monetary_locale.mon_grouping); + +#define M_ASSIGN_CHAR(NAME) \ + (((char *)_monetary_locale.NAME)[0] = \ + cnv(_monetary_locale.NAME)) + + M_ASSIGN_CHAR(int_frac_digits); + M_ASSIGN_CHAR(frac_digits); + M_ASSIGN_CHAR(p_cs_precedes); + M_ASSIGN_CHAR(p_sep_by_space); + M_ASSIGN_CHAR(n_cs_precedes); + M_ASSIGN_CHAR(n_sep_by_space); + M_ASSIGN_CHAR(p_sign_posn); + M_ASSIGN_CHAR(n_sign_posn); + + /* + * The six additional C99 international monetary formatting + * parameters default to the national parameters when + * reading FreeBSD LC_MONETARY data files. + */ +#define M_ASSIGN_ICHAR(NAME) \ + if (_monetary_locale.int_##NAME == NULL) \ + _monetary_locale.int_##NAME = \ + _monetary_locale.NAME; \ + else \ + M_ASSIGN_CHAR(int_##NAME); + + M_ASSIGN_ICHAR(p_cs_precedes); + M_ASSIGN_ICHAR(n_cs_precedes); + M_ASSIGN_ICHAR(p_sep_by_space); + M_ASSIGN_ICHAR(n_sep_by_space); + M_ASSIGN_ICHAR(p_sign_posn); + M_ASSIGN_ICHAR(n_sign_posn); + } + return (ret); +} + +struct lc_monetary_T * +__get_current_monetary_locale(void) +{ + return (_monetary_using_locale ? &_monetary_locale : + (struct lc_monetary_T *)&_C_monetary_locale); +} diff --git a/usr/src/lib/libc/port/locale/lmonetary.h b/usr/src/lib/libc/port/locale/lmonetary.h new file mode 100644 index 0000000000..ac7624fd30 --- /dev/null +++ b/usr/src/lib/libc/port/locale/lmonetary.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2000, 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LMONETARY_H_ +#define _LMONETARY_H_ + +struct lc_monetary_T { + const char *int_curr_symbol; + const char *currency_symbol; + const char *mon_decimal_point; + const char *mon_thousands_sep; + const char *mon_grouping; + const char *positive_sign; + const char *negative_sign; + const char *int_frac_digits; + const char *frac_digits; + const char *p_cs_precedes; + const char *p_sep_by_space; + const char *n_cs_precedes; + const char *n_sep_by_space; + const char *p_sign_posn; + const char *n_sign_posn; + const char *int_p_cs_precedes; + const char *int_n_cs_precedes; + const char *int_p_sep_by_space; + const char *int_n_sep_by_space; + const char *int_p_sign_posn; + const char *int_n_sign_posn; +}; + +struct lc_monetary_T *__get_current_monetary_locale(void); +int __monetary_load_locale(const char *); + +#endif /* !_LMONETARY_H_ */ diff --git a/usr/src/lib/libc/port/locale/lnumeric.c b/usr/src/lib/libc/port/locale/lnumeric.c new file mode 100644 index 0000000000..c125ebcc5d --- /dev/null +++ b/usr/src/lib/libc/port/locale/lnumeric.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2000, 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <limits.h> +#include "ldpart.h" +#include "lnumeric.h" +#include "../i18n/_locale.h" + +extern int __nlocale_changed; +extern const char *__fix_locale_grouping_str(const char *); + +#define LCNUMERIC_SIZE (sizeof (struct lc_numeric_T) / sizeof (char *)) + +static char numempty[] = { CHAR_MAX, '\0' }; + +static const struct lc_numeric_T _C_numeric_locale = { + ".", /* decimal_point */ + "", /* thousands_sep */ + numempty /* grouping */ +}; + +static struct lc_numeric_T _numeric_locale; +static int _numeric_using_locale; +static char *_numeric_locale_buf; + +int +__numeric_load_locale(const char *name) +{ + int ret; + + ret = __part_load_locale(name, &_numeric_using_locale, + &_numeric_locale_buf, "LC_NUMERIC", LCNUMERIC_SIZE, LCNUMERIC_SIZE, + (const char **)&_numeric_locale); + if (ret != _LDP_ERROR) + __nlocale_changed = 1; + if (ret == _LDP_LOADED) { + /* Can't be empty according to C99 */ + if (*_numeric_locale.decimal_point == '\0') + _numeric_locale.decimal_point = + _C_numeric_locale.decimal_point; + _numeric_locale.grouping = + __fix_locale_grouping_str(_numeric_locale.grouping); + + /* This is Solaris legacy, required for _doprnt */ + _numeric[0] = *_numeric_locale.decimal_point; + _numeric[1] = *_numeric_locale.grouping; + } + return (ret); +} + +struct lc_numeric_T * +__get_current_numeric_locale(void) +{ + return (_numeric_using_locale ? &_numeric_locale : + (struct lc_numeric_T *)&_C_numeric_locale); +} diff --git a/usr/src/lib/libc/port/locale/lnumeric.h b/usr/src/lib/libc/port/locale/lnumeric.h new file mode 100644 index 0000000000..74cac4dfa2 --- /dev/null +++ b/usr/src/lib/libc/port/locale/lnumeric.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000, 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LNUMERIC_H_ +#define _LNUMERIC_H_ + +struct lc_numeric_T { + const char *decimal_point; + const char *thousands_sep; + const char *grouping; +}; + +struct lc_numeric_T *__get_current_numeric_locale(void); +int __numeric_load_locale(const char *); + +#endif /* !_LNUMERIC_H_ */ diff --git a/usr/src/lib/libc/port/locale/localeconv.c b/usr/src/lib/libc/port/locale/localeconv.c new file mode 100644 index 0000000000..c99f1ee088 --- /dev/null +++ b/usr/src/lib/libc/port/locale/localeconv.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2001 Alexey Zelkin <phantom@FreeBSD.org> + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LCONV_C99 +#define _LCONV_C99 /* so we get all the extensions */ +#endif + +#include "lint.h" +#include <locale.h> +#include "lmonetary.h" +#include "lnumeric.h" + +/* + * The localeconv() function constructs a struct lconv from the current + * monetary and numeric locales. + * + * Because localeconv() may be called many times (especially by library + * routines like printf() & strtod()), the approprate members of the + * lconv structure are computed only when the monetary or numeric + * locale has been changed. + */ +int __mlocale_changed = 1; +int __nlocale_changed = 1; + +/* + * Return the current locale conversion. + */ +struct lconv * +localeconv(void) +{ + static struct lconv ret; + + if (__mlocale_changed) { + /* LC_MONETARY part */ + struct lc_monetary_T *mptr; + +#define M_ASSIGN_STR(NAME) (ret.NAME = (char *)mptr->NAME) +#define M_ASSIGN_CHAR(NAME) (ret.NAME = mptr->NAME[0]) + + mptr = __get_current_monetary_locale(); + M_ASSIGN_STR(int_curr_symbol); + M_ASSIGN_STR(currency_symbol); + M_ASSIGN_STR(mon_decimal_point); + M_ASSIGN_STR(mon_thousands_sep); + M_ASSIGN_STR(mon_grouping); + M_ASSIGN_STR(positive_sign); + M_ASSIGN_STR(negative_sign); + M_ASSIGN_CHAR(int_frac_digits); + M_ASSIGN_CHAR(frac_digits); + M_ASSIGN_CHAR(p_cs_precedes); + M_ASSIGN_CHAR(p_sep_by_space); + M_ASSIGN_CHAR(n_cs_precedes); + M_ASSIGN_CHAR(n_sep_by_space); + M_ASSIGN_CHAR(p_sign_posn); + M_ASSIGN_CHAR(n_sign_posn); + M_ASSIGN_CHAR(int_p_cs_precedes); + M_ASSIGN_CHAR(int_n_cs_precedes); + M_ASSIGN_CHAR(int_p_sep_by_space); + M_ASSIGN_CHAR(int_n_sep_by_space); + M_ASSIGN_CHAR(int_p_sign_posn); + M_ASSIGN_CHAR(int_n_sign_posn); + __mlocale_changed = 0; + } + + if (__nlocale_changed) { + /* LC_NUMERIC part */ + struct lc_numeric_T *nptr; + +#define N_ASSIGN_STR(NAME) (ret.NAME = (char *)nptr->NAME) + + nptr = __get_current_numeric_locale(); + N_ASSIGN_STR(decimal_point); + N_ASSIGN_STR(thousands_sep); + N_ASSIGN_STR(grouping); + __nlocale_changed = 0; + } + + return (&ret); +} diff --git a/usr/src/lib/libc/port/locale/mblen.c b/usr/src/lib/libc/port/locale/mblen.c new file mode 100644 index 0000000000..5ab2608236 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mblen.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" + +int +mblen(const char *s, size_t n) +{ + static const mbstate_t initial = { 0 }; + static mbstate_t mbs; + size_t rval; + + if (s == NULL) { + /* No support for state dependent encodings. */ + mbs = initial; + return (0); + } + rval = __mbrtowc(NULL, s, n, &mbs); + if (rval == (size_t)-1 || rval == (size_t)-2) + return (-1); + return ((int)rval); +} diff --git a/usr/src/lib/libc/port/locale/mblocal.h b/usr/src/lib/libc/port/locale/mblocal.h new file mode 100644 index 0000000000..ca5095d068 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mblocal.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MBLOCAL_H_ +#define _MBLOCAL_H_ + +#include "runetype.h" + +/* + * Rune initialization function prototypes. + */ +int _none_init(_RuneLocale *); +int _ascii_init(_RuneLocale *); +int _UTF8_init(_RuneLocale *); +int _EUC_init(_RuneLocale *); +int _GB18030_init(_RuneLocale *); +int _GB2312_init(_RuneLocale *); +int _GBK_init(_RuneLocale *); +int _BIG5_init(_RuneLocale *); +int _MSKanji_init(_RuneLocale *); + +/* + * Conversion function pointers for current encoding. + */ +extern size_t (*__mbrtowc)(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); +extern int (*__mbsinit)(const mbstate_t *); +extern size_t (*__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, + const char **_RESTRICT_KYWD, size_t, size_t, mbstate_t *_RESTRICT_KYWD); + +extern size_t (*__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + +extern size_t (*__wcsnrtombs)(char *_RESTRICT_KYWD, + const wchar_t **_RESTRICT_KYWD, size_t, size_t, mbstate_t *_RESTRICT_KYWD); + +size_t __mbsnrtowcs_std(wchar_t *_RESTRICT_KYWD, const char **_RESTRICT_KYWD, + size_t, size_t, mbstate_t *_RESTRICT_KYWD); +size_t __wcsnrtombs_std(char *_RESTRICT_KYWD, const wchar_t **_RESTRICT_KYWD, + size_t, size_t, mbstate_t *_RESTRICT_KYWD); + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +#endif /* _MBLOCAL_H_ */ diff --git a/usr/src/lib/libc/port/locale/mbrlen.c b/usr/src/lib/libc/port/locale/mbrlen.c new file mode 100644 index 0000000000..6a65ce40b2 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbrlen.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <wchar.h> +#include "mblocal.h" + +size_t +mbrlen(const char *_RESTRICT_KYWD s, size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + static mbstate_t mbs; + + if (ps == NULL) + ps = &mbs; + return (__mbrtowc(NULL, s, n, ps)); +} diff --git a/usr/src/lib/libc/port/locale/mbrtowc.c b/usr/src/lib/libc/port/locale/mbrtowc.c new file mode 100644 index 0000000000..165f30efc2 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbrtowc.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <wchar.h> +#include "mblocal.h" + +size_t +mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + static mbstate_t mbs; + + if (ps == NULL) + ps = &mbs; + return (__mbrtowc(pwc, s, n, ps)); +} diff --git a/usr/src/lib/libc/port/locale/mbsinit.c b/usr/src/lib/libc/port/locale/mbsinit.c new file mode 100644 index 0000000000..d514d0ee51 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbsinit.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <wchar.h> +#include "mblocal.h" + +int +mbsinit(const mbstate_t *ps) +{ + + return (__mbsinit(ps)); +} diff --git a/usr/src/lib/libc/port/locale/mbsnrtowcs.c b/usr/src/lib/libc/port/locale/mbsnrtowcs.c new file mode 100644 index 0000000000..7ca21ac915 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbsnrtowcs.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" + +size_t +mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src, + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + static mbstate_t mbs; + + if (ps == NULL) + ps = &mbs; + return (__mbsnrtowcs(dst, src, nms, len, ps)); +} + +size_t +__mbsnrtowcs_std(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src, + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + const char *s; + size_t nchr; + wchar_t wc; + size_t nb; + + s = *src; + nchr = 0; + + if (dst == NULL) { + for (;;) { + if ((nb = __mbrtowc(&wc, s, nms, ps)) == (size_t)-1) + /* Invalid sequence - mbrtowc() sets errno. */ + return ((size_t)-1); + else if (nb == 0 || nb == (size_t)-2) + return (nchr); + s += nb; + nms -= nb; + nchr++; + } + /*NOTREACHED*/ + } + + while (len-- > 0) { + if ((nb = __mbrtowc(dst, s, nms, ps)) == (size_t)-1) { + *src = s; + return ((size_t)-1); + } else if (nb == (size_t)-2) { + *src = s + nms; + return (nchr); + } else if (nb == 0) { + *src = NULL; + return (nchr); + } + s += nb; + nms -= nb; + nchr++; + dst++; + } + *src = s; + return (nchr); +} diff --git a/usr/src/lib/libc/port/locale/mbsrtowcs.c b/usr/src/lib/libc/port/locale/mbsrtowcs.c new file mode 100644 index 0000000000..b1eff34f6a --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbsrtowcs.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" + +size_t +mbsrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src, + size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + static mbstate_t mbs; + + if (ps == NULL) + ps = &mbs; + return (__mbsnrtowcs(dst, src, ULONG_MAX, len, ps)); +} diff --git a/usr/src/lib/libc/port/locale/mbstowcs.c b/usr/src/lib/libc/port/locale/mbstowcs.c new file mode 100644 index 0000000000..29fbac2ce0 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbstowcs.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <limits.h> +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" + +size_t +mbstowcs(wchar_t *_RESTRICT_KYWD pwcs, const char *_RESTRICT_KYWD s, size_t n) +{ + static const mbstate_t initial = { 0 }; + mbstate_t mbs; + const char *sp; + + mbs = initial; + sp = s; + return (__mbsnrtowcs(pwcs, &sp, ULONG_MAX, n, &mbs)); +} diff --git a/usr/src/lib/libc/port/locale/mbtowc.c b/usr/src/lib/libc/port/locale/mbtowc.c new file mode 100644 index 0000000000..c87a181b45 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbtowc.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" + +int +mbtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, size_t n) +{ + static const mbstate_t initial = { 0 }; + static mbstate_t mbs; + size_t rval; + + if (s == NULL) { + /* No support for state dependent encodings. */ + mbs = initial; + return (0); + } + rval = __mbrtowc(pwc, s, n, &mbs); + if (rval == (size_t)-1 || rval == (size_t)-2) + return (-1); + return ((int)rval); +} diff --git a/usr/src/lib/libc/port/locale/mskanji.c b/usr/src/lib/libc/port/locale/mskanji.c new file mode 100644 index 0000000000..0109d3c26b --- /dev/null +++ b/usr/src/lib/libc/port/locale/mskanji.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * + * ja_JP.SJIS locale table for BSD4.4/rune + * version 1.0 + * (C) Sin'ichiro MIYATANI / Phase One, Inc + * May 12, 1995 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Phase One, Inc. + * 4. The name of Phase One, Inc. may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <sys/types.h> +#include <errno.h> +#include "runetype.h" +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" + +static size_t _MSKanji_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +static int _MSKanji_mbsinit(const mbstate_t *); +static size_t _MSKanji_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + +typedef struct { + wchar_t ch; +} _MSKanjiState; + +int +_MSKanji_init(_RuneLocale *rl) +{ + + __mbrtowc = _MSKanji_mbrtowc; + __wcrtomb = _MSKanji_wcrtomb; + __mbsinit = _MSKanji_mbsinit; + _CurrentRuneLocale = rl; + __ctype[520] = 2; + return (0); +} + +static int +_MSKanji_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _MSKanjiState *)ps)->ch == 0); +} + +static size_t +_MSKanji_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + _MSKanjiState *ms; + wchar_t wc; + + ms = (_MSKanjiState *)ps; + + if ((ms->ch & ~0xFF) != 0) { + /* Bad conversion state. */ + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + + if (ms->ch != 0) { + if (*s == '\0') { + errno = EILSEQ; + return ((size_t)-1); + } + wc = (ms->ch << 8) | (*s & 0xFF); + if (pwc != NULL) + *pwc = wc; + ms->ch = 0; + return (1); + } + wc = *s++ & 0xff; + if ((wc > 0x80 && wc < 0xa0) || (wc >= 0xe0 && wc < 0xfd)) { + if (n < 2) { + /* Incomplete multibyte sequence */ + ms->ch = wc; + return ((size_t)-2); + } + if (*s == '\0') { + errno = EILSEQ; + return ((size_t)-1); + } + wc = (wc << 8) | (*s++ & 0xff); + if (pwc != NULL) + *pwc = wc; + return (2); + } else { + if (pwc != NULL) + *pwc = wc; + return (wc == L'\0' ? 0 : 1); + } +} + +static size_t +_MSKanji_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, + mbstate_t *_RESTRICT_KYWD ps) +{ + _MSKanjiState *ms; + int len, i; + + ms = (_MSKanjiState *)ps; + + if (ms->ch != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + len = (wc > 0x100) ? 2 : 1; + for (i = len; i-- > 0; ) + *s++ = wc >> (i << 3); + return (len); +} diff --git a/usr/src/lib/libc/port/locale/nl_langinfo.c b/usr/src/lib/libc/port/locale/nl_langinfo.c new file mode 100644 index 0000000000..2c296b45e5 --- /dev/null +++ b/usr/src/lib/libc/port/locale/nl_langinfo.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2001, 2003 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <langinfo.h> +#include <limits.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> + +#include "lnumeric.h" +#include "lmessages.h" +#include "lmonetary.h" +#include "timelocal.h" + +#define _REL(BASE) ((int)item-BASE) + +#define MONETARY (__get_current_monetary_locale()) +#define TIME (__get_current_time_locale()) +#define MESSAGES (__get_current_messages_locale()) +#define NUMERIC (__get_current_numeric_locale()) + +#pragma weak _nl_langinfo = nl_langinfo + +char * +nl_langinfo(nl_item item) +{ + char *ret, *s, *cs; + static char *csym = NULL; + + switch (item) { + case CODESET: + ret = ""; + /* + * The codeset is the suffix of a locale, for most it will + * will be UTF-8, as in "en.UTF-8". Short form locales are + * not supported. Note also that although FreeBSD uses + * US-ASCII, Solaris historically has reported "646" for the + * C locale. + */ + if ((s = setlocale(LC_CTYPE, NULL)) != NULL) { + if ((cs = strchr(s, '.')) != NULL) + ret = cs + 1; + else if (strcmp(s, "C") == 0 || strcmp(s, "POSIX") == 0) + ret = "646"; + } + break; + case D_T_FMT: + ret = (char *)TIME->c_fmt; + break; + case D_FMT: + ret = (char *)TIME->x_fmt; + break; + case T_FMT: + ret = (char *)TIME->X_fmt; + break; + case T_FMT_AMPM: + ret = (char *)TIME->ampm_fmt; + break; + case AM_STR: + ret = (char *)TIME->am; + break; + case PM_STR: + ret = (char *)TIME->pm; + break; + case DAY_1: case DAY_2: case DAY_3: + case DAY_4: case DAY_5: case DAY_6: case DAY_7: + ret = (char *)TIME->weekday[_REL(DAY_1)]; + break; + case ABDAY_1: case ABDAY_2: case ABDAY_3: + case ABDAY_4: case ABDAY_5: case ABDAY_6: case ABDAY_7: + ret = (char *)TIME->wday[_REL(ABDAY_1)]; + break; + case MON_1: case MON_2: case MON_3: case MON_4: + case MON_5: case MON_6: case MON_7: case MON_8: + case MON_9: case MON_10: case MON_11: case MON_12: + ret = (char *)TIME->month[_REL(MON_1)]; + break; + case ABMON_1: case ABMON_2: case ABMON_3: case ABMON_4: + case ABMON_5: case ABMON_6: case ABMON_7: case ABMON_8: + case ABMON_9: case ABMON_10: case ABMON_11: case ABMON_12: + ret = (char *)TIME->mon[_REL(ABMON_1)]; + break; + case ERA: + /* XXX: need to be implemented */ + ret = ""; + break; + case ERA_D_FMT: + /* XXX: need to be implemented */ + ret = ""; + break; + case ERA_D_T_FMT: + /* XXX: need to be implemented */ + ret = ""; + break; + case ERA_T_FMT: + /* XXX: need to be implemented */ + ret = ""; + break; + case ALT_DIGITS: + /* XXX: need to be implemented */ + ret = ""; + break; + case RADIXCHAR: + ret = (char *)NUMERIC->decimal_point; + break; + case THOUSEP: + ret = (char *)NUMERIC->thousands_sep; + break; + case YESEXPR: + ret = (char *)MESSAGES->yesexpr; + break; + case NOEXPR: + ret = (char *)MESSAGES->noexpr; + break; + /* + * YESSTR and NOSTR items marked with LEGACY are available, but not + * recomended by SUSv2 to be used in portable applications since + * they're subject to remove in future specification editions. + */ + case YESSTR: /* LEGACY */ + ret = (char *)MESSAGES->yesstr; + break; + case NOSTR: /* LEGACY */ + ret = (char *)MESSAGES->nostr; + break; + /* + * SUSv2 special formatted currency string + */ + case CRNCYSTR: + ret = ""; + cs = (char *)MONETARY->currency_symbol; + if (*cs != '\0') { + char pos = localeconv()->p_cs_precedes; + + if (pos == localeconv()->n_cs_precedes) { + char psn = '\0'; + + if (pos == CHAR_MAX) { + if (strcmp(cs, + MONETARY->mon_decimal_point) == 0) + psn = '.'; + } else + psn = pos ? '-' : '+'; + if (psn != '\0') { + int clen = strlen(cs); + char *newc; + + newc = realloc(csym, clen + 2); + if (newc != NULL) { + free(csym); + csym = newc; + *csym = psn; + (void) strcpy(csym + 1, cs); + ret = csym; + } + } + } + } + break; + case _DATE_FMT: /* Solaris specific extension */ + ret = (char *)TIME->date_fmt; + break; + /* + * Note that FreeBSD also had a private D_MD_ORDER, but that appears + * to have been specific to FreeBSD, so we have not included it here. + */ + default: + ret = ""; + } + return (ret); +} diff --git a/usr/src/lib/libc/port/locale/none.c b/usr/src/lib/libc/port/locale/none.c new file mode 100644 index 0000000000..f9427d1270 --- /dev/null +++ b/usr/src/lib/libc/port/locale/none.c @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <note.h> +#include "runetype.h" +#include "mblocal.h" +#include "../i18n/_locale.h" + +static size_t _none_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); + +static int _none_mbsinit(const mbstate_t *); +static size_t _none_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, + const char **_RESTRICT_KYWD src, size_t nms, size_t len, + mbstate_t *_RESTRICT_KYWD); +static size_t _none_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); +static size_t _none_wcsnrtombs(char *_RESTRICT_KYWD, + const wchar_t **_RESTRICT_KYWD, + size_t, size_t, mbstate_t *_RESTRICT_KYWD); + +/* setup defaults */ + +extern unsigned char __ctype_C[]; + +int +_none_init(_RuneLocale *rl) +{ + /* + * We need to populate the ctype stuff. This means the + * tolower table, the type masks, etc. + * There are 257 entries for the type array, 257 entries for the + * tolower/toupper array, and 7 bytes for CSWIDTH array. + * + * We have to set this stuff up because for POSIX/C we short + * circuit most of the logic in setrunelocale that would handle it. + */ + (void) memcpy(__ctype, __ctype_C, SZ_TOTAL); + + __ctype_mask = rl->__runetype; + __trans_upper = rl->__mapupper; + __trans_lower = rl->__maplower; + + __mbrtowc = _none_mbrtowc; + __mbsinit = _none_mbsinit; + __mbsnrtowcs = _none_mbsnrtowcs; + __wcrtomb = _none_wcrtomb; + __wcsnrtombs = _none_wcsnrtombs; + _CurrentRuneLocale = rl; + return (0); +} + +static int +_none_mbsinit(const mbstate_t *unused) +{ + _NOTE(ARGUNUSED(unused)); + + /* + * Encoding is not state dependent - we are always in the + * initial state. + */ + return (1); +} + +static size_t +_none_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD unused) +{ + _NOTE(ARGUNUSED(unused)); + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (0); + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + if (pwc != NULL) + *pwc = (unsigned char)*s; + return (*s == '\0' ? 0 : 1); +} + +static size_t +_none_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, + mbstate_t *_RESTRICT_KYWD unused) +{ + _NOTE(ARGUNUSED(unused)); + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if (wc < 0 || wc > UCHAR_MAX) { + errno = EILSEQ; + return ((size_t)-1); + } + *s = (unsigned char)wc; + return (1); +} + +static size_t +_none_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src, + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD unused) +{ + const char *s; + size_t nchr; + + _NOTE(ARGUNUSED(unused)); + + if (dst == NULL) { + s = memchr(*src, '\0', nms); + return (s != NULL ? s - *src : nms); + } + + s = *src; + nchr = 0; + while (len-- > 0 && nms-- > 0) { + if ((*dst++ = (unsigned char)*s++) == L'\0') { + *src = NULL; + return (nchr); + } + nchr++; + } + *src = s; + return (nchr); +} + +static size_t +_none_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD unused) +{ + const wchar_t *s; + size_t nchr; + + _NOTE(ARGUNUSED(unused)); + + if (dst == NULL) { + for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) { + if (*s < 0 || *s > UCHAR_MAX) { + errno = EILSEQ; + return ((size_t)-1); + } + } + return (s - *src); + } + + s = *src; + nchr = 0; + while (len-- > 0 && nwc-- > 0) { + if (*s < 0 || *s > UCHAR_MAX) { + errno = EILSEQ; + return ((size_t)-1); + } + if ((*dst++ = *s++) == '\0') { + *src = NULL; + return (nchr); + } + nchr++; + } + *src = s; + return (nchr); +} + +/* setup defaults */ + +size_t (*__mbrtowc)(wchar_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD) = _none_mbrtowc; + +int (*__mbsinit)(const mbstate_t *) = _none_mbsinit; + +size_t (*__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, const char **_RESTRICT_KYWD, + size_t, size_t, mbstate_t *_RESTRICT_KYWD) = _none_mbsnrtowcs; + +size_t (*__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, mbstate_t *_RESTRICT_KYWD) = + _none_wcrtomb; + +size_t (*__wcsnrtombs)(char *_RESTRICT_KYWD, const wchar_t **_RESTRICT_KYWD, + size_t, size_t, mbstate_t *_RESTRICT_KYWD) = _none_wcsnrtombs; diff --git a/usr/src/lib/libc/port/locale/regcomp.c b/usr/src/lib/libc/port/locale/regcomp.c new file mode 100644 index 0000000000..324609b19c --- /dev/null +++ b/usr/src/lib/libc/port/locale/regcomp.c @@ -0,0 +1,1746 @@ +/* + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "file64.h" +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include <regex.h> +#include <wchar.h> +#include <wctype.h> + +#include "runetype.h" +#include "collate.h" + +#include "utils.h" +#include "regex2.h" + +#include "cname.h" +#include "mblocal.h" + +/* + * parse structure, passed up and down to avoid global variables and + * other clumsinesses + */ +struct parse { + char *next; /* next character in RE */ + char *end; /* end of string (-> NUL normally) */ + int error; /* has an error been seen? */ + sop *strip; /* malloced strip */ + sopno ssize; /* malloced strip size (allocated) */ + sopno slen; /* malloced strip length (used) */ + int ncsalloc; /* number of csets allocated */ + struct re_guts *g; +#define NPAREN 10 /* we need to remember () 1-9 for back refs */ + sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ + sopno pend[NPAREN]; /* -> ) ([0] unused) */ +}; + +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === regcomp.c === */ +static void p_ere(struct parse *p, wint_t stop); +static void p_ere_exp(struct parse *p); +static void p_str(struct parse *p); +static void p_bre(struct parse *p, wint_t end1, wint_t end2); +static int p_simp_re(struct parse *p, int starordinary); +static int p_count(struct parse *p); +static void p_bracket(struct parse *p); +static void p_b_term(struct parse *p, cset *cs); +static void p_b_cclass(struct parse *p, cset *cs); +static void p_b_eclass(struct parse *p, cset *cs); +static wint_t p_b_symbol(struct parse *p); +static wint_t p_b_coll_elem(struct parse *p, wint_t endc); +static wint_t othercase(wint_t ch); +static void bothcases(struct parse *p, wint_t ch); +static void ordinary(struct parse *p, wint_t ch); +static void nonnewline(struct parse *p); +static void repeat(struct parse *p, sopno start, int from, int to); +static int seterr(struct parse *p, int e); +static cset *allocset(struct parse *p); +static void freeset(struct parse *p, cset *cs); +static void CHadd(struct parse *p, cset *cs, wint_t ch); +static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max); +static void CHaddtype(struct parse *p, cset *cs, wctype_t wct); +static wint_t singleton(cset *cs); +static sopno dupl(struct parse *p, sopno start, sopno finish); +static void doemit(struct parse *p, sop op, size_t opnd); +static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); +static void dofwd(struct parse *p, sopno pos, sop value); +static void enlarge(struct parse *p, sopno size); +static void stripsnug(struct parse *p, struct re_guts *g); +static void findmust(struct parse *p, struct re_guts *g); +static int altoffset(sop *scan, int offset); +static void computejumps(struct parse *p, struct re_guts *g); +static void computematchjumps(struct parse *p, struct re_guts *g); +static sopno pluscount(struct parse *p, struct re_guts *g); +static wint_t wgetnext(struct parse *p); + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ + +static char nuls[10]; /* place to point scanner in event of error */ + +/* + * macros for use with parse structure + * BEWARE: these know that the parse structure is named `p' !!! + */ +#define PEEK() (*p->next) +#define PEEK2() (*(p->next+1)) +#define MORE() (p->next < p->end) +#define MORE2() (p->next+1 < p->end) +#define SEE(c) (MORE() && PEEK() == (c)) +#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) +#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) +#define NEXT() (p->next++) +#define NEXT2() (p->next += 2) +#define NEXTn(n) (p->next += (n)) +#define GETNEXT() (*p->next++) +#define WGETNEXT() wgetnext(p) +#define SETERROR(e) ((void)seterr(p, (e))) +#define REQUIRE(co, e) ((co) || seterr(p, e)) +#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) +#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) +#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) +#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) +#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) +#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) +#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) +#define HERE() (p->slen) +#define THERE() (p->slen - 1) +#define THERETHERE() (p->slen - 2) +#define DROP(n) (p->slen -= (n)) + +#ifndef NDEBUG +static int never = 0; /* for use in asserts; shuts lint up */ +#else +#define never 0 /* some <assert.h>s have bugs too */ +#endif + +/* + * regcomp - interface for parser and compilation + */ +int /* 0 success, otherwise REG_something */ +regcomp(regex_t *_RESTRICT_KYWD preg, + const char *_RESTRICT_KYWD pattern, + int cflags) +{ + struct parse pa; + struct re_guts *g; + struct parse *p = &pa; + int i; + size_t len; +#ifdef REDEBUG +#define GOODFLAGS(f) (f) +#else +#define GOODFLAGS(f) ((f)&~REG_DUMP) +#endif + + /* We had REG_INVARG, but we don't have that on Solaris. */ + cflags = GOODFLAGS(cflags); + if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) + return (REG_EFATAL); + + if (cflags®_PEND) { + if (preg->re_endp < pattern) + return (REG_EFATAL); + len = preg->re_endp - pattern; + } else + len = strlen((char *)pattern); + + /* do the mallocs early so failure handling is easy */ + g = (struct re_guts *)malloc(sizeof (struct re_guts)); + if (g == NULL) + return (REG_ESPACE); + p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ + p->strip = (sop *)malloc(p->ssize * sizeof (sop)); + p->slen = 0; + if (p->strip == NULL) { + free((char *)g); + return (REG_ESPACE); + } + + /* set things up */ + p->g = g; + p->next = (char *)pattern; /* convenience; we do not modify it */ + p->end = p->next + len; + p->error = 0; + p->ncsalloc = 0; + for (i = 0; i < NPAREN; i++) { + p->pbegin[i] = 0; + p->pend[i] = 0; + } + g->sets = NULL; + g->ncsets = 0; + g->cflags = cflags; + g->iflags = 0; + g->nbol = 0; + g->neol = 0; + g->must = NULL; + g->moffset = -1; + g->charjump = NULL; + g->matchjump = NULL; + g->mlen = 0; + g->nsub = 0; + g->backrefs = 0; + + /* do it */ + EMIT(OEND, 0); + g->firststate = THERE(); + if (cflags®_EXTENDED) + p_ere(p, OUT); + else if (cflags®_NOSPEC) + p_str(p); + else + p_bre(p, OUT, OUT); + EMIT(OEND, 0); + g->laststate = THERE(); + + /* tidy up loose ends and fill things in */ + stripsnug(p, g); + findmust(p, g); + /* + * only use Boyer-Moore algorithm if the pattern is bigger + * than three characters + */ + if (g->mlen > 3) { + computejumps(p, g); + computematchjumps(p, g); + if (g->matchjump == NULL && g->charjump != NULL) { + free(g->charjump); + g->charjump = NULL; + } + } + g->nplus = pluscount(p, g); + g->magic = MAGIC2; + preg->re_nsub = g->nsub; + preg->re_g = g; + preg->re_magic = MAGIC1; +#ifndef REDEBUG + /* not debugging, so can't rely on the assert() in regexec() */ + if (g->iflags&BAD) + SETERROR(REG_EFATAL); +#endif + + /* win or lose, we're done */ + if (p->error != 0) /* lose */ + regfree(preg); + return (p->error); +} + +/* + * p_ere - ERE parser top level, concatenation and alternation + */ +static void +p_ere(struct parse *p, + wint_t stop) /* character this ERE should end at */ +{ + char c; + sopno prevback; + sopno prevfwd; + sopno conc; + int first = 1; /* is this the first alternative? */ + + for (;;) { + /* do a bunch of concatenated expressions */ + conc = HERE(); + while (MORE() && (c = PEEK()) != '|' && c != stop) + p_ere_exp(p); + /* require nonempty */ + (void) REQUIRE(HERE() != conc, REG_BADPAT); + + if (!EAT('|')) + break; /* NOTE BREAK OUT */ + + if (first) { + INSERT(OCH_, conc); /* offset is wrong */ + prevfwd = conc; + prevback = conc; + first = 0; + } + ASTERN(OOR1, prevback); + prevback = THERE(); + AHEAD(prevfwd); /* fix previous offset */ + prevfwd = HERE(); + EMIT(OOR2, 0); /* offset is very wrong */ + } + + if (!first) { /* tail-end fixups */ + AHEAD(prevfwd); + ASTERN(O_CH, prevback); + } + + assert(!MORE() || SEE(stop)); +} + +/* + * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op + */ +static void +p_ere_exp(struct parse *p) +{ + char c; + wint_t wc; + sopno pos; + int count; + int count2; + sopno subno; + int wascaret = 0; + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + + pos = HERE(); + switch (c) { + case '(': + (void) REQUIRE(MORE(), REG_EPAREN); + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + if (!SEE(')')) + p_ere(p, ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + (void) MUSTEAT(')', REG_EPAREN); + break; +#ifndef POSIX_MISTAKE + case ')': /* happens only if no current unmatched ( */ + /* + * You may ask, why the ifndef? Because I didn't notice + * this until slightly too late for 1003.2, and none of the + * other 1003.2 regular-expression reviewers noticed it at + * all. So an unmatched ) is legal POSIX, at least until + * we can get it fixed. + */ + SETERROR(REG_EPAREN); + break; +#endif + case '^': + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + wascaret = 1; + break; + case '$': + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + break; + case '|': + SETERROR(REG_BADPAT); + break; + case '*': + case '+': + case '?': + SETERROR(REG_BADRPT); + break; + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case '\\': + (void) REQUIRE(MORE(), REG_EESCAPE); + wc = WGETNEXT(); + ordinary(p, wc); + break; + case '{': /* okay as ordinary except if digit follows */ + (void) REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); + /* FALLTHROUGH */ + default: + p->next--; + wc = WGETNEXT(); + ordinary(p, wc); + break; + } + + if (!MORE()) + return; + c = PEEK(); + /* we call { a repetition if followed by a digit */ + if (!(c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit((uch)PEEK2())))) + return; /* no repetition, we're done */ + NEXT(); + + (void) REQUIRE(!wascaret, REG_BADRPT); + switch (c) { + case '*': /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + break; + case '+': + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + break; + case '?': + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, pos); /* offset slightly wrong */ + ASTERN(OOR1, pos); /* this one's right */ + AHEAD(pos); /* fix the OCH_ */ + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + break; + case '{': + count = p_count(p); + if (EAT(',')) { + if (isdigit((uch)PEEK())) { + count2 = p_count(p); + (void) REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EAT('}')) { /* error heuristics */ + while (MORE() && PEEK() != '}') + NEXT(); + (void) REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + break; + } + + if (!MORE()) + return; + c = PEEK(); + if (!(c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit((uch)PEEK2())))) + return; + SETERROR(REG_BADRPT); +} + +/* + * p_str - string (no metacharacters) "parser" + */ +static void +p_str(struct parse *p) +{ + (void) REQUIRE(MORE(), REG_BADPAT); + while (MORE()) + ordinary(p, WGETNEXT()); +} + +/* + * p_bre - BRE parser top level, anchoring and concatenation + * Giving end1 as OUT essentially eliminates the end1/end2 check. + * + * This implementation is a bit of a kludge, in that a trailing $ is first + * taken as an ordinary character and then revised to be an anchor. + * The amount of lookahead needed to avoid this kludge is excessive. + */ +static void +p_bre(struct parse *p, + wint_t end1, /* first terminating character */ + wint_t end2) /* second terminating character */ +{ + sopno start = HERE(); + int first = 1; /* first subexpression? */ + int wasdollar = 0; + + if (EAT('^')) { + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + } + while (MORE() && !SEETWO(end1, end2)) { + wasdollar = p_simp_re(p, first); + first = 0; + } + if (wasdollar) { /* oops, that was a trailing anchor */ + DROP(1); + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + } + + (void) REQUIRE(HERE() != start, REG_BADPAT); /* require nonempty */ +} + +/* + * p_simp_re - parse a simple RE, an atom possibly followed by a repetition + */ +static int /* was the simple RE an unbackslashed $? */ +p_simp_re(struct parse *p, + int starordinary) /* is a leading * an ordinary character? */ +{ + int c; + int count; + int count2; + sopno pos; + int i; + wint_t wc; + sopno subno; +#define BACKSL (1<<CHAR_BIT) + + pos = HERE(); /* repetion op, if any, covers from here */ + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + if (c == '\\') { + (void) REQUIRE(MORE(), REG_EESCAPE); + c = BACKSL | GETNEXT(); + } + switch (c) { + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case BACKSL|'{': + SETERROR(REG_BADRPT); + break; + case BACKSL|'(': + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + /* the MORE here is an error heuristic */ + if (MORE() && !SEETWO('\\', ')')) + p_bre(p, '\\', ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + (void) REQUIRE(EATTWO('\\', ')'), REG_EPAREN); + break; + case BACKSL|')': /* should not get here -- must be user */ + case BACKSL|'}': + SETERROR(REG_EPAREN); + break; + case BACKSL|'1': + case BACKSL|'2': + case BACKSL|'3': + case BACKSL|'4': + case BACKSL|'5': + case BACKSL|'6': + case BACKSL|'7': + case BACKSL|'8': + case BACKSL|'9': + i = (c&~BACKSL) - '0'; + assert(i < NPAREN); + if (p->pend[i] != 0) { + assert(i <= p->g->nsub); + EMIT(OBACK_, i); + assert(p->pbegin[i] != 0); + assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); + assert(OP(p->strip[p->pend[i]]) == ORPAREN); + (void) dupl(p, p->pbegin[i]+1, p->pend[i]); + EMIT(O_BACK, i); + } else + SETERROR(REG_ESUBREG); + p->g->backrefs = 1; + break; + case '*': + (void) REQUIRE(starordinary, REG_BADRPT); + /* FALLTHROUGH */ + default: + p->next--; + wc = WGETNEXT(); + ordinary(p, wc); + break; + } + + if (EAT('*')) { /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + } else if (EATTWO('\\', '{')) { + count = p_count(p); + if (EAT(',')) { + if (MORE() && isdigit((uch)PEEK())) { + count2 = p_count(p); + (void) REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EATTWO('\\', '}')) { /* error heuristics */ + while (MORE() && !SEETWO('\\', '}')) + NEXT(); + (void) REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + } else if (c == '$') /* $ (but not \$) ends it */ + return (1); + + return (0); +} + +/* + * p_count - parse a repetition count + */ +static int /* the value */ +p_count(struct parse *p) +{ + int count = 0; + int ndigits = 0; + + while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { + count = count*10 + (GETNEXT() - '0'); + ndigits++; + } + + (void) REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); + return (count); +} + +/* + * p_bracket - parse a bracketed character list + */ +static void +p_bracket(struct parse *p) +{ + cset *cs; + wint_t ch; + + /* Dept of Truly Sickening Special-Case Kludges */ + if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } + + if ((cs = allocset(p)) == NULL) + return; + + if (p->g->cflags®_ICASE) + cs->icase = 1; + if (EAT('^')) + cs->invert = 1; + if (EAT(']')) + CHadd(p, cs, ']'); + else if (EAT('-')) + CHadd(p, cs, '-'); + while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) + p_b_term(p, cs); + if (EAT('-')) + CHadd(p, cs, '-'); + (void) MUSTEAT(']', REG_EBRACK); + + if (p->error != 0) /* don't mess things up further */ + return; + + if (cs->invert && p->g->cflags®_NEWLINE) + cs->bmp['\n' >> 3] |= 1 << ('\n' & 7); + + if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */ + ordinary(p, ch); + freeset(p, cs); + } else + EMIT(OANYOF, (int)(cs - p->g->sets)); +} + +/* + * p_b_term - parse one term of a bracketed character list + */ +static void +p_b_term(struct parse *p, cset *cs) +{ + char c; + wint_t start, finish; + wint_t i; + + /* classify what we've got */ + switch ((MORE()) ? PEEK() : '\0') { + case '[': + c = (MORE2()) ? PEEK2() : '\0'; + break; + case '-': + SETERROR(REG_ERANGE); + return; /* NOTE RETURN */ + break; + default: + c = '\0'; + break; + } + + switch (c) { + case ':': /* character class */ + NEXT2(); + (void) REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + (void) REQUIRE(c != '-' && c != ']', REG_ECTYPE); + p_b_cclass(p, cs); + (void) REQUIRE(MORE(), REG_EBRACK); + (void) REQUIRE(EATTWO(':', ']'), REG_ECTYPE); + break; + case '=': /* equivalence class */ + NEXT2(); + (void) REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + (void) REQUIRE(c != '-' && c != ']', REG_ECOLLATE); + p_b_eclass(p, cs); + (void) REQUIRE(MORE(), REG_EBRACK); + (void) REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); + break; + default: /* symbol, ordinary character, or range */ + start = p_b_symbol(p); + if (SEE('-') && MORE2() && PEEK2() != ']') { + /* range */ + NEXT(); + if (EAT('-')) + finish = '-'; + else + finish = p_b_symbol(p); + } else + finish = start; + if (start == finish) + CHadd(p, cs, start); + else { + if (__collate_load_error) { + (void) REQUIRE((uch)start <= (uch)finish, + REG_ERANGE); + CHaddrange(p, cs, start, finish); + } else { + (void) REQUIRE(__collate_range_cmp(start, + finish) <= 0, REG_ERANGE); + for (i = 0; i <= UCHAR_MAX; i++) { + if (__collate_range_cmp(start, i) <= + 0 && + __collate_range_cmp(i, finish) <= + 0) + CHadd(p, cs, i); + } + } + } + break; + } +} + +/* + * p_b_cclass - parse a character-class name and deal with it + */ +static void +p_b_cclass(struct parse *p, cset *cs) +{ + char *sp = p->next; + size_t len; + wctype_t wct; + char clname[16]; + + while (MORE() && isalpha((uch)PEEK())) + NEXT(); + len = p->next - sp; + if (len >= sizeof (clname) - 1) { + SETERROR(REG_ECTYPE); + return; + } + (void) memcpy(clname, sp, len); + clname[len] = '\0'; + if ((wct = wctype(clname)) == 0) { + SETERROR(REG_ECTYPE); + return; + } + CHaddtype(p, cs, wct); +} + +/* + * p_b_eclass - parse an equivalence-class name and deal with it + * + * This implementation is incomplete. xxx + */ +static void +p_b_eclass(struct parse *p, cset *cs) +{ + wint_t c; + + c = p_b_coll_elem(p, '='); + CHadd(p, cs, c); +} + +/* + * p_b_symbol - parse a character or [..]ed multicharacter collating symbol + */ +static wint_t /* value of symbol */ +p_b_symbol(struct parse *p) +{ + wint_t value; + + (void) REQUIRE(MORE(), REG_EBRACK); + if (!EATTWO('[', '.')) + return (WGETNEXT()); + + /* collating symbol */ + value = p_b_coll_elem(p, '.'); + (void) REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); + return (value); +} + +/* + * p_b_coll_elem - parse a collating-element name and look it up + */ +static wint_t /* value of collating element */ +p_b_coll_elem(struct parse *p, + wint_t endc) /* name ended by endc,']' */ +{ + char *sp = p->next; + struct cname *cp; + int len; + mbstate_t mbs; + wchar_t wc; + size_t clen; + + while (MORE() && !SEETWO(endc, ']')) + NEXT(); + if (!MORE()) { + SETERROR(REG_EBRACK); + return (0); + } + len = p->next - sp; + for (cp = cnames; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + return (cp->code); /* known name */ + (void) memset(&mbs, 0, sizeof (mbs)); + if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len) + return (wc); /* single character */ + else if (clen == (size_t)-1 || clen == (size_t)-2) + SETERROR(REG_ECHAR); + else + SETERROR(REG_ECOLLATE); /* neither */ + return (0); +} + +/* + * othercase - return the case counterpart of an alphabetic + */ +static wint_t /* if no counterpart, return ch */ +othercase(wint_t ch) +{ + assert(iswalpha(ch)); + if (iswupper(ch)) + return (towlower(ch)); + else if (iswlower(ch)) + return (towupper(ch)); + else /* peculiar, but could happen */ + return (ch); +} + +/* + * bothcases - emit a dualcase version of a two-case character + * + * Boy, is this implementation ever a kludge... + */ +static void +bothcases(struct parse *p, wint_t ch) +{ + char *oldnext = p->next; + char *oldend = p->end; + char bracket[3 + MB_LEN_MAX]; + size_t n; + mbstate_t mbs; + + assert(othercase(ch) != ch); /* p_bracket() would recurse */ + p->next = bracket; + (void) memset(&mbs, 0, sizeof (mbs)); + n = wcrtomb(bracket, ch, &mbs); + assert(n != (size_t)-1); + bracket[n] = ']'; + bracket[n + 1] = '\0'; + p->end = bracket+n+1; + p_bracket(p); + assert(p->next == p->end); + p->next = oldnext; + p->end = oldend; +} + +/* + * ordinary - emit an ordinary character + */ +static void +ordinary(struct parse *p, wint_t ch) +{ + cset *cs; + + if ((p->g->cflags®_ICASE) && iswalpha(ch) && othercase(ch) != ch) + bothcases(p, ch); + else if ((ch & OPDMASK) == ch) + EMIT(OCHAR, ch); + else { + /* + * Kludge: character is too big to fit into an OCHAR operand. + * Emit a singleton set. + */ + if ((cs = allocset(p)) == NULL) + return; + CHadd(p, cs, ch); + EMIT(OANYOF, (int)(cs - p->g->sets)); + } +} + +/* + * nonnewline - emit REG_NEWLINE version of OANY + * + * Boy, is this implementation ever a kludge... + */ +static void +nonnewline(struct parse *p) +{ + char *oldnext = p->next; + char *oldend = p->end; + char bracket[4]; + + p->next = bracket; + p->end = bracket+3; + bracket[0] = '^'; + bracket[1] = '\n'; + bracket[2] = ']'; + bracket[3] = '\0'; + p_bracket(p); + assert(p->next == bracket+3); + p->next = oldnext; + p->end = oldend; +} + +/* + * repeat - generate code for a bounded repetition, recursively if needed + */ +static void +repeat(struct parse *p, + sopno start, /* operand from here to end of strip */ + int from, /* repeated from this number */ + int to) /* to this number of times (maybe INFINITY) */ +{ + sopno finish = HERE(); +#define N 2 +#define INF 3 +#define REP(f, t) ((f)*8 + (t)) +#define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) + sopno copy; + + if (p->error != 0) /* head off possible runaway recursion */ + return; + + assert(from <= to); + + switch (REP(MAP(from), MAP(to))) { + case REP(0, 0): /* must be user doing this */ + DROP(finish-start); /* drop the operand */ + break; + case REP(0, 1): /* as x{1,1}? */ + case REP(0, N): /* as x{1,n}? */ + case REP(0, INF): /* as x{1,}? */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); /* offset is wrong... */ + repeat(p, start+1, 1, to); + ASTERN(OOR1, start); + AHEAD(start); /* ... fix it */ + EMIT(OOR2, 0); + AHEAD(THERE()); + ASTERN(O_CH, THERETHERE()); + break; + case REP(1, 1): /* trivial case */ + /* done */ + break; + case REP(1, N): /* as x?x{1,n-1} */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); + ASTERN(OOR1, start); + AHEAD(start); + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + copy = dupl(p, start+1, finish+1); + assert(copy == finish+4); + repeat(p, copy, 1, to-1); + break; + case REP(1, INF): /* as x+ */ + INSERT(OPLUS_, start); + ASTERN(O_PLUS, start); + break; + case REP(N, N): /* as xx{m-1,n-1} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to-1); + break; + case REP(N, INF): /* as xx{n-1,INF} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to); + break; + default: /* "can't happen" */ + SETERROR(REG_EFATAL); /* just in case */ + break; + } +} + +/* + * wgetnext - helper function for WGETNEXT() macro. Gets the next wide + * character from the parse struct, signals a REG_ILLSEQ error if the + * character can't be converted. Returns the number of bytes consumed. + */ +static wint_t +wgetnext(struct parse *p) +{ + mbstate_t mbs; + wchar_t wc; + size_t n; + + (void) memset(&mbs, 0, sizeof (mbs)); + n = mbrtowc(&wc, p->next, p->end - p->next, &mbs); + if (n == (size_t)-1 || n == (size_t)-2) { + SETERROR(REG_ECHAR); + return (0); + } + if (n == 0) + n = 1; + p->next += n; + return (wc); +} + +/* + * seterr - set an error condition + */ +static int /* useless but makes type checking happy */ +seterr(struct parse *p, int e) +{ + if (p->error == 0) /* keep earliest error condition */ + p->error = e; + p->next = nuls; /* try to bring things to a halt */ + p->end = nuls; + return (0); /* make the return value well-defined */ +} + +/* + * allocset - allocate a set of characters for [] + */ +static cset * +allocset(struct parse *p) +{ + cset *cs, *ncs; + + ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof (*ncs)); + if (ncs == NULL) { + SETERROR(REG_ESPACE); + return (NULL); + } + p->g->sets = ncs; + cs = &p->g->sets[p->g->ncsets++]; + (void) memset(cs, 0, sizeof (*cs)); + + return (cs); +} + +/* + * freeset - free a now-unused set + */ +static void +freeset(struct parse *p, cset *cs) +{ + cset *top = &p->g->sets[p->g->ncsets]; + + free(cs->wides); + free(cs->ranges); + free(cs->types); + (void) memset(cs, 0, sizeof (*cs)); + if (cs == top-1) /* recover only the easy case */ + p->g->ncsets--; +} + +/* + * singleton - Determine whether a set contains only one character, + * returning it if so, otherwise returning OUT. + */ +static wint_t +singleton(cset *cs) +{ + wint_t i, s, n; + + for (i = n = 0; i < NC; i++) + if (CHIN(cs, i)) { + n++; + s = i; + } + if (n == 1) + return (s); + if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 && + cs->icase == 0) + return (cs->wides[0]); + /* Don't bother handling the other cases. */ + return (OUT); +} + +/* + * CHadd - add character to character set. + */ +static void +CHadd(struct parse *p, cset *cs, wint_t ch) +{ + wint_t nch, *newwides; + assert(ch >= 0); + if (ch < NC) + cs->bmp[ch >> 3] |= 1 << (ch & 7); + else { + newwides = realloc(cs->wides, (cs->nwides + 1) * + sizeof (*cs->wides)); + if (newwides == NULL) { + SETERROR(REG_ESPACE); + return; + } + cs->wides = newwides; + cs->wides[cs->nwides++] = ch; + } + if (cs->icase) { + if ((nch = towlower(ch)) < NC) + cs->bmp[nch >> 3] |= 1 << (nch & 7); + if ((nch = towupper(ch)) < NC) + cs->bmp[nch >> 3] |= 1 << (nch & 7); + } +} + +/* + * CHaddrange - add all characters in the range [min,max] to a character set. + */ +static void +CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max) +{ + crange *newranges; + + for (; min < NC && min <= max; min++) + CHadd(p, cs, min); + if (min >= max) + return; + newranges = realloc(cs->ranges, (cs->nranges + 1) * + sizeof (*cs->ranges)); + if (newranges == NULL) { + SETERROR(REG_ESPACE); + return; + } + cs->ranges = newranges; + cs->ranges[cs->nranges].min = min; + cs->ranges[cs->nranges].min = max; + cs->nranges++; +} + +/* + * CHaddtype - add all characters of a certain type to a character set. + */ +static void +CHaddtype(struct parse *p, cset *cs, wctype_t wct) +{ + wint_t i; + wctype_t *newtypes; + + for (i = 0; i < NC; i++) + if (iswctype(i, wct)) + CHadd(p, cs, i); + newtypes = realloc(cs->types, (cs->ntypes + 1) * + sizeof (*cs->types)); + if (newtypes == NULL) { + SETERROR(REG_ESPACE); + return; + } + cs->types = newtypes; + cs->types[cs->ntypes++] = wct; +} + +/* + * dupl - emit a duplicate of a bunch of sops + */ +static sopno /* start of duplicate */ +dupl(struct parse *p, + sopno start, /* from here */ + sopno finish) /* to this less one */ +{ + sopno ret = HERE(); + sopno len = finish - start; + + assert(finish >= start); + if (len == 0) + return (ret); + enlarge(p, p->ssize + len); /* this many unexpected additions */ + assert(p->ssize >= p->slen + len); + (void) memcpy((char *)(p->strip + p->slen), + (char *)(p->strip + start), (size_t)len*sizeof (sop)); + p->slen += len; + return (ret); +} + +/* + * doemit - emit a strip operator + * + * It might seem better to implement this as a macro with a function as + * hard-case backup, but it's just too big and messy unless there are + * some changes to the data structures. Maybe later. + */ +static void +doemit(struct parse *p, sop op, size_t opnd) +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* deal with oversize operands ("can't happen", more or less) */ + assert(opnd < 1<<OPSHIFT); + + /* deal with undersized strip */ + if (p->slen >= p->ssize) + enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ + assert(p->slen < p->ssize); + + /* finally, it's all reduced to the easy case */ + p->strip[p->slen++] = SOP(op, opnd); +} + +/* + * doinsert - insert a sop into the strip + */ +static void +doinsert(struct parse *p, sop op, size_t opnd, sopno pos) +{ + sopno sn; + sop s; + int i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + sn = HERE(); + EMIT(op, opnd); /* do checks, ensure space */ + assert(HERE() == sn+1); + s = p->strip[sn]; + + /* adjust paren pointers */ + assert(pos > 0); + for (i = 1; i < NPAREN; i++) { + if (p->pbegin[i] >= pos) { + p->pbegin[i]++; + } + if (p->pend[i] >= pos) { + p->pend[i]++; + } + } + + memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], + (HERE()-pos-1)*sizeof (sop)); + p->strip[pos] = s; +} + +/* + * dofwd - complete a forward reference + */ +static void +dofwd(struct parse *p, sopno pos, sop value) +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + assert(value < 1<<OPSHIFT); + p->strip[pos] = OP(p->strip[pos]) | value; +} + +/* + * enlarge - enlarge the strip + */ +static void +enlarge(struct parse *p, sopno size) +{ + sop *sp; + + if (p->ssize >= size) + return; + + sp = (sop *)realloc(p->strip, size*sizeof (sop)); + if (sp == NULL) { + SETERROR(REG_ESPACE); + return; + } + p->strip = sp; + p->ssize = size; +} + +/* + * stripsnug - compact the strip + */ +static void +stripsnug(struct parse *p, struct re_guts *g) +{ + g->nstates = p->slen; + g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof (sop)); + if (g->strip == NULL) { + SETERROR(REG_ESPACE); + g->strip = p->strip; + } +} + +/* + * findmust - fill in must and mlen with longest mandatory literal string + * + * This algorithm could do fancy things like analyzing the operands of | + * for common subsequences. Someday. This code is simple and finds most + * of the interesting cases. + * + * Note that must and mlen got initialized during setup. + */ +static void +findmust(struct parse *p, struct re_guts *g) +{ + sop *scan; + sop *start; + sop *newstart; + sopno newlen; + sop s; + char *cp; + int offset; + char buf[MB_LEN_MAX]; + size_t clen; + mbstate_t mbs; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* + * It's not generally safe to do a ``char'' substring search on + * multibyte character strings, but it's safe for at least + * UTF-8 (see RFC 3629). + */ + if (MB_CUR_MAX > 1 && + strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0) + return; + + /* find the longest OCHAR sequence in strip */ + newlen = 0; + offset = 0; + g->moffset = 0; + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OCHAR: /* sequence member */ + if (newlen == 0) { /* new sequence */ + (void) memset(&mbs, 0, sizeof (mbs)); + newstart = scan - 1; + } + clen = wcrtomb(buf, OPND(s), &mbs); + if (clen == (size_t)-1) + goto toohard; + newlen += clen; + break; + case OPLUS_: /* things that don't break one */ + case OLPAREN: + case ORPAREN: + break; + case OQUEST_: /* things that must be skipped */ + case OCH_: + offset = altoffset(scan, offset); + scan--; + do { + scan += OPND(s); + s = *scan; + /* assert() interferes w debug printouts */ + if (OP(s) != O_QUEST && OP(s) != O_CH && + OP(s) != OOR2) { + g->iflags |= BAD; + return; + } + } while (OP(s) != O_QUEST && OP(s) != O_CH); + /* FALLTHROUGH */ + case OBOW: /* things that break a sequence */ + case OEOW: + case OBOL: + case OEOL: + case O_QUEST: + case O_CH: + case OEND: + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + if (offset > -1) { + g->moffset += offset; + offset = newlen; + } else + g->moffset = offset; + } else { + if (offset > -1) + offset += newlen; + } + newlen = 0; + break; + case OANY: + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + if (offset > -1) { + g->moffset += offset; + offset = newlen; + } else + g->moffset = offset; + } else { + if (offset > -1) + offset += newlen; + } + if (offset > -1) + offset++; + newlen = 0; + break; + case OANYOF: /* may or may not invalidate offset */ + /* First, everything as OANY */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + if (offset > -1) { + g->moffset += offset; + offset = newlen; + } else + g->moffset = offset; + } else { + if (offset > -1) + offset += newlen; + } + if (offset > -1) + offset++; + newlen = 0; + break; + toohard: + default: + /* + * Anything here makes it impossible or too hard + * to calculate the offset -- so we give up; + * save the last known good offset, in case the + * must sequence doesn't occur later. + */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + if (offset > -1) + g->moffset += offset; + else + g->moffset = offset; + } + offset = -1; + newlen = 0; + break; + } + } while (OP(s) != OEND); + + if (g->mlen == 0) { /* there isn't one */ + g->moffset = -1; + return; + } + + /* turn it into a character string */ + g->must = malloc((size_t)g->mlen + 1); + if (g->must == NULL) { /* argh; just forget it */ + g->mlen = 0; + g->moffset = -1; + return; + } + cp = g->must; + scan = start; + (void) memset(&mbs, 0, sizeof (mbs)); + while (cp < g->must + g->mlen) { + while (OP(s = *scan++) != OCHAR) + continue; + clen = wcrtomb(cp, OPND(s), &mbs); + assert(clen != (size_t)-1); + cp += clen; + } + assert(cp == g->must + g->mlen); + *cp++ = '\0'; /* just on general principles */ +} + +/* + * altoffset - choose biggest offset among multiple choices + * + * Compute, recursively if necessary, the largest offset among multiple + * re paths. + */ +static int +altoffset(sop *scan, int offset) +{ + int largest; + int try; + sop s; + + /* If we gave up already on offsets, return */ + if (offset == -1) + return (-1); + + largest = 0; + try = 0; + s = *scan++; + while (OP(s) != O_QUEST && OP(s) != O_CH) { + switch (OP(s)) { + case OOR1: + if (try > largest) + largest = try; + try = 0; + break; + case OQUEST_: + case OCH_: + try = altoffset(scan, try); + if (try == -1) + return (-1); + scan--; + do { + scan += OPND(s); + s = *scan; + if (OP(s) != O_QUEST && OP(s) != O_CH && + OP(s) != OOR2) + return (-1); + } while (OP(s) != O_QUEST && OP(s) != O_CH); + /* + * We must skip to the next position, or we'll + * leave altoffset() too early. + */ + scan++; + break; + case OANYOF: + case OCHAR: + case OANY: + try++; + /*FALLTHRU*/ + case OBOW: + case OEOW: + case OLPAREN: + case ORPAREN: + case OOR2: + break; + default: + try = -1; + break; + } + if (try == -1) + return (-1); + s = *scan++; + } + + if (try > largest) + largest = try; + + return (largest+offset); +} + +/* + * computejumps - compute char jumps for BM scan + * + * This algorithm assumes g->must exists and is has size greater than + * zero. It's based on the algorithm found on Computer Algorithms by + * Sara Baase. + * + * A char jump is the number of characters one needs to jump based on + * the value of the character from the text that was mismatched. + */ +static void +computejumps(struct parse *p, struct re_guts *g) +{ + int ch; + int mindex; + + /* Avoid making errors worse */ + if (p->error != 0) + return; + + g->charjump = (int *)malloc((NC + 1) * sizeof (int)); + if (g->charjump == NULL) /* Not a fatal error */ + return; + /* Adjust for signed chars, if necessary */ + g->charjump = &g->charjump[-(CHAR_MIN)]; + + /* + * If the character does not exist in the pattern, the jump + * is equal to the number of characters in the pattern. + */ + for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++) + g->charjump[ch] = g->mlen; + + /* + * If the character does exist, compute the jump that would + * take us to the last character in the pattern equal to it + * (notice that we match right to left, so that last character + * is the first one that would be matched). + */ + for (mindex = 0; mindex < g->mlen; mindex++) + g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1; +} + +/* + * computematchjumps - compute match jumps for BM scan + * + * This algorithm assumes g->must exists and is has size greater than + * zero. It's based on the algorithm found on Computer Algorithms by + * Sara Baase. + * + * A match jump is the number of characters one needs to advance based + * on the already-matched suffix. + * Notice that all values here are minus (g->mlen-1), because of the way + * the search algorithm works. + */ +static void +computematchjumps(struct parse *p, struct re_guts *g) +{ + int mindex; /* General "must" iterator */ + int suffix; /* Keeps track of matching suffix */ + int ssuffix; /* Keeps track of suffixes' suffix */ + int *pmatches; + /* + * pmatches[k] points to the next i + * such that i+1...mlen is a substring + * of k+1...k+mlen-i-1 + */ + + /* Avoid making errors worse */ + if (p->error != 0) + return; + + pmatches = (int *)malloc(g->mlen * sizeof (unsigned int)); + if (pmatches == NULL) { + g->matchjump = NULL; + return; + } + + g->matchjump = (int *)malloc(g->mlen * sizeof (unsigned int)); + if (g->matchjump == NULL) /* Not a fatal error */ + return; + + /* Set maximum possible jump for each character in the pattern */ + for (mindex = 0; mindex < g->mlen; mindex++) + g->matchjump[mindex] = 2*g->mlen - mindex - 1; + + /* Compute pmatches[] */ + for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0; + mindex--, suffix--) { + pmatches[mindex] = suffix; + + /* + * If a mismatch is found, interrupting the substring, + * compute the matchjump for that position. If no + * mismatch is found, then a text substring mismatched + * against the suffix will also mismatch against the + * substring. + */ + while (suffix < g->mlen && g->must[mindex] != g->must[suffix]) { + g->matchjump[suffix] = MIN(g->matchjump[suffix], + g->mlen - mindex - 1); + suffix = pmatches[suffix]; + } + } + + /* + * Compute the matchjump up to the last substring found to jump + * to the beginning of the largest must pattern prefix matching + * it's own suffix. + */ + for (mindex = 0; mindex <= suffix; mindex++) + g->matchjump[mindex] = MIN(g->matchjump[mindex], + g->mlen + suffix - mindex); + + ssuffix = pmatches[suffix]; + while (suffix < g->mlen) { + while (suffix <= ssuffix && suffix < g->mlen) { + g->matchjump[suffix] = MIN(g->matchjump[suffix], + g->mlen + ssuffix - suffix); + suffix++; + } + if (suffix < g->mlen) + ssuffix = pmatches[ssuffix]; + } + + free(pmatches); +} + +/* + * pluscount - count + nesting + */ +static sopno /* nesting depth */ +pluscount(struct parse *p, struct re_guts *g) +{ + sop *scan; + sop s; + sopno plusnest = 0; + sopno maxnest = 0; + + if (p->error != 0) + return (0); /* there may not be an OEND */ + + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OPLUS_: + plusnest++; + break; + case O_PLUS: + if (plusnest > maxnest) + maxnest = plusnest; + plusnest--; + break; + } + } while (OP(s) != OEND); + if (plusnest != 0) + g->iflags |= BAD; + return (maxnest); +} diff --git a/usr/src/lib/libc/port/locale/regerror.c b/usr/src/lib/libc/port/locale/regerror.c new file mode 100644 index 0000000000..2a81ed67f1 --- /dev/null +++ b/usr/src/lib/libc/port/locale/regerror.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "file64.h" +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <stdlib.h> +#include <regex.h> + +#include "utils.h" + +static struct rerr { + int code; + char *name; + char *explain; +} rerrs[] = { + {REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"}, + {REG_BADPAT, "REG_BADPAT", "invalid regular expression"}, + {REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"}, + {REG_ECTYPE, "REG_ECTYPE", "invalid character class"}, + {REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)"}, + {REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"}, + {REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced"}, + {REG_EPAREN, "REG_EPAREN", "parentheses not balanced"}, + {REG_EBRACE, "REG_EBRACE", "braces not balanced"}, + {REG_BADBR, "REG_BADBR", "invalid repetition count(s)"}, + {REG_ERANGE, "REG_ERANGE", "invalid character range"}, + {REG_ESPACE, "REG_ESPACE", "out of memory"}, + {REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid"}, +#ifdef REG_EMPTY + {REG_EMPTY, "REG_EMPTY", "empty (sub)expression"}, +#endif + {REG_EFATAL, "REG_EFATAL", "\"can't happen\" -- you found a bug"}, +#ifdef REG_INVARG + {REG_INVARG, "REG_INVARG", "invalid argument to regex routine"}, +#endif + {REG_ECHAR, "REG_ECHAR", "illegal byte sequence"}, + {REG_ENOSYS, "REG_ENOSYS", "function not supported"}, + {REG_STACK, "REG_STACK", "backtrack stack overflow"}, + {REG_ENSUB, "REG_ENSUB", "more than 9 \\( \\) pairs"}, + {REG_ENEWLINE, "REG_ENEWLINE", "\n found before end of pattern"}, + {0, "", "*** unknown regexp error code ***"} +}; + + +/* + * regerror - the interface to error numbers + */ +/* ARGSUSED */ +size_t +regerror(int errcode, const regex_t *_RESTRICT_KYWD preg, + char *_RESTRICT_KYWD errbuf, size_t errbuf_size) +{ + struct rerr *r; + size_t len; + char *s; + + for (r = rerrs; r->code != 0; r++) + if (r->code == errcode) + break; + + s = r->explain; + + len = strlen(s) + 1; + if (errbuf_size > 0) { + if (errbuf_size > len) + (void) strcpy(errbuf, s); + else { + (void) strncpy(errbuf, s, errbuf_size-1); + errbuf[errbuf_size-1] = '\0'; + } + } + + return (len); +} diff --git a/usr/src/lib/libc/port/locale/regex2.h b/usr/src/lib/libc/port/locale/regex2.h new file mode 100644 index 0000000000..1393386c0d --- /dev/null +++ b/usr/src/lib/libc/port/locale/regex2.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regex2.h 8.4 (Berkeley) 3/20/94 + * $FreeBSD: src/lib/libc/regex/regex2.h,v 1.11 2007/01/09 00:28:04 imp Exp $ + */ + +/* + * First, the stuff that ends up in the outside-world include file + * typedef off_t regoff_t; + * typedef struct { + * int re_magic; + * size_t re_nsub; // number of parenthesized subexpressions + * const char *re_endp; // end pointer for REG_PEND + * struct re_guts *re_g; // none of your business :-) + * } regex_t; + * typedef struct { + * regoff_t rm_so; // start of match + * regoff_t rm_eo; // end of match + * } regmatch_t; + */ +/* + * internals of regex_t + */ +#define MAGIC1 ((('r'^0200)<<8) | 'e') + +/* + * The internal representation is a *strip*, a sequence of + * operators ending with an endmarker. (Some terminology etc. is a + * historical relic of earlier versions which used multiple strips.) + * Certain oddities in the representation are there to permit running + * the machinery backwards; in particular, any deviation from sequential + * flow must be marked at both its source and its destination. Some + * fine points: + * + * - OPLUS_ and O_PLUS are *inside* the loop they create. + * - OQUEST_ and O_QUEST are *outside* the bypass they create. + * - OCH_ and O_CH are *outside* the multi-way branch they create, while + * OOR1 and OOR2 are respectively the end and the beginning of one of + * the branches. Note that there is an implicit OOR2 following OCH_ + * and an implicit OOR1 preceding O_CH. + * + * In state representations, an operator's bit is on to signify a state + * immediately *preceding* "execution" of that operator. + */ +typedef unsigned long sop; /* strip operator */ +typedef long sopno; +#define OPRMASK 0xf8000000U +#define OPDMASK 0x07ffffffU +#define OPSHIFT ((unsigned)27) +#define OP(n) ((n)&OPRMASK) +#define OPND(n) ((n)&OPDMASK) +#define SOP(op, opnd) ((op)|(opnd)) +/* operators meaning operand */ +/* (back, fwd are offsets) */ +#define OEND (1U<<OPSHIFT) /* endmarker - */ +#define OCHAR (2U<<OPSHIFT) /* character wide character */ +#define OBOL (3U<<OPSHIFT) /* left anchor - */ +#define OEOL (4U<<OPSHIFT) /* right anchor - */ +#define OANY (5U<<OPSHIFT) /* . - */ +#define OANYOF (6U<<OPSHIFT) /* [...] set number */ +#define OBACK_ (7U<<OPSHIFT) /* begin \d paren number */ +#define O_BACK (8U<<OPSHIFT) /* end \d paren number */ +#define OPLUS_ (9U<<OPSHIFT) /* + prefix fwd to suffix */ +#define O_PLUS (10U<<OPSHIFT) /* + suffix back to prefix */ +#define OQUEST_ (11U<<OPSHIFT) /* ? prefix fwd to suffix */ +#define O_QUEST (12U<<OPSHIFT) /* ? suffix back to prefix */ +#define OLPAREN (13U<<OPSHIFT) /* ( fwd to ) */ +#define ORPAREN (14U<<OPSHIFT) /* ) back to ( */ +#define OCH_ (15U<<OPSHIFT) /* begin choice fwd to OOR2 */ +#define OOR1 (16U<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ +#define OOR2 (17U<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ +#define O_CH (18U<<OPSHIFT) /* end choice back to OOR1 */ +#define OBOW (19U<<OPSHIFT) /* begin word - */ +#define OEOW (20U<<OPSHIFT) /* end word - */ + +/* + * Structures for [] character-set representation. + */ +typedef struct { + wint_t min; + wint_t max; +} crange; +typedef struct { + unsigned char bmp[NC / 8]; + wctype_t *types; + int ntypes; + wint_t *wides; + int nwides; + crange *ranges; + int nranges; + int invert; + int icase; +} cset; + +static int +CHIN1(cset *cs, wint_t ch) +{ + int i; + + assert(ch >= 0); + if (ch < NC) + return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^ + cs->invert); + for (i = 0; i < cs->nwides; i++) + if (ch == cs->wides[i]) + return (!cs->invert); + for (i = 0; i < cs->nranges; i++) + if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max) + return (!cs->invert); + for (i = 0; i < cs->ntypes; i++) + if (iswctype(ch, cs->types[i])) + return (!cs->invert); + return (cs->invert); +} + +static int +CHIN(cset *cs, wint_t ch) +{ + + assert(ch >= 0); + if (ch < NC) + return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^ + cs->invert); + else if (cs->icase) + return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) || + CHIN1(cs, towupper(ch))); + else + return (CHIN1(cs, ch)); +} + +/* + * main compiled-expression structure + */ +struct re_guts { + int magic; +#define MAGIC2 ((('R'^0200)<<8)|'E') + sop *strip; /* malloced area for strip */ + int ncsets; /* number of csets in use */ + cset *sets; /* -> cset [ncsets] */ + int cflags; /* copy of regcomp() cflags argument */ + sopno nstates; /* = number of sops */ + sopno firststate; /* the initial OEND (normally 0) */ + sopno laststate; /* the final OEND */ + int iflags; /* internal flags */ +#define USEBOL 01 /* used ^ */ +#define USEEOL 02 /* used $ */ +#define BAD 04 /* something wrong */ + int nbol; /* number of ^ used */ + int neol; /* number of $ used */ + char *must; /* match must contain this string */ + int moffset; /* latest point at which must may be located */ + int *charjump; /* Boyer-Moore char jump table */ + int *matchjump; /* Boyer-Moore match jump table */ + int mlen; /* length of must */ + size_t nsub; /* copy of re_nsub */ + int backrefs; /* does it use back references? */ + sopno nplus; /* how deep does it nest +s? */ +}; + +/* misc utilities */ +#define OUT (CHAR_MIN - 1) /* a non-character value */ +#define ISWORD(c) (iswalnum((uch)(c)) || (c) == '_') diff --git a/usr/src/lib/libc/port/locale/regexec.c b/usr/src/lib/libc/port/locale/regexec.c new file mode 100644 index 0000000000..f824fa2af4 --- /dev/null +++ b/usr/src/lib/libc/port/locale/regexec.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * the outer shell of regexec() + * + * This file includes engine.c three times, after muchos fiddling with the + * macros that code uses. This lets the same code operate on two different + * representations for state sets and characters. + */ +#include "lint.h" +#include "file64.h" +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <ctype.h> +#include <regex.h> +#include <wchar.h> +#include <wctype.h> +#include <note.h> +#include <assert.h> + +#include "utils.h" +#include "regex2.h" + +/* we want _NOTE, but not NOTE (which collides with our own use) */ +#undef NOTE + +static size_t +xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) +{ + size_t nr; + wchar_t wc; + + nr = mbrtowc(&wc, s, n, mbs); + if (wi != NULL) + *wi = wc; + if (nr == 0) + return (1); + else if (nr == (size_t)-1 || nr == (size_t)-2) { + (void) memset(mbs, 0, sizeof (*mbs)); + if (wi != NULL) + *wi = dummy; + return (1); + } else + return (nr); +} + +static size_t +xmbrtowc_dummy(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, + wint_t dummy) +{ + _NOTE(ARGUNUSED(n)); + _NOTE(ARGUNUSED(mbs)); + _NOTE(ARGUNUSED(dummy)); + + if (wi != NULL) + *wi = (unsigned char)*s; + return (1); +} + +/* macros for manipulating states, small version */ +#define states long +#define states1 states /* for later use in regexec() decision */ +#define CLEAR(v) ((v) = 0) +#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) +#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) +#define ASSIGN(d, s) ((d) = (s)) +#define EQ(a, b) ((a) == (b)) +#define STATEVARS long dummy /* dummy version */ +#define STATESETUP(m, n) /* nothing */ +#define STATETEARDOWN(m) /* nothing */ +#define SETUP(v) ((v) = 0) +#define onestate long +#define INIT(o, n) ((o) = (unsigned long)1 << (n)) +#define INC(o) ((o) <<= 1) +#define ISSTATEIN(v, o) (((v) & (o)) != 0) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) +#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) +/* no multibyte support */ +#define XMBRTOWC xmbrtowc_dummy +#define ZAPSTATE(mbs) ((void)(mbs)) +/* function names */ +#define SNAMES /* engine.c looks after details */ + +#include "engine.c" + +/* now undo things */ +#undef states +#undef CLEAR +#undef SET0 +#undef SET1 +#undef ISSET +#undef ASSIGN +#undef EQ +#undef STATEVARS +#undef STATESETUP +#undef STATETEARDOWN +#undef SETUP +#undef onestate +#undef INIT +#undef INC +#undef ISSTATEIN +#undef FWD +#undef BACK +#undef ISSETBACK +#undef SNAMES +#undef XMBRTOWC +#undef ZAPSTATE + +/* macros for manipulating states, large version */ +#define states char * +#define CLEAR(v) (void) memset(v, 0, m->g->nstates) +#define SET0(v, n) ((v)[n] = 0) +#define SET1(v, n) ((v)[n] = 1) +#define ISSET(v, n) ((v)[n]) +#define ASSIGN(d, s) (void) memcpy(d, s, m->g->nstates) +#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) +#define STATEVARS long vn; char *space +#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ + if ((m)->space == NULL) \ + return (REG_ESPACE); \ + (m)->vn = 0; } +#define STATETEARDOWN(m) { free((m)->space); } +#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) +#define onestate long +#define INIT(o, n) ((o) = (n)) +#define INC(o) ((o)++) +#define ISSTATEIN(v, o) ((v)[o]) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) +#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) +#define ISSETBACK(v, n) ((v)[here - (n)]) +/* no multibyte support */ +#define XMBRTOWC xmbrtowc_dummy +#define ZAPSTATE(mbs) ((void)(mbs)) +/* function names */ +#define LNAMES /* flag */ + +#include "engine.c" + +/* multibyte character & large states version */ +#undef LNAMES +#undef XMBRTOWC +#undef ZAPSTATE +#define XMBRTOWC xmbrtowc +#define ZAPSTATE(mbs) (void) memset((mbs), 0, sizeof (*(mbs))) +#define MNAMES + +#include "engine.c" + +/* + * regexec - interface for matching + * + * We put this here so we can exploit knowledge of the state representation + * when choosing which matcher to call. Also, by this point the matchers + * have been prototyped. + */ +int /* 0 success, REG_NOMATCH failure */ +regexec(const regex_t *_RESTRICT_KYWD preg, + const char *_RESTRICT_KYWD string, size_t nmatch, + regmatch_t pmatch[_RESTRICT_KYWD], int eflags) +{ + struct re_guts *g = preg->re_g; +#ifdef REDEBUG +#define GOODFLAGS(f) (f) +#else +#ifdef REG_STARTEND +#define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) +#else +#define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL)) +#endif +#endif + + if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) + return (REG_BADPAT); + assert(!(g->iflags&BAD)); + if (g->iflags&BAD) /* backstop for no-debug case */ + return (REG_BADPAT); + eflags = GOODFLAGS(eflags); + + if (MB_CUR_MAX > 1) + return (mmatcher(g, (char *)string, nmatch, pmatch, eflags)); +#ifdef REG_LARGE + else if (g->nstates <= CHAR_BIT*sizeof (states1) && !(eflags®_LARGE)) +#else + else if (g->nstates <= CHAR_BIT*sizeof (states1)) +#endif + return (smatcher(g, (char *)string, nmatch, pmatch, eflags)); + else + return (lmatcher(g, (char *)string, nmatch, pmatch, eflags)); +} diff --git a/usr/src/lib/libc/port/locale/regfree.c b/usr/src/lib/libc/port/locale/regfree.c new file mode 100644 index 0000000000..e35784214a --- /dev/null +++ b/usr/src/lib/libc/port/locale/regfree.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "file64.h" +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <regex.h> +#include <wchar.h> +#include <wctype.h> + +#include "utils.h" +#include "regex2.h" + +/* + * regfree - free everything + */ +void +regfree(regex_t *preg) +{ + struct re_guts *g; + int i; + +#ifdef __lint + /* shut up lint! */ + CHIN(NULL, 0); +#endif + + if (preg->re_magic != MAGIC1) /* oops */ + return; /* nice to complain, but hard */ + + g = preg->re_g; + if (g == NULL || g->magic != MAGIC2) /* oops again */ + return; + preg->re_magic = 0; /* mark it invalid */ + g->magic = 0; /* mark it invalid */ + + if (g->strip != NULL) + free((char *)g->strip); + if (g->sets != NULL) { + for (i = 0; i < g->ncsets; i++) { + free(g->sets[i].ranges); + free(g->sets[i].wides); + free(g->sets[i].types); + } + free((char *)g->sets); + } + if (g->must != NULL) + free(g->must); + if (g->charjump != NULL) + free(&g->charjump[CHAR_MIN]); + if (g->matchjump != NULL) + free(g->matchjump); + free((char *)g); +} diff --git a/usr/src/lib/libc/port/locale/rune.c b/usr/src/lib/libc/port/locale/rune.c new file mode 100644 index 0000000000..787a4a663b --- /dev/null +++ b/usr/src/lib/libc/port/locale/rune.c @@ -0,0 +1,284 @@ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "file64.h" +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <netinet/in.h> + +#include "runetype.h" +#include "runefile.h" + +_RuneLocale *_Read_RuneMagi(FILE *); + +_RuneLocale * +_Read_RuneMagi(FILE *fp) +{ + char *fdata, *data; + void *lastp; + _FileRuneLocale *frl; + _RuneLocale *rl; + _FileRuneEntry *frr; + _RuneEntry *rr; + struct stat sb; + int x, saverr; + void *variable; + _FileRuneEntry *runetype_ext_ranges; + _FileRuneEntry *maplower_ext_ranges; + _FileRuneEntry *mapupper_ext_ranges; + int runetype_ext_len = 0; + + if (fstat(fileno(fp), &sb) < 0) + return (NULL); + + if ((size_t)sb.st_size < sizeof (_FileRuneLocale)) { + errno = EINVAL; + return (NULL); + } + + if ((fdata = malloc(sb.st_size)) == NULL) + return (NULL); + + errno = 0; + rewind(fp); /* Someone might have read the magic number once already */ + if (errno) { + saverr = errno; + free(fdata); + errno = saverr; + return (NULL); + } + + if (fread(fdata, sb.st_size, 1, fp) != 1) { + saverr = errno; + free(fdata); + errno = saverr; + return (NULL); + } + + frl = (_FileRuneLocale *)(void *)fdata; + lastp = fdata + sb.st_size; + + variable = frl + 1; + + if (memcmp(frl->magic, _FILE_RUNE_MAGIC_1, sizeof (frl->magic))) { + free(fdata); + errno = EINVAL; + return (NULL); + } + + frl->variable_len = ntohl(frl->variable_len); + frl->runetype_ext_nranges = ntohl(frl->runetype_ext_nranges); + frl->maplower_ext_nranges = ntohl(frl->maplower_ext_nranges); + frl->mapupper_ext_nranges = ntohl(frl->mapupper_ext_nranges); + + for (x = 0; x < _CACHED_RUNES; ++x) { + frl->runetype[x] = ntohl(frl->runetype[x]); + frl->maplower[x] = ntohl(frl->maplower[x]); + frl->mapupper[x] = ntohl(frl->mapupper[x]); + } + + runetype_ext_ranges = (_FileRuneEntry *)variable; + variable = runetype_ext_ranges + frl->runetype_ext_nranges; + if (variable > lastp) { + free(fdata); + errno = EINVAL; + return (NULL); + } + + maplower_ext_ranges = (_FileRuneEntry *)variable; + variable = maplower_ext_ranges + frl->maplower_ext_nranges; + if (variable > lastp) { + free(fdata); + errno = EINVAL; + return (NULL); + } + + mapupper_ext_ranges = (_FileRuneEntry *)variable; + variable = mapupper_ext_ranges + frl->mapupper_ext_nranges; + if (variable > lastp) { + free(fdata); + errno = EINVAL; + return (NULL); + } + + frr = runetype_ext_ranges; + for (x = 0; x < frl->runetype_ext_nranges; ++x) { + uint32_t *types; + + frr[x].min = ntohl(frr[x].min); + frr[x].max = ntohl(frr[x].max); + frr[x].map = ntohl(frr[x].map); + if (frr[x].map == 0) { + int len = frr[x].max - frr[x].min + 1; + types = variable; + variable = types + len; + runetype_ext_len += len; + if (variable > lastp) { + free(fdata); + errno = EINVAL; + return (NULL); + } + while (len-- > 0) + types[len] = ntohl(types[len]); + } + } + + frr = maplower_ext_ranges; + for (x = 0; x < frl->maplower_ext_nranges; ++x) { + frr[x].min = ntohl(frr[x].min); + frr[x].max = ntohl(frr[x].max); + frr[x].map = ntohl(frr[x].map); + } + + frr = mapupper_ext_ranges; + for (x = 0; x < frl->mapupper_ext_nranges; ++x) { + frr[x].min = ntohl(frr[x].min); + frr[x].max = ntohl(frr[x].max); + frr[x].map = ntohl(frr[x].map); + } + if ((char *)variable + frl->variable_len > (char *)lastp) { + free(fdata); + errno = EINVAL; + return (NULL); + } + + /* + * Convert from disk format to host format. + */ + data = malloc(sizeof (_RuneLocale) + + (frl->runetype_ext_nranges + frl->maplower_ext_nranges + + frl->mapupper_ext_nranges) * sizeof (_RuneEntry) + + runetype_ext_len * sizeof (*rr->__types) + + frl->variable_len); + if (data == NULL) { + saverr = errno; + free(fdata); + errno = saverr; + return (NULL); + } + + rl = (_RuneLocale *)(void *)data; + rl->__variable = rl + 1; + + (void) memcpy(rl->__magic, _RUNE_MAGIC_1, sizeof (rl->__magic)); + (void) memcpy(rl->__encoding, frl->encoding, sizeof (rl->__encoding)); + + rl->__variable_len = frl->variable_len; + rl->__runetype_ext.__nranges = frl->runetype_ext_nranges; + rl->__maplower_ext.__nranges = frl->maplower_ext_nranges; + rl->__mapupper_ext.__nranges = frl->mapupper_ext_nranges; + + for (x = 0; x < _CACHED_RUNES; ++x) { + rl->__runetype[x] = frl->runetype[x]; + rl->__maplower[x] = frl->maplower[x]; + rl->__mapupper[x] = frl->mapupper[x]; + } + + rl->__runetype_ext.__ranges = (_RuneEntry *)rl->__variable; + rl->__variable = rl->__runetype_ext.__ranges + + rl->__runetype_ext.__nranges; + + rl->__maplower_ext.__ranges = (_RuneEntry *)rl->__variable; + rl->__variable = rl->__maplower_ext.__ranges + + rl->__maplower_ext.__nranges; + + rl->__mapupper_ext.__ranges = (_RuneEntry *)rl->__variable; + rl->__variable = rl->__mapupper_ext.__ranges + + rl->__mapupper_ext.__nranges; + + variable = mapupper_ext_ranges + frl->mapupper_ext_nranges; + frr = runetype_ext_ranges; + rr = rl->__runetype_ext.__ranges; + for (x = 0; x < rl->__runetype_ext.__nranges; ++x) { + uint32_t *types; + + rr[x].__min = frr[x].min; + rr[x].__max = frr[x].max; + rr[x].__map = frr[x].map; + if (rr[x].__map == 0) { + int len = rr[x].__max - rr[x].__min + 1; + types = variable; + variable = types + len; + rr[x].__types = rl->__variable; + rl->__variable = rr[x].__types + len; + while (len-- > 0) + rr[x].__types[len] = types[len]; + } else + rr[x].__types = NULL; + } + + frr = maplower_ext_ranges; + rr = rl->__maplower_ext.__ranges; + for (x = 0; x < rl->__maplower_ext.__nranges; ++x) { + rr[x].__min = frr[x].min; + rr[x].__max = frr[x].max; + rr[x].__map = frr[x].map; + } + + frr = mapupper_ext_ranges; + rr = rl->__mapupper_ext.__ranges; + for (x = 0; x < rl->__mapupper_ext.__nranges; ++x) { + rr[x].__min = frr[x].min; + rr[x].__max = frr[x].max; + rr[x].__map = frr[x].map; + } + + (void) memcpy(rl->__variable, variable, rl->__variable_len); + free(fdata); + + /* + * Go out and zero pointers that should be zero. + */ + if (!rl->__variable_len) + rl->__variable = NULL; + + if (!rl->__runetype_ext.__nranges) + rl->__runetype_ext.__ranges = NULL; + + if (!rl->__maplower_ext.__nranges) + rl->__maplower_ext.__ranges = NULL; + + if (!rl->__mapupper_ext.__nranges) + rl->__mapupper_ext.__ranges = NULL; + + return (rl); +} diff --git a/usr/src/lib/libc/port/locale/runefile.h b/usr/src/lib/libc/port/locale/runefile.h new file mode 100644 index 0000000000..b2a6e287ad --- /dev/null +++ b/usr/src/lib/libc/port/locale/runefile.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2005 Ruslan Ermilov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _RUNEFILE_H_ +#define _RUNEFILE_H_ + +#include <sys/types.h> + +#ifndef _CACHED_RUNES +#define _CACHED_RUNES (1 << 8) +#endif + +typedef struct { + int32_t min; + int32_t max; + int32_t map; +} _FileRuneEntry; + +typedef struct { + char magic[8]; + char encoding[32]; + + uint32_t runetype[_CACHED_RUNES]; + int32_t maplower[_CACHED_RUNES]; + int32_t mapupper[_CACHED_RUNES]; + + int32_t runetype_ext_nranges; + int32_t maplower_ext_nranges; + int32_t mapupper_ext_nranges; + + int32_t variable_len; +} _FileRuneLocale; + +#define _FILE_RUNE_MAGIC_1 "RuneMag1" + +#endif /* !_RUNEFILE_H_ */ diff --git a/usr/src/lib/libc/port/locale/runetype.c b/usr/src/lib/libc/port/locale/runetype.c new file mode 100644 index 0000000000..d8018e6a22 --- /dev/null +++ b/usr/src/lib/libc/port/locale/runetype.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <ctype.h> +#include <stdio.h> +#include "runetype.h" + +unsigned long +___runetype(__ct_rune_t c) +{ + size_t lim; + _RuneRange *rr = &_CurrentRuneLocale->__runetype_ext; + _RuneEntry *base, *re; + + if (c < 0 || c == EOF) + return (0L); + + /* Binary search -- see bsearch.c for explanation. */ + base = rr->__ranges; + for (lim = rr->__nranges; lim != 0; lim >>= 1) { + re = base + (lim >> 1); + if (re->__min <= c && c <= re->__max) { + if (re->__types) + return (re->__types[c - re->__min]); + else + return (re->__map); + } else if (c > re->__max) { + base = re + 1; + lim--; + } + } + + return (0L); +} diff --git a/usr/src/lib/libc/port/locale/runetype.h b/usr/src/lib/libc/port/locale/runetype.h new file mode 100644 index 0000000000..e55860c4c8 --- /dev/null +++ b/usr/src/lib/libc/port/locale/runetype.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Because we borrowed the __rune_t defintions from _types.h, we need + * this copyright notice as well: + * + * Copyright (c) 2002 Mike Barcroft <mike@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _RUNETYPE_H_ +#define _RUNETYPE_H_ + +#define _CACHED_RUNES (1 << 8) /* Must be a power of 2 */ +#define _CRMASK (~(_CACHED_RUNES - 1)) + +/* + * rune_t is declared to be an ``int'' instead of the more natural + * ``unsigned long'' or ``long''. Two things are happening here. It is not + * unsigned so that EOF (-1) can be naturally assigned to it and used. Also, + * it looks like 10646 will be a 31 bit standard. This means that if your + * ints cannot hold 32 bits, you will be in trouble. The reason an int was + * chosen over a long is that the is*() and to*() routines take ints (says + * ANSI C), but they use __ct_rune_t instead of int. + * + * NOTE: rune_t is not covered by ANSI nor other standards, and should not + * be instantiated outside of lib/libc/locale. Use wchar_t. wchar_t and + * rune_t must be the same type. Also, wint_t must be no narrower than + * wchar_t, and should be able to hold all members of the largest + * character set plus one extra value (WEOF), and must be at least 16 bits. + * + * For compatibility with Solaris, we want to use long in ILP32, and int in + * LP64. This is due to historical Solaris legacy. (See <wchar.h> for + * the definition.) + */ + +#if defined(_LP64) +typedef int __ct_rune_t; /* arg type for ctype funcs */ +#else +typedef int __ct_rune_t; +#endif + +typedef int __rune_t; /* rune_t (see above) */ + +/* + * The lower 8 bits of runetype[] contain the digit value of the rune. + */ +typedef struct { + __rune_t __min; /* First rune of the range */ + __rune_t __max; /* Last rune (inclusive) of the range */ + __rune_t __map; /* What first maps to in maps */ + unsigned long *__types; /* Array of types in range */ +} _RuneEntry; + +typedef struct { + int __nranges; /* Number of ranges stored */ + _RuneEntry *__ranges; /* Pointer to the ranges */ +} _RuneRange; + +typedef struct { + char __magic[8]; /* Magic saying what version we are */ + char __encoding[32]; /* ASCII name of this encoding */ + + unsigned int __runetype[_CACHED_RUNES]; + __rune_t __maplower[_CACHED_RUNES]; + __rune_t __mapupper[_CACHED_RUNES]; + + /* + * The following are to deal with Runes larger than _CACHED_RUNES - 1. + * Their data is actually contiguous with this structure so as to make + * it easier to read/write from/to disk. + */ + _RuneRange __runetype_ext; + _RuneRange __maplower_ext; + _RuneRange __mapupper_ext; + + void *__variable; /* Data which depends on the encoding */ + int __variable_len; /* how long that data is */ +} _RuneLocale; + +#define _RUNE_MAGIC_1 "RuneMagi" /* Indicates version 0 of RuneLocale */ + +extern _RuneLocale _DefaultRuneLocale; +extern _RuneLocale *_CurrentRuneLocale; + +#endif /* !_RUNETYPE_H_ */ diff --git a/usr/src/lib/libc/port/locale/setlocale.c b/usr/src/lib/libc/port/locale/setlocale.c new file mode 100644 index 0000000000..c9eda31517 --- /dev/null +++ b/usr/src/lib/libc/port/locale/setlocale.c @@ -0,0 +1,304 @@ +/* + * Copyright (c) 1996 - 2002 FreeBSD Project + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <alloca.h> +#include "collate.h" +#include "lmonetary.h" /* for __monetary_load_locale() */ +#include "lnumeric.h" /* for __numeric_load_locale() */ +#include "lmessages.h" /* for __messages_load_locale() */ +#include "setlocale.h" +#include "ldpart.h" +#include "timelocal.h" /* for __time_load_locale() */ +#include "../i18n/_loc_path.h" + +#define NUM_CATS 7 +/* + * Category names for getenv() Note that this was modified + * for Solaris. See <iso/locale_iso.h>. + */ +static char *categories[NUM_CATS] = { + "LC_CTYPE", + "LC_NUMERIC", + "LC_TIME", + "LC_COLLATE", + "LC_MONETARY", + "LC_MESSAGES", + "LC_ALL", +}; + +/* + * Current locales for each category + */ +static char current_categories[NUM_CATS][ENCODING_LEN + 1] = { + "C", + "C", + "C", + "C", + "C", + "C", + "C", +}; + +/* + * Path to locale storage directory. See ../i18n/_loc_path.h + */ +char *_PathLocale = _DFLT_LOC_PATH; + +/* + * The locales we are going to try and load + */ +static char new_categories[NUM_CATS][ENCODING_LEN + 1]; +static char saved_categories[NUM_CATS][ENCODING_LEN + 1]; +static char current_locale_string[NUM_CATS * (ENCODING_LEN + 1 + 1)]; + +static char *currentlocale(void); +static char *loadlocale(int); +static const char *__get_locale_env(int); + +char * +setlocale(int category, const char *locale) +{ + int i, j, saverr; + const char *env, *r; + + if (category < 0 || category >= NUM_CATS) { + errno = EINVAL; + return (NULL); + } + + if (locale == NULL) + return (category != LC_ALL ? + current_categories[category] : currentlocale()); + + /* + * Default to the current locale for everything. + */ + for (i = 0; i < NUM_CATS; ++i) + (void) strcpy(new_categories[i], current_categories[i]); + + /* + * Now go fill up new_categories from the locale argument + */ + if (!*locale) { + if (category == LC_ALL) { + for (i = 0; i < NUM_CATS; ++i) { + env = __get_locale_env(i); + if (strlen(env) > ENCODING_LEN) { + errno = EINVAL; + return (NULL); + } + (void) strcpy(new_categories[i], env); + } + } else { + env = __get_locale_env(category); + if (strlen(env) > ENCODING_LEN) { + errno = EINVAL; + return (NULL); + } + (void) strcpy(new_categories[category], env); + } + } else if (category != LC_ALL) { + if (strlen(locale) > ENCODING_LEN) { + errno = EINVAL; + return (NULL); + } + (void) strcpy(new_categories[category], locale); + } else { + if ((r = strchr(locale, '/')) == NULL) { + if (strlen(locale) > ENCODING_LEN) { + errno = EINVAL; + return (NULL); + } + for (i = 1; i < NUM_CATS; ++i) + (void) strcpy(new_categories[i], locale); + } else { + char *buf; + char *save; + + buf = alloca(strlen(locale) + 1); + + for (i = 0, save = NULL; i <= LC_ALL; i++) { + r = strtok_r(buf, "/", &save); + if (r == NULL) { + if (i == LC_ALL) { + /* Good! Fully specified! */ + break; + } + /* + * Composite Locale is inadequately + * specified! (Or with empty fields.) + * The old code would fill fields + * out from the last one, but I think + * this is suboptimal. + */ + errno = EINVAL; + return (NULL); + } + if (i == LC_ALL) { + /* Too many components */ + errno = EINVAL; + return (NULL); + } + (void) strlcpy(new_categories[i], r, + ENCODING_LEN); + buf = NULL; /* for strtok's benefit */ + } + } + } + + if (category != LC_ALL) + return (loadlocale(category)); + + for (i = 0; i < LC_ALL; ++i) { + (void) strcpy(saved_categories[i], current_categories[i]); + if (loadlocale(i) == NULL) { + saverr = errno; + for (j = 1; j < i; j++) { + (void) strcpy(new_categories[j], + saved_categories[j]); + if (loadlocale(j) == NULL) { + (void) strcpy(new_categories[j], "C"); + (void) loadlocale(j); + } + } + errno = saverr; + return (NULL); + } + } + return (currentlocale()); +} + +static char * +currentlocale(void) +{ + int i; + + (void) strcpy(current_locale_string, current_categories[0]); + + for (i = 1; i < LC_ALL; ++i) + if (strcmp(current_categories[1], current_categories[i])) { + for (i = 1; i < LC_ALL; ++i) { + (void) strcat(current_locale_string, "/"); + (void) strcat(current_locale_string, + current_categories[i]); + } + break; + } + return (current_locale_string); +} + +static char * +loadlocale(int category) +{ + char *new = new_categories[category]; + char *old = current_categories[category]; + int (*func)(const char *); + + if ((new[0] == '.' && + (new[1] == '\0' || (new[1] == '.' && new[2] == '\0'))) || + strchr(new, '/') != NULL) { + errno = EINVAL; + return (NULL); + } + + switch (category) { + case LC_CTYPE: + func = __wrap_setrunelocale; + break; + case LC_COLLATE: + func = __collate_load_tables; + break; + case LC_TIME: + func = __time_load_locale; + break; + case LC_NUMERIC: + func = __numeric_load_locale; + break; + case LC_MONETARY: + func = __monetary_load_locale; + break; + case LC_MESSAGES: + func = __messages_load_locale; + break; + default: + errno = EINVAL; + return (NULL); + } + + if (strcmp(new, old) == 0) + return (old); + + if (func(new) != _LDP_ERROR) { + (void) strcpy(old, new); + return (old); + } + + return (NULL); +} + +static const char * +__get_locale_env(int category) +{ + const char *env; + + /* 1. check LC_ALL. */ + env = getenv(categories[0]); + + /* 2. check LC_* */ + if (env == NULL || !*env) + env = getenv(categories[category]); + + /* 3. check LANG */ + if (env == NULL || !*env) + env = getenv("LANG"); + + /* 4. if none is set, fall to "C" */ + if (env == NULL || !*env) + env = "C"; + + return (env); +} diff --git a/usr/src/lib/libc/port/locale/setlocale.h b/usr/src/lib/libc/port/locale/setlocale.h new file mode 100644 index 0000000000..ee27ed804a --- /dev/null +++ b/usr/src/lib/libc/port/locale/setlocale.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 1997 by Andrey A. Chernov, Moscow, Russia. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SETLOCALE_H_ +#define _SETLOCALE_H_ + +#define _PATH_LOCALE "/usr/share/locale" +#define ENCODING_LEN 31 +#define CATEGORY_LEN 11 + +extern char *_PathLocale; + +int __detect_path_locale(void); +int __wrap_setrunelocale(const char *); + +#endif /* !_SETLOCALE_H_ */ diff --git a/usr/src/lib/libc/port/locale/setrunelocale.c b/usr/src/lib/libc/port/locale/setrunelocale.c new file mode 100644 index 0000000000..db5fda6dd6 --- /dev/null +++ b/usr/src/lib/libc/port/locale/setrunelocale.c @@ -0,0 +1,246 @@ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "file64.h" +#include <errno.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <wchar.h> +#include "runetype.h" +#include "ldpart.h" +#include "mblocal.h" +#include "setlocale.h" +#include "_ctype.h" + +extern _RuneLocale *_Read_RuneMagi(FILE *); + +static int __setrunelocale(const char *); + +static int +__setrunelocale(const char *encoding) +{ + FILE *fp; + char name[PATH_MAX]; + _RuneLocale *rl; + int saverr, ret; + size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); + size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + int (*old__mbsinit)(const mbstate_t *); + size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, + const char **_RESTRICT_KYWD, size_t, size_t, + mbstate_t *_RESTRICT_KYWD); + size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD, + const wchar_t **_RESTRICT_KYWD, size_t, size_t, + mbstate_t *_RESTRICT_KYWD); + static char ctype_encoding[ENCODING_LEN + 1]; + static _RuneLocale *CachedRuneLocale; + static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); + static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); + static int (*Cached__mbsinit)(const mbstate_t *); + static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, + const char **_RESTRICT_KYWD, size_t, size_t, + mbstate_t *_RESTRICT_KYWD); + static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD, + const wchar_t **_RESTRICT_KYWD, size_t, size_t, + mbstate_t *_RESTRICT_KYWD); + + /* + * The "C" and "POSIX" locale are always here. + */ + if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { + (void) _none_init(&_DefaultRuneLocale); + return (0); + } + + /* + * If the locale name is the same as our cache, use the cache. + */ + if (CachedRuneLocale != NULL && + strcmp(encoding, ctype_encoding) == 0) { + _CurrentRuneLocale = CachedRuneLocale; + __mbrtowc = Cached__mbrtowc; + __mbsinit = Cached__mbsinit; + __mbsnrtowcs = Cached__mbsnrtowcs; + __wcrtomb = Cached__wcrtomb; + __wcsnrtombs = Cached__wcsnrtombs; + return (0); + } + + /* + * Slurp the locale file into the cache. + */ + + /* Range checking not needed, encoding length already checked before */ + (void) strcpy(name, _PathLocale); + (void) strcat(name, "/"); + (void) strcat(name, encoding); + (void) strcat(name, "/LC_CTYPE"); + + if ((fp = fopen(name, "r")) == NULL) + return (errno == 0 ? ENOENT : errno); + + if ((rl = _Read_RuneMagi(fp)) == NULL) { + saverr = (errno == 0 ? EINVAL : errno); + (void) fclose(fp); + return (saverr); + } + (void) fclose(fp); + + old__mbrtowc = __mbrtowc; + old__mbsinit = __mbsinit; + old__mbsnrtowcs = __mbsnrtowcs; + old__wcrtomb = __wcrtomb; + old__wcsnrtombs = __wcsnrtombs; + + __mbrtowc = NULL; + __mbsinit = NULL; + __mbsnrtowcs = __mbsnrtowcs_std; + __wcrtomb = NULL; + __wcsnrtombs = __wcsnrtombs_std; + + if (strcmp(rl->__encoding, "NONE") == 0) + ret = _none_init(rl); + else if (strcmp(rl->__encoding, "ASCII") == 0) + ret = _ascii_init(rl); + else if (strcmp(rl->__encoding, "UTF-8") == 0) + ret = _UTF8_init(rl); + else if (strcmp(rl->__encoding, "EUC") == 0) + ret = _EUC_init(rl); + else if (strcmp(rl->__encoding, "GB18030") == 0) + ret = _GB18030_init(rl); + else if (strcmp(rl->__encoding, "GB2312") == 0) + ret = _GB2312_init(rl); + else if (strcmp(rl->__encoding, "GBK") == 0) + ret = _GBK_init(rl); + else if (strcmp(rl->__encoding, "BIG5") == 0) + ret = _BIG5_init(rl); + else if (strcmp(rl->__encoding, "MSKanji") == 0) + ret = _MSKanji_init(rl); + else + ret = EINVAL; + + if (ret == 0) { + if (CachedRuneLocale != NULL) { + /* See euc.c */ + if (strcmp(CachedRuneLocale->__encoding, "EUC") == 0) + free(CachedRuneLocale->__variable); + free(CachedRuneLocale); + } + CachedRuneLocale = _CurrentRuneLocale; + Cached__mbrtowc = __mbrtowc; + Cached__mbsinit = __mbsinit; + Cached__mbsnrtowcs = __mbsnrtowcs; + Cached__wcrtomb = __wcrtomb; + Cached__wcsnrtombs = __wcsnrtombs; + (void) strcpy(ctype_encoding, encoding); + + /* + * We need to overwrite the _ctype array. This requires + * some finagling. This is because references to it may + * have been baked into applications. + * + * Note that it is interesting that toupper/tolower only + * produce defined results when the input is representable + * as a byte. + */ + + /* + * The top half is the type mask array. Because we + * want to support both legacy Solaris code (which have + * mask valeus baked in to them), and we want to be able + * to import locale files from other sources (FreeBSD) + * which probably uses different masks, we have to perform + * a conversion here. Ugh. Note that the _CTYPE definitions + * we use from FreeBSD are richer than the Solaris legacy. + * + * We have to cope with these limitations though, because the + * inadequate Solaris definitions were baked into binaries. + */ + for (int i = 0; i < _CACHED_RUNES; i++) { + /* ctype can only encode the lower 8 bits. */ + __ctype[i+1] = rl->__runetype[i] & 0xff; + __ctype_mask[i] = rl->__runetype[i]; + } + + /* The bottom half is the toupper/lower array */ + for (int i = 0; i < _CACHED_RUNES; i++) { + __ctype[258 + i] = i; + if (rl->__mapupper[i] && rl->__mapupper[i] != i) + __ctype[258+i] = rl->__mapupper[i]; + if (rl->__maplower[i] && rl->__maplower[i] != i) + __ctype[258+i] = rl->__maplower[i]; + + /* Don't forget these annoyances either! */ + __trans_upper[i] = rl->__mapupper[i]; + __trans_lower[i] = rl->__maplower[i]; + } + + /* + * Note that we expect the init code will have populated + * the CSWIDTH array (__ctype[514-520]) properly. + */ + } else { + __mbrtowc = old__mbrtowc; + __mbsinit = old__mbsinit; + __mbsnrtowcs = old__mbsnrtowcs; + __wcrtomb = old__wcrtomb; + __wcsnrtombs = old__wcsnrtombs; + free(rl); + } + + return (ret); +} + +int +__wrap_setrunelocale(const char *locale) +{ + int ret = __setrunelocale(locale); + + if (ret != 0) { + errno = ret; + return (_LDP_ERROR); + } + return (_LDP_LOADED); +} diff --git a/usr/src/lib/libc/port/locale/strcoll.c b/usr/src/lib/libc/port/locale/strcoll.c new file mode 100644 index 0000000000..1888063512 --- /dev/null +++ b/usr/src/lib/libc/port/locale/strcoll.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include "collate.h" + +int +strcoll(const char *s, const char *s2) +{ + int len, len2, prim, prim2, sec, sec2, ret, ret2; + const char *t, *t2; + char *tt, *tt2; + + if (__collate_load_error) + return (strcmp(s, s2)); + + len = len2 = 1; + ret = ret2 = 0; + if (__collate_substitute_nontrivial) { + t = tt = __collate_substitute(s); + t2 = tt2 = __collate_substitute(s2); + if ((tt == NULL) || (tt2 == NULL)) { + errno = ENOMEM; + if (tt) + free(tt); + if (tt2) + free(tt2); + /* + * All we can do here is set errno, the application + * is obliged to check it. + */ + return (strcmp(s, s2)); + } + } else { + tt = tt2 = NULL; + t = s; + t2 = s2; + } + while (*t && *t2) { + prim = prim2 = 0; + while (*t && !prim) { + __collate_lookup(t, &len, &prim, &sec); + t += len; + } + while (*t2 && !prim2) { + __collate_lookup(t2, &len2, &prim2, &sec2); + t2 += len2; + } + if (!prim || !prim2) + break; + if (prim != prim2) { + ret = prim - prim2; + goto end; + } + if (!ret2) + ret2 = sec - sec2; + } + if (!*t && *t2) + ret = -(int)((uchar_t)*t2); + else if (*t && !*t2) + ret = (uchar_t)*t; + else if (!*t && !*t2) + ret = ret2; +end: + free(tt); + free(tt2); + + return (ret); +} diff --git a/usr/src/lib/libc/port/locale/strfmon.c b/usr/src/lib/libc/port/locale/strfmon.c new file mode 100644 index 0000000000..456c595a41 --- /dev/null +++ b/usr/src/lib/libc/port/locale/strfmon.c @@ -0,0 +1,626 @@ +/* + * Copyright (c) 2001 Alexey Zelkin <phantom@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LCONV_C99 +#define _LCONV_C99 +#endif + +#include "lint.h" +#include <sys/types.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <monetary.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* internal flags */ +#define NEED_GROUPING 0x01 /* print digits grouped (default) */ +#define SIGN_POSN_USED 0x02 /* '+' or '(' usage flag */ +#define LOCALE_POSN 0x04 /* use locale defined +/- (default) */ +#define PARENTH_POSN 0x08 /* enclose negative amount in () */ +#define SUPRESS_CURR_SYMBOL 0x10 /* supress the currency from output */ +#define LEFT_JUSTIFY 0x20 /* left justify */ +#define USE_INTL_CURRENCY 0x40 /* use international currency symbol */ +#define IS_NEGATIVE 0x80 /* is argument value negative ? */ + +/* internal macros */ +#define PRINT(CH) { \ + if (dst >= s + maxsize) \ + goto e2big_error; \ + *dst++ = CH; \ +} + +#define PRINTS(STR) { \ + char *tmps = STR; \ + while (*tmps != '\0') \ + PRINT(*tmps++); \ +} + +#define GET_NUMBER(VAR) { \ + VAR = 0; \ + while (isdigit((unsigned char)*fmt)) { \ + if (VAR > INT_MAX / 10) \ + goto e2big_error; \ + VAR *= 10; \ + VAR += *fmt - '0'; \ + if (VAR < 0) \ + goto e2big_error; \ + fmt++; \ + } \ +} + +#define GRPCPY(howmany) { \ + int i = howmany; \ + while (i-- > 0) { \ + avalue_size--; \ + *--bufend = *(avalue+avalue_size+padded); \ + } \ +} + +#define GRPSEP { \ + *--bufend = thousands_sep; \ + groups++; \ +} + +static void __setup_vars(int, char *, char *, char *, char **); +static int __calc_left_pad(int, char *); +static char *__format_grouped_double(double, int *, int, int, int); + +ssize_t +strfmon(char *_RESTRICT_KYWD s, size_t maxsize, + const char *_RESTRICT_KYWD format, ...) +{ + va_list ap; + char *dst; /* output destination pointer */ + const char *fmt; /* current format poistion pointer */ + struct lconv *lc; /* pointer to lconv structure */ + char *asciivalue; /* formatted double pointer */ + + int flags; /* formatting options */ + int pad_char; /* padding character */ + int pad_size; /* pad size */ + int width; /* field width */ + int left_prec; /* left precision */ + int right_prec; /* right precision */ + double value; /* just value */ + char space_char = ' '; /* space after currency */ + + char cs_precedes; /* values gathered from struct lconv */ + char sep_by_space; + char sign_posn; + char *signstr; + char *currency_symbol; + + char *tmpptr; /* temporary vars */ + int sverrno; + + va_start(ap, format); + + lc = localeconv(); + dst = s; + fmt = format; + asciivalue = NULL; + currency_symbol = NULL; + pad_size = 0; + + while (*fmt) { + /* pass nonformating characters AS IS */ + if (*fmt != '%') + goto literal; + + /* '%' found ! */ + + /* "%%" mean just '%' */ + if (*(fmt+1) == '%') { + fmt++; + literal: + PRINT(*fmt++); + continue; + } + + /* set up initial values */ + flags = (NEED_GROUPING|LOCALE_POSN); + pad_char = ' '; /* padding character is "space" */ + left_prec = -1; /* no left precision specified */ + right_prec = -1; /* no right precision specified */ + width = -1; /* no width specified */ + value = 0; /* we have no value to print now */ + + /* Flags */ + for (;;) { + switch (*++fmt) { + case '=': /* fill character */ + pad_char = *++fmt; + if (pad_char == '\0') + goto format_error; + continue; + case '^': /* not group currency */ + flags &= ~(NEED_GROUPING); + continue; + case '+': /* use locale defined signs */ + if (flags & SIGN_POSN_USED) + goto format_error; + flags |= (SIGN_POSN_USED|LOCALE_POSN); + continue; + case '(': /* enclose negatives with () */ + if (flags & SIGN_POSN_USED) + goto format_error; + flags |= (SIGN_POSN_USED|PARENTH_POSN); + continue; + case '!': /* suppress currency symbol */ + flags |= SUPRESS_CURR_SYMBOL; + continue; + case '-': /* alignment (left) */ + flags |= LEFT_JUSTIFY; + continue; + default: + break; + } + break; + } + + /* field Width */ + if (isdigit((unsigned char)*fmt)) { + GET_NUMBER(width); + /* + * Do we have enough space to put number with + * required width ? + */ + if ((unsigned int)width >= maxsize - (dst - s)) + goto e2big_error; + } + + /* Left precision */ + if (*fmt == '#') { + if (!isdigit((unsigned char)*++fmt)) + goto format_error; + GET_NUMBER(left_prec); + if ((unsigned int)left_prec >= maxsize - (dst - s)) + goto e2big_error; + } + + /* Right precision */ + if (*fmt == '.') { + if (!isdigit((unsigned char)*++fmt)) + goto format_error; + GET_NUMBER(right_prec); + if ((unsigned int)right_prec >= maxsize - (dst - s) - + left_prec) + goto e2big_error; + } + + /* Conversion Characters */ + switch (*fmt++) { + case 'i': /* use internaltion currency format */ + flags |= USE_INTL_CURRENCY; + break; + case 'n': /* use national currency format */ + flags &= ~(USE_INTL_CURRENCY); + break; + default: + /* required char missing or premature EOS */ + goto format_error; + } + + if (currency_symbol != NULL) + free(currency_symbol); + if (flags & USE_INTL_CURRENCY) { + currency_symbol = strdup(lc->int_curr_symbol); + if (currency_symbol != NULL) + space_char = *(currency_symbol+3); + } else + currency_symbol = strdup(lc->currency_symbol); + + if (currency_symbol == NULL) + goto end_error; /* ENOMEM. */ + + /* value itself */ + value = va_arg(ap, double); + + /* detect sign */ + if (value < 0) { + flags |= IS_NEGATIVE; + value = -value; + } + + /* fill left_prec with amount of padding chars */ + if (left_prec >= 0) { + pad_size = __calc_left_pad((flags ^ IS_NEGATIVE), + currency_symbol) - + __calc_left_pad(flags, currency_symbol); + if (pad_size < 0) + pad_size = 0; + } + + if (asciivalue != NULL) + free(asciivalue); + asciivalue = __format_grouped_double(value, &flags, + left_prec, right_prec, pad_char); + if (asciivalue == NULL) + goto end_error; /* errno already set */ + /* to ENOMEM by malloc() */ + + /* set some variables for later use */ + __setup_vars(flags, &cs_precedes, &sep_by_space, &sign_posn, + &signstr); + + /* + * Description of some LC_MONETARY's values: + * + * p_cs_precedes & n_cs_precedes + * + * = 1 - $currency_symbol precedes the value + * for a monetary quantity with a non-negative value + * = 0 - symbol succeeds the value + * + * p_sep_by_space & n_sep_by_space + * + * = 0 - no space separates $currency_symbol + * from the value for a monetary quantity with a + * non-negative value + * = 1 - space separates the symbol from the value + * = 2 - space separates the symbol and the sign string, + * if adjacent. + * + * p_sign_posn & n_sign_posn + * + * = 0 - parentheses enclose the quantity and the + * $currency_symbol + * = 1 - the sign string precedes the quantity and the + * $currency_symbol + * = 2 - the sign string succeeds the quantity and the + * $currency_symbol + * = 3 - the sign string precedes the $currency_symbol + * = 4 - the sign string succeeds the $currency_symbol + * + */ + + tmpptr = dst; + + while (pad_size-- > 0) + PRINT(' '); + + if (sign_posn == 0 && (flags & IS_NEGATIVE)) + PRINT('('); + + if (cs_precedes == 1) { + if (sign_posn == 1 || sign_posn == 3) { + PRINTS(signstr); + if (sep_by_space == 2) /* XXX: ? */ + PRINT(' '); + } + + if (!(flags & SUPRESS_CURR_SYMBOL)) { + PRINTS(currency_symbol); + + if (sign_posn == 4) { + if (sep_by_space == 2) + PRINT(space_char); + PRINTS(signstr); + if (sep_by_space == 1) + PRINT(' '); + } else if (sep_by_space == 1) + PRINT(space_char); + } + } else if (sign_posn == 1) + PRINTS(signstr); + + PRINTS(asciivalue); + + if (cs_precedes == 0) { + if (sign_posn == 3) { + if (sep_by_space == 1) + PRINT(' '); + PRINTS(signstr); + } + + if (!(flags & SUPRESS_CURR_SYMBOL)) { + if ((sign_posn == 3 && sep_by_space == 2) || + (sep_by_space == 1 && (sign_posn == 0 || + sign_posn == 1 || sign_posn == 2 || + sign_posn == 4))) + PRINT(space_char); + PRINTS(currency_symbol); /* XXX: len */ + if (sign_posn == 4) { + if (sep_by_space == 2) + PRINT(' '); + PRINTS(signstr); + } + } + } + + if (sign_posn == 2) { + if (sep_by_space == 2) + PRINT(' '); + PRINTS(signstr); + } + + if (sign_posn == 0 && (flags & IS_NEGATIVE)) + PRINT(')'); + + if (dst - tmpptr < width) { + if (flags & LEFT_JUSTIFY) { + while (dst - tmpptr < width) + PRINT(' '); + } else { + pad_size = dst-tmpptr; + memmove(tmpptr + width-pad_size, tmpptr, + pad_size); + (void) memset(tmpptr, ' ', width-pad_size); + dst += width-pad_size; + } + } + } + + PRINT('\0'); + va_end(ap); + free(asciivalue); + free(currency_symbol); + return (dst - s - 1); /* size of put data except trailing '\0' */ + +e2big_error: + errno = E2BIG; + goto end_error; + +format_error: + errno = EINVAL; + +end_error: + sverrno = errno; + if (asciivalue != NULL) + free(asciivalue); + if (currency_symbol != NULL) + free(currency_symbol); + errno = sverrno; + va_end(ap); + return (-1); +} + +static void +__setup_vars(int flags, char *cs_precedes, char *sep_by_space, + char *sign_posn, char **signstr) +{ + + struct lconv *lc = localeconv(); + + if ((flags & IS_NEGATIVE) && (flags & USE_INTL_CURRENCY)) { + *cs_precedes = lc->int_n_cs_precedes; + *sep_by_space = lc->int_n_sep_by_space; + *sign_posn = (flags & PARENTH_POSN) ? 0 : lc->int_n_sign_posn; + *signstr = (lc->negative_sign[0] == '\0') ? "-" + : lc->negative_sign; + } else if (flags & USE_INTL_CURRENCY) { + *cs_precedes = lc->int_p_cs_precedes; + *sep_by_space = lc->int_p_sep_by_space; + *sign_posn = (flags & PARENTH_POSN) ? 0 : lc->int_p_sign_posn; + *signstr = lc->positive_sign; + } else if (flags & IS_NEGATIVE) { + *cs_precedes = lc->n_cs_precedes; + *sep_by_space = lc->n_sep_by_space; + *sign_posn = (flags & PARENTH_POSN) ? 0 : lc->n_sign_posn; + *signstr = (lc->negative_sign[0] == '\0') ? "-" + : lc->negative_sign; + } else { + *cs_precedes = lc->p_cs_precedes; + *sep_by_space = lc->p_sep_by_space; + *sign_posn = (flags & PARENTH_POSN) ? 0 : lc->p_sign_posn; + *signstr = lc->positive_sign; + } + + /* Set defult values for unspecified information. */ + if (*cs_precedes != 0) + *cs_precedes = 1; + if (*sep_by_space == CHAR_MAX) + *sep_by_space = 0; + if (*sign_posn == CHAR_MAX) + *sign_posn = 0; +} + +static int +__calc_left_pad(int flags, char *cur_symb) +{ + + char cs_precedes, sep_by_space, sign_posn, *signstr; + int left_chars = 0; + + __setup_vars(flags, &cs_precedes, &sep_by_space, &sign_posn, &signstr); + + if (cs_precedes != 0) { + left_chars += strlen(cur_symb); + if (sep_by_space != 0) + left_chars++; + } + + switch (sign_posn) { + case 1: + left_chars += strlen(signstr); + break; + case 3: + case 4: + if (cs_precedes != 0) + left_chars += strlen(signstr); + } + return (left_chars); +} + +static int +get_groups(int size, char *grouping) +{ + + int chars = 0; + + if (*grouping == CHAR_MAX || *grouping <= 0) /* no grouping ? */ + return (0); + + while (size > (int)*grouping) { + chars++; + size -= (int)*grouping++; + /* no more grouping ? */ + if (*grouping == CHAR_MAX) + break; + /* rest grouping with same value ? */ + if (*grouping == 0) { + chars += (size - 1) / *(grouping - 1); + break; + } + } + return (chars); +} + +/* convert double to ASCII */ +static char * +__format_grouped_double(double value, int *flags, + int left_prec, int right_prec, int pad_char) +{ + + char *rslt; + char *avalue; + int avalue_size; + char fmt[32]; + + size_t bufsize; + char *bufend; + + int padded; + + struct lconv *lc = localeconv(); + char *grouping; + char decimal_point; + char thousands_sep; + + int groups = 0; + + grouping = lc->mon_grouping; + decimal_point = *lc->mon_decimal_point; + if (decimal_point == '\0') + decimal_point = *lc->decimal_point; + thousands_sep = *lc->mon_thousands_sep; + if (thousands_sep == '\0') + thousands_sep = *lc->thousands_sep; + + /* fill left_prec with default value */ + if (left_prec == -1) + left_prec = 0; + + /* fill right_prec with default value */ + if (right_prec == -1) { + if (*flags & USE_INTL_CURRENCY) + right_prec = lc->int_frac_digits; + else + right_prec = lc->frac_digits; + + if (right_prec == CHAR_MAX) /* POSIX locale ? */ + right_prec = 2; + } + + if (*flags & NEED_GROUPING) + left_prec += get_groups(left_prec, grouping); + + /* convert to string */ + (void) snprintf(fmt, sizeof (fmt), "%%%d.%df", + left_prec + right_prec + 1, right_prec); + avalue_size = asprintf(&avalue, fmt, value); + if (avalue_size < 0) + return (NULL); + + /* make sure that we've enough space for result string */ + bufsize = strlen(avalue)*2+1; + rslt = calloc(1, bufsize); + if (rslt == NULL) { + free(avalue); + return (NULL); + } + bufend = rslt + bufsize - 1; /* reserve space for trailing '\0' */ + + /* skip spaces at beggining */ + padded = 0; + while (avalue[padded] == ' ') { + padded++; + avalue_size--; + } + + if (right_prec > 0) { + bufend -= right_prec; + (void) memcpy(bufend, avalue + avalue_size+padded-right_prec, + right_prec); + *--bufend = decimal_point; + avalue_size -= (right_prec + 1); + } + + if ((*flags & NEED_GROUPING) && + thousands_sep != '\0' && /* XXX: need investigation */ + *grouping != CHAR_MAX && + *grouping > 0) { + while (avalue_size > (int)*grouping) { + GRPCPY(*grouping); + GRPSEP; + grouping++; + + /* no more grouping ? */ + if (*grouping == CHAR_MAX) + break; + + /* rest grouping with same value ? */ + if (*grouping == 0) { + grouping--; + while (avalue_size > *grouping) { + GRPCPY(*grouping); + GRPSEP; + } + } + } + if (avalue_size != 0) + GRPCPY(avalue_size); + padded -= groups; + + } else { + bufend -= avalue_size; + (void) memcpy(bufend, avalue+padded, avalue_size); + if (right_prec == 0) + padded--; /* decrease assumed $decimal_point */ + } + + /* do padding with pad_char */ + if (padded > 0) { + bufend -= padded; + (void) memset(bufend, pad_char, padded); + } + + bufsize = bufsize - (bufend - rslt) + 1; + memmove(rslt, bufend, bufsize); + free(avalue); + return (rslt); +} diff --git a/usr/src/lib/libc/port/locale/strftime.c b/usr/src/lib/libc/port/locale/strftime.c new file mode 100644 index 0000000000..9e7e30f17c --- /dev/null +++ b/usr/src/lib/libc/port/locale/strftime.c @@ -0,0 +1,527 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "tzfile.h" +#include <fcntl.h> +#include <sys/stat.h> +#include <string.h> +#include <stdio.h> +#include "timelocal.h" + +static char *_add(const char *, char *, const char *); +static char *_conv(int, const char *, char *, const char *); +static char *_fmt(const char *, const struct tm *, char *, const char * const); +static char *_yconv(int, int, int, int, char *, const char *); + +extern char *tzname[]; + +#define IN_NONE 0 +#define IN_SOME 1 +#define IN_THIS 2 +#define IN_ALL 3 + +#define PAD_DEFAULT 0 +#define PAD_LESS 1 +#define PAD_SPACE 2 +#define PAD_ZERO 3 + +static const char *fmt_padding[][4] = { + /* DEFAULT, LESS, SPACE, ZERO */ +#define PAD_FMT_MONTHDAY 0 +#define PAD_FMT_HMS 0 +#define PAD_FMT_CENTURY 0 +#define PAD_FMT_SHORTYEAR 0 +#define PAD_FMT_MONTH 0 +#define PAD_FMT_WEEKOFYEAR 0 +#define PAD_FMT_DAYOFMONTH 0 + { "%02d", "%d", "%2d", "%02d" }, +#define PAD_FMT_SDAYOFMONTH 1 +#define PAD_FMT_SHMS 1 + { "%2d", "%d", "%2d", "%02d" }, +#define PAD_FMT_DAYOFYEAR 2 + { "%03d", "%d", "%3d", "%03d" }, +#define PAD_FMT_YEAR 3 + { "%04d", "%d", "%4d", "%04d" } +}; + + +size_t +strftime(char *_RESTRICT_KYWD s, size_t maxsize, + const char *_RESTRICT_KYWD format, const struct tm *_RESTRICT_KYWD t) +{ + char *p; + + tzset(); + p = _fmt(((format == NULL) ? "%c" : format), t, s, s + maxsize); + if (p == s + maxsize) + return (0); + *p = '\0'; + return (p - s); +} + +static char * +_fmt(const char *format, const struct tm *t, char *pt, const char * const ptlim) +{ + int Ealternative, Oalternative, PadIndex; + struct lc_time_T *tptr = __get_current_time_locale(); + +#define PADDING(x) fmt_padding[x][PadIndex] + + for (; *format; ++format) { + if (*format == '%') { + Ealternative = 0; + Oalternative = 0; + PadIndex = PAD_DEFAULT; +label: + switch (*++format) { + case '\0': + --format; + break; + case 'A': + pt = _add((t->tm_wday < 0 || + t->tm_wday >= DAYSPERWEEK) ? + "?" : tptr->weekday[t->tm_wday], + pt, ptlim); + continue; + case 'a': + pt = _add((t->tm_wday < 0 || + t->tm_wday >= DAYSPERWEEK) ? + "?" : tptr->wday[t->tm_wday], + pt, ptlim); + continue; + case 'B': + pt = _add((t->tm_mon < 0 || + t->tm_mon >= MONSPERYEAR) ? + "?" : (tptr->month)[t->tm_mon], + pt, ptlim); + continue; + case 'b': + case 'h': + pt = _add((t->tm_mon < 0 || + t->tm_mon >= MONSPERYEAR) ? + "?" : tptr->mon[t->tm_mon], + pt, ptlim); + continue; + case 'C': + /* + * %C used to do a... + * _fmt("%a %b %e %X %Y", t); + * ...whereas now POSIX 1003.2 calls for + * something completely different. + * (ado, 1993-05-24) + */ + pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 0, + pt, ptlim); + continue; + case 'c': + pt = _fmt(tptr->c_fmt, t, pt, ptlim); + continue; + case 'D': + pt = _fmt("%m/%d/%y", t, pt, ptlim); + continue; + case 'd': + pt = _conv(t->tm_mday, + PADDING(PAD_FMT_DAYOFMONTH), pt, ptlim); + continue; + case 'E': + if (Ealternative || Oalternative) + break; + Ealternative++; + goto label; + case 'O': + /* + * C99 locale modifiers. + * The sequences + * %Ec %EC %Ex %EX %Ey %EY + * %Od %oe %OH %OI %Om %OM + * %OS %Ou %OU %OV %Ow %OW %Oy + * are supposed to provide alternate + * representations. + */ + if (Ealternative || Oalternative) + break; + Oalternative++; + goto label; + case 'e': + pt = _conv(t->tm_mday, + PADDING(PAD_FMT_SDAYOFMONTH), pt, ptlim); + continue; + case 'F': + pt = _fmt("%Y-%m-%d", t, pt, ptlim); + continue; + case 'H': + pt = _conv(t->tm_hour, PADDING(PAD_FMT_HMS), + pt, ptlim); + continue; + case 'I': + pt = _conv((t->tm_hour % 12) ? + (t->tm_hour % 12) : 12, + PADDING(PAD_FMT_HMS), pt, ptlim); + continue; + case 'j': + pt = _conv(t->tm_yday + 1, + PADDING(PAD_FMT_DAYOFYEAR), pt, ptlim); + continue; + case 'k': + /* + * This used to be... + * _conv(t->tm_hour % 12 ? + * t->tm_hour % 12 : 12, 2, ' '); + * ...and has been changed to the below to + * match SunOS 4.1.1 and Arnold Robbins' + * strftime version 3.0. That is, "%k" and + * "%l" have been swapped. + * (ado, 1993-05-24) + */ + pt = _conv(t->tm_hour, + PADDING(PAD_FMT_SHMS), pt, ptlim); + continue; + case 'l': + /* + * This used to be... + * _conv(t->tm_hour, 2, ' '); + * ...and has been changed to the below to + * match SunOS 4.1.1 and Arnold Robbin's + * strftime version 3.0. That is, "%k" and + * "%l" have been swapped. + * (ado, 1993-05-24) + */ + pt = _conv((t->tm_hour % 12) ? + (t->tm_hour % 12) : 12, + PADDING(PAD_FMT_SHMS), pt, ptlim); + continue; + case 'M': + pt = _conv(t->tm_min, PADDING(PAD_FMT_HMS), + pt, ptlim); + continue; + case 'm': + pt = _conv(t->tm_mon + 1, + PADDING(PAD_FMT_MONTH), + pt, ptlim); + continue; + case 'n': + pt = _add("\n", pt, ptlim); + continue; + case 'p': + pt = _add((t->tm_hour >= (HOURSPERDAY / 2)) ? + tptr->pm : tptr->am, pt, ptlim); + continue; + case 'R': + pt = _fmt("%H:%M", t, pt, ptlim); + continue; + case 'r': + pt = _fmt(tptr->ampm_fmt, t, pt, ptlim); + continue; + case 'S': + pt = _conv(t->tm_sec, PADDING(PAD_FMT_HMS), + pt, ptlim); + continue; + + /* + * Note: 's' for seconds since epoch was removed. + * While FreeBSD and Linux appear to support this, + * Sun Solaris does not. Furthermore, the FreeBSD + * implementation was not correct for _LP64. + */ + + case 'T': + pt = _fmt("%H:%M:%S", t, pt, ptlim); + continue; + case 't': + pt = _add("\t", pt, ptlim); + continue; + case 'U': + pt = _conv((t->tm_yday + DAYSPERWEEK - + t->tm_wday) / DAYSPERWEEK, + PADDING(PAD_FMT_WEEKOFYEAR), + pt, ptlim); + continue; + case 'u': + /* + * From Arnold Robbins' strftime version 3.0: + * "ISO 8601: Weekday as a decimal number + * [1 (Monday) - 7]" + * (ado, 1993-05-24) + */ + pt = _conv((t->tm_wday == 0) ? + DAYSPERWEEK : t->tm_wday, + "%d", pt, ptlim); + continue; + case 'V': /* ISO 8601 week number */ + case 'G': /* ISO 8601 year (four digits) */ + case 'g': /* ISO 8601 year (two digits) */ +/* + * From Arnold Robbins' strftime version 3.0: "the week number of the + * year (the first Monday as the first day of week 1) as a decimal number + * (01-53)." + * (ado, 1993-05-24) + * + * From "http://www.ft.uni-erlangen.de/~mskuhn/iso-time.html" by Markus Kuhn: + * "Week 01 of a year is per definition the first week which has the + * Thursday in this year, which is equivalent to the week which contains + * the fourth day of January. In other words, the first week of a new year + * is the week which has the majority of its days in the new year. Week 01 + * might also contain days from the previous year and the week before week + * 01 of a year is the last week (52 or 53) of the previous year even if + * it contains days from the new year. A week starts with Monday (day 1) + * and ends with Sunday (day 7). For example, the first week of the year + * 1997 lasts from 1996-12-30 to 1997-01-05..." + * (ado, 1996-01-02) + */ + { + int year; + int base; + int yday; + int wday; + int w; + + year = t->tm_year; + base = TM_YEAR_BASE; + yday = t->tm_yday; + wday = t->tm_wday; + for (;;) { + int len; + int bot; + int top; + + len = isleap_sum(year, base) ? + DAYSPERLYEAR : DAYSPERNYEAR; + /* + * What yday (-3 ... 3) does + * the ISO year begin on? + */ + bot = ((yday + 11 - wday) % + DAYSPERWEEK) - 3; + /* + * What yday does the NEXT + * ISO year begin on? + */ + top = bot - (len % DAYSPERWEEK); + if (top < -3) + top += DAYSPERWEEK; + top += len; + if (yday >= top) { + ++base; + w = 1; + break; + } + if (yday >= bot) { + w = 1 + ((yday - bot) / + DAYSPERWEEK); + break; + } + --base; + yday += isleap_sum(year, base) ? + DAYSPERLYEAR : DAYSPERNYEAR; + } +#ifdef XPG4_1994_04_09 + if ((w == 52 && t->tm_mon == TM_JANUARY) || + (w == 1 && t->tm_mon == TM_DECEMBER)) + w = 53; +#endif /* defined XPG4_1994_04_09 */ + if (*format == 'V') + pt = _conv(w, + PADDING(PAD_FMT_WEEKOFYEAR), + pt, ptlim); + else if (*format == 'g') { + pt = _yconv(year, base, 0, 1, + pt, ptlim); + } else + pt = _yconv(year, base, 1, 1, + pt, ptlim); + } + continue; + case 'v': + /* + * From Arnold Robbins' strftime version 3.0: + * "date as dd-bbb-YYYY" + * (ado, 1993-05-24) + */ + pt = _fmt("%e-%b-%Y", t, pt, ptlim); + continue; + case 'W': + pt = _conv((t->tm_yday + DAYSPERWEEK - + (t->tm_wday ? + (t->tm_wday - 1) : + (DAYSPERWEEK - 1))) / DAYSPERWEEK, + PADDING(PAD_FMT_WEEKOFYEAR), + pt, ptlim); + continue; + case 'w': + pt = _conv(t->tm_wday, "%d", pt, ptlim); + continue; + case 'X': + pt = _fmt(tptr->X_fmt, t, pt, ptlim); + continue; + case 'x': + pt = _fmt(tptr->x_fmt, t, pt, ptlim); + continue; + case 'y': + pt = _yconv(t->tm_year, TM_YEAR_BASE, 0, 1, + pt, ptlim); + continue; + case 'Y': + pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 1, + pt, ptlim); + continue; + case 'Z': + if (t->tm_isdst >= 0) + pt = _add(tzname[t->tm_isdst != 0], + pt, ptlim); + /* + * C99 says that %Z must be replaced by the + * empty string if the time zone is not + * determinable. + */ + continue; + case 'z': + { + int diff; + char const * sign; + + if (t->tm_isdst < 0) + continue; + /* + * C99 says that the UTC offset must + * be computed by looking only at + * tm_isdst. This requirement is + * incorrect, since it means the code + * must rely on magic (in this case + * altzone and timezone), and the + * magic might not have the correct + * offset. Doing things correctly is + * tricky and requires disobeying C99; + * see GNU C strftime for details. + * For now, punt and conform to the + * standard, even though it's incorrect. + * + * C99 says that %z must be replaced by the + * empty string if the time zone is not + * determinable, so output nothing if the + * appropriate variables are not available. + */ + if (t->tm_isdst == 0) + diff = -timezone; + else + diff = -altzone; + if (diff < 0) { + sign = "-"; + diff = -diff; + } else + sign = "+"; + pt = _add(sign, pt, ptlim); + diff /= SECSPERMIN; + diff = (diff / MINSPERHOUR) * 100 + + (diff % MINSPERHOUR); + pt = _conv(diff, PADDING(PAD_FMT_YEAR), + pt, ptlim); + } + continue; + case '+': + pt = _fmt(tptr->date_fmt, t, pt, ptlim); + continue; + case '-': + if (PadIndex != PAD_DEFAULT) + break; + PadIndex = PAD_LESS; + goto label; + case '_': + if (PadIndex != PAD_DEFAULT) + break; + PadIndex = PAD_SPACE; + goto label; + case '0': + if (PadIndex != PAD_DEFAULT) + break; + PadIndex = PAD_ZERO; + goto label; + case '%': + /* + * X311J/88-090 (4.12.3.5): if conversion char is + * undefined, behavior is undefined. Print out the + * character itself as printf(3) also does. + */ + default: + break; + } + } + if (pt == ptlim) + break; + *pt++ = *format; + } + return (pt); +} + +static char * +_conv(const int n, const char *format, char *const pt, + const char *const ptlim) +{ + char buf[12]; + + (void) sprintf(buf, format, n); + return (_add(buf, pt, ptlim)); +} + +static char * +_add(const char *str, char *pt, const char *const ptlim) +{ + while (pt < ptlim && (*pt = *str++) != '\0') + ++pt; + return (pt); +} + +/* + * POSIX and the C Standard are unclear or inconsistent about + * what %C and %y do if the year is negative or exceeds 9999. + * Use the convention that %C concatenated with %y yields the + * same output as %Y, and that %Y contains at least 4 bytes, + * with more only if necessary. + */ + +static char * +_yconv(const int a, const int b, const int convert_top, const int convert_yy, + char *pt, const char * const ptlim) +{ + register int lead; + register int trail; + +#define DIVISOR 100 + trail = a % DIVISOR + b % DIVISOR; + lead = a / DIVISOR + b / DIVISOR + trail / DIVISOR; + trail %= DIVISOR; + if (trail < 0 && lead > 0) { + trail += DIVISOR; + --lead; + } else if (lead < 0 && trail > 0) { + trail -= DIVISOR; + ++lead; + } + if (convert_top) { + if (lead == 0 && trail < 0) + pt = _add("-0", pt, ptlim); + else pt = _conv(lead, "%02d", pt, ptlim); + } + if (convert_yy) + pt = _conv(((trail < 0) ? -trail : trail), "%02d", pt, ptlim); + return (pt); +} diff --git a/usr/src/lib/libc/port/locale/strptime.c b/usr/src/lib/libc/port/locale/strptime.c new file mode 100644 index 0000000000..5b59acaab1 --- /dev/null +++ b/usr/src/lib/libc/port/locale/strptime.c @@ -0,0 +1,539 @@ +/* + * Powerdog Industries kindly requests feedback from anyone modifying + * this function: + * + * Date: Thu, 05 Jun 1997 23:17:17 -0400 + * From: Kevin Ruddy <kevin.ruddy@powerdog.com> + * To: James FitzGibbon <james@nexis.net> + * Subject: Re: Use of your strptime(3) code (fwd) + * + * The reason for the "no mod" clause was so that modifications would + * come back and we could integrate them and reissue so that a wider + * audience could use it (thereby spreading the wealth). This has + * made it possible to get strptime to work on many operating systems. + * I'm not sure why that's "plain unacceptable" to the FreeBSD team. + * + * Anyway, you can change it to "with or without modification" as + * you see fit. Enjoy. + * + * Kevin Ruddy + * Powerdog Industries, Inc. + */ +/* + * Copyright (c) 1994 Powerdog Industries. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed by Powerdog Industries. + * 4. The name of Powerdog Industries may not be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright 2010, Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#include "lint.h" +#include <time.h> +#include <ctype.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> +#include <alloca.h> +#include "timelocal.h" + +#define asizeof(a) (sizeof (a) / sizeof ((a)[0])) + +static char * +__strptime(const char *buf, const char *fmt, struct tm *tm) +{ + char c; + const char *ptr; + int i, len; + int Ealternative, Oalternative; + struct lc_time_T *tptr = __get_current_time_locale(); + + ptr = fmt; + while (*ptr != 0) { + if (*buf == 0) + break; + + c = *ptr++; + + if (c != '%') { + if (isspace((unsigned char)c)) + while (*buf != 0 && + isspace((unsigned char)*buf)) + buf++; + else if (c != *buf++) + return (0); + continue; + } + + Ealternative = 0; + Oalternative = 0; +label: + c = *ptr++; + switch (c) { + case 0: + case '%': + if (*buf++ != '%') + return (0); + break; + + case '+': + buf = __strptime(buf, tptr->date_fmt, tm); + if (buf == 0) + return (0); + break; + + case 'C': + if (!isdigit((unsigned char)*buf)) + return (0); + + /* XXX This will break for 3-digit centuries. */ + len = 2; + for (i = 0; + len && isdigit((unsigned char)*buf); + buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 19) + return (0); + + tm->tm_year = i * 100 - 1900; + break; + + case 'c': + buf = __strptime(buf, tptr->c_fmt, tm); + if (buf == 0) + return (0); + break; + + case 'D': + buf = __strptime(buf, "%m/%d/%y", tm); + if (buf == 0) + return (0); + break; + + case 'E': + if (Ealternative || Oalternative) + break; + Ealternative++; + goto label; + + case 'O': + if (Ealternative || Oalternative) + break; + Oalternative++; + goto label; + + case 'F': + buf = __strptime(buf, "%Y-%m-%d", tm); + if (buf == 0) + return (0); + break; + + case 'R': + buf = __strptime(buf, "%H:%M", tm); + if (buf == 0) + return (0); + break; + + case 'r': + buf = __strptime(buf, tptr->ampm_fmt, tm); + if (buf == 0) + return (0); + break; + + case 'T': + buf = __strptime(buf, "%H:%M:%S", tm); + if (buf == 0) + return (0); + break; + + case 'X': + buf = __strptime(buf, tptr->X_fmt, tm); + if (buf == 0) + return (0); + break; + + case 'x': + buf = __strptime(buf, tptr->x_fmt, tm); + if (buf == 0) + return (0); + break; + + case 'j': + if (!isdigit((unsigned char)*buf)) + return (0); + + len = 3; + for (i = 0; + len && isdigit((unsigned char)*buf); + buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 1 || i > 366) + return (0); + + tm->tm_yday = i - 1; + break; + + case 'M': + case 'S': + if (*buf == 0 || isspace((unsigned char)*buf)) + break; + + if (!isdigit((unsigned char)*buf)) + return (0); + + len = 2; + for (i = 0; + len && isdigit((unsigned char)*buf); + buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + + if (c == 'M') { + if (i > 59) + return (0); + tm->tm_min = i; + } else { + if (i > 60) + return (0); + tm->tm_sec = i; + } + + if (*buf != 0 && isspace((unsigned char)*buf)) + while (*ptr != 0 && + !isspace((unsigned char)*ptr)) + ptr++; + break; + + case 'H': + case 'I': + case 'k': + case 'l': + /* + * Of these, %l is the only specifier explicitly + * documented as not being zero-padded. However, + * there is no harm in allowing zero-padding. + * + * XXX The %l specifier may gobble one too many + * digits if used incorrectly. + */ + if (!isdigit((unsigned char)*buf)) + return (0); + + len = 2; + for (i = 0; + len && isdigit((unsigned char)*buf); + buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (c == 'H' || c == 'k') { + if (i > 23) + return (0); + } else if (i > 12) + return (0); + + tm->tm_hour = i; + + if (*buf != 0 && isspace((unsigned char)*buf)) + while (*ptr != 0 && + !isspace((unsigned char)*ptr)) + ptr++; + break; + + case 'p': + /* + * XXX This is bogus if parsed before hour-related + * specifiers. + */ + len = strlen(tptr->am); + if (strncasecmp(buf, tptr->am, len) == 0) { + if (tm->tm_hour > 12) + return (0); + if (tm->tm_hour == 12) + tm->tm_hour = 0; + buf += len; + break; + } + + len = strlen(tptr->pm); + if (strncasecmp(buf, tptr->pm, len) == 0) { + if (tm->tm_hour > 12) + return (0); + if (tm->tm_hour != 12) + tm->tm_hour += 12; + buf += len; + break; + } + + return (0); + + case 'A': + case 'a': + for (i = 0; i < asizeof(tptr->weekday); i++) { + len = strlen(tptr->weekday[i]); + if (strncasecmp(buf, tptr->weekday[i], len) == + 0) + break; + len = strlen(tptr->wday[i]); + if (strncasecmp(buf, tptr->wday[i], len) == 0) + break; + } + if (i == asizeof(tptr->weekday)) + return (0); + + tm->tm_wday = i; + buf += len; + break; + + case 'U': + case 'W': + /* + * XXX This is bogus, as we can not assume any valid + * information present in the tm structure at this + * point to calculate a real value, so just check the + * range for now. + */ + if (!isdigit((unsigned char)*buf)) + return (0); + + len = 2; + for (i = 0; + len && isdigit((unsigned char)*buf); + buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i > 53) + return (0); + + if (*buf != 0 && isspace((unsigned char)*buf)) + while (*ptr != 0 && + !isspace((unsigned char)*ptr)) + ptr++; + break; + + case 'w': + if (!isdigit((unsigned char)*buf)) + return (0); + + i = *buf - '0'; + if (i > 6) + return (0); + + tm->tm_wday = i; + + if (*buf != 0 && isspace((unsigned char)*buf)) + while (*ptr != 0 && + !isspace((unsigned char)*ptr)) + ptr++; + break; + + case 'd': + case 'e': + /* + * The %e specifier is explicitly documented as not + * being zero-padded but there is no harm in allowing + * such padding. + * + * XXX The %e specifier may gobble one too many + * digits if used incorrectly. + */ + if (!isdigit((unsigned char)*buf)) + return (0); + + len = 2; + for (i = 0; + len && isdigit((unsigned char)*buf); + buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i > 31) + return (0); + + tm->tm_mday = i; + + if (*buf != 0 && isspace((unsigned char)*buf)) + while (*ptr != 0 && + !isspace((unsigned char)*ptr)) + ptr++; + break; + + case 'B': + case 'b': + case 'h': + for (i = 0; i < asizeof(tptr->month); i++) { + len = strlen(tptr->month[i]); + if (strncasecmp(buf, tptr->month[i], len) == 0) + break; + } + /* + * Try the abbreviated month name if the full name + * wasn't found. + */ + if (i == asizeof(tptr->month)) { + for (i = 0; i < asizeof(tptr->month); i++) { + len = strlen(tptr->mon[i]); + if (strncasecmp(buf, tptr->mon[i], + len) == 0) + break; + } + } + if (i == asizeof(tptr->month)) + return (0); + + tm->tm_mon = i; + buf += len; + break; + + case 'm': + if (!isdigit((unsigned char)*buf)) + return (0); + + len = 2; + for (i = 0; + len && isdigit((unsigned char)*buf); + buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 1 || i > 12) + return (0); + + tm->tm_mon = i - 1; + + if (*buf != 0 && isspace((unsigned char)*buf)) + while (*ptr != 0 && + !isspace((unsigned char)*ptr)) + ptr++; + break; + + case 'Y': + case 'y': + if (*buf == 0 || isspace((unsigned char)*buf)) + break; + + if (!isdigit((unsigned char)*buf)) + return (0); + + len = (c == 'Y') ? 4 : 2; + for (i = 0; + len && isdigit((unsigned char)*buf); + buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (c == 'Y') + i -= 1900; + if (c == 'y' && i < 69) + i += 100; + if (i < 0) + return (0); + + tm->tm_year = i; + + if (*buf != 0 && isspace((unsigned char)*buf)) + while (*ptr != 0 && + !isspace((unsigned char)*ptr)) + ptr++; + break; + + case 'Z': + { + const char *cp = buf; + char *zonestr; + + while (isupper((unsigned char)*cp)) + ++cp; + if (cp - buf) { + zonestr = alloca(cp - buf + 1); + (void) strncpy(zonestr, buf, cp - buf); + zonestr[cp - buf] = '\0'; + tzset(); + /* + * Once upon a time this supported "GMT", + * for GMT, but we removed this as Solaris + * doesn't have it, and we lack the needed + * timegm function. + */ + if (0 == strcmp(zonestr, tzname[0])) { + tm->tm_isdst = 0; + } else if (0 == strcmp(zonestr, tzname[1])) { + tm->tm_isdst = 1; + } else { + return (0); + } + buf += cp - buf; + } + } + break; + + /* + * Note that there used to be support %z and %s, but these + * are not supported by Solaris, so we have removed them. + * They would have required timegm() which is missing. + */ + } + } + return ((char *)buf); +} + +char * +strptime(const char *buf, const char *fmt, struct tm *tm) +{ + /* Legacy Solaris strptime clears the incoming tm structure. */ + (void) memset(tm, 0, sizeof (*tm)); + + return (__strptime(buf, fmt, tm)); +} + +/* + * This is used by Solaris, and is a variant that does not clear the + * incoming tm. It is triggered by -D_STRPTIME_DONTZERO. + */ +char * +__strptime_dontzero(const char *buf, const char *fmt, struct tm *tm) +{ + return (__strptime(buf, fmt, tm)); +} diff --git a/usr/src/lib/libc/port/locale/strxfrm.c b/usr/src/lib/libc/port/locale/strxfrm.c new file mode 100644 index 0000000000..2166d18dc9 --- /dev/null +++ b/usr/src/lib/libc/port/locale/strxfrm.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include "collate.h" + +size_t +strxfrm(char *_RESTRICT_KYWD dest, const char *_RESTRICT_KYWD src, size_t len) +{ + int prim, sec, l; + size_t slen; + char *s, *ss; + + if (!*src) { + if (len > 0) + *dest = '\0'; + return (0); + } + + if (__collate_load_error) + return (strlcpy(dest, src, len)); + + slen = 0; + prim = sec = 0; + ss = s = __collate_substitute(src); + if (s == NULL) { + /* Best effort, caller must check errno per spec. */ + errno = ENOMEM; + return (strlcpy(dest, src, len)); + } + while (*s) { + while (*s && !prim) { + __collate_lookup(s, &l, &prim, &sec); + s += l; + } + if (prim) { + if (len > 1) { + *dest++ = (char)prim; + len--; + } + slen++; + prim = 0; + } + } + free(ss); + if (len > 0) + *dest = '\0'; + + return (slen); +} diff --git a/usr/src/lib/libc/port/locale/table.c b/usr/src/lib/libc/port/locale/table.c new file mode 100644 index 0000000000..1502b237cf --- /dev/null +++ b/usr/src/lib/libc/port/locale/table.c @@ -0,0 +1,273 @@ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <ctype.h> +#include <wchar.h> +#include "runetype.h" +#include "mblocal.h" +#include "_ctype.h" + +_RuneLocale _DefaultRuneLocale = { + _RUNE_MAGIC_1, + "NONE", + { + /* 00 */ + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + /* 08 */ + _CTYPE_C, + _CTYPE_C|_CTYPE_S|_CTYPE_B, + _CTYPE_C|_CTYPE_S, + _CTYPE_C|_CTYPE_S, + _CTYPE_C|_CTYPE_S, + _CTYPE_C|_CTYPE_S, + _CTYPE_C, + _CTYPE_C, + /* 10 */ + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + /* 18 */ + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + _CTYPE_C, + /* 20 */ + _CTYPE_S|_CTYPE_B|_CTYPE_R, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + /* 28 */ + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + /* 30 */ + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + /* 38 */ + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + /* 40 */ + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + /* 48 */ + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + /* 50 */ + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + /* 58 */ + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + /* 60 */ + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + /* 68 */ + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + /* 70 */ + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + /* 78 */ + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_P|_CTYPE_R|_CTYPE_G, + _CTYPE_C, + }, + + /* BEGIN CSTYLED */ + { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, + }, + { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, + }, + /* END CSTYLED */ +}; + +_RuneLocale *_CurrentRuneLocale = &_DefaultRuneLocale; + +/* Taken from former _ctype.c */ +unsigned int *__ctype_mask = _DefaultRuneLocale.__runetype; + +int *__trans_lower = _DefaultRuneLocale.__maplower; +int *__trans_upper = _DefaultRuneLocale.__mapupper; diff --git a/usr/src/lib/libc/port/locale/timelocal.c b/usr/src/lib/libc/port/locale/timelocal.c new file mode 100644 index 0000000000..58dfbe24c7 --- /dev/null +++ b/usr/src/lib/libc/port/locale/timelocal.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2001 Alexey Zelkin <phantom@FreeBSD.org> + * Copyright (c) 1997 FreeBSD Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stddef.h> +#include "ldpart.h" +#include "timelocal.h" + +static struct lc_time_T _time_locale; +static int _time_using_locale; +static char *time_locale_buf; + +#define LCTIME_SIZE (sizeof (struct lc_time_T) / sizeof (char *)) + +static const struct lc_time_T _C_time_locale = { + { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }, { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" + }, { + "Sun", "Mon", "Tue", "Wed", + "Thu", "Fri", "Sat" + }, { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday" + }, + + /* X_fmt */ + "%H:%M:%S", + + /* + * x_fmt + * Since the C language standard calls for + * "date, using locale's date format," anything goes. + * Using just numbers (as here) makes Quakers happier; + * it's also compatible with SVR4. + */ + "%m/%d/%y", + + /* + * c_fmt + */ + "%a %b %e %H:%M:%S %Y", + + /* am */ + "AM", + + /* pm */ + "PM", + + /* date_fmt */ + "%a %b %e %H:%M:%S %Z %Y", + + /* + * ampm_fmt - To determine 12-hour clock format time (empty, if N/A) + */ + "%I:%M:%S %p" +}; + +struct lc_time_T * +__get_current_time_locale(void) +{ + return (_time_using_locale ? &_time_locale : + (struct lc_time_T *)&_C_time_locale); +} + +int +__time_load_locale(const char *name) +{ + return (__part_load_locale(name, &_time_using_locale, + &time_locale_buf, "LC_TIME", LCTIME_SIZE, LCTIME_SIZE, + (const char **)&_time_locale)); +} diff --git a/usr/src/lib/libc/port/locale/timelocal.h b/usr/src/lib/libc/port/locale/timelocal.h new file mode 100644 index 0000000000..d3629ae3b5 --- /dev/null +++ b/usr/src/lib/libc/port/locale/timelocal.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 1997-2002 FreeBSD Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _TIMELOCAL_H_ +#define _TIMELOCAL_H_ + +/* + * Private header file for the strftime and strptime localization + * stuff. + */ +struct lc_time_T { + const char *mon[12]; + const char *month[12]; + const char *wday[7]; + const char *weekday[7]; + const char *X_fmt; + const char *x_fmt; + const char *c_fmt; + const char *am; + const char *pm; + const char *date_fmt; + const char *ampm_fmt; +}; + +struct lc_time_T *__get_current_time_locale(void); +int __time_load_locale(const char *); + +#endif /* !_TIMELOCAL_H_ */ diff --git a/usr/src/lib/libc/port/locale/tolower.c b/usr/src/lib/libc/port/locale/tolower.c new file mode 100644 index 0000000000..1f83c0e900 --- /dev/null +++ b/usr/src/lib/libc/port/locale/tolower.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#include "lint.h" +#include <ctype.h> + +#pragma weak _tolower = tolower +#pragma weak _toupper = toupper + +int +tolower(int c) +{ + return (((unsigned)c > 255) ? c : __trans_lower[c]); +} + +int +toupper(int c) +{ + return (((unsigned)c > 255) ? c : __trans_upper[c]); +} diff --git a/usr/src/lib/libc/port/locale/towlower.c b/usr/src/lib/libc/port/locale/towlower.c new file mode 100644 index 0000000000..74d408cfc4 --- /dev/null +++ b/usr/src/lib/libc/port/locale/towlower.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <wctype.h> +#include <stdio.h> +#include "runetype.h" + +static wint_t +__change_case_ext(wint_t c, int lower) +{ + size_t lim; + _RuneRange *rr; + _RuneEntry *base, *re; + + if (c < 0 || c == EOF) + return (c); + + rr = lower ? + &_CurrentRuneLocale->__maplower_ext : + &_CurrentRuneLocale->__mapupper_ext; + /* Binary search -- see bsearch.c for explanation. */ + base = rr->__ranges; + for (lim = rr->__nranges; lim != 0; lim >>= 1) { + re = base + (lim >> 1); + if (re->__min <= c && c <= re->__max) { + return (re->__map + c - re->__min); + } else if (c > re->__max) { + base = re + 1; + lim--; + } + } + + return (c); +} + +#undef towlower +wint_t +towlower(wint_t wc) +{ + return ((wc < 0 || wc >= _CACHED_RUNES) ? + __change_case_ext(wc, 1) : + _CurrentRuneLocale->__maplower[wc]); +} + +#undef towupper +wint_t +towupper(wint_t wc) +{ + return ((wc < 0 || wc >= _CACHED_RUNES) ? + __change_case_ext(wc, 0) : + _CurrentRuneLocale->__mapupper[wc]); +} diff --git a/usr/src/lib/libc/port/i18n/__ungetwc_xpg5.c b/usr/src/lib/libc/port/locale/ungetwc.c index 4a6e6609b1..ff9ca33e0b 100644 --- a/usr/src/lib/libc/port/i18n/__ungetwc_xpg5.c +++ b/usr/src/lib/libc/port/locale/ungetwc.c @@ -24,11 +24,13 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* Copyright (c) 1986 AT&T */ /* All Rights Reserved */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ /* This module is created for NLS on Sep.03.86 */ @@ -50,31 +52,25 @@ #include "stdiom.h" #include "mse.h" -wint_t -__ungetwc_xpg5(wint_t wc, FILE *iop) +static wint_t +__ungetwc_impl(wint_t wc, FILE *iop, int orient) { - char mbs[MB_LEN_MAX]; + char mbs[MB_LEN_MAX]; unsigned char *p; - int n; - void *lc; - int (*fp_wctomb)(void *, char *, wchar_t); + int n; rmutex_t *lk; FLOCKFILE(lk, iop); - if (_set_orientation_wide(iop, &lc, - (void (*(*))(void))&fp_wctomb, FP_WCTOMB) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (WEOF); + if (orient && GET_NO_MODE(iop)) { + _setorientation(iop, _WC_MODE); } - if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0)) { FUNLOCKFILE(lk); return (WEOF); } - n = fp_wctomb(lc, mbs, (wchar_t)wc); + n = wctomb(mbs, (wchar_t)wc); if (n <= 0) { FUNLOCKFILE(lk); return (WEOF); @@ -102,3 +98,16 @@ __ungetwc_xpg5(wint_t wc, FILE *iop) FUNLOCKFILE(lk); return (wc); } + + +wint_t +__ungetwc_xpg5(wint_t wc, FILE *iop) +{ + return (__ungetwc_impl(wc, iop, 1)); +} + +wint_t +ungetwc(wint_t wc, FILE *iop) +{ + return (__ungetwc_impl(wc, iop, 0)); +} diff --git a/usr/src/lib/libc/port/locale/utf8.c b/usr/src/lib/libc/port/locale/utf8.c new file mode 100644 index 0000000000..60dc07bc73 --- /dev/null +++ b/usr/src/lib/libc/port/locale/utf8.c @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <errno.h> +#include <limits.h> +#include "runetype.h" +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" + +static size_t _UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD, + const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +static int _UTF8_mbsinit(const mbstate_t *); +static size_t _UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD, + const char **_RESTRICT_KYWD, size_t, size_t, + mbstate_t *_RESTRICT_KYWD); +static size_t _UTF8_wcrtomb(char *_RESTRICT_KYWD, wchar_t, + mbstate_t *_RESTRICT_KYWD); +static size_t _UTF8_wcsnrtombs(char *_RESTRICT_KYWD, + const wchar_t **_RESTRICT_KYWD, + size_t, size_t, mbstate_t *_RESTRICT_KYWD); + +typedef struct { + wchar_t ch; + int want; + wchar_t lbound; +} _UTF8State; + +int +_UTF8_init(_RuneLocale *rl) +{ + __mbrtowc = _UTF8_mbrtowc; + __wcrtomb = _UTF8_wcrtomb; + __mbsinit = _UTF8_mbsinit; + __mbsnrtowcs = _UTF8_mbsnrtowcs; + __wcsnrtombs = _UTF8_wcsnrtombs; + _CurrentRuneLocale = rl; + + /* + * In theory up to 6 bytes can be used for the encoding, + * but only encodings with more than 4 bytes are illegal. + */ + __ctype[520] = 4; + /* + * Note that the other CSWIDTH members are nonsensical for this + * this coding. They only are valid with EUC codings. + */ + + return (0); +} + +static int +_UTF8_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _UTF8State *)ps)->want == 0); +} + +static size_t +_UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s, + size_t n, mbstate_t *_RESTRICT_KYWD ps) +{ + _UTF8State *us; + int ch, i, mask, want; + wchar_t lbound, wch; + + us = (_UTF8State *)ps; + + if (us->want < 0 || us->want > 6) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + + if (us->want == 0 && ((ch = (unsigned char)*s) & ~0x7f) == 0) { + /* Fast path for plain ASCII characters. */ + if (pwc != NULL) + *pwc = ch; + return (ch != '\0' ? 1 : 0); + } + + if (us->want == 0) { + /* + * Determine the number of octets that make up this character + * from the first octet, and a mask that extracts the + * interesting bits of the first octet. We already know + * the character is at least two bytes long. + * + * We also specify a lower bound for the character code to + * detect redundant, non-"shortest form" encodings. For + * example, the sequence C0 80 is _not_ a legal representation + * of the null character. This enforces a 1-to-1 mapping + * between character codes and their multibyte representations. + */ + ch = (unsigned char)*s; + if ((ch & 0x80) == 0) { + mask = 0x7f; + want = 1; + lbound = 0; + } else if ((ch & 0xe0) == 0xc0) { + mask = 0x1f; + want = 2; + lbound = 0x80; + } else if ((ch & 0xf0) == 0xe0) { + mask = 0x0f; + want = 3; + lbound = 0x800; + } else if ((ch & 0xf8) == 0xf0) { + mask = 0x07; + want = 4; + lbound = 0x10000; +#if 0 + /* These would be illegal in the UTF-8 space */ + + } else if ((ch & 0xfc) == 0xf8) { + mask = 0x03; + want = 5; + lbound = 0x200000; + } else if ((ch & 0xfe) == 0xfc) { + mask = 0x01; + want = 6; + lbound = 0x4000000; +#endif + } else { + /* + * Malformed input; input is not UTF-8. + */ + errno = EILSEQ; + return ((size_t)-1); + } + } else { + want = us->want; + lbound = us->lbound; + } + + /* + * Decode the octet sequence representing the character in chunks + * of 6 bits, most significant first. + */ + if (us->want == 0) + wch = (unsigned char)*s++ & mask; + else + wch = us->ch; + + for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) { + if ((*s & 0xc0) != 0x80) { + /* + * Malformed input; bad characters in the middle + * of a character. + */ + errno = EILSEQ; + return ((size_t)-1); + } + wch <<= 6; + wch |= *s++ & 0x3f; + } + if (i < want) { + /* Incomplete multibyte sequence. */ + us->want = want - i; + us->lbound = lbound; + us->ch = wch; + return ((size_t)-2); + } + if (wch < lbound) { + /* + * Malformed input; redundant encoding. + */ + errno = EILSEQ; + return ((size_t)-1); + } + if (pwc != NULL) + *pwc = wch; + us->want = 0; + return (wch == L'\0' ? 0 : want); +} + +static size_t +_UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src, + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + _UTF8State *us; + const char *s; + size_t nchr; + wchar_t wc; + size_t nb; + + us = (_UTF8State *)ps; + + s = *src; + nchr = 0; + + if (dst == NULL) { + /* + * The fast path in the loop below is not safe if an ASCII + * character appears as anything but the first byte of a + * multibyte sequence. Check now to avoid doing it in the loop. + */ + if (nms > 0 && us->want > 0 && (signed char)*s > 0) { + errno = EILSEQ; + return ((size_t)-1); + } + for (;;) { + if (nms > 0 && (signed char)*s > 0) + /* + * Fast path for plain ASCII characters + * excluding NUL. + */ + nb = 1; + else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) == + (size_t)-1) + /* Invalid sequence - mbrtowc() sets errno. */ + return ((size_t)-1); + else if (nb == 0 || nb == (size_t)-2) + return (nchr); + s += nb; + nms -= nb; + nchr++; + } + /*NOTREACHED*/ + } + + /* + * The fast path in the loop below is not safe if an ASCII + * character appears as anything but the first byte of a + * multibyte sequence. Check now to avoid doing it in the loop. + */ + if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) { + errno = EILSEQ; + return ((size_t)-1); + } + while (len-- > 0) { + if (nms > 0 && (signed char)*s > 0) { + /* + * Fast path for plain ASCII characters + * excluding NUL. + */ + *dst = (wchar_t)*s; + nb = 1; + } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) == + (size_t)-1) { + *src = s; + return ((size_t)-1); + } else if (nb == (size_t)-2) { + *src = s + nms; + return (nchr); + } else if (nb == 0) { + *src = NULL; + return (nchr); + } + s += nb; + nms -= nb; + nchr++; + dst++; + } + *src = s; + return (nchr); +} + +static size_t +_UTF8_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) +{ + _UTF8State *us; + unsigned char lead; + int i, len; + + us = (_UTF8State *)ps; + + if (us->want != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + + if ((wc & ~0x7f) == 0) { + /* Fast path for plain ASCII characters. */ + *s = (char)wc; + return (1); + } + + /* + * Determine the number of octets needed to represent this character. + * We always output the shortest sequence possible. Also specify the + * first few bits of the first octet, which contains the information + * about the sequence length. + */ + if ((wc & ~0x7f) == 0) { + lead = 0; + len = 1; + } else if ((wc & ~0x7ff) == 0) { + lead = 0xc0; + len = 2; + } else if ((wc & ~0xffff) == 0) { + lead = 0xe0; + len = 3; + } else if ((wc & ~0x1fffff) == 0) { + lead = 0xf0; + len = 4; +#if 0 + /* Again, 5 and 6 byte encodings are simply not permitted */ + } else if ((wc & ~0x3ffffff) == 0) { + lead = 0xf8; + len = 5; + } else if ((wc & ~0x7fffffff) == 0) { + lead = 0xfc; + len = 6; +#endif + } else { + errno = EILSEQ; + return ((size_t)-1); + } + + /* + * Output the octets representing the character in chunks + * of 6 bits, least significant last. The first octet is + * a special case because it contains the sequence length + * information. + */ + for (i = len - 1; i > 0; i--) { + s[i] = (wc & 0x3f) | 0x80; + wc >>= 6; + } + *s = (wc & 0xff) | lead; + + return (len); +} + +static size_t +_UTF8_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + _UTF8State *us; + char buf[MB_LEN_MAX]; + const wchar_t *s; + size_t nbytes; + size_t nb; + + us = (_UTF8State *)ps; + + if (us->want != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + s = *src; + nbytes = 0; + + if (dst == NULL) { + while (nwc-- > 0) { + if (0 <= *s && *s < 0x80) + /* Fast path for plain ASCII characters. */ + nb = 1; + else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == + (size_t)-1) + /* Invalid character - wcrtomb() sets errno. */ + return ((size_t)-1); + if (*s == L'\0') + return (nbytes + nb - 1); + s++; + nbytes += nb; + } + return (nbytes); + } + + while (len > 0 && nwc-- > 0) { + if (0 <= *s && *s < 0x80) { + /* Fast path for plain ASCII characters. */ + nb = 1; + *dst = *s; + } else if (len > (size_t)MB_CUR_MAX) { + /* Enough space to translate in-place. */ + if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) { + *src = s; + return ((size_t)-1); + } + } else { + /* + * May not be enough space; use temp. buffer. + */ + if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) { + *src = s; + return ((size_t)-1); + } + if (nb > (int)len) + /* MB sequence for character won't fit. */ + break; + (void) memcpy(dst, buf, nb); + } + if (*s == L'\0') { + *src = NULL; + return (nbytes + nb - 1); + } + s++; + dst += nb; + len -= nb; + nbytes += nb; + } + *src = s; + return (nbytes); +} diff --git a/usr/src/lib/libc/port/locale/utils.h b/usr/src/lib/libc/port/locale/utils.h new file mode 100644 index 0000000000..aec21991ab --- /dev/null +++ b/usr/src/lib/libc/port/locale/utils.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* utility definitions */ +#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ +#define INFINITY (DUPMAX + 1) +#define NC (CHAR_MAX - CHAR_MIN + 1) +typedef unsigned char uch; + +/* switch off assertions (if not already off) if no REDEBUG */ +#ifndef REDEBUG +#ifndef NDEBUG +#define NDEBUG /* no assertions please */ +#endif +#endif +#include <assert.h> diff --git a/usr/src/lib/libc/port/locale/wcrtomb.c b/usr/src/lib/libc/port/locale/wcrtomb.c new file mode 100644 index 0000000000..2648b25ae1 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcrtomb.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <wchar.h> +#include "mblocal.h" + +size_t +wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps) +{ + static mbstate_t mbs; + + if (ps == NULL) + ps = &mbs; + return (__wcrtomb(s, wc, ps)); +} diff --git a/usr/src/lib/libc/port/locale/wcscoll.c b/usr/src/lib/libc/port/locale/wcscoll.c new file mode 100644 index 0000000000..0837f1c867 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcscoll.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2002 Tim J. Robbins + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "collate.h" + +static char *__mbsdup(const wchar_t *); + +/* + * Placeholder implementation of wcscoll(). Attempts to use the single-byte + * collation ordering where possible, and falls back on wcscmp() in locales + * with extended character sets. + */ +int +wcscoll(const wchar_t *ws1, const wchar_t *ws2) +{ + char *mbs1, *mbs2; + int diff, sverrno; + + if (__collate_load_error || MB_CUR_MAX > 1) + /* + * Locale has no special collating order, could not be + * loaded, or has an extended character set; do a fast binary + * comparison. + */ + return (wcscmp(ws1, ws2)); + + if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) { + /* + * Out of memory or illegal wide chars; fall back to wcscmp() + * but leave errno indicating the error. Callers that don't + * check for error will get a reasonable but often slightly + * incorrect result. + */ + sverrno = errno; + free(mbs1); + errno = sverrno; + return (wcscmp(ws1, ws2)); + } + + diff = strcoll(mbs1, mbs2); + sverrno = errno; + free(mbs1); + free(mbs2); + errno = sverrno; + + return (diff); +} + +static char * +__mbsdup(const wchar_t *ws) +{ + static const mbstate_t initial = { 0 }; + mbstate_t st; + const wchar_t *wcp; + size_t len; + char *mbs; + + wcp = ws; + st = initial; + if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1) + return (NULL); + if ((mbs = malloc(len + 1)) == NULL) + return (NULL); + st = initial; + (void) wcsrtombs(mbs, &ws, len + 1, &st); + + return (mbs); +} diff --git a/usr/src/lib/libc/port/locale/wcsftime.c b/usr/src/lib/libc/port/locale/wcsftime.c new file mode 100644 index 0000000000..80afc62a7a --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcsftime.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2002 Tim J. Robbins + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include "mse_int.h" +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <wchar.h> +#include <alloca.h> + +/* + * Convert date and time to a wide-character string. + * + * This is the wide-character counterpart of strftime(). So that we do not + * have to duplicate the code of strftime(), we convert the format string to + * multibyte, call strftime(), then convert the result back into wide + * characters. + * + * This technique loses in the presence of stateful multibyte encoding if any + * of the conversions in the format string change conversion state. When + * stateful encoding is implemented, we will need to reset the state between + * format specifications in the format string. + * + * Note carefully that prior to xpg5, the format was char *, not wchar_t. + */ + +/* + * Hmmm this is probably a bit backwards. As we are converting to single + * byte formats, perhaps we should not be doing a redundant conversion. + * Something to look at for the future. + */ + +size_t +__wcsftime_xpg5(wchar_t *wcs, size_t maxsize, const wchar_t *format, + const struct tm *timeptr) +{ + static const mbstate_t initial = { 0 }; + mbstate_t mbs; + char *dst, *sformat; + const char *dstp; + const wchar_t *formatp; + size_t n, sflen; + int sverrno; + + sformat = dst = NULL; + + /* + * Convert the supplied format string to a multibyte representation + * for strftime(), which only handles single-byte characters. + */ + mbs = initial; + formatp = format; + sflen = wcsrtombs(NULL, &formatp, 0, &mbs); + if (sflen == (size_t)-1) + goto error; + if ((sformat = malloc(sflen + 1)) == NULL) + goto error; + mbs = initial; + (void) wcsrtombs(sformat, &formatp, sflen + 1, &mbs); + + /* + * Allocate memory for longest multibyte sequence that will fit + * into the caller's buffer and call strftime() to fill it. + * Then, copy and convert the result back into wide characters in + * the caller's buffer. + */ + if (LONG_MAX / MB_CUR_MAX <= maxsize) { + /* maxsize is prepostorously large - avoid int. overflow. */ + errno = EINVAL; + goto error; + } + if ((dst = malloc(maxsize * MB_CUR_MAX)) == NULL) + goto error; + if (strftime(dst, maxsize, sformat, timeptr) == 0) + goto error; + dstp = dst; + mbs = initial; + n = mbsrtowcs(wcs, &dstp, maxsize, &mbs); + if (n == (size_t)-2 || n == (size_t)-1 || dstp != NULL) + goto error; + + free(sformat); + free(dst); + return (n); + +error: + sverrno = errno; + free(sformat); + free(dst); + errno = sverrno; + return (0); +} + +size_t +wcsftime(wchar_t *wcs, size_t maxsize, const char *format, + const struct tm *timeptr) +{ + int len; + wchar_t *wfmt; + + /* Convert the format (mb string) to wide char array */ + len = strlen(format) + 1; + wfmt = alloca(sizeof (wchar_t) * len); + if (mbstowcs(wfmt, format, len) == (size_t)-1) { + return (0); + } + return (__wcsftime_xpg5(wcs, maxsize, wfmt, timeptr)); +} diff --git a/usr/src/lib/libc/port/locale/wcsnrtombs.c b/usr/src/lib/libc/port/locale/wcsnrtombs.c new file mode 100644 index 0000000000..d053419341 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcsnrtombs.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" + +size_t +wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + static mbstate_t mbs; + + if (ps == NULL) + ps = &mbs; + return (__wcsnrtombs(dst, src, nwc, len, ps)); +} + +size_t +__wcsnrtombs_std(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + mbstate_t mbsbak; + char buf[MB_LEN_MAX]; + const wchar_t *s; + size_t nbytes; + size_t nb; + + s = *src; + nbytes = 0; + + if (dst == NULL) { + while (nwc-- > 0) { + if ((nb = __wcrtomb(buf, *s, ps)) == (size_t)-1) + /* Invalid character - wcrtomb() sets errno. */ + return ((size_t)-1); + else if (*s == L'\0') + return (nbytes + nb - 1); + s++; + nbytes += nb; + } + return (nbytes); + } + + while (len > 0 && nwc-- > 0) { + if (len > (size_t)MB_CUR_MAX) { + /* Enough space to translate in-place. */ + if ((nb = __wcrtomb(dst, *s, ps)) == (size_t)-1) { + *src = s; + return ((size_t)-1); + } + } else { + /* + * May not be enough space; use temp. buffer. + * + * We need to save a copy of the conversion state + * here so we can restore it if the multibyte + * character is too long for the buffer. + */ + mbsbak = *ps; + if ((nb = __wcrtomb(buf, *s, ps)) == (size_t)-1) { + *src = s; + return ((size_t)-1); + } + if (nb > (int)len) { + /* MB sequence for character won't fit. */ + *ps = mbsbak; + break; + } + (void) memcpy(dst, buf, nb); + } + if (*s == L'\0') { + *src = NULL; + return (nbytes + nb - 1); + } + s++; + dst += nb; + len -= nb; + nbytes += nb; + } + *src = s; + return (nbytes); +} diff --git a/usr/src/lib/libc/port/locale/wcsrtombs.c b/usr/src/lib/libc/port/locale/wcsrtombs.c new file mode 100644 index 0000000000..60c9132dd9 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcsrtombs.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "mblocal.h" + +size_t +wcsrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src, + size_t len, mbstate_t *_RESTRICT_KYWD ps) +{ + static mbstate_t mbs; + + if (ps == NULL) + ps = &mbs; + return (__wcsnrtombs(dst, src, ULONG_MAX, len, ps)); +} diff --git a/usr/src/lib/libc/port/locale/wcstombs.c b/usr/src/lib/libc/port/locale/wcstombs.c new file mode 100644 index 0000000000..4155d93cc4 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcstombs.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "lint.h" +#include <limits.h> +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" + +size_t +wcstombs(char *_RESTRICT_KYWD s, const wchar_t *_RESTRICT_KYWD pwcs, size_t n) +{ + static const mbstate_t initial = { 0 }; + mbstate_t mbs; + const wchar_t *pwcsp; + + mbs = initial; + pwcsp = pwcs; + return (__wcsnrtombs(s, &pwcsp, ULONG_MAX, n, &mbs)); +} diff --git a/usr/src/lib/libc/port/locale/wcswidth.c b/usr/src/lib/libc/port/locale/wcswidth.c new file mode 100644 index 0000000000..1988460a5a --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcswidth.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <wchar.h> + +int +wcswidth(const wchar_t *pwcs, size_t n) +{ + wchar_t wc; + int len, l; + + len = 0; + while (n-- > 0 && (wc = *pwcs++) != L'\0') { + if ((l = wcwidth(wc)) < 0) + return (-1); + len += l; + } + return (len); +} diff --git a/usr/src/lib/libc/port/locale/wcsxfrm.c b/usr/src/lib/libc/port/locale/wcsxfrm.c new file mode 100644 index 0000000000..5d98aa3441 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcsxfrm.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "collate.h" + +static char *__mbsdup(const wchar_t *); + +/* + * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of + * the logic used. + */ +size_t +wcsxfrm(wchar_t *_RESTRICT_KYWD dest, + const wchar_t *_RESTRICT_KYWD src, size_t len) +{ + int prim, sec, l; + size_t slen; + char *mbsrc, *s, *ss; + + if (*src == L'\0') { + if (len != 0) + *dest = L'\0'; + return (0); + } + + if (__collate_load_error || MB_CUR_MAX > 1) { + slen = wcslen(src); + if (len > 0) { + if (slen < len) + (void) wcscpy(dest, src); + else { + (void) wcsncpy(dest, src, len - 1); + dest[len - 1] = L'\0'; + } + } + return (slen); + } + + mbsrc = __mbsdup(src); + slen = 0; + prim = sec = 0; + ss = s = __collate_substitute(mbsrc); + while (*s != '\0') { + while (*s != '\0' && prim == 0) { + __collate_lookup(s, &l, &prim, &sec); + s += l; + } + if (prim != 0) { + if (len > 1) { + *dest++ = (wchar_t)prim; + len--; + } + slen++; + prim = 0; + } + } + free(ss); + free(mbsrc); + if (len != 0) + *dest = L'\0'; + + return (slen); +} + +static char * +__mbsdup(const wchar_t *ws) +{ + static const mbstate_t initial = { 0 }; + mbstate_t st; + const wchar_t *wcp; + size_t len; + char *mbs; + + wcp = ws; + st = initial; + if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1) + return (NULL); + if ((mbs = malloc(len + 1)) == NULL) + return (NULL); + st = initial; + (void) wcsrtombs(mbs, &ws, len + 1, &st); + + return (mbs); +} diff --git a/usr/src/lib/libc/port/locale/wctob.c b/usr/src/lib/libc/port/locale/wctob.c new file mode 100644 index 0000000000..5f3ec17790 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wctob.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <limits.h> +#include <stdio.h> +#include <wchar.h> +#include "mblocal.h" + +int +wctob(wint_t c) +{ + static const mbstate_t initial = { 0 }; + mbstate_t mbs = initial; + char buf[MB_LEN_MAX]; + + if (c == WEOF || __wcrtomb(buf, c, &mbs) != 1) + return (EOF); + return ((unsigned char)*buf); +} diff --git a/usr/src/lib/libc/port/locale/wctomb.c b/usr/src/lib/libc/port/locale/wctomb.c new file mode 100644 index 0000000000..4760b51dde --- /dev/null +++ b/usr/src/lib/libc/port/locale/wctomb.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <stdlib.h> +#include <wchar.h> +#include "mblocal.h" + +int +wctomb(char *s, wchar_t wchar) +{ + static const mbstate_t initial = { 0 }; + static mbstate_t mbs; + size_t rval; + + if (s == NULL) { + /* No support for state dependent encodings. */ + mbs = initial; + return (0); + } + if ((rval = __wcrtomb(s, wchar, &mbs)) == (size_t)-1) + return (-1); + return ((int)rval); +} diff --git a/usr/src/lib/libc/port/locale/wctrans.c b/usr/src/lib/libc/port/locale/wctrans.c new file mode 100644 index 0000000000..276704a9a0 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wctrans.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <errno.h> +#include <string.h> +#include <wctype.h> + +enum { + _WCT_ERROR = 0, + _WCT_TOLOWER = 1, + _WCT_TOUPPER = 2 +}; + +wint_t +towctrans(wint_t wc, wctrans_t desc) +{ + + switch (desc) { + case _WCT_TOLOWER: + wc = towlower(wc); + break; + case _WCT_TOUPPER: + wc = towupper(wc); + break; + case _WCT_ERROR: + default: + errno = EINVAL; + break; + } + + return (wc); +} + +wctrans_t +wctrans(const char *charclass) +{ + struct { + const char *name; + wctrans_t trans; + } ccls[] = { + { "tolower", _WCT_TOLOWER }, + { "toupper", _WCT_TOUPPER }, + { NULL, _WCT_ERROR }, /* Default */ + }; + int i; + + i = 0; + while (ccls[i].name != NULL && strcmp(ccls[i].name, charclass) != 0) + i++; + + if (ccls[i].trans == _WCT_ERROR) + errno = EINVAL; + return (ccls[i].trans); +} diff --git a/usr/src/lib/libc/port/locale/wctype.c b/usr/src/lib/libc/port/locale/wctype.c new file mode 100644 index 0000000000..22d28340ba --- /dev/null +++ b/usr/src/lib/libc/port/locale/wctype.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <ctype.h> +#include <string.h> +#include <wctype.h> +#include "_ctype.h" + +wctype_t +wctype(const char *property) +{ + static const struct { + const char *name; + wctype_t mask; + } props[] = { + { "alnum", _CTYPE_A|_CTYPE_D }, + { "alpha", _CTYPE_A }, + { "blank", _CTYPE_B }, + { "cntrl", _CTYPE_C }, + { "digit", _CTYPE_D }, + { "graph", _CTYPE_G }, + { "lower", _CTYPE_L }, + { "print", _CTYPE_R }, + { "punct", _CTYPE_P }, + { "space", _CTYPE_S }, + { "upper", _CTYPE_U }, + { "xdigit", _CTYPE_X }, + { "ideogram", _CTYPE_I }, /* BSD extension */ + { "special", _CTYPE_T }, /* BSD extension */ + { "phonogram", _CTYPE_Q }, /* BSD extension */ + { NULL, 0UL }, /* Default */ + }; + int i; + + i = 0; + while (props[i].name != NULL && strcmp(props[i].name, property) != 0) + i++; + + return (props[i].mask); +} diff --git a/usr/src/lib/libc/port/locale/wcwidth.c b/usr/src/lib/libc/port/locale/wcwidth.c new file mode 100644 index 0000000000..fb63c723c1 --- /dev/null +++ b/usr/src/lib/libc/port/locale/wcwidth.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "lint.h" +#include <wchar.h> +#include "_ctype.h" +#include "runetype.h" + +#undef wcwidth + +int +wcwidth(wchar_t wc) +{ + unsigned int x; + + if (wc == 0) + return (0); + + x = ((wc < 0 || wc >= _CACHED_RUNES) ? ___runetype(wc) : + _CurrentRuneLocale->__runetype[wc]) & (_CTYPE_SWM|_CTYPE_R); + + if ((x & _CTYPE_SWM) != 0) + return ((x & _CTYPE_SWM) >> _CTYPE_SWS); + return ((x & _CTYPE_R) != 0 ? 1 : -1); +} + +#pragma weak _scrwidth = scrwidth + +/* + * This is a Solaris extension. I don't really understand why a different + * function was needed. Anecdotally, it appears that perhaps some versions + * of the Sun wcwidth didn't handle illegal encodings well, and scrwidth + * seems to have been better. Its not an issue for our implementation. + */ +int +scrwidth(wchar_t wc) +{ + return (wcwidth(wc)); +} diff --git a/usr/src/lib/libc/port/locale/wscol.c b/usr/src/lib/libc/port/locale/wscol.c new file mode 100644 index 0000000000..d55096e4db --- /dev/null +++ b/usr/src/lib/libc/port/locale/wscol.c @@ -0,0 +1,39 @@ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "lint.h" +#include <wchar.h> + +int +wscol(const wchar_t *s) +{ + int l = 0; + while (*s) { + l += wcwidth(*s); + s++; + } + return (l); +} diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index a3dae49082..b0f99cab15 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -21,6 +21,9 @@ # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. # +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Use is subject to license terms. +# # # MAPFILE HEADER START @@ -2493,14 +2496,6 @@ SYMBOL_VERSION SUNWprivate_1.1 { __inf_written; __i_size; _isnanf { TYPE = FUNCTION; FILTER = libm.so.2 }; - __lc_charmap; - __lc_collate; - __lc_ctype; - __lc_locale; - __lc_messages; - __lc_monetary; - __lc_numeric; - __lc_time; __libc_threaded; _lib_version { FLAGS = NODIRECT }; _logb { TYPE = FUNCTION; FILTER = libm.so.2 }; @@ -2597,14 +2592,9 @@ $endif attr_to_option; attr_to_xattr_view; _autofssys; - __btowc_dense; - __btowc_euc; - __btowc_sb; _bufsync; - __charmap_init; _cladm; __class_quadruple; - __collate_init; core_get_default_content; core_get_default_path; core_get_global_content; @@ -2619,7 +2609,6 @@ $endif core_set_options; core_set_process_content; core_set_process_path; - __ctype_init; dbm_close_status; dbm_do_nextkey; dbm_setdefwrite; @@ -2640,24 +2629,17 @@ $endif _doscan; _errfp; _errxfp; - __eucpctowc_gen; exportfs; _F_cplx_div; _F_cplx_div_ix; _F_cplx_div_rx; _F_cplx_mul; - __fgetwc_dense; - __fgetwc_euc; - __fgetwc_sb; __fgetwc_xpg5; __fgetws_xpg5; _findbuf; _findiop; __fini_daemon_priv; _finite; - __fnmatch_C; - __fnmatch_sb; - __fnmatch_std; _fork1 { FLAGS = NODYNSORT }; _forkall { FLAGS = NODYNSORT }; _fpclass; @@ -2670,12 +2652,10 @@ $endif _fpsetsticky; __fputwc_xpg5; __fputws_xpg5; - __fseterror_u; _ftw; _gcvt; _getarg; __getcontext; - __getdate_std; _getdents; _get_exit_frame_monitor; _getfp; @@ -2694,12 +2674,7 @@ $endif __init_suid_priv; _insert; inst_sync; - _is_euc_fc; - _is_euc_pc; _iswctype; - __iswctype_bc; - __iswctype_sb; - __iswctype_std; klpd_create; klpd_getpath; klpd_getport; @@ -2711,8 +2686,6 @@ $endif _lgrp_home_fast { FLAGS = NODYNSORT }; _lgrpsys; _lltostr; - __localeconv_std; - __locale_init; _lock_clear; _lock_try; _ltzset; @@ -2720,38 +2693,10 @@ $endif makeut; makeutx; _mbftowc; - __mbftowc_dense; - __mbftowc_euc; - __mbftowc_sb; - __mblen_gen; - __mblen_sb; - __mbrlen_gen; - __mbrlen_sb; - __mbrtowc_dense; - __mbrtowc_euc; - __mbrtowc_sb; - __mbsinit_gen; - __mbsrtowcs_dense; - __mbsrtowcs_euc; - __mbsrtowcs_sb; - __mbst_get_consumed_array; - __mbst_get_locale; - __mbst_get_nconsumed; - __mbstowcs_dense; - __mbstowcs_euc; - __mbstowcs_sb; - __mbst_set_consumed_array; - __mbst_set_locale; - __mbst_set_nconsumed; - __mbtowc_dense; - __mbtowc_euc; - __mbtowc_sb; mcfiller; - __messages_init; mntopt; modctl; modutx; - __monetary_init; msgctl64; __multi_innetgr; _mutex_destroy { FLAGS = NODYNSORT }; @@ -2763,7 +2708,6 @@ $endif nfssvc; _nfssys; __nis_get_environment; - __nl_langinfo_std; _nss_db_state_destr; nss_default_key2str; nss_delete; @@ -2784,7 +2728,6 @@ $endif __nsw_freeconfig_v1; __nsw_getconfig_v1; __nthreads; - __numeric_init; __openattrdirat; option_to_attr; __priv_bracket; @@ -2798,12 +2741,6 @@ $endif rctlctl; rctllist; _realbufend; - __regcomp_C; - __regcomp_std; - __regerror_std; - __regexec_C; - __regexec_std; - __regfree_std; _resume; _resume_ret; _rpcsys; @@ -2812,7 +2749,6 @@ $endif semctl64; _semctl64; set_setcontext_enforcement; - set_l10n_alternate_root; _setbufend; __set_errno; setprojrctl; @@ -2846,16 +2782,7 @@ $endif str2group; str2passwd; str2spwd; - __strcoll_C; - __strcoll_sb; - __strcoll_std; - __strfmon_std; - __strftime_std; __strptime_dontzero; - __strptime_std; - __strxfrm_C; - __strxfrm_sb; - __strxfrm_std; stty; syscall; _sysconfig; @@ -2883,14 +2810,7 @@ $endif _thr_suspend_mutator; thr_wait_mutator; _thr_wait_mutator; - __time_init; __tls_get_addr; - __towctrans_bc; - __towctrans_std; - __towlower_bc; - __towlower_std; - __towupper_bc; - __towupper_std; tpool_create; tpool_dispatch; tpool_destroy; @@ -2899,8 +2819,6 @@ $endif tpool_suspended; tpool_resume; tpool_member; - _trwctype; - __trwctype_std; _ttyname_dev; _ucred_alloc; ucred_getamask; @@ -2925,43 +2843,8 @@ $endif _vwarnxfp; _warnfp; _warnxfp; - __wcrtomb_dense; - __wcrtomb_euc; - __wcrtomb_sb; - __wcscoll_bc; - __wcscoll_C; - __wcscoll_std; - __wcsftime_std; __wcsftime_xpg5; - __wcsrtombs_dense; - __wcsrtombs_euc; - __wcsrtombs_sb; __wcstok_xpg5; - __wcstombs_dense; - __wcstombs_euc; - __wcstombs_sb; - __wcswidth_bc; - __wcswidth_dense; - __wcswidth_euc; - __wcswidth_sb; - __wcswidth_std; - __wcsxfrm_bc; - __wcsxfrm_C; - __wcsxfrm_std; - __wctob_dense; - __wctob_euc; - __wctob_sb; - __wctoeucpc_gen; - __wctomb_dense; - __wctomb_euc; - __wctomb_sb; - __wctrans_std; - __wctype_std; - __wcwidth_bc; - __wcwidth_dense; - __wcwidth_euc; - __wcwidth_sb; - __wcwidth_std; wdbindf; wdchkind; wddelim; diff --git a/usr/src/lib/libc/port/print/doprnt.c b/usr/src/lib/libc/port/print/doprnt.c index c291970f8e..0c644c33bd 100644 --- a/usr/src/lib/libc/port/print/doprnt.c +++ b/usr/src/lib/libc/port/print/doprnt.c @@ -89,7 +89,7 @@ static const wchar_t widenullstr[] = L"(null)"; #define PUT(p, n) \ { \ int retp; \ - retp = put_wide(iop, &bufptr, bufferend, p, n, sflag, lc, fp); \ + retp = put_wide(iop, &bufptr, bufferend, p, n, sflag); \ if (retp == EOF) { \ return ((ssize_t)EOF); \ } \ @@ -299,7 +299,7 @@ _dowrite(const char *p, ssize_t n, FILE *iop, unsigned char **ptrptr) static int put_wide(FILE *iop, unsigned char **bufptr, unsigned char *bufferend, wchar_t *p, size_t n, - int sflag, void *lc, int (*fp_wctomb)(void *, char *, wchar_t)) + int sflag) { unsigned char *newbufptr; wchar_t *q; @@ -330,7 +330,7 @@ put_wide(FILE *iop, unsigned char **bufptr, q = p; tmpq = tmpp; for (len = 0, i = 0; i < n; i++) { - r = fp_wctomb(lc, tmpq, *q++); + r = wctomb(tmpq, *q++); if (r == -1) { lfree(tmpp, tsize); errno = EILSEQ; @@ -423,9 +423,6 @@ _ndoprnt(const char *format, va_list in_args, FILE *iop, int prflag) #ifdef _WIDE int sflag = 0; size_t maxcount; - mbstate_t *mbst; - void *lc; - int (*fp)(void *, char *, wchar_t); #else int snflag = 0; #endif /* _WIDE */ @@ -592,13 +589,6 @@ _ndoprnt(const char *format, va_list in_args, FILE *iop, int prflag) sflag = 1; if (!sflag) { - mbst = _getmbstate(iop); - if (mbst == NULL) { - errno = EBADF; - return (EOF); - } - lc = __mbst_get_lc_and_fp((const mbstate_t *)mbst, - (void (*(*))(void))&fp, FP_WCTOMB); #endif /* _WIDE */ /* if first I/O to the stream get a buffer */ /* Note that iop->_base should not equal 0 for sprintf and vsprintf */ diff --git a/usr/src/lib/libc/port/print/vwprintf.c b/usr/src/lib/libc/port/print/vwprintf.c index ae64a62688..3ba770c96a 100644 --- a/usr/src/lib/libc/port/print/vwprintf.c +++ b/usr/src/lib/libc/port/print/vwprintf.c @@ -23,8 +23,10 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ #include "lint.h" #include "file64.h" @@ -65,11 +67,8 @@ vwprintf(const wchar_t *format, va_list ap) FLOCKFILE(lk, stdout); - if (_set_orientation_wide(stdout, NULL, NULL, 0) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (EOF); - } + if (GET_NO_MODE(stdout)) + _setorientation(stdout, _WC_MODE); if (!(stdout->_flag & _IOWRT)) { /* if no write flag */ if (stdout->_flag & _IORW) { @@ -114,11 +113,8 @@ vfwprintf(FILE *iop, const wchar_t *format, va_list ap) FLOCKFILE(lk, iop); - if (_set_orientation_wide(iop, NULL, NULL, 0) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (EOF); - } + if (GET_NO_MODE(iop)) + _setorientation(iop, _WC_MODE); if (!(iop->_flag & _IOWRT)) { /* if no write flag */ diff --git a/usr/src/lib/libc/port/print/wprintf.c b/usr/src/lib/libc/port/print/wprintf.c index 4cdfe715fa..dc7d207766 100644 --- a/usr/src/lib/libc/port/print/wprintf.c +++ b/usr/src/lib/libc/port/print/wprintf.c @@ -23,8 +23,11 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ -#pragma ident "%Z%%M% %I% %E% SMI" #include "lint.h" #include "file64.h" @@ -63,11 +66,8 @@ wprintf(const wchar_t *format, ...) va_start(ap, format); FLOCKFILE(lk, stdout); - if (_set_orientation_wide(stdout, NULL, NULL, 0) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (EOF); - } + if (GET_NO_MODE(stdout)) + _setorientation(stdout, _WC_MODE); if (!(stdout->_flag & _IOWRT)) { /* if no write flag */ @@ -109,11 +109,8 @@ fwprintf(FILE *iop, const wchar_t *format, ...) FLOCKFILE(lk, iop); - if (_set_orientation_wide(iop, NULL, NULL, 0) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (EOF); - } + if (GET_NO_MODE(iop)) + _setorientation(iop, _WC_MODE); if (!(iop->_flag & _IOWRT)) { /* if no write flag */ diff --git a/usr/src/lib/libc/port/stdio/data.c b/usr/src/lib/libc/port/stdio/data.c index 42293e2a75..b38494f3a5 100644 --- a/usr/src/lib/libc/port/stdio/data.c +++ b/usr/src/lib/libc/port/stdio/data.c @@ -27,7 +27,10 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ #pragma weak __iob = _iob @@ -53,7 +56,7 @@ Uchar _smbuf[_NFILE + 1][_SMBFSZ] = {0}; /* shared library compatibility */ #define DEFAULTMBSTATE \ - { NULL, NULL, {0, 0, 0, 0, 0, 0, 0, 0}, 0, {0, 0}} + { 0 } #ifdef _LP64 diff --git a/usr/src/lib/libc/port/stdio/mse.c b/usr/src/lib/libc/port/stdio/mse.c index 4d5ee6874a..2b50dfbc69 100644 --- a/usr/src/lib/libc/port/stdio/mse.c +++ b/usr/src/lib/libc/port/stdio/mse.c @@ -24,7 +24,10 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Use is subject to license terms. + */ #include "lint.h" #include "mtlib.h" @@ -43,21 +46,6 @@ /* * DESCRIPTION: - * This function sets the error indicator for the specified stream. - * This is a private API for the L10N method functions, especially - * for fgetwc(). - * - * The stream needs to have been properly locked. Usually, the wrapper - * function of fgetwc() locks the stream. - */ -void -__fseterror_u(FILE *iop) -{ - iop->_flag |= _IOERR; -} - -/* - * DESCRIPTION: * This function/macro gets the orientation bound to the specified iop. * * RETURNS: @@ -104,132 +92,3 @@ _setorientation(FILE *iop, _IOP_orientation_t mode) break; } } - -static mbstate_t **__top_mbstates = NULL; -static mutex_t __top_mbstates_lock = DEFAULTMUTEX; - -void -_clear_internal_mbstate(void) -{ - int i; - - lmutex_lock(&__top_mbstates_lock); - if (__top_mbstates) { - for (i = 0; i <= _MAX_MB_FUNC; i++) { - if (*(__top_mbstates + i)) { - lfree(*(__top_mbstates + i), - sizeof (mbstate_t)); - } - } - lfree(__top_mbstates, - (_MAX_MB_FUNC + 1) * sizeof (mbstate_t *)); - __top_mbstates = NULL; - } - lmutex_unlock(&__top_mbstates_lock); -} - -mbstate_t * -_get_internal_mbstate(int item) -{ - if (item < 0 || item > _MAX_MB_FUNC) - return (NULL); - - lmutex_lock(&__top_mbstates_lock); - if (__top_mbstates == NULL) { - __top_mbstates = - lmalloc((_MAX_MB_FUNC + 1) * sizeof (mbstate_t *)); - if (__top_mbstates == NULL) { - lmutex_unlock(&__top_mbstates_lock); - return (NULL); - } - *(__top_mbstates + item) = lmalloc(sizeof (mbstate_t)); - if (*(__top_mbstates + item) == NULL) { - lmutex_unlock(&__top_mbstates_lock); - return (NULL); - } - lmutex_unlock(&__top_mbstates_lock); - return (*(__top_mbstates + item)); - } - if (*(__top_mbstates + item) == NULL) { - *(__top_mbstates + item) = lmalloc(sizeof (mbstate_t)); - if (*(__top_mbstates + item) == NULL) { - lmutex_unlock(&__top_mbstates_lock); - return (NULL); - } - } - lmutex_unlock(&__top_mbstates_lock); - return (*(__top_mbstates + item)); -} - -/* - * From page 32 of XSH5 - * Once a wide-character I/O function has been applied - * to a stream without orientation, the stream becomes - * wide-orientated. Similarly, once a byte I/O function - * has been applied to a stream without orientation, - * the stream becomes byte-orientated. Only a call to - * the freopen() function or the fwide() function can - * otherwise alter the orientation of a stream. - */ - -/* - * void - * _set_orientation_byte(FILE *iop) - * - * Note: this is now implemented as macro __SET_ORIENTATION_BYTE() - * (in libc/inc/mse.h) for performance improvement. - */ - -/* Returns the value of 'ps->__nconsumed' */ -char -__mbst_get_nconsumed(const mbstate_t *ps) -{ - return (ps->__nconsumed); -} - -/* Sets 'n' to 'ps->__nconsumed' */ -void -__mbst_set_nconsumed(mbstate_t *ps, char n) -{ - ps->__nconsumed = n; -} - -/* Copies 'len' bytes from '&ps->__consumed[index]' to 'str' */ -int -__mbst_get_consumed_array(const mbstate_t *ps, char *str, - size_t index, size_t len) -{ - if ((index + len) > 8) { - /* The max size of __consumed[] is 8 */ - return (-1); - } - (void) memcpy((void *)str, (const void *)&ps->__consumed[index], len); - return (0); -} - -/* Copies 'len' bytes from 'str' to '&ps->__consumed[index]' */ -int -__mbst_set_consumed_array(mbstate_t *ps, const char *str, - size_t index, size_t len) -{ - if ((index + len) > 8) { - /* The max size of __consumed[] is 8 */ - return (-1); - } - (void) memcpy((void *)&ps->__consumed[index], (const void *)str, len); - return (0); -} - -/* Returns 'ps->__lc_locale' */ -void * -__mbst_get_locale(const mbstate_t *ps) -{ - return (ps->__lc_locale); -} - -/* Sets 'loc' to 'ps->__lc_locale' */ -void -__mbst_set_locale(mbstate_t *ps, const void *loc) -{ - ps->__lc_locale = (void *)loc; -} diff --git a/usr/src/lib/libc/port/stdio/vwscanf.c b/usr/src/lib/libc/port/stdio/vwscanf.c index 7448e6ffad..76cfabf4ae 100644 --- a/usr/src/lib/libc/port/stdio/vwscanf.c +++ b/usr/src/lib/libc/port/stdio/vwscanf.c @@ -54,11 +54,8 @@ vwscanf(const wchar_t *fmt, va_list ap) FLOCKFILE(lk, stdin); - if (_set_orientation_wide(stdin, NULL, NULL, 0) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (EOF); - } + if (GET_NO_MODE(stdin)) + _setorientation(stdin, _WC_MODE); #ifdef _C89_INTMAX32 ret = __wdoscan_u(stdin, fmt, ap, _F_INTMAX32); @@ -81,11 +78,8 @@ vfwscanf(FILE *iop, const wchar_t *fmt, va_list ap) FLOCKFILE(lk, iop); - if (_set_orientation_wide(iop, NULL, NULL, 0) == -1) { - errno = EBADF; - FUNLOCKFILE(lk); - return (EOF); - } + if (GET_NO_MODE(iop)) + _setorientation(iop, _WC_MODE); #ifdef _C89_INTMAX32 diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com index b03c067933..07c5f2e8e6 100644 --- a/usr/src/lib/libc/sparc/Makefile.com +++ b/usr/src/lib/libc/sparc/Makefile.com @@ -22,6 +22,9 @@ # # Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. # +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Use is subject to license terms. +# LIBCDIR= $(SRC)/lib/libc LIB_PIC= libc_pic.a @@ -717,11 +720,6 @@ PORTSTDIO= \ wscanf.o PORTI18N= \ - __fgetwc_xpg5.o \ - __fgetws_xpg5.o \ - __fputwc_xpg5.o \ - __fputws_xpg5.o \ - __ungetwc_xpg5.o \ getwchar.o \ putwchar.o \ putws.o \ @@ -766,12 +764,79 @@ PORTI18N= \ wdresolve.o \ _ctype.o \ isascii.o \ + isdigit.o \ toascii.o PORTI18N_COND= \ wcstol_longlong.o \ wcstoul_longlong.o +PORTLOCALE= \ + ascii.o \ + big5.o \ + btowc.o \ + collate.o \ + collcmp.o \ + euc.o \ + fnmatch.o \ + fgetwc.o \ + fgetws.o \ + fputwc.o \ + fputws.o \ + fwide.o \ + gb18030.o \ + gb2312.o \ + gbk.o \ + getdate.o \ + iswctype.o \ + ldpart.o \ + lmessages.o \ + lnumeric.o \ + lmonetary.o \ + localeconv.o \ + mbftowc.o \ + mblen.o \ + mbrlen.o \ + mbrtowc.o \ + mbsinit.o \ + mbsrtowcs.o \ + mbstowcs.o \ + mbtowc.o \ + mskanji.o \ + none.o \ + regcomp.o \ + regfree.o \ + regerror.o \ + regexec.o \ + rune.o \ + runetype.o \ + setlocale.o \ + setrunelocale.o \ + strcoll.o \ + strfmon.o \ + strftime.o \ + strptime.o \ + strxfrm.o \ + table.o \ + timelocal.o \ + tolower.o \ + towlower.o \ + ungetwc.o \ + utf8.o \ + wcrtomb.o \ + wcscoll.o \ + wcsftime.o \ + wcsrtombs.o \ + wcswidth.o \ + wcstombs.o \ + wcsxfrm.o \ + wctob.o \ + wctomb.o \ + wctrans.o \ + wctype.o \ + wcwidth.o \ + wscol.o + AIOOBJS= \ aio.o \ aio_alloc.o \ @@ -911,6 +976,7 @@ MOSTOBJS= \ $(PORTGEN64) \ $(PORTI18N) \ $(PORTI18N_COND) \ + $(PORTLOCALE) \ $(PORTPRINT) \ $(PORTPRINT_C89) \ $(PORTPRINT_W) \ @@ -1051,6 +1117,7 @@ SRCS= \ $(PORTFP:%.o=$(LIBCDIR)/port/fp/%.c) \ $(PORTGEN:%.o=$(LIBCDIR)/port/gen/%.c) \ $(PORTI18N:%.o=$(LIBCDIR)/port/i18n/%.c) \ + $(PORTLOCALE:%.o=$(LIBCDIR)/port/locale/%.c) \ $(PORTPRINT:%.o=$(LIBCDIR)/port/print/%.c) \ $(PORTREGEX:%.o=$(LIBCDIR)/port/regex/%.c) \ $(PORTSTDIO:%.o=$(LIBCDIR)/port/stdio/%.c) \ diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com index 2144a07e17..f7674e613c 100644 --- a/usr/src/lib/libc/sparcv9/Makefile.com +++ b/usr/src/lib/libc/sparcv9/Makefile.com @@ -22,6 +22,9 @@ # # Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. # +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Use is subject to license terms. +# LIBCDIR= $(SRC)/lib/libc LIB_PIC= libc_pic.a @@ -668,11 +671,6 @@ PORTSTDIO= \ wscanf.o PORTI18N= \ - __fgetwc_xpg5.o \ - __fgetws_xpg5.o \ - __fputwc_xpg5.o \ - __fputws_xpg5.o \ - __ungetwc_xpg5.o \ getwchar.o \ putwchar.o \ putws.o \ @@ -717,12 +715,79 @@ PORTI18N= \ wdresolve.o \ _ctype.o \ isascii.o \ + isdigit.o \ toascii.o PORTI18N_COND= \ wcstol_longlong.o \ wcstoul_longlong.o +PORTLOCALE= \ + ascii.o \ + big5.o \ + btowc.o \ + collate.o \ + collcmp.o \ + euc.o \ + fnmatch.o \ + fgetwc.o \ + fgetws.o \ + fputwc.o \ + fputws.o \ + fwide.o \ + gb18030.o \ + gb2312.o \ + gbk.o \ + getdate.o \ + iswctype.o \ + ldpart.o \ + lmessages.o \ + lnumeric.o \ + lmonetary.o \ + localeconv.o \ + mbftowc.o \ + mblen.o \ + mbrlen.o \ + mbrtowc.o \ + mbsinit.o \ + mbsrtowcs.o \ + mbstowcs.o \ + mbtowc.o \ + mskanji.o \ + none.o \ + regcomp.o \ + regfree.o \ + regerror.o \ + regexec.o \ + rune.o \ + runetype.o \ + setlocale.o \ + setrunelocale.o \ + strcoll.o \ + strfmon.o \ + strftime.o \ + strptime.o \ + strxfrm.o \ + table.o \ + timelocal.o \ + tolower.o \ + towlower.o \ + ungetwc.o \ + utf8.o \ + wcrtomb.o \ + wcscoll.o \ + wcsftime.o \ + wcsrtombs.o \ + wcswidth.o \ + wcstombs.o \ + wcsxfrm.o \ + wctob.o \ + wctomb.o \ + wctrans.o \ + wctype.o \ + wcwidth.o \ + wscol.o + AIOOBJS= \ aio.o \ aio_alloc.o \ @@ -861,6 +926,7 @@ MOSTOBJS= \ $(PORTGEN64) \ $(PORTI18N) \ $(PORTI18N_COND) \ + $(PORTLOCALE) \ $(PORTPRINT) \ $(PORTPRINT_W) \ $(PORTREGEX) \ @@ -989,6 +1055,7 @@ SRCS= \ $(PORTFP:%.o=$(LIBCDIR)/port/fp/%.c) \ $(PORTGEN:%.o=$(LIBCDIR)/port/gen/%.c) \ $(PORTI18N:%.o=$(LIBCDIR)/port/i18n/%.c) \ + $(PORTLOCALE:%.o=$(LIBCDIR)/port/locale/%.c) \ $(PORTPRINT:%.o=$(LIBCDIR)/port/print/%.c) \ $(PORTREGEX:%.o=$(LIBCDIR)/port/regex/%.c) \ $(PORTSTDIO:%.o=$(LIBCDIR)/port/stdio/%.c) \ |