diff options
Diffstat (limited to 'usr/src')
53 files changed, 2440 insertions, 651 deletions
diff --git a/usr/src/head/Makefile b/usr/src/head/Makefile index 75e2788897..1f598f5849 100644 --- a/usr/src/head/Makefile +++ b/usr/src/head/Makefile @@ -201,6 +201,7 @@ HDRS= $($(MACH)_HDRS) \ time.h \ tiuser.h \ tzfile.h \ + uchar.h \ ucontext.h \ ucred.h \ ulimit.h \ @@ -231,7 +232,7 @@ ISOHDRS = \ locale_iso.h \ math_c99.h \ math_iso.h \ - setjmp_iso.h \ + setjmp_iso.h \ signal_iso.h \ stdarg_c99.h \ stdarg_iso.h \ @@ -247,7 +248,7 @@ ISOHDRS = \ wchar_iso.h \ wctype_iso.h -ARPAHDRS = \ +ARPAHDRS = \ ftp.h \ inet.h \ nameser.h \ @@ -414,7 +415,7 @@ $(ROOT)/usr/include/%: % .PARALLEL: $(ROOTHDRS) $(CHECKHDRS) -install_h: $(ROOTDIRS) .WAIT $(ROOTHDRS) $(SYMHDRASSERT) $(SYMHDRERRNO) \ +install_h: $(ROOTDIRS) .WAIT $(ROOTHDRS) $(SYMHDRASSERT) $(SYMHDRERRNO) \ $(SYMHDRFLOAT) $(SYMHDRISO646) check: $(CHECKHDRS) diff --git a/usr/src/head/uchar.h b/usr/src/head/uchar.h new file mode 100644 index 0000000000..83d998b4a1 --- /dev/null +++ b/usr/src/head/uchar.h @@ -0,0 +1,74 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Robert Mustacchi + */ + +#ifndef _UCHAR_H +#define _UCHAR_H + +/* + * C11 Unicode utilities support. + * + * Note, we do not define either __STDC_UTF_16__ or __STDC_UTF_32__. While the + * functions that are implemented work in that fashion, the ability to represent + * any UTF-16 or UTF-32 code point depends on the current locale. Though in + * practice they function that way. + */ + +#include <sys/isa_defs.h> +#include <sys/feature_tests.h> +#include <wchar_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(_SIZE_T) || __cplusplus >= 199711L +#define _SIZE_T +#if defined(_LP64) || defined(_I32LPx) +typedef unsigned long size_t; /* size of something in bytes */ +#else +typedef unsigned int size_t; /* (historical version) */ +#endif +#endif /* _SIZE_T */ + +#if !defined(_MBSTATE_T) || __cplusplus >= 199711L +#define _MBSTATE_T +typedef __mbstate_t mbstate_t; +#endif /* _MBSTATE_T */ + +/* + * These types must match the uint_least16_t and uint_least32_t. They are + * defined in terms of the same type so as to minimize the needed includes. + * C++11 also defines these types and they are considered built in, so we should + * not define them in that context. + */ +#if __cplusplus < 2011103L +typedef unsigned short char16_t; +typedef unsigned int char32_t; +#endif + +extern size_t mbrtoc16(char16_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +extern size_t mbrtoc32(char32_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, + size_t, mbstate_t *_RESTRICT_KYWD); +extern size_t c16rtomb(char *_RESTRICT_KYWD, char16_t, + mbstate_t *_RESTRICT_KYWD); +extern size_t c32rtomb(char *_RESTRICT_KYWD, char32_t, + mbstate_t *_RESTRICT_KYWD); + +#ifdef __cplusplus +} +#endif + +#endif /* _UCHAR_H */ diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index c21ef76ee2..a134e08c87 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -747,6 +747,8 @@ PORTI18N_COND= \ PORTLOCALE= \ big5.o \ btowc.o \ + c16rtomb.o \ + c32rtomb.o \ collate.o \ collcmp.o \ euc.o \ @@ -772,6 +774,8 @@ PORTLOCALE= \ mbftowc.o \ mblen.o \ mbrlen.o \ + mbrtoc16.o \ + mbrtoc32.o \ mbrtowc.o \ mbsinit.o \ mbsnrtowcs.o \ diff --git a/usr/src/lib/libc/amd64/unwind/call_frame_inst.c b/usr/src/lib/libc/amd64/unwind/call_frame_inst.c index 82d357fb0c..3e23a5e25b 100644 --- a/usr/src/lib/libc/amd64/unwind/call_frame_inst.c +++ b/usr/src/lib/libc/amd64/unwind/call_frame_inst.c @@ -23,6 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright 2012 Milan Jurik. All rights reserved. + * Copyright 2020 Oxide Computer Company */ /* @@ -471,19 +472,25 @@ _Unw_get_val(void **datap, ptrdiff_t reloc, static uint64_t get_encoded_val(void **datap, ptrdiff_t reloc, int enc) { - int val = enc & 0xf; - int rel = (enc >> 4) & 0xf; + const uint8_t val = enc & 0xf; + const uint8_t rel = enc & 0x70; + const boolean_t indirect = (enc & 0x80) != 0; intptr_t loc = ((intptr_t)*datap) + reloc; uint64_t res = 0; + /* + * Calculate the offset represented by the pointer encoding. These + * DWARF extensions are defined in the Core Generic document set of the + * LSB specification. + */ switch (val) { case 0x01: res = _Unw_get_val(datap, reloc, ULEB128, 1, 1, 0); break; - case 0x2: + case 0x02: res = _Unw_get_val(datap, reloc, UNUM16, 1, 1, 0); break; - case 0x3: + case 0x03: res = _Unw_get_val(datap, reloc, UNUM32, 1, 1, 0); break; case 0x04: @@ -502,11 +509,11 @@ get_encoded_val(void **datap, ptrdiff_t reloc, int enc) res = _Unw_get_val(datap, reloc, SNUM64, 1, 1, 0); break; } - switch (rel) { - case 0: + case 0x00: break; - case 1: + case 0x10: + /* DW_EH_PE_pcrel */ if (res != 0) res += loc; break; @@ -514,6 +521,26 @@ get_encoded_val(void **datap, ptrdiff_t reloc, int enc) /* remainder not implemented */ break; } + + /* + * The high bit of the pointer encoding (DW_EH_PE_indirect = 0x80) + * indicates that a pointer-sized value should be read from the + * calculated address as the final result. + * + * Shockingly, this is not documented in any specification to date, but + * has been implemented in various unwind implementations through + * reverse-engineering of GCC. + */ + if (indirect) { + void *addr = (void *)(uintptr_t)res; + + /* + * Built only for amd64, we can count on a 64-bit pointer size + * for the indirect handling. + */ + res = _Unw_get_val(&addr, reloc, UNUM64, 1, 1, 0); + } + return (res); } diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index d9e56ee8ec..13218d9980 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -794,6 +794,8 @@ PORTI18N_COND= \ PORTLOCALE= \ big5.o \ btowc.o \ + c16rtomb.o \ + c32rtomb.o \ collate.o \ collcmp.o \ euc.o \ @@ -819,6 +821,8 @@ PORTLOCALE= \ mbftowc.o \ mblen.o \ mbrlen.o \ + mbrtoc16.o \ + mbrtoc32.o \ mbrtowc.o \ mbsinit.o \ mbsnrtowcs.o \ diff --git a/usr/src/lib/libc/port/locale/big5.c b/usr/src/lib/libc/port/locale/big5.c index 889b182de5..53b3a31f59 100644 --- a/usr/src/lib/libc/port/locale/big5.c +++ b/usr/src/lib/libc/port/locale/big5.c @@ -55,10 +55,6 @@ static size_t _BIG5_wcsnrtombs(char *_RESTRICT_KYWD, const wchar_t **_RESTRICT_KYWD, size_t, size_t, mbstate_t *_RESTRICT_KYWD); -typedef struct { - wchar_t ch; -} _BIG5State; - void _BIG5_init(struct lc_ctype *lct) { diff --git a/usr/src/lib/libc/port/locale/c16rtomb.c b/usr/src/lib/libc/port/locale/c16rtomb.c new file mode 100644 index 0000000000..050f06206e --- /dev/null +++ b/usr/src/lib/libc/port/locale/c16rtomb.c @@ -0,0 +1,78 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Robert Mustacchi + */ + +/* + * C11 c16rtomb(3C) support. + * + * Convert a series of char16_t values into a series of multi-byte characters. + * We may be given a surrogate value, so we need to potentially store that in + * the interim. + */ + +#include <uchar.h> +#include <errno.h> +#include "mblocal.h" +#include "unicode.h" + +static mbstate_t c16rtomb_state; + +size_t +c16rtomb(char *restrict str, char16_t c16, mbstate_t *restrict ps) +{ + char32_t c32; + _CHAR16State *c16s; + + if (ps == NULL) { + ps = &c16rtomb_state; + } + + if (str == NULL) { + c16 = L'\0'; + } + + c16s = (_CHAR16State *)ps; + if (c16s->c16_surrogate != 0) { + if (c16 > UNICODE_SUR_MAX || c16 < UNICODE_SUR_MIN || + (c16 & UNICODE_SUR_LOWER) != UNICODE_SUR_LOWER) { + errno = EILSEQ; + return ((size_t)-1); + } + + c32 = UNICODE_SUR_UVALUE(c16s->c16_surrogate) | + UNICODE_SUR_LVALUE(c16); + c32 += UNICODE_SUP_START; + c16s->c16_surrogate = 0; + } else if (c16 >= UNICODE_SUR_MIN && c16 <= UNICODE_SUR_MAX) { + /* + * The lower surrogate pair mask (dc00) overlaps the upper mask + * (d800), hence why we do a binary and with the upper mask. + */ + if ((c16 & UNICODE_SUR_LOWER) != UNICODE_SUR_UPPER) { + errno = EILSEQ; + return ((size_t)-1); + } + + c16s->c16_surrogate = c16; + return (0); + } else { + c32 = c16; + } + + /* + * Call c32rtomb() and not wcrtomb() so that way all of the unicode code + * point validation is performed. + */ + return (c32rtomb(str, c32, ps)); +} diff --git a/usr/src/lib/libc/port/locale/c32rtomb.c b/usr/src/lib/libc/port/locale/c32rtomb.c new file mode 100644 index 0000000000..a4d7a591f1 --- /dev/null +++ b/usr/src/lib/libc/port/locale/c32rtomb.c @@ -0,0 +1,50 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Robert Mustacchi + */ + +/* + * C11 c32rtomb(3C) support. + * + * The char32_t type is designed to represent a UTF-32 value, which is what we + * can represent with a wchar_t. This is basically a wrapper around wcrtomb(). + */ + +#include <locale.h> +#include <wchar.h> +#include <xlocale.h> +#include <uchar.h> +#include <errno.h> +#include "unicode.h" + +static mbstate_t c32rtomb_state; + +size_t +c32rtomb(char *restrict str, char32_t c32, mbstate_t *restrict ps) +{ + if ((c32 >= UNICODE_SUR_MIN && c32 <= UNICODE_SUR_MAX) || + c32 > UNICODE_SUP_MAX) { + errno = EILSEQ; + return ((size_t)-1); + } + + if (ps == NULL) { + ps = &c32rtomb_state; + } + + if (str == NULL) { + c32 = L'\0'; + } + + return (wcrtomb_l(str, (wchar_t)c32, ps, uselocale((locale_t)0))); +} diff --git a/usr/src/lib/libc/port/locale/euc.c b/usr/src/lib/libc/port/locale/euc.c index 1d1d25b17b..97c7bc3bea 100644 --- a/usr/src/lib/libc/port/locale/euc.c +++ b/usr/src/lib/libc/port/locale/euc.c @@ -101,12 +101,6 @@ static size_t _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD, static int _EUC_mbsinit(const mbstate_t *); -typedef struct { - wchar_t ch; - int set; - int want; -} _EucState; - int _EUC_mbsinit(const mbstate_t *ps) { diff --git a/usr/src/lib/libc/port/locale/gb18030.c b/usr/src/lib/libc/port/locale/gb18030.c index 36c48c5cc5..3901270a8d 100644 --- a/usr/src/lib/libc/port/locale/gb18030.c +++ b/usr/src/lib/libc/port/locale/gb18030.c @@ -55,12 +55,6 @@ static size_t _GB18030_wcsnrtombs(char *_RESTRICT_KYWD, const wchar_t **_RESTRICT_KYWD, size_t, size_t, mbstate_t *_RESTRICT_KYWD); - -typedef struct { - int count; - uchar_t bytes[4]; -} _GB18030State; - void _GB18030_init(struct lc_ctype *lct) { diff --git a/usr/src/lib/libc/port/locale/gb2312.c b/usr/src/lib/libc/port/locale/gb2312.c index bfb6c0177b..9b42060d10 100644 --- a/usr/src/lib/libc/port/locale/gb2312.c +++ b/usr/src/lib/libc/port/locale/gb2312.c @@ -50,12 +50,6 @@ static size_t _GB2312_wcsnrtombs(char *_RESTRICT_KYWD, const wchar_t **_RESTRICT_KYWD, size_t, size_t, mbstate_t *_RESTRICT_KYWD); - -typedef struct { - int count; - uchar_t bytes[2]; -} _GB2312State; - void _GB2312_init(struct lc_ctype *lct) { diff --git a/usr/src/lib/libc/port/locale/gbk.c b/usr/src/lib/libc/port/locale/gbk.c index f422ce8fb5..6202091f87 100644 --- a/usr/src/lib/libc/port/locale/gbk.c +++ b/usr/src/lib/libc/port/locale/gbk.c @@ -55,10 +55,6 @@ static size_t _GBK_wcsnrtombs(char *_RESTRICT_KYWD, const wchar_t **_RESTRICT_KYWD, size_t, size_t, mbstate_t *_RESTRICT_KYWD); -typedef struct { - wchar_t ch; -} _GBKState; - void _GBK_init(struct lc_ctype *lct) { diff --git a/usr/src/lib/libc/port/locale/mblocal.h b/usr/src/lib/libc/port/locale/mblocal.h index 3d958e364e..2873962944 100644 --- a/usr/src/lib/libc/port/locale/mblocal.h +++ b/usr/src/lib/libc/port/locale/mblocal.h @@ -31,6 +31,64 @@ #include "runetype.h" #include "lctype.h" +#include <uchar.h> + +/* + * Actual implementation structures for mbstate_t data. + * + * All of the conversion states are independent of one another, with the + * exception of that used for mbrtoc16(). That needs to encode data not as a + * wide-character but as UTF-16 data, which means handling surrogate pairs. To + * minimize the amount of state in each locale, we instead have a conversion + * state for this which includes all the other conversion states, plus extra + * data to accomodate this. + */ +typedef struct { + wchar_t ch; +} _BIG5State; + +typedef struct { + wchar_t ch; + int set; + int want; +} _EucState; + +typedef struct { + int count; + uchar_t bytes[4]; +} _GB18030State; + +typedef struct { + int count; + uchar_t bytes[2]; +} _GB2312State; + +typedef struct { + wchar_t ch; +} _GBKState; + +typedef struct { + wchar_t ch; +} _MSKanjiState; + +typedef struct { + wchar_t ch; + int want; + wchar_t lbound; +} _UTF8State; + +typedef struct { + union { + _BIG5State c16_big5; + _EucState c16_euc; + _GB18030State c16_gb18030; + _GB2312State c16_gb2312; + _GBKState c16_gbk; + _MSKanjiState c16_mskanji; + _UTF8State c16_utf8; + } c16_state; + char16_t c16_surrogate; +} _CHAR16State; /* * Rune initialization function prototypes. diff --git a/usr/src/lib/libc/port/locale/mbrtoc16.c b/usr/src/lib/libc/port/locale/mbrtoc16.c new file mode 100644 index 0000000000..a8e6e8119b --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbrtoc16.c @@ -0,0 +1,91 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Robert Mustacchi + */ + +/* + * C11 mbrtoc16(3C) support. + * + * The char16_t represents a UTF-16 encoding. This means that we have to deal + * with surrogate pairs. + */ + +#include <locale.h> +#include <wchar.h> +#include <xlocale.h> +#include <uchar.h> +#include "mblocal.h" +#include "unicode.h" + +#include <sys/debug.h> + +/* + * Ensure that we never cause our save state to ever exceed that of the + * mbstate_t. See the block comment in mblocal.h. + */ +CTASSERT(sizeof (_CHAR16State) <= sizeof (mbstate_t)); + +static mbstate_t mbrtoc16_state; + +size_t +mbrtoc16(char16_t *restrict pc16, const char *restrict str, size_t len, + mbstate_t *restrict ps) +{ + wchar_t wc; + size_t ret; + char16_t out; + _CHAR16State *c16s; + + if (ps == NULL) { + ps = &mbrtoc16_state; + } + + if (str == NULL) { + pc16 = NULL; + str = ""; + len = 1; + } + + c16s = (_CHAR16State *)ps; + if (c16s->c16_surrogate != 0) { + if (pc16 != NULL) { + *pc16 = c16s->c16_surrogate; + } + c16s->c16_surrogate = 0; + return ((size_t)-3); + } + + ret = mbrtowc_l(&wc, str, len, ps, uselocale(NULL)); + if ((ssize_t)ret < 0) { + return (ret); + } + + /* + * If this character is not in the basic multilingual plane then we need + * a surrogate character to represent it in UTF-16 and we will need to + * write that out on the next iteration. + */ + if (wc >= UNICODE_SUP_START) { + wc -= UNICODE_SUP_START; + c16s->c16_surrogate = UNICODE_SUR_LOWER | UNICODE_SUR_LMASK(wc); + out = UNICODE_SUR_UPPER | UNICODE_SUR_UMASK(wc); + } else { + out = (char16_t)wc; + } + + if (pc16 != NULL) { + *pc16 = out; + } + + return (ret); +} diff --git a/usr/src/lib/libc/port/locale/mbrtoc32.c b/usr/src/lib/libc/port/locale/mbrtoc32.c new file mode 100644 index 0000000000..46df3fb1f0 --- /dev/null +++ b/usr/src/lib/libc/port/locale/mbrtoc32.c @@ -0,0 +1,46 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Robert Mustacchi + */ + +/* + * C11 mbrtoc32(3C) support. + * + * The char32_t type is designed to represent UTF-32. Conveniently, the wchar_t + * is as well. In this case, we can just pass this directly to mbrtowc_l(). + */ + +#include <locale.h> +#include <wchar.h> +#include <xlocale.h> +#include <uchar.h> + +static mbstate_t mbrtoc32_state; + +size_t +mbrtoc32(char32_t *restrict pc32, const char *restrict str, size_t len, + mbstate_t *restrict ps) +{ + if (ps == NULL) { + ps = &mbrtoc32_state; + } + + if (str == NULL) { + pc32 = NULL; + str = ""; + len = 1; + } + + return (mbrtowc_l((wchar_t *)pc32, str, len, ps, + uselocale((locale_t)0))); +} diff --git a/usr/src/lib/libc/port/locale/mbsinit.c b/usr/src/lib/libc/port/locale/mbsinit.c index ae956d0f15..e6227ee62e 100644 --- a/usr/src/lib/libc/port/locale/mbsinit.c +++ b/usr/src/lib/libc/port/locale/mbsinit.c @@ -17,10 +17,28 @@ #include <locale.h> #include "localeimpl.h" #include "lctype.h" +#include "mblocal.h" int mbsinit_l(const mbstate_t *s, locale_t loc) { + + /* + * To implement support for the C11 char16_t conversion functions + * (mbrtoc16() and c16rtomb()) we opted to leverage all of the existing + * conversion infrastructure, including the per-locale conversion + * structures. The char16_t conversion functions tack an extra member in + * the mbstate_t that occurs after all others have placed their data. + * Therefore, before we go to the per-locale backend we need to see if + * there is any outstanding state in the char16_t specific state. + */ + if (s != NULL) { + const _CHAR16State *c16s = (const _CHAR16State *)s; + if (c16s->c16_surrogate != 0) { + return (0); + } + } + return (loc->ctype->lc_mbsinit(s)); } diff --git a/usr/src/lib/libc/port/locale/mskanji.c b/usr/src/lib/libc/port/locale/mskanji.c index 69955e5afa..a0a1f193ce 100644 --- a/usr/src/lib/libc/port/locale/mskanji.c +++ b/usr/src/lib/libc/port/locale/mskanji.c @@ -57,10 +57,6 @@ static size_t _MSKanji_wcsnrtombs(char *_RESTRICT_KYWD, const wchar_t **_RESTRICT_KYWD, size_t, size_t, mbstate_t *_RESTRICT_KYWD); -typedef struct { - wchar_t ch; -} _MSKanjiState; - void _MSKanji_init(struct lc_ctype *lct) { diff --git a/usr/src/lib/libc/port/locale/unicode.h b/usr/src/lib/libc/port/locale/unicode.h new file mode 100644 index 0000000000..558ef2be13 --- /dev/null +++ b/usr/src/lib/libc/port/locale/unicode.h @@ -0,0 +1,68 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Robert Mustacchi + */ + +#ifndef _UNICODE_H +#define _UNICODE_H + +/* + * Common definitions for dealing with Unicode. + * + * UTF-16 encodes data as a series of two byte values. However, there are more + * than 16-bit of code points. Code points inside of the first 16-bits are + * referred to as existing in the 'basic multilingual plane' (BMP). Those + * outside of it are in the 'supplementary plane'. When such a code point is + * encountered, it is encoded as a series of two uint16_t values. + * + * A value which is up to 20 bits (the current limit of the unicode code point + * space) is encoded by splitting it into two 10-bit values. The upper 10 bits + * are ORed with 0xd800 and the lower 10 bits are ORed with 0xdc00. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Range of Unicode code points reserved for surrogate characters. + */ +#define UNICODE_SUR_MIN 0xd800 +#define UNICODE_SUR_MAX 0xdfff + +/* + * Range of Unicode code points in supplementary planes. + */ +#define UNICODE_SUP_START 0x10000 +#define UNICODE_SUP_MAX 0x10ffff + +/* + * Starting constants for surrogate pairs. + */ +#define UNICODE_SUR_UPPER 0xd800 +#define UNICODE_SUR_LOWER 0xdc00 + +/* + * Macros to extract the value from a surrogate pair and to take a code point + * and transform it into the surrogate version. + */ +#define UNICODE_SUR_UVALUE(x) (((x) & 0x3ff) << 10) +#define UNICODE_SUR_LVALUE(x) ((x) & 0x3ff) +#define UNICODE_SUR_UMASK(x) (((x) >> 10) & 0x3ff) +#define UNICODE_SUR_LMASK(x) ((x) & 0x3ff) + +#ifdef __cplusplus +} +#endif + +#endif /* _UNICODE_H */ diff --git a/usr/src/lib/libc/port/locale/utf8.c b/usr/src/lib/libc/port/locale/utf8.c index a6e037d94e..133bd3bf01 100644 --- a/usr/src/lib/libc/port/locale/utf8.c +++ b/usr/src/lib/libc/port/locale/utf8.c @@ -48,12 +48,6 @@ static size_t _UTF8_wcsnrtombs(char *_RESTRICT_KYWD, const wchar_t **_RESTRICT_KYWD, size_t, size_t, mbstate_t *_RESTRICT_KYWD); -typedef struct { - wchar_t ch; - int want; - wchar_t lbound; -} _UTF8State; - void _UTF8_init(struct lc_ctype *lct) { diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index da8de86f25..eb81629318 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -78,6 +78,14 @@ $if _x86 && _ELF64 $add amd64 $endif +SYMBOL_VERSION ILLUMOS_0.33 { + protected: + c16rtomb; + c32rtomb; + mbrtoc16; + mbrtoc32; +} ILLUMOS_0.32; + SYMBOL_VERSION ILLUMOS_0.32 { protected: fmemopen; diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com index ee580e8513..0d5febe066 100644 --- a/usr/src/lib/libc/sparc/Makefile.com +++ b/usr/src/lib/libc/sparc/Makefile.com @@ -823,6 +823,8 @@ PORTI18N_COND= \ PORTLOCALE= \ big5.o \ btowc.o \ + c16rtomb.o \ + c32rtomb.o \ collate.o \ collcmp.o \ euc.o \ @@ -848,6 +850,8 @@ PORTLOCALE= \ mbftowc.o \ mblen.o \ mbrlen.o \ + mbrtoc16.o \ + mbrtoc32.o \ mbrtowc.o \ mbsinit.o \ mbsnrtowcs.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com index 3aacce8f61..2a1dc8d0cd 100644 --- a/usr/src/lib/libc/sparcv9/Makefile.com +++ b/usr/src/lib/libc/sparcv9/Makefile.com @@ -766,6 +766,8 @@ PORTI18N_COND= \ PORTLOCALE= \ big5.o \ btowc.o \ + c16rtomb.o \ + c32rtomb.o \ collate.o \ collcmp.o \ euc.o \ @@ -791,6 +793,8 @@ PORTLOCALE= \ mbftowc.o \ mblen.o \ mbrlen.o \ + mbrtoc16.o \ + mbrtoc32.o \ mbrtowc.o \ mbsinit.o \ mbsnrtowcs.o \ diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile index 726125d3e9..0387bbd608 100644 --- a/usr/src/man/man3c/Makefile +++ b/usr/src/man/man3c/Makefile @@ -62,6 +62,7 @@ MANFILES= __fbufsize.3c \ bstring.3c \ btowc.3c \ byteorder.3c \ + c16rtomb.3c \ call_once.3c \ catgets.3c \ catopen.3c \ @@ -241,7 +242,7 @@ MANFILES= __fbufsize.3c \ malloc.3c \ mblen.3c \ mbrlen.3c \ - mbrtowc.3c \ + mbrtoc16.3c \ mbsinit.3c \ mbsrtowcs.3c \ mbstowcs.3c \ @@ -543,7 +544,6 @@ MANFILES= __fbufsize.3c \ waitpid.3c \ walkcontext.3c \ wcpcpy.3c \ - wcrtomb.3c \ wcscasecmp.3c \ wcscoll.3c \ wcsdup.3c \ @@ -729,6 +729,7 @@ MANLINKS= FD_CLR.3c \ bindtextdomain.3c \ btowc_l.3c \ bzero.3c \ + c32rtomb.3c \ calloc.3c \ canonicalize_file_name.3c \ catclose.3c \ @@ -1006,6 +1007,8 @@ MANLINKS= FD_CLR.3c \ major.3c \ mblen_l.3c \ mbrlen_l.3c \ + mbrtoc32.3c \ + mbrtowc.3c \ mbrtowc_l.3c \ mbsinit_l.3c \ mbsnrtowcs.3c \ @@ -1385,6 +1388,7 @@ MANLINKS= FD_CLR.3c \ watol.3c \ watoll.3c \ wcpncpy.3c \ + wcrtomb.3c \ wcrtomb_l.3c \ wcscasecmp_l.3c \ wcscat.3c \ @@ -1599,6 +1603,10 @@ ntohl.3c := LINKSRC = byteorder.3c ntohll.3c := LINKSRC = byteorder.3c ntohs.3c := LINKSRC = byteorder.3c +c32rtomb.3c := LINKSRC = c16rtomb.3c +wcrtomb.3c := LINKSRC = c16rtomb.3c +wcrtomb_l.3c := LINKSRC = c16rtomb.3c + canonicalize_file_name.3c := LINKSRC = realpath.3c catclose.3c := LINKSRC = catopen.3c @@ -2012,10 +2020,12 @@ mblen_l.3c := LINKSRC = mblen.3c mbrlen_l.3c := LINKSRC = mbrlen.3c -mbrtowc_l.3c := LINKSRC = mbrtowc.3c - mbsinit_l.3c := LINKSRC = mbsinit.3c +mbrtoc32.3c := LINKSRC = mbrtoc16.3c +mbrtowc.3c := LINKSRC = mbrtoc16.3c +mbrtowc_l.3c := LINKSRC = mbrtoc16.3c + mbsnrtowcs.3c := LINKSRC = mbsrtowcs.3c mbsnrtowcs_l.3c := LINKSRC = mbsrtowcs.3c mbsrtowcs_l.3c := LINKSRC = mbsrtowcs.3c @@ -2509,8 +2519,6 @@ printstack.3c := LINKSRC = walkcontext.3c wcpncpy.3c := LINKSRC = wcpcpy.3c -wcrtomb_l.3c := LINKSRC = wcrtomb.3c - wcscasecmp_l.3c := LINKSRC = wcscasecmp.3c wcsncasecmp.3c := LINKSRC = wcscasecmp.3c wcsncasecmp_l.3c := LINKSRC = wcscasecmp.3c diff --git a/usr/src/man/man3c/c16rtomb.3c b/usr/src/man/man3c/c16rtomb.3c new file mode 100644 index 0000000000..33c6189dd3 --- /dev/null +++ b/usr/src/man/man3c/c16rtomb.3c @@ -0,0 +1,285 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 Robert Mustacchi +.\" +.Dd April 23, 2020 +.Dt C16RTOMB 3C +.Os +.Sh NAME +.Nm c16rtomb , +.Nm c32rtomb , +.Nm wcrtomb , +.Nm wcrtomb_l +.Nd convert wide-characters to character sequences +.Sh SYNOPSIS +.In uchar.h +.Ft size_t +.Fo c16rtomb +.Fa "char *restrict str" +.Fa "char16_t c16" +.Fa "mbstate_t *restrict ps" +.Fc +.Ft size_t +.Fo c32rtomb +.Fa "char *restrict str" +.Fa "char32_t c32" +.Fa "mbstate_t *restrict ps" +.Fc +.In stdio.h +.Ft size_t +.Fo wcrtomb +.Fa "char *restrict str" +.Fa "wchar_t wc" +.Fa "mbstate_t *restrict ps" +.Fc +.In stdio.h +.In xlocale.h +.Ft size_t +.Fo wcrtomb_l +.Fa "char *restrict str" +.Fa "wchar_t wc" +.Fa "mbstate_t *restrict ps" +.Fa "locale_t loc" +.Fc +.Sh DESCRIPTION +The +.Fn c16rtomb , +.Fn c32rtomb , +.Fn wcrtomb , +and +.Fn wcrtomb_l +functions convert wide-character sequences into a series of multi-byte +characters. +The functions work in the following formats: +.Bl -tag -width wcrtomb_l +.It Fn c16rtomb +A UTF-16 code sequence, where every code point is represented by one or +two +.Vt char16_t . +The UTF-16 encoding will encode certain Unicode code points as a pair of +two 16-bit code sequences, commonly referred to as a surrogate pair. +.It Fn c32rtomb +A UTF-32 code sequence, where every code point is represented by a +single +.Vt char32_t . +It is illegal to pass reserved Unicode code points. +.It Fn wcrtomb , Fn wcrtomb_l +Wide characters, being a 32-bit value where every code point is +represented by a single +.Vt wchar_t . +While the +.Vt wchar_t +and +.Vt char32_t +are different types, in this implementation, they are similar encodings. +.El +.Pp +The functions all work by looking at the passed in wide-character +.Po +.Fa c16 , +.Fa c32 , +.Fa wc +.Pc +and appending it to the current conversion state, +.Fa ps . +Once a valid code point, based on the current locale, is found, then it +will be converted into a series of characters that are stored in +.Fa str . +Up to +.Dv MB_CUR_MAX +bytes will be stored in +.Fa str . +It is the caller's responsibility to ensure that there is sufficient +space in +.Fa str . +.Pp +The functions are all influenced by the +.Dv LC_CTYPE +category of the current locale for determining what is considered a +valid character. +For example, in the +.Sy C +locale, +only ASCII characters are recognized, while in a +.Sy UTF-8 +based locale like +.Sy en_us.UTF-8 , +all valid Unicode code points are recognized and will be converted into +the corresponding multi-byte sequence. +The +.Fn wcrtomb_l +function uses the locale passed in +.Fa loc +rather than the locale of the current thread. +.Pp +The +.Fa ps +argument represents a multi-byte conversion state which can be used +across multiple calls to a given function +.Pq but not mixed between functions . +These allow for characters to be consumed from subsequent buffers, e.g. +different values of +.Fa str . +The functions may be called from multiple threads as long as they use +unique values for +.Fa ps . +If +.Fa ps +is +.Dv NULL , +then a function-specific buffer will be used for the conversion state; +however, this is stored between all threads and its use is not +recommended. +.Pp +The functions all have a special behavior when +.Dv NULL +is passed for +.Fa str . +They instead will treat it as though a the NULL wide-character was +passed in +.Fa c16 , +.Fa c32 , +or +.Fa wc +and an internal buffer +.Pq buf +will be used to write out the results of the +converstion. +In other words, the functions would be called as: +.Bd -literal -offset indent +c16rtomb(buf, L'\\0', ps) +c32rtomb(buf, L'\\0', ps) +wcrtomb(buf, L'\\0', ps) +wcrtomb_l(buf, L'\\0', ps, loc) +.Ed +.Ss Locale Details +Not all locales in the system are Unicode based locales. +For example, ISO 8859 family locales have code points with values that +do not match their counterparts in Unicode. +When using these functions with non-Unicode based locales, the code +points returned will be those determined by the locale. +They will not be converted from the corresponding Unicode code point. +For example, if using the Euro sign in ISO 8859-15, these functions +will not encode the Unicode value 0x20ac into the ISO 8859-15 value +0xa4. +.Pp +Regardless of the locale, the characters returned will be encoded as +though the code point were the corresponding value in Unicode. +This means that when using UTF-16, if the corresponding code point were +in the range for surorgate pairs, then the +.Fn c16rtomb +function will expect to receive that code point in that fashion. +.Pp +This behavior of the +.Fn c16rtomb +and +.Fn c32rtomb +functions should not be relied upon, is not portable, and subject to +change for non-Unicode locales. +.Sh RETURN VALUES +Upon successful completion, the +.Fn c16rtomb , +.Fn c32rtomb , +.Fn wcrtomb , +and +.Fn wcrtomb_l +functions return the number of bytes stored in +.Fa str . +Otherwise, +.Sy (size_t)-1 +is returned to indicate an encoding error and +.Va errno +is set. +.Sh EXAMPLES +.Sy Example 1 +Converting a UTF-32 character into a multi-byte character sequence. +.Bd -literal +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <err.h> +#include <stdio.h> +#include <uchar.h> + +int +main(void) +{ + mbstate_t mbs; + size_t ret; + char buf[MB_CUR_MAX]; + char32_t val = 0x5149; + const char *uchar_exp = "\exe5\ex85\ex89"; + + (void) memset(&mbs, 0, sizeof (mbs)); + (void) setlocale(LC_CTYPE, "en_US.UTF-8"); + ret = c32rtomb(buf, val, &mbs); + if (ret != strlen(uchar_exp)) { + errx(EXIT_FAILURE, "failed to convert string, got %zd", + ret); + } + + if (strncmp(buf, uchar_exp, ret) != 0) { + errx(EXIT_FAILURE, "converted char32_t does not match " + "expected value"); + } + + return (0); +} +.Ed +.Sh ERRORS +The +.Fn c16rtomb , +.Fn c32rtomb , +.Fn wcrtomb , +and +.Fn wcrtomb_l +functions will fail if: +.Bl -tag -width Er +.It Er EINVAL +The conversion state in +.Fa ps +is invalid. +.It Er EILSEQ +An invalid character sequence has been detected. +.El +.Sh MT-LEVEL +The +.Fn c16rtomb , +.Fn c32rtomb , +.Fn wcrtomb , +and +.Fn wcrtomb_l +functions are +.Sy MT-Safe +as long as different +.Vt mbstate_t +structures are passed in +.Fa ps . +If +.Fa ps +is +.Dv NULL +or different threads use the same value for +.Fa ps , +then the functions are +.Sy Unsafe . +.Sh INTERFACE STABILITY +.Sy Committed +.Sh SEE ALSO +.Xr mbrtoc16 3C , +.Xr mbrtoc32 3C , +.Xr mbrtowc 3C , +.Xr newlocale 3C , +.Xr setlocale 3C , +.Xr uselocale 3C , +.Xr uchar.h 3HEAD , +.Xr environ 5 diff --git a/usr/src/man/man3c/mbrtoc16.3c b/usr/src/man/man3c/mbrtoc16.3c new file mode 100644 index 0000000000..d1b3ab478b --- /dev/null +++ b/usr/src/man/man3c/mbrtoc16.3c @@ -0,0 +1,397 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 Robert Mustacchi +.\" +.Dd April 23, 2020 +.Dt MBRTOC16 3C +.Os +.Sh NAME +.Nm mbrtoc16 , +.Nm mbrtoc32 , +.Nm mbrtowc , +.Nm mbrtowc_l +.Nd convert characters to wide characters +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fo mbrtowc +.Fa "wchar_t *restrict pwc" +.Fa "const char *restrict str" +.Fa "size_t len" +.Fa "mstate_t *restrict ps" +.Fc +.In wchar.h +.In xlocale.h +.Ft size_t +.Fo mbrtowc +.Fa "wchar_t *restrict pwc" +.Fa "const char *restrict str" +.Fa "size_t len" +.Fa "mstate_t *restrict ps" +.Fa "locale_t loc" +.Fc +.In uchar.h +.Ft size_t +.Fo mbrtoc16 +.Fa "char16_t *restrict p16c" +.Fa "const char *restrict str" +.Fa "size_t len" +.Fa "mbstate_t *restrict ps" +.Fc +.Ft size_t +.Fo mbrtoc32 +.Fa "char32_t *restrict p32c" +.Fa "const char *restrict str" +.Fa "size_t len" +.Fa "mbstate_t *restrict ps" +.Fc +.Sh DESCRIPTION +The +.Fn mbrtoc16 , +.Fn mbrtoc32 , +.Fn mbrtowc , +and +.Fn mbrtowc_l +functions convert character sequences, which may contain multi-byte +characters, into different character formats. +The functions work in the following formats: +.Bl -tag -width mbrtowc_l +.It Fn mbrtoc16 +A UTF-16 code sequence, where every code point is represented by one or +two +.Vt char16_t . +The UTF-16 encoding will encode certain Unicode code points as a pair of +two 16-bit code sequences, commonly referred to as a surrogate pair. +.It Fn mbrtoc32 +A UTF-32 code sequence, where every code point is represented by a +single +.Vt char32_t . +.It Fn mbrtowc , Fn mbrtowc_l +Wide characters, being a 32-bit value where every code point is +represented by a single +.Vt wchar_t . +While the +.Vt wchar_t +and +.Vt char32_t +are different types, in this implementation, they are similar encodings. +.El +.Pp +The functions consume up to +.Fa len +characters from the string +.Fa str +and accumulate them in +.Fa ps +until a valid character is found, which is influenced by +the +.Dv LC_CTYPE +category of the current locale. +For example, in the +.Sy C +locale, only ASCII characters are recognized, while in a +.Sy UTF-8 +based locale like +.Sy en_US.UTF-8 , +UTF-8 multi-byte character sequences that represent Unicode code points +are recognized. +The +.Fn mbrtowc_l +function uses the locale passed in +.Fa loc +rather than the locale of the current thread. +.Pp +When a valid character sequence has been found, it is converted to +either a 16-bit character sequence for +.Fn mbrtoc16 +or a 32-bit character sequence for +.Fn mbrtoc32 +and will be stored in +.Fa p16c +and +.Fa p32c +respectively. +.Pp +The +.Fa ps +argument represents a multi-byte conversion state which can be used +across multiple calls to a given function +.Pq but not mixed between functions . +These allow for characters to be consumed from subsequent buffers, e.g. +different values of +.Fa str . +The functions may be called from multiple threads as long as they use +unique values for +.Fa ps . +If +.Fa ps +is +.Dv NULL , +then a function-specific buffer will be used for the conversion state; +however, this is stored between all threads and its use is not +recommended. +.Pp +When using these functions, more than one character may be output for a +given set of consumed input characters. +An example of this is when a given code point is represented as a set of +surrogate pairs in UTF-16, which require two 16-bit characters to +represent a code point. +When this occurs, the functions return the special return value +.Sy -3 . +.Pp +The functions all have a special behavior when +.Dv NULL +is passed for +.Fa str . +They instead will treat it as though +.Fa pwc , +.Fa p16c , +or +.Fa p32c +were +.Dv NULL , +.Fa str +had been passed as the empty string, "" and the length, +.Fa len , +would appear as the value 1. +In other words, the functions would be called as: +.Bd -literal -offset indent +mbrtowc(NULL, "", 1, ps) +mbrtowc_l(NULL, "", 1, ps) +mbrtoc16(NULL, "", 1, ps) +mbrtoc32(NULL, "", 1, ps) +.Ed +.Ss Locale Details +Not all locales in the system are Unicode based locales. +For example, ISO 8859 family locales have code points with values that +do not match their counterparts in Unicode. +When using these functions with non-Unicode based locales, the code +points returned will be those determined by the locale. +They will not be converted to the corresponding Unicode code point. +For example, if using the Euro sign in ISO 8859-15, these functions +might return the code point 0xa4 and not the Unicode value 0x20ac. +.Pp +Regardless of the locale, the characters returned will be encoded as +though the code point were the corresponding value in Unicode. +This means that if a locale returns a value that would be a surrogate +pair in the UTF-16 encoding, it will still be encoded as a UTF-16 +character. +.Pp +This behavior of the +.Fn mbrtoc16 +and +.Fn mbrtoc32 +functions should not be relied upon, is not portable, and subject to +change for non-Unicode locales. +.Sh RETURN VALUES +The +.Fn mbrtoc16 , +.Fn mbrtoc32 , +.Fn mbrtowc , +and +.Fn mbrtowc_l +functions return the following values: +.Bl -tag -width (size_t)-3 +.It Sy 0 +.Fa len +or fewer bytes of +.Fa str +were consumed and the null wide character was written into the wide +character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +.It Sy between 1 and len +The specified number of bytes were consumed and a single character was +written into the wide character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +.It Sy (size_t)-1 +An encoding error has occurred. +The next +.Fa len +bytes of +.Fa str +do not contribute to a valid character. +.Va errno +has been set to +.Er EILSEQ . +No data was written into the wide character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +.It Sy (size_t)-2 +.Fa len +bytes of +.Fa str +were consumed, but a complete multi-byte character sequence has not been +found and no data was written into the wide character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +.It Sy (size_t)-3 +A character has been written into the wide character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +This character was from a previous call (such as another part of a +UTF-16 surrogate pair) and no input was consumed. +This is limited to the +.Fn mbrtoc16 +and +.Fn mbrtoc32 +functions. +.El +.Sh EXAMPLES +.Sy Example 1 +Using the +.Fn mbrtoc32 +function to convert a multibyte string. +.Bd -literal +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <err.h> +#include <stdio.h> +#include <uchar.h> + +int +main(void) +{ + mbstate_t mbs; + char32_t out; + size_t ret; + const char *uchar_str = "\exe5\ex85\ex89"; + + (void) memset(&mbs, 0, sizeof (mbs)); + (void) setlocale(LC_CTYPE, "en_US.UTF-8"); + ret = mbrtoc32(&out, uchar_str, strlen(uchar_str), &mbs); + if (ret != strlen(uchar_str)) { + errx(EXIT_FAILURE, "failed to convert string, got %zd", + ret); + } + + (void) printf("Converted %zu bytes into UTF-32 character " + "0x%x\n", ret, out); + return (0); +} +.Ed +.Pp +When compiled and run, this produces: +.Bd -literal -offset indent +$ ./a.out +Converted 3 bytes into UTF-32 character 0x5149 +.Ed +.Pp +.Sy Example 2 +Handling surrogate pairs from the +.Fn mbrtoc16 +function. +.Bd -literal +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <err.h> +#include <stdio.h> +#include <uchar.h> + +int +main(void) +{ + mbstate_t mbs; + char16_t first, second; + size_t ret; + const char *uchar_str = "\exf0\ex9f\ex92\exa9"; + + (void) memset(&mbs, '\0', sizeof (mbs)); + (void) setlocale(LC_CTYPE, "en_US.UTF-8"); + ret = mbrtoc16(&first, uchar_str, strlen(uchar_str), &mbs); + if (ret != strlen(uchar_str)) { + errx(EXIT_FAILURE, "failed to convert string, got %zd", + ret); + } + + ret = mbrtoc16(&second, "", 0, &mbs); + if (ret != (size_t)-3) { + errx(EXIT_FAILURE, "didn't get second surrogate pair, " + "got %zd", ret); + } + + (void) printf("UTF-16 surrogates: 0x%x 0x%x\n", first, second); + return (0); +} +.Ed +.Pp +When compiled and run, this produces: +.Bd -literal -offset indent +$ ./a.out +UTF-16 surrogates: 0xd83d 0xdca9 +.Ed +.Sh ERRORS +The +.Fn mbrtoc16 , +.Fn mbrtoc32 , +.Fn mbrtowc , +and +.Fn mbrtowc_l +functions will fail if: +.Bl -tag -width Er +.It Er EINVAL +The conversion state in +.Fa ps +is invalid. +.It Er EILSEQ +An invalid character sequence has been detected. +.El +.Sh MT-LEVEL +The +.Fn mbrtoc16 , +.Fn mbrtoc32 , +.Fn mbrtowc , +and +.Fn mbrtowc_l +functions are +.Sy MT-Safe +as long as different +.Vt mbstate_t +structures are passed in +.Fa ps . +If +.Fa ps +is +.Dv NULL +or different threads use the same value for +.Fa ps , +then the functions are +.Sy Unsafe . +.Sh INTERFACE STABILITY +.Sy Committed +.Sh SEE ALSO +.Xr c16rtomb 3C , +.Xr c32rtomb 3C , +.Xr newlocale 3C , +.Xr setlocale 3C , +.Xr uselocale 3C , +.Xr wcrtomb 3C , +.Xr uchar.h 3HEAD , +.Xr environ 5 diff --git a/usr/src/man/man3c/mbrtowc.3c b/usr/src/man/man3c/mbrtowc.3c deleted file mode 100644 index 70fe73d25e..0000000000 --- a/usr/src/man/man3c/mbrtowc.3c +++ /dev/null @@ -1,231 +0,0 @@ -.\" -.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for -.\" permission to reproduce portions of its copyrighted documentation. -.\" Original documentation from The Open Group can be obtained online at -.\" http://www.opengroup.org/bookstore/. -.\" -.\" The Institute of Electrical and Electronics Engineers and The Open -.\" Group, have given us permission to reprint portions of their -.\" documentation. -.\" -.\" In the following statement, the phrase ``this text'' refers to portions -.\" of the system documentation. -.\" -.\" Portions of this text are reprinted and reproduced in electronic form -.\" in the SunOS Reference Manual, from IEEE Std 1003.1, 2004 Edition, -.\" Standard for Information Technology -- Portable Operating System -.\" Interface (POSIX), The Open Group Base Specifications Issue 6, -.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics -.\" Engineers, Inc and The Open Group. In the event of any discrepancy -.\" between these versions and the original IEEE and The Open Group -.\" Standard, the original IEEE and The Open Group Standard is the referee -.\" document. The original Standard can be obtained online at -.\" http://www.opengroup.org/unix/online.html. -.\" -.\" This notice shall appear on any product containing this material. -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" -.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. -.\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright 2014 Garrett D'Amore <garrett@damore.org> -.\" -.TH MBRTOWC 3C "Jun 23, 2014" -.SH NAME -mbrtowc, mbrtowc_l \- convert a character to a wide-character code (restartable) -.SH SYNOPSIS -.LP -.nf -#include <wchar.h> - -\fBsize_t\fR \fBmbrtowc\fR(\fBwchar_t *restrict\fR \fIpwc\fR, \fBconst char *restrict\fR \fIs\fR, \fBsize_t\fR \fIn\fR, - \fBmbstate_t *restrict\fR \fIps\fR); -.fi -.LP -.nf -#include <wchar.h> -#include <xlocale.h> - -\fBsize_t\fR \fBmbrtowc_l\fR(\fBwchar_t *restrict\fR \fIpwc\fR, \fBconst char *restrict\fR \fIs\fR, \fBsize_t\fR \fIn\fR, - \fBmbstate_t *restrict\fR \fIps\fR, \fBlocale_t\fR \fIloc\fR); -.fi -.SH DESCRIPTION -.LP -If -.I s -is a null pointer, the -.B mbrtowc() -function is equivalent to the call: -.sp -.in +2 -.nf -\fBmbrtowc\fR(NULL, "", 1, \fIps\fR) -.fi -.in -2 -.LP -Likewise, if -.I s -is a null pointer, the -.B mbrtowc_l() -function is equivalent to the call: -.sp -.in +2 -.nf -\fBmbrtowc_l\fR(NULL, "", 1, \fIps\fR, \fIloc\fR); -.fi -.in -2 -.LP -In these cases, the values of the arguments -.I pwc -and -.I n -are ignored. -.LP -If -.I s -is not a null pointer, these functions inspect at most -.I n -bytes beginning at the byte pointed to by -.I s -to determine the number of bytes needed to complete the next character -(including any shift sequences). If the functions determine that the next -character is completed, -they determine the value of the corresponding wide-character and then, if -.I pwc -is not a null pointer, stores that value in the object pointed to by -.IR pwc . -If the corresponding wide-character is the null wide-character, the -resulting state described is the initial conversion state. -.LP -If -.I ps -is a null pointer, these functions use their own -internal -.B mbstate_t -object, which is initialized at program startup to the -initial conversion state. Otherwise, the -.B mbstate_t -object pointed to by -.I ps -is used to completely describe the current conversion state of the -associated character sequence. The system will behave as if no function defined -in the Reference Manual calls -.B mbrtowc() -or -.BR Bmbrtowc_l() . -.LP -The behavior of -.B mbrtowc() -is affected by the -.B LC_CTYPE -category of the current locale. The -.B mbrtowc_l() -function is affected by the -.B LC_CTYPE -category of the specified -.I loc -locale object. See -.B environ (5). -.SH RETURN VALUES -.LP -The -.B mbrtowc() -and -.B mbrtowc_l() -functions return the first of the following that applies: -.IP \fB0\fR -If the next -.I n -or fewer bytes complete the character that corresponds to -the null wide-character (which is the value stored). -.IP \fBpositive\fR -If the next -.I n -or fewer bytes complete a valid character (which is the -value stored); the value returned is the number of bytes that complete the -character. -.IP \fB(size_t)\(mi2\fR -If the next -.I n -bytes contribute to an incomplete but potentially valid -character, and all -.I n -bytes have been processed (no value is stored). -When -.I n -has at least the value of the -.B MB_CUR_MAX -macro, this case can only occur if -.I s -points at a sequence of redundant shift sequences -(for implementations with state-dependent encodings). -.IP \fB(size_t)\(mi1\fR -If an encoding error occurs, in which case the next \fIn\fR or fewer bytes do -not contribute to a complete and valid character (no value is stored). In -this case, -.B EILSEQ -is stored in -.B errno -and the conversion state is undefined. -.SH ERRORS -.LP -The -.B mbrtowc() -and -.B mbrtowc_l() -functions may fail if: -.IP \fBEINVAL\fR -The -.I ps -argument points to an object that contains an invalid conversion -state. -.IP \fBEILSEQ\fR -Invalid character sequence is detected. -.SH ATTRIBUTES -.LP -See \fBattributes\fR(5) for descriptions of the following attributes: -.TS -box; -c | c -l | l . -ATTRIBUTE TYPE ATTRIBUTE VALUE -_ -Interface Stability See below. -_ -MT-Level See below. -.TE - -.LP -The -.B mbrtowc() -function is Standard. The -.B mbrtowc_l() -function is Uncommitted. -.LP -If -.I ps -is a null pointer, these functions are Unsafe for use in -multithreaded applications. Otherwise they are MT-Safe. -.SH SEE ALSO -.LP -.BR mbsinit (3C), -.BR newlocale (3C), -.BR setlocale (3C), -.BR uselocale (3C), -.BR attributes (5), -.BR environ (5), -.BR standards (5) diff --git a/usr/src/man/man3c/wcrtomb.3c b/usr/src/man/man3c/wcrtomb.3c deleted file mode 100644 index 9a3478cb07..0000000000 --- a/usr/src/man/man3c/wcrtomb.3c +++ /dev/null @@ -1,146 +0,0 @@ -.\" -.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for -.\" permission to reproduce portions of its copyrighted documentation. -.\" Original documentation from The Open Group can be obtained online at -.\" http://www.opengroup.org/bookstore/. -.\" -.\" The Institute of Electrical and Electronics Engineers and The Open -.\" Group, have given us permission to reprint portions of their -.\" documentation. -.\" -.\" In the following statement, the phrase ``this text'' refers to portions -.\" of the system documentation. -.\" -.\" Portions of this text are reprinted and reproduced in electronic form -.\" in the SunOS Reference Manual, from IEEE Std 1003.1, 2004 Edition, -.\" Standard for Information Technology -- Portable Operating System -.\" Interface (POSIX), The Open Group Base Specifications Issue 6, -.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics -.\" Engineers, Inc and The Open Group. In the event of any discrepancy -.\" between these versions and the original IEEE and The Open Group -.\" Standard, the original IEEE and The Open Group Standard is the referee -.\" document. The original Standard can be obtained online at -.\" http://www.opengroup.org/unix/online.html. -.\" -.\" This notice shall appear on any product containing this material. -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" -.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. -.\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright 2014 Garrett D'Amore <garrett@damore.org> -.\" -.TH WCRTOMB 3C "Jun 24, 2014" -.SH NAME -wcrtomb, wcrtomb_l \- convert a wide-character code to a character (restartable) -.SH SYNOPSIS -.LP -.nf -#include <stdio.h> - -\fBsize_t\fR \fBwcrtomb\fR(\fBchar *restrict\fR \fIs\fR, \fBwchar_t\fR \fIwc\fR, \fBmbstate_t *restrict\fR \fIps\fR); -.fi -.LP -.nf -#include <stdio.h> -#include <xlocale.h> - -\fBsize_t\fR \fBwcrtomb_l\fR(\fBchar *restrict\fR \fIs\fR, \fBwchar_t\fR \fIwc\fR, \fBmbstate_t *restrict\fR \fIps\fR, - \fBlocale_t\fR \fIloc\fR); -.fi -.SH DESCRIPTION -.LP -If \fIs\fR is a null pointer, the \fBwcrtomb()\fR function is equivalent to the -call: -.IP -\fBwcrtomb\fR(\fIbuf\fR, L'\e0', \fIps\fR); -.LP -where \fIbuf\fR is an internal buffer. -.LP -If \fIs\fR is not a null pointer, the \fBwcrtomb()\fR function determines the -number of bytes needed to represent the character that corresponds to the -wide-character given by \fIwc\fR (including any shift sequences), and stores -the resulting bytes in the array whose first element is pointed to by \fIs\fR. -At most \fBMB_CUR_MAX\fR bytes are stored. If \fIwc\fR is a null -wide-character, a null byte is stored, preceded by any shift sequence needed to -restore the initial shift state. The resulting state described is the initial -conversion state. -.LP -If \fIps\fR is a null pointer, the \fBwcrtomb()\fR function uses its own -internal \fBmbstate_t\fR object, which is initialized at program startup to the -initial conversion state. Otherwise, the \fBmbstate_t\fR object pointed to -by \fIps\fR is used to completely describe the current conversion state of the -associated character sequence. The system will behave as if no function -defined in the Reference Manual calls \fBwcrtomb()\fR. -.LP -The behavior of \fBwcrtomb()\fR is affected by the \fBLC_CTYPE\fR category of the -current locale. See \fBenviron\fR(5). The function \fBwcrtomb_l()\fR behaves -identically to \fBwcrtomb()\fR, except instead of operating in the current -locale, it operates in the locale specified by \fIloc\fR. -.SH RETURN VALUES -.LP -The \fBwcrtomb()\fR function returns the number of bytes stored in the array -object (including any shift sequences). When \fIwc\fR is not a valid -wide-character, an encoding error occurs. In this case, the function stores -the value of the macros \fBEILSEQ\fR in \fBerrno\fR and returns -\fB(size_t)\(mi1\fR; the conversion state is undefined. -.SH ERRORS -.LP -The \fBwcrtomb()\fR function may fail if: -.sp -.ne 2 -.na -\fB\fBEINVAL\fR\fR -.ad -.RS 10n -The \fIps\fR argument points to an object that contains an invalid conversion -state. -.RE - -.sp -.ne 2 -.na -\fB\fBEILSEQ\fR\fR -.ad -.RS 10n -Invalid wide-character code is detected. -.RE -.SH ATTRIBUTES -.LP -See \fBattributes\fR(5) for descriptions of the following attributes: -.TS -box; -c | c -l | l . -ATTRIBUTE TYPE ATTRIBUTE VALUE -_ -Interface Stability See below. -_ -MT-Level See below. -.TE - -.LP -The \fBwcrtomb()\fR function is Standard. The -\fBwcrtomb_l()\fR function is Uncommitted. -.LP -If \fIps\fR is a null pointer, these functions should be considered Unsafe -for use in multithreaded applications. Otherwise, they are MT-Safe. -.SH SEE ALSO -.LP -\fBmbsinit\fR(3C), \fBnewlocale\fR(3C), \fBsetlocale\fR(3C), -\fBuselocale\fR(3C), \fBattributes\fR(5), -\fBstandards\fR(5), \fBenviron\fR(5) diff --git a/usr/src/man/man3head/Makefile b/usr/src/man/man3head/Makefile index e212c67457..fdc94ef4ef 100644 --- a/usr/src/man/man3head/Makefile +++ b/usr/src/man/man3head/Makefile @@ -17,98 +17,99 @@ include $(SRC)/Makefile.master -MANSECT= 3head +MANSECT= 3head MANFILES= acct.h.3head \ - aio.h.3head \ - ar.h.3head \ - archives.h.3head \ - assert.h.3head \ - complex.h.3head \ - cpio.h.3head \ - dirent.h.3head \ + aio.h.3head \ + ar.h.3head \ + archives.h.3head \ + assert.h.3head \ + complex.h.3head \ + cpio.h.3head \ + dirent.h.3head \ endian.h.3head \ - errno.h.3head \ - fcntl.h.3head \ - fenv.h.3head \ - float.h.3head \ - floatingpoint.h.3head \ - fmtmsg.h.3head \ - fnmatch.h.3head \ - ftw.h.3head \ - glob.h.3head \ - grp.h.3head \ - iconv.h.3head \ - if.h.3head \ - in.h.3head \ - inet.h.3head \ - inttypes.h.3head \ - ipc.h.3head \ - iso646.h.3head \ - langinfo.h.3head \ - libgen.h.3head \ - libintl.h.3head \ - limits.h.3head \ - locale.h.3head \ - math.h.3head \ - mman.h.3head \ - monetary.h.3head \ - mqueue.h.3head \ - msg.h.3head \ - ndbm.h.3head \ - netdb.h.3head \ - nl_types.h.3head \ - poll.h.3head \ - pthread.h.3head \ - pwd.h.3head \ + errno.h.3head \ + fcntl.h.3head \ + fenv.h.3head \ + float.h.3head \ + floatingpoint.h.3head \ + fmtmsg.h.3head \ + fnmatch.h.3head \ + ftw.h.3head \ + glob.h.3head \ + grp.h.3head \ + iconv.h.3head \ + if.h.3head \ + in.h.3head \ + inet.h.3head \ + inttypes.h.3head \ + ipc.h.3head \ + iso646.h.3head \ + langinfo.h.3head \ + libgen.h.3head \ + libintl.h.3head \ + limits.h.3head \ + locale.h.3head \ + math.h.3head \ + mman.h.3head \ + monetary.h.3head \ + mqueue.h.3head \ + msg.h.3head \ + ndbm.h.3head \ + netdb.h.3head \ + nl_types.h.3head \ + poll.h.3head \ + pthread.h.3head \ + pwd.h.3head \ queue.h.3head \ - regex.h.3head \ - resource.h.3head \ - sched.h.3head \ - search.h.3head \ - select.h.3head \ - sem.h.3head \ - semaphore.h.3head \ - setjmp.h.3head \ - shm.h.3head \ - siginfo.h.3head \ - signal.h.3head \ - socket.h.3head \ - spawn.h.3head \ - stat.h.3head \ - statvfs.h.3head \ - stdbool.h.3head \ - stddef.h.3head \ - stdint.h.3head \ - stdio.h.3head \ - stdlib.h.3head \ - string.h.3head \ - strings.h.3head \ - stropts.h.3head \ - syslog.h.3head \ - tar.h.3head \ - tcp.h.3head \ - termios.h.3head \ - tgmath.h.3head \ - time.h.3head \ - timeb.h.3head \ - times.h.3head \ - types.h.3head \ - types32.h.3head \ - ucontext.h.3head \ - uio.h.3head \ - ulimit.h.3head \ - un.h.3head \ - unistd.h.3head \ - utime.h.3head \ - utmpx.h.3head \ - utsname.h.3head \ - values.h.3head \ - wait.h.3head \ - wchar.h.3head \ - wctype.h.3head \ - wordexp.h.3head \ - xlocale.h.3head + regex.h.3head \ + resource.h.3head \ + sched.h.3head \ + search.h.3head \ + select.h.3head \ + sem.h.3head \ + semaphore.h.3head \ + setjmp.h.3head \ + shm.h.3head \ + siginfo.h.3head \ + signal.h.3head \ + socket.h.3head \ + spawn.h.3head \ + stat.h.3head \ + statvfs.h.3head \ + stdbool.h.3head \ + stddef.h.3head \ + stdint.h.3head \ + stdio.h.3head \ + stdlib.h.3head \ + string.h.3head \ + strings.h.3head \ + stropts.h.3head \ + syslog.h.3head \ + tar.h.3head \ + tcp.h.3head \ + termios.h.3head \ + tgmath.h.3head \ + time.h.3head \ + timeb.h.3head \ + times.h.3head \ + types.h.3head \ + types32.h.3head \ + uchar.h.3head \ + ucontext.h.3head \ + uio.h.3head \ + ulimit.h.3head \ + un.h.3head \ + unistd.h.3head \ + utime.h.3head \ + utmpx.h.3head \ + utsname.h.3head \ + values.h.3head \ + wait.h.3head \ + wchar.h.3head \ + wctype.h.3head \ + wordexp.h.3head \ + xlocale.h.3head MANLINKS= \ LIST_CLASS_ENTRY.3head \ diff --git a/usr/src/man/man3head/uchar.h.3head b/usr/src/man/man3head/uchar.h.3head new file mode 100644 index 0000000000..cb34b7f69a --- /dev/null +++ b/usr/src/man/man3head/uchar.h.3head @@ -0,0 +1,95 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 Robert Mustacchi +.\" +.Dd April 22, 2020 +.Dt UCHAR.H 3HEAD +.Os +.Sh NAME +.Nm uchar.h +.Nd unicode utilities header +.Sh SYNOPSIS +.In uchar.h +.Sh DESCRIPTION +The +.In uchar.h +header provides support for the C11 Unicode utilities. +The types and functions provide means for working with data encoded as +UTF-16 and UTF-32. +When working in non-Unicode locales, these types may not represent +Unicode code points. +.Pp +The +.In uchar.h +header defines the following types: +.Bl -tag -width Vt +.It Vt char16_t +An unsigned integer that can represent 16-bit characters, generally a +single single UTF-16 code unit. +A Unicode code point may be one or two UTF-16 code units due to +surrogate pairs. +.It Vt char32_t +An unsigned integer that can represent 32-bit characters, generally a +single UTF-32 code unit. +.It Vt size_t +An unsigned integer that represents the size of various objects. +This can hold the result of the +.Sy sizeof +operator. +See also +.Xr stddef.h 3HEAD . +.It Vt mbstate_t +An object that holds the state for converting between character +sequences and wide characters +.Po +.Vt wchar_t , +.Vt char16_t , +.Vt char32_t +.Pc . +See also, +.Xr wchar.h 3HEAD . +.El +The +.In uchar.h +header also defines the following functions which are used to convert +between +.Vt char16_t +and +.Vt char32_t +sequences and other character sequences: +.Bl -tag -width cr16rtomb +.It Xr c16rtomb 3C +Convert +.Vt char16_t +sequences to multi-byte character sequences. +.It Xr c32rtomb 3C +Convert +.Vt char32_t +sequences to multi-byte character sequences. +.It Xr mbrtoc16 3C +Convert multi-byte character sequences to +.Vt char16_t +sequences. +.It Xr mbrtoc32 3C +Convert multi-byte character sequences to +.Vt char32_t +sequences. +.El +.Sh INTERFACE STABILITY +.Sy Committed +.Sh SEE ALSO +.Xr c16rtomb 3C , +.Xr c32rtomb 3C , +.Xr mbrtoc16 3C , +.Xr mbrtoc32 3C , +.Xr stddef.h 3HEAD , +.Xr wchar.h 3HEAD diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index c95634b16c..a76896db15 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -1605,6 +1605,7 @@ file path=usr/include/time.h file path=usr/include/tiuser.h file path=usr/include/tsol/label.h file path=usr/include/tzfile.h +file path=usr/include/uchar.h file path=usr/include/ucontext.h file path=usr/include/ucred.h file path=usr/include/uid_stp.h @@ -1911,6 +1912,7 @@ file path=usr/share/man/man3head/timeb.h.3head file path=usr/share/man/man3head/times.h.3head file path=usr/share/man/man3head/types.h.3head file path=usr/share/man/man3head/types32.h.3head +file path=usr/share/man/man3head/uchar.h.3head file path=usr/share/man/man3head/ucontext.h.3head file path=usr/share/man/man3head/uio.h.3head file path=usr/share/man/man3head/ulimit.h.3head diff --git a/usr/src/pkg/manifests/system-library.man3c.inc b/usr/src/pkg/manifests/system-library.man3c.inc index 7317e92fce..ce6056ce82 100644 --- a/usr/src/pkg/manifests/system-library.man3c.inc +++ b/usr/src/pkg/manifests/system-library.man3c.inc @@ -58,6 +58,7 @@ file path=usr/share/man/man3c/bsearch.3c file path=usr/share/man/man3c/bstring.3c file path=usr/share/man/man3c/btowc.3c file path=usr/share/man/man3c/byteorder.3c +file path=usr/share/man/man3c/c16rtomb.3c file path=usr/share/man/man3c/call_once.3c file path=usr/share/man/man3c/catgets.3c file path=usr/share/man/man3c/catopen.3c @@ -234,7 +235,7 @@ file path=usr/share/man/man3c/makedev.3c file path=usr/share/man/man3c/malloc.3c file path=usr/share/man/man3c/mblen.3c file path=usr/share/man/man3c/mbrlen.3c -file path=usr/share/man/man3c/mbrtowc.3c +file path=usr/share/man/man3c/mbrtoc16.3c file path=usr/share/man/man3c/mbsinit.3c file path=usr/share/man/man3c/mbsrtowcs.3c file path=usr/share/man/man3c/mbstowcs.3c @@ -536,7 +537,6 @@ file path=usr/share/man/man3c/wait3.3c file path=usr/share/man/man3c/waitpid.3c file path=usr/share/man/man3c/walkcontext.3c file path=usr/share/man/man3c/wcpcpy.3c -file path=usr/share/man/man3c/wcrtomb.3c file path=usr/share/man/man3c/wcscasecmp.3c file path=usr/share/man/man3c/wcscoll.3c file path=usr/share/man/man3c/wcsdup.3c @@ -722,6 +722,7 @@ link path=usr/share/man/man3c/bind_textdomain_codeset.3c target=gettext.3c link path=usr/share/man/man3c/bindtextdomain.3c target=gettext.3c link path=usr/share/man/man3c/btowc_l.3c target=btowc.3c link path=usr/share/man/man3c/bzero.3c target=bstring.3c +link path=usr/share/man/man3c/c32rtomb.3c target=c16rtomb.3c link path=usr/share/man/man3c/calloc.3c target=malloc.3c link path=usr/share/man/man3c/canonicalize_file_name.3c target=realpath.3c link path=usr/share/man/man3c/catclose.3c target=catopen.3c @@ -1006,7 +1007,9 @@ link path=usr/share/man/man3c/lrand48.3c target=drand48.3c link path=usr/share/man/man3c/major.3c target=makedev.3c link path=usr/share/man/man3c/mblen_l.3c target=mblen.3c link path=usr/share/man/man3c/mbrlen_l.3c target=mbrlen.3c -link path=usr/share/man/man3c/mbrtowc_l.3c target=mbrtowc.3c +link path=usr/share/man/man3c/mbrtoc32.3c target=mbrtoc16.3c +link path=usr/share/man/man3c/mbrtowc.3c target=mbrtoc16.3c +link path=usr/share/man/man3c/mbrtowc_l.3c target=mbrtoc16.3c link path=usr/share/man/man3c/mbsinit_l.3c target=mbsinit.3c link path=usr/share/man/man3c/mbsnrtowcs.3c target=mbsrtowcs.3c link path=usr/share/man/man3c/mbsnrtowcs_l.3c target=mbsrtowcs.3c @@ -1457,7 +1460,8 @@ link path=usr/share/man/man3c/watoi.3c target=wcstol.3c link path=usr/share/man/man3c/watol.3c target=wcstol.3c link path=usr/share/man/man3c/watoll.3c target=wcstol.3c link path=usr/share/man/man3c/wcpncpy.3c target=wcpcpy.3c -link path=usr/share/man/man3c/wcrtomb_l.3c target=wcrtomb.3c +link path=usr/share/man/man3c/wcrtomb.3c target=c16rtomb.3c +link path=usr/share/man/man3c/wcrtomb_l.3c target=c16rtomb.3c link path=usr/share/man/man3c/wcscasecmp_l.3c target=wcscasecmp.3c link path=usr/share/man/man3c/wcscat.3c target=wcstring.3c link path=usr/share/man/man3c/wcschr.3c target=wcstring.3c diff --git a/usr/src/pkg/manifests/system-test-libctest.mf b/usr/src/pkg/manifests/system-test-libctest.mf index 472e4f3432..c3d9b87bdb 100644 --- a/usr/src/pkg/manifests/system-test-libctest.mf +++ b/usr/src/pkg/manifests/system-test-libctest.mf @@ -185,6 +185,8 @@ file path=opt/libc-tests/tests/symbols/symbols_test.$(ARCH64) mode=0555 file path=opt/libc-tests/tests/thread_name mode=0555 file path=opt/libc-tests/tests/timespec_get.32 mode=0555 file path=opt/libc-tests/tests/timespec_get.64 mode=0555 +file path=opt/libc-tests/tests/uchar.32 mode=0555 +file path=opt/libc-tests/tests/uchar.64 mode=0555 file path=opt/libc-tests/tests/wcsncasecmp-7344.32 mode=0555 file path=opt/libc-tests/tests/wcsncasecmp-7344.64 mode=0555 file path=opt/libc-tests/tests/wcsncasecmp-7350.32 mode=0555 diff --git a/usr/src/test/libc-tests/runfiles/default.run b/usr/src/test/libc-tests/runfiles/default.run index d69ed758c3..2556c6916c 100644 --- a/usr/src/test/libc-tests/runfiles/default.run +++ b/usr/src/test/libc-tests/runfiles/default.run @@ -116,6 +116,8 @@ timeout = 600 [/opt/libc-tests/tests/thread_name] [/opt/libc-tests/tests/timespec_get.32] [/opt/libc-tests/tests/timespec_get.64] +[/opt/libc-tests/tests/uchar.32] +[/opt/libc-tests/tests/uchar.64] [/opt/libc-tests/tests/pthread_attr_get_np] diff --git a/usr/src/test/libc-tests/tests/Makefile b/usr/src/test/libc-tests/tests/Makefile index a8932e21c4..9ea35b5525 100644 --- a/usr/src/test/libc-tests/tests/Makefile +++ b/usr/src/test/libc-tests/tests/Makefile @@ -51,7 +51,8 @@ PROGS = \ timespec_get \ wcsncasecmp \ wcsncasecmp-7344 \ - wcsncasecmp-7350 + wcsncasecmp-7350 \ + uchar SCRIPTS = \ quick_exit \ diff --git a/usr/src/test/libc-tests/tests/uchar.c b/usr/src/test/libc-tests/tests/uchar.c new file mode 100644 index 0000000000..895711fe5e --- /dev/null +++ b/usr/src/test/libc-tests/tests/uchar.c @@ -0,0 +1,789 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Robert Mustacchi + */ + +/* + * Test the implementation of various pieces of uchar.h(3HEAD) functionality. + */ + +#include <locale.h> +#include <err.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <strings.h> +#include <wchar.h> +#include <uchar.h> +#include <errno.h> + +static const char *uchar_wide = "å…‰"; +static const char32_t uchar_value = 0x5149; +static const char *uchar_hello = "hello"; + +static void +update_locale(const char *loc) +{ + const char *newloc = setlocale(LC_CTYPE, loc); + if (newloc == NULL) { + err(EXIT_FAILURE, "TEST FAILED: failed to update locale to %s", + loc); + } + + if (strcmp(newloc, loc) != 0) { + errx(EXIT_FAILURE, "TEST FAILED: locale set to %s, but got %s", + loc, newloc); + } +} + +static boolean_t +mbrtoc32_ascii(mbstate_t *mbs) +{ + char32_t out; + size_t len; + boolean_t ret = B_TRUE; + + if ((len = mbrtoc32(&out, uchar_hello, 5, mbs)) != 1) { + warnx("expected mbrtoc32 to return 1, returned %zu", len); + ret = B_FALSE; + } + + if (out != 'h') { + warnx("got bad char32_t, expected 0x%x, found 0x%x\n", 'h', + out); + ret = B_FALSE; + } + + if ((len = mbrtoc32(&out, uchar_hello + 1, 4, mbs)) != 1) { + warnx("expected mbrtoc32 to return 1, returned %zu", len); + ret = B_FALSE; + } + + if (out != 'e') { + warnx("got bad char32_t, expected 0x%x, found 0x%x\n", 'h', + out); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc32_ascii_internal(void) +{ + return (mbrtoc32_ascii(NULL)); +} + +static boolean_t +mbrtoc32_ascii_mbstate(void) +{ + mbstate_t mbs; + + bzero(&mbs, sizeof (mbs)); + return (mbrtoc32_ascii(&mbs)); +} + +static boolean_t +mbrtoc32_badseq_utf8(void) +{ + mbstate_t mbs; + size_t len; + char32_t out; + boolean_t ret = B_TRUE; + char *badstr; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc32(&out, "\xa9", 1, &mbs); + if (len != (size_t)-1) { + warnx("mbrtoc32 returned %zu, not %zu", len, (size_t)-1); + ret = B_FALSE; + } + + if (errno != EILSEQ) { + warnx("found bad errno, expected %d, found %d\n", errno, + EILSEQ); + ret = B_FALSE; + } + + badstr = strdup(uchar_wide); + if (badstr == NULL) { + warn("failed to duplicate uchar_wide"); + return (B_FALSE); + } + + badstr[1] = '?'; + bzero(&mbs, sizeof (mbs)); + len = mbrtoc32(&out, badstr, strlen(badstr), &mbs); + free(badstr); + if (len != (size_t)-1) { + warnx("mbrtoc32 returned %zu, not %zu", len, (size_t)-1); + ret = B_FALSE; + } + + if (errno != EILSEQ) { + warnx("found bad errno, expected %d, found %d\n", errno, + EILSEQ); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc32_roundtrip(void) +{ + char32_t out; + size_t len, clen; + mbstate_t mbs; + char buf[MB_CUR_MAX]; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc32(&out, uchar_wide, strlen(uchar_wide), &mbs); + if (len != 3) { + warnx("mbrtoc32 returned %zu, expected %u", len, 3); + ret = B_FALSE; + } + + if (out != uchar_value) { + warnx("mbrtoc32 converted character to 0x%x not 0x%x", + out, uchar_value); + ret = B_FALSE; + } + + clen = c32rtomb(buf, out, &mbs); + if (clen != len) { + warnx("c32rtomb returned %d bytes, but we originally used %d", + clen, len); + ret = B_FALSE; + } + + if (strncmp(buf, uchar_wide, len) != 0) { + warnx("round trip string comparison failed"); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc32_partial(void) +{ + char32_t out; + size_t len, i; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + for (i = 0; i < strlen(uchar_wide) - 1; i++) { + len = mbrtoc32(&out, uchar_wide + i, 1, &mbs); + if (len != (size_t)-2) { + warnx("partial mbrtoc32 returned %zu, not -2", len); + ret = B_FALSE; + } + } + + len = mbrtoc32(&out, uchar_wide + i, 1, &mbs); + if (len != 1) { + warnx("partial mbrtoc32 returned %zu, not 1", len); + ret = B_FALSE; + } + + if (out != uchar_value) { + warnx("mbrtoc32 converted character to 0x%x not 0x%x", + out, uchar_value); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc32_zero(void) +{ + char32_t out, exp = L'\0'; + size_t len; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc32(&out, "", 1, &mbs); + if (len != 0) { + warnx("partial mbrtoc32 returned %zu, not 0", len); + ret = B_FALSE; + } + + if (out != exp) { + warnx("mbrtoc32 converted character to 0x%x not 0x%x", + out, exp); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc32_zero_len(void) +{ + char32_t out = 0x12345, exp = 0x12345; + size_t len; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc32(&out, uchar_wide, 0, &mbs); + if (len != (size_t)-2) { + warnx("partial mbrtoc32 returned %zu, not -2", len); + ret = B_FALSE; + } + + if (out != exp) { + warnx("mbrtoc32 incorrectly wrote to char32_t value with " + "zero string, found 0x%x not 0x%x", out, exp); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc32_null(void) +{ + char32_t out = 0x123456, exp = 0x123456; + size_t len; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc32(&out, NULL, 1, &mbs); + if (len != 0) { + warnx("partial mbrtoc32 returned %zu, not 0", len); + ret = B_FALSE; + } + + if (out != exp) { + warnx("mbrtoc32 incorrectly wrote to char32_t value with " + "null string, found 0x%x not 0x%x", out, exp); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc16_ascii(mbstate_t *mbs) +{ + char16_t out; + size_t len; + boolean_t ret = B_TRUE; + + if ((len = mbrtoc16(&out, uchar_hello, 5, mbs)) != 1) { + warnx("expected mbrtoc16 to return 1, returned %zu", len); + ret = B_FALSE; + } + + if (out != 'h') { + warnx("got bad char16_t, expected 0x%x, found 0x%x\n", 'h', + out); + ret = B_FALSE; + } + + if ((len = mbrtoc16(&out, uchar_hello + 1, 4, mbs)) != 1) { + warnx("expected mbrtoc16 to return 1, returned %zu", len); + ret = B_FALSE; + } + + if (out != 'e') { + warnx("got bad char16_t, expected 0x%x, found 0x%x\n", 'h', + out); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc16_ascii_internal(void) +{ + return (mbrtoc16_ascii(NULL)); +} + +static boolean_t +mbrtoc16_ascii_mbstate(void) +{ + mbstate_t mbs; + + bzero(&mbs, sizeof (mbs)); + return (mbrtoc16_ascii(&mbs)); +} + +static boolean_t +mbrtoc16_null(void) +{ + char16_t out = 0x1234, exp = 0x1234; + size_t len; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc16(&out, NULL, 1, &mbs); + if (len != 0) { + warnx("partial mbrtoc16 returned %zu, not 0", len); + ret = B_FALSE; + } + + if (out != exp) { + warnx("mbrtoc16 incorrectly wrote to char16_t value with " + "null string, found 0x%x not 0x%x", out, exp); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc16_zero(void) +{ + char16_t out, exp = L'\0'; + size_t len; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc16(&out, "", 1, &mbs); + if (len != 0) { + warnx("partial mbrtoc16 returned %zu, not 0", len); + ret = B_FALSE; + } + + if (out != exp) { + warnx("mbrtoc16 converted character to 0x%x not 0x%x", + out, exp); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc16_zero_len(void) +{ + char16_t out = 0x5432, exp = 0x5432; + size_t len; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc16(&out, uchar_wide, 0, &mbs); + if (len != (size_t)-2) { + warnx("partial mbrtoc16 returned %zu, not -2", len); + ret = B_FALSE; + } + + if (out != exp) { + warnx("mbrtoc16 incorrectly wrote to char16_t value with " + "zero length string, found 0x%x not 0x%x", out, exp); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc16_roundtrip(void) +{ + char16_t out; + size_t len, clen; + mbstate_t mbs; + char buf[MB_CUR_MAX]; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc16(&out, uchar_wide, strlen(uchar_wide), &mbs); + if (len != 3) { + warnx("mbrtoc16 returned %zu, expected %u", len, 3); + ret = B_FALSE; + } + + if (out != uchar_value) { + warnx("mbrtoc16 converted character to 0x%x not 0x%x", + out, uchar_value); + ret = B_FALSE; + } + + clen = c16rtomb(buf, out, &mbs); + if (clen != len) { + warnx("c16rtomb returned %d bytes, but we originally used %d", + clen, len); + ret = B_FALSE; + } + + if (strncmp(buf, uchar_wide, len) != 0) { + warnx("round trip string comparison failed"); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc16_partial(void) +{ + char16_t out; + size_t len, i; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + for (i = 0; i < strlen(uchar_wide) - 1; i++) { + len = mbrtoc16(&out, uchar_wide + i, 1, &mbs); + if (len != (size_t)-2) { + warnx("partial mbrtoc16 returned %zu, not -2", len); + ret = B_FALSE; + } + } + + len = mbrtoc16(&out, uchar_wide + i, 1, &mbs); + if (len != 1) { + warnx("partial mbrtoc16 returned %zu, not 1", len); + ret = B_FALSE; + } + + if (out != uchar_value) { + warnx("mbrtoc16 converted character to 0x%x not 0x%x", + out, uchar_value); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +mbrtoc16_surrogate(void) +{ + char16_t out0, out1; + size_t len, clen; + mbstate_t mbs; + const char *surrogate = "\xF0\x9F\x92\xA9"; + char16_t exp0 = 0xd83d, exp1 = 0xdca9; + size_t slen = strlen(surrogate); + boolean_t ret = B_TRUE; + char buf[MB_CUR_MAX]; + + bzero(&mbs, sizeof (mbs)); + len = mbrtoc16(&out0, surrogate, slen, &mbs); + if (len != slen) { + warnx("mbrtoc16 returned %zu, expected %u", len, slen); + ret = B_FALSE; + } + + if (out0 != exp0) { + warnx("mbrtoc16 converted character to 0x%x not 0x%x", + out0, exp0); + ret = B_FALSE; + } + + if (mbsinit(&mbs) != 0) { + warnx("mb state with a surrogate character is somehow in the " + "initial state"); + ret = B_FALSE; + } + + len = mbrtoc16(&out1, uchar_wide, strlen(uchar_wide), &mbs); + if (len != (size_t)-3) { + warnx("mbrtoc16 returned %zu, expected -3", len); + ret = B_FALSE; + } + + if (mbsinit(&mbs) == 0) { + warnx("mb state with after both surrogate characters isn't " + "in initial state"); + ret = B_FALSE; + } + + if (out1 != exp1) { + warnx("mbrtoc32 converted character to 0x%x not 0x%x", + out1, exp1); + ret = B_FALSE; + } + + clen = c16rtomb(buf, out0, &mbs); + if (clen != 0) { + warnx("c16rtomb returned %d bytes, but expected zero for the " + "first surrogate", clen); + ret = B_FALSE; + } + + if (mbsinit(&mbs) != 0) { + warnx("mb state with a surrogate character is somehow in the " + "initial state"); + ret = B_FALSE; + } + + clen = c16rtomb(buf, out1, &mbs); + if (clen != slen) { + warnx("c16rtomb returned %zd, expected %u", len, slen); + ret = B_FALSE; + } + + if (mbsinit(&mbs) == 0) { + warnx("mb state with after both surrogate characters isn't " + "in initial state"); + ret = B_FALSE; + } + + if (strncmp(buf, surrogate, slen) != 0) { + warnx("round trip string comparison failed"); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +c32rtomb_eilseq_iso8859(void) +{ + char buf[MB_CUR_MAX]; + mbstate_t mbs; + size_t len; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = c32rtomb(buf, uchar_value, &mbs); + if (len != (size_t)-1) { + warnx("c32rtomb returned %zd, expected -1\n", len); + ret = B_FALSE; + } + + if (errno != EILSEQ) { + warnx("expected errno set to %d was %d", EILSEQ, errno); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +c16rtomb_eilseq_iso8859(void) +{ + char buf[MB_CUR_MAX]; + mbstate_t mbs; + size_t len; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = c32rtomb(buf, (char16_t)uchar_value, &mbs); + if (len != (size_t)-1) { + warnx("c32rtomb returned %zd, expected -1\n", len); + ret = B_FALSE; + } + + if (errno != EILSEQ) { + warnx("expected errno set to %d was %d", EILSEQ, errno); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +c32rtomb_eilseq_utf8(void) +{ + char buf[MB_CUR_MAX]; + mbstate_t mbs; + size_t len; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = c32rtomb(buf, UINT32_MAX, &mbs); + if (len != (size_t)-1) { + warnx("c32rtomb returned %zd, expected -1\n", len); + ret = B_FALSE; + } + + if (errno != EILSEQ) { + warnx("expected errno set to %d was %d", EILSEQ, errno); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +c16rtomb_bad_first(void) +{ + char buf[MB_CUR_MAX]; + mbstate_t mbs; + size_t len, i; + char16_t first = 0xd83d; + char16_t bad[] = { 0x0, 0xd7ff, 0xd83d, 0xd900, 0xffff }; + boolean_t ret = B_TRUE; + + for (i = 0; i < ARRAY_SIZE(bad); i++) { + bzero(&mbs, sizeof (mbs)); + len = c16rtomb(buf, first, &mbs); + if (len != 0) { + warnx("c16rtomb returned %zd, expected 0\n", len); + ret = B_FALSE; + } + + len = c16rtomb(buf, bad[i], &mbs); + if (len != (size_t)-1) { + warnx("c16rtomb surrogate %x returned %zd, expected " + "-1\n", bad[i], len); + ret = B_FALSE; + } + + if (errno != EILSEQ) { + warnx("expected errno set to %d was %d", EILSEQ, errno); + ret = B_FALSE; + } + } + + return (ret); +} + +static boolean_t +c16rtomb_bad_second(void) +{ + char buf[MB_CUR_MAX]; + mbstate_t mbs; + size_t len, i; + char16_t bad[] = { 0xdc00, 0xdd34, 0xdfff }; + boolean_t ret = B_TRUE; + + for (i = 0; i < ARRAY_SIZE(bad); i++) { + bzero(&mbs, sizeof (mbs)); + len = c16rtomb(buf, bad[i], &mbs); + if (len != (size_t)-1) { + warnx("c16rtomb surrogate %x returned %zd, expected " + "-1\n", bad[i], len); + ret = B_FALSE; + } + + if (errno != EILSEQ) { + warnx("expected errno set to %d was %d", EILSEQ, errno); + ret = B_FALSE; + } + } + + return (ret); +} + +static boolean_t +c32rtomb_null(void) +{ + size_t len; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = c32rtomb(NULL, uchar_value, &mbs); + if (len != 1) { + warnx("c32rtomb returned %zd, expected %zd", len, 1); + ret = B_FALSE; + } + + return (ret); +} + +static boolean_t +c16rtomb_null(void) +{ + size_t len; + mbstate_t mbs; + boolean_t ret = B_TRUE; + + bzero(&mbs, sizeof (mbs)); + len = c16rtomb(NULL, uchar_value, &mbs); + if (len != 1) { + warnx("c16rtomb returned %zd, expected %zd", len, 1); + ret = B_FALSE; + } + + return (ret); +} + +typedef boolean_t (*uchar_test_f)(void); + +typedef struct uchar_test { + uchar_test_f ut_func; + const char *ut_test; + const char *ut_locale; +} uchar_test_t; + +static const uchar_test_t uchar_tests[] = { + { mbrtoc32_ascii_mbstate, "mbrtoc32: ascii conversion" }, + { mbrtoc32_ascii_internal, "mbrtoc32: ascii conversion (internal " + "mbstate_t)" }, + { mbrtoc32_badseq_utf8, "mbrtoc32: bad locale sequence (UTF-8)" }, + { mbrtoc32_roundtrip, "mbrtoc32: round trip conversion" }, + { mbrtoc32_partial, "mbrtoc32: correctly consume partial sequences" }, + { mbrtoc32_zero, "mbrtoc32: correctly handle L'\\0'" }, + { mbrtoc32_zero_len, "mbrtoc32: correctly handle length of zero" }, + { mbrtoc32_null, "mbrtoc32: correctly handle null string" }, + { mbrtoc16_ascii_mbstate, "mbrtoc16: ascii conversion" }, + { mbrtoc16_ascii_internal, "mbrtoc16: ascii conversion (internal " + "mbstate_t)" }, + { mbrtoc16_null, "mbrtoc16: correctly handle null string" }, + { mbrtoc16_zero, "mbrtoc16: correctly handle L'\\0'" }, + { mbrtoc16_zero_len, "mbrtoc16: correctly handle length of zero" }, + { mbrtoc16_roundtrip, "mbrtoc16: round trip conversion" }, + { mbrtoc16_partial, "mbrtoc16: correctly consume partial sequences" }, + { mbrtoc16_surrogate, "mbrtoc16: correctly generate surrogate pairs " + "and round trip conversion" }, + { c32rtomb_eilseq_iso8859, "c32rtomb: character outside of locale is " + "caught", "en_US.ISO8859-1" }, + { c16rtomb_eilseq_iso8859, "c16rtomb: character outside of locale is " + "caught", "en_US.ISO8859-1" }, + { c32rtomb_eilseq_utf8, "c32rtomb: character outside of locale is " + "caught" }, + { c16rtomb_bad_first, "c16rtomb: bad first surrogate pair" }, + { c16rtomb_bad_second, "c16rtomb: bad second surrogate pair" }, + { c32rtomb_null, "c32rtomb: correctly handle null buffer" }, + { c16rtomb_null, "c16rtomb: correctly handle null buffer" }, +}; + +int +main(void) +{ + uint_t i; + uint_t passes = 0; + uint_t ntests = ARRAY_SIZE(uchar_tests); + + for (i = 0; i < ntests; i++) { + boolean_t r; + + /* + * Default to a standard UTF-8 locale if none is requested by + * the test. + */ + if (uchar_tests[i].ut_locale != NULL) { + update_locale(uchar_tests[i].ut_locale); + } else { + update_locale("en_US.UTF-8"); + } + + r = uchar_tests[i].ut_func(); + (void) fprintf(stderr, "TEST %s: %s\n", r ? "PASSED" : "FAILED", + uchar_tests[i].ut_test); + if (r) { + passes++; + } + } + + (void) printf("%d/%d test%s passed\n", passes, ntests, + passes > 1 ? "s" : ""); + return (passes == ntests ? EXIT_SUCCESS : EXIT_FAILURE); + +} diff --git a/usr/src/tools/smatch/src/Documentation/smatch.txt b/usr/src/tools/smatch/src/Documentation/smatch.txt index b62e4507ee..dadd05d0da 100644 --- a/usr/src/tools/smatch/src/Documentation/smatch.txt +++ b/usr/src/tools/smatch/src/Documentation/smatch.txt @@ -1,9 +1,13 @@ Smatch +0. Introduction 1. Building Smatch 2. Using Smatch 3. Smatch vs Sparse +Section 0: Introduction + +The Smatch mailing list is <smatch@vger.kernel.org>. Section 1: Building Smatch --------------------------- diff --git a/usr/src/tools/smatch/src/Documentation/submitting-patches.md b/usr/src/tools/smatch/src/Documentation/submitting-patches.md index fb176ce51d..66d6cd175a 100644 --- a/usr/src/tools/smatch/src/Documentation/submitting-patches.md +++ b/usr/src/tools/smatch/src/Documentation/submitting-patches.md @@ -19,3 +19,16 @@ Kernel submitting process. Notice that sparse uses the MIT License. +4. Smatch is built on top of Sparse but it is licensed under the GPLv2+ the + git repostories are: + + https://github.com/error27/smatch + https://repo.or.cz/w/smatch.git + + They are identical mirrors so it doesn't matter which you use. + + Send patches for to Smatch to <smatch@vger.kernel.org>. If the code is + shared with both Sparse and Smatch then please send it to the Sparse + mailing list instead <linux-sparse@vger.kernel.org> and I will pick it up + from there. + diff --git a/usr/src/tools/smatch/src/Makefile b/usr/src/tools/smatch/src/Makefile index 7cd1db3039..8d7f2e66aa 100644 --- a/usr/src/tools/smatch/src/Makefile +++ b/usr/src/tools/smatch/src/Makefile @@ -1,4 +1,4 @@ -VERSION=0.6.1-rc1-il-5 +VERSION=0.6.1-rc1-il-6 ######################################################################## # The following variables can be overwritten from the command line diff --git a/usr/src/tools/smatch/src/check_atomic_inc_dec.c b/usr/src/tools/smatch/src/check_atomic_inc_dec.c index c83dc893b2..068cb5e535 100644 --- a/usr/src/tools/smatch/src/check_atomic_inc_dec.c +++ b/usr/src/tools/smatch/src/check_atomic_inc_dec.c @@ -24,14 +24,22 @@ static int my_id; STATE(inc); -STATE(orig); +STATE(start_state); STATE(dec); static struct smatch_state *unmatched_state(struct sm_state *sm) { - if (parent_is_gone_var_sym(sm->name, sm->sym)) + /* + * We default to decremented. For example, say we have: + * if (p) + * atomic_dec(p); + * <- p is decreemented. + * + */ + if ((sm->state == &dec) && + parent_is_gone_var_sym(sm->name, sm->sym)) return sm->state; - return &undefined; + return &start_state; } static struct stree *start_states; @@ -86,7 +94,7 @@ static struct sm_state *get_best_match(const char *key) return NULL; } -static void db_inc_dec(struct expression *expr, int param, const char *key, const char *value, int inc_dec) +static void db_inc_dec(struct expression *expr, int param, const char *key, int inc_dec) { struct sm_state *start_sm; struct expression *arg; @@ -129,7 +137,7 @@ static void db_inc_dec(struct expression *expr, int param, const char *key, cons set_start_state(name, sym, &inc); if (start_sm && start_sm->state == &inc) - set_state(my_id, name, sym, &orig); + set_state(my_id, name, sym, &start_state); else set_state(my_id, name, sym, &dec); } @@ -139,24 +147,70 @@ free: free_string(name); } +static const char *primitive_funcs[] = { + "atomic_inc_return", + "atomic_add_return", + "atomic_sub_return", + "atomic_sub_and_test", + "atomic_dec_and_test", + "_atomic_dec_and_lock", + "atomic_dec", + "atomic_long_inc", + "atomic_long_dec", + "atomic_inc", + "atomic_sub", + "refcount_inc", + "refcount_dec", + "refcount_add", + "refcount_add_not_zero", + "refcount_inc_not_zero", + "refcount_sub_and_test", + "refcount_dec_and_test", + "atomic_dec_if_positive", +}; + +static bool is_inc_dec_primitive(struct expression *expr) +{ + int i; + + while (expr->type == EXPR_ASSIGNMENT) + expr = strip_expr(expr->right); + if (expr->type != EXPR_CALL) + return false; + + if (expr->fn->type != EXPR_SYMBOL) + return false; + + for (i = 0; i < ARRAY_SIZE(primitive_funcs); i++) { + if (sym_name_is(primitive_funcs[i], expr->fn)) + return true; + } + + return false; +} + static void db_inc(struct expression *expr, int param, char *key, char *value) { - db_inc_dec(expr, param, key, value, ATOMIC_INC); + if (is_inc_dec_primitive(expr)) + return; + db_inc_dec(expr, param, key, ATOMIC_INC); } static void db_dec(struct expression *expr, int param, char *key, char *value) { - db_inc_dec(expr, param, key, value, ATOMIC_DEC); + if (is_inc_dec_primitive(expr)) + return; + db_inc_dec(expr, param, key, ATOMIC_DEC); } static void match_atomic_inc(const char *fn, struct expression *expr, void *_unused) { - db_inc_dec(expr, 0, "$->counter", "", ATOMIC_INC); + db_inc_dec(expr, 0, "$->counter", ATOMIC_INC); } static void match_atomic_dec(const char *fn, struct expression *expr, void *_unused) { - db_inc_dec(expr, 0, "$->counter", "", ATOMIC_DEC); + db_inc_dec(expr, 0, "$->counter", ATOMIC_DEC); } static void match_atomic_add(const char *fn, struct expression *expr, void *_unused) @@ -166,26 +220,53 @@ static void match_atomic_add(const char *fn, struct expression *expr, void *_unu amount = get_argument_from_call_expr(expr->args, 0); if (get_implied_value(amount, &sval) && sval_is_negative(sval)) { - db_inc_dec(expr, 1, "$->counter", "", ATOMIC_DEC); + db_inc_dec(expr, 1, "$->counter", ATOMIC_DEC); return; } - db_inc_dec(expr, 1, "$->counter", "", ATOMIC_INC); + db_inc_dec(expr, 1, "$->counter", ATOMIC_INC); } static void match_atomic_sub(const char *fn, struct expression *expr, void *_unused) { - db_inc_dec(expr, 1, "$->counter", "", ATOMIC_DEC); + db_inc_dec(expr, 1, "$->counter", ATOMIC_DEC); } static void refcount_inc(const char *fn, struct expression *expr, void *param) { - db_inc_dec(expr, PTR_INT(param), "$->ref.counter", "", ATOMIC_INC); + db_inc_dec(expr, PTR_INT(param), "$->ref.counter", ATOMIC_INC); } static void refcount_dec(const char *fn, struct expression *expr, void *param) { - db_inc_dec(expr, PTR_INT(param), "$->ref.counter", "", ATOMIC_DEC); + db_inc_dec(expr, PTR_INT(param), "$->ref.counter", ATOMIC_DEC); +} + +static void pm_runtime_get_sync(const char *fn, struct expression *expr, void *param) +{ + db_inc_dec(expr, PTR_INT(param), "$->power.usage_count.counter", ATOMIC_INC); +} + +static void match_implies_inc(const char *fn, struct expression *call_expr, + struct expression *assign_expr, void *param) +{ + db_inc_dec(call_expr, PTR_INT(param), "$->ref.counter", ATOMIC_INC); +} + +static void match_implies_atomic_dec(const char *fn, struct expression *call_expr, + struct expression *assign_expr, void *param) +{ + db_inc_dec(call_expr, PTR_INT(param), "$->counter", ATOMIC_DEC); +} + +static bool is_maybe_dec(struct sm_state *sm) +{ + if (sm->state == &dec) + return true; + if (slist_has_state(sm->possible, &dec) && + !slist_has_state(sm->possible, &inc)) + return true; + return false; } static void match_return_info(int return_id, char *return_ranges, struct expression *expr) @@ -194,9 +275,13 @@ static void match_return_info(int return_id, char *return_ranges, struct express const char *param_name; int param; + if (is_impossible_path()) + return; + FOR_EACH_MY_SM(my_id, __get_cur_stree(), sm) { - if (sm->state != &inc && - sm->state != &dec) + if (sm->state != &inc && !is_maybe_dec(sm)) + continue; + if (sm->state == get_state_stree(start_states, my_id, sm->name, sm->sym)) continue; if (parent_is_gone_var_sym(sm->name, sm->sym)) continue; @@ -221,7 +306,7 @@ static int success_fail_positive(struct range_list *rl) if (!rl) return EMPTY; - if (sval_is_negative(rl_min(rl))) + if (!is_whole_rl(rl) && sval_is_negative(rl_min(rl))) return NEGATIVE; if (rl_min(rl).value == 0) @@ -237,14 +322,23 @@ static void check_counter(const char *name, struct symbol *sym) int inc_buckets[NUM_BUCKETS] = {}; int dec_buckets[NUM_BUCKETS] = {}; struct stree *stree, *orig_stree; + struct smatch_state *state; struct sm_state *return_sm; struct sm_state *sm; sval_t line = sval_type_val(&int_ctype, 0); int bucket; + /* static variable are probably just counters */ + if (sym->ctype.modifiers & MOD_STATIC && + !(sym->ctype.modifiers & MOD_TOPLEVEL)) + return; + FOR_EACH_PTR(get_all_return_strees(), stree) { orig_stree = __swap_cur_stree(stree); + if (is_impossible_path()) + goto swap_stree; + return_sm = get_sm_state(RETURN_ID, "return_ranges", NULL); if (!return_sm) goto swap_stree; @@ -257,21 +351,23 @@ static void check_counter(const char *name, struct symbol *sym) goto swap_stree; sm = get_sm_state(my_id, name, sym); - if (!sm) - goto swap_stree; + if (sm) + state = sm->state; + else + state = &start_state; - if (sm->state != &inc && - sm->state != &dec && - sm->state != &orig) + if (state != &inc && + state != &dec && + state != &start_state) goto swap_stree; bucket = success_fail_positive(estate_rl(return_sm->state)); - if (sm->state == &inc) { + if (state == &inc) { add_range(&inc_lines, line, line); inc_buckets[bucket] = true; } - if (sm->state == &dec || sm->state == &orig) { + if (state == &dec || state == &start_state) { add_range(&dec_lines, line, line); dec_buckets[bucket] = true; } @@ -345,9 +441,14 @@ void check_atomic_inc_dec(int id) add_function_hook("atomic_add_return", &match_atomic_add, NULL); add_function_hook("atomic_sub_return", &match_atomic_sub, NULL); add_function_hook("atomic_sub_and_test", &match_atomic_sub, NULL); + add_function_hook("atomic_long_sub_and_test", &match_atomic_sub, NULL); + add_function_hook("atomic64_sub_and_test", &match_atomic_sub, NULL); add_function_hook("atomic_dec_and_test", &match_atomic_dec, NULL); + add_function_hook("atomic_long_dec_and_test", &match_atomic_dec, NULL); + add_function_hook("atomic64_dec_and_test", &match_atomic_dec, NULL); add_function_hook("_atomic_dec_and_lock", &match_atomic_dec, NULL); add_function_hook("atomic_dec", &match_atomic_dec, NULL); + add_function_hook("atomic_dec_return", &match_atomic_dec, NULL); add_function_hook("atomic_long_inc", &match_atomic_inc, NULL); add_function_hook("atomic_long_dec", &match_atomic_dec, NULL); add_function_hook("atomic_inc", &match_atomic_inc, NULL); @@ -356,11 +457,17 @@ void check_atomic_inc_dec(int id) add_function_hook("refcount_inc", &refcount_inc, INT_PTR(0)); add_function_hook("refcount_dec", &refcount_dec, INT_PTR(0)); add_function_hook("refcount_add", &refcount_inc, INT_PTR(1)); - add_function_hook("refcount_add_not_zero", &refcount_inc, INT_PTR(1)); - add_function_hook("refcount_inc_not_zero", &refcount_inc, INT_PTR(0)); + + return_implies_state("refcount_add_not_zero", 1, 1, &match_implies_inc, INT_PTR(1)); + return_implies_state("refcount_inc_not_zero", 1, 1, &match_implies_inc, INT_PTR(0)); + + return_implies_state("atomic_dec_if_positive", 0, INT_MAX, &match_implies_atomic_dec, INT_PTR(0)); + add_function_hook("refcount_sub_and_test", &refcount_dec, INT_PTR(1)); add_function_hook("refcount_dec_and_test", &refcount_dec, INT_PTR(0)); + add_function_hook("pm_runtime_get_sync", &pm_runtime_get_sync, INT_PTR(0)); + add_hook(&match_check_missed, END_FUNC_HOOK); add_hook(&match_after_func, AFTER_FUNC_HOOK); diff --git a/usr/src/tools/smatch/src/check_kernel_printf.c b/usr/src/tools/smatch/src/check_kernel_printf.c index 8992a83fd7..4b65da5a21 100644 --- a/usr/src/tools/smatch/src/check_kernel_printf.c +++ b/usr/src/tools/smatch/src/check_kernel_printf.c @@ -15,6 +15,8 @@ * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt */ +#define _GNU_SOURCE + #include <assert.h> #include <ctype.h> #include <string.h> @@ -207,7 +209,7 @@ qualifier: return ++fmt - start; case 's': - if (qualifier) + if (qualifier && qualifier != 'l') sm_warning("qualifier '%c' ignored for %%s specifier", qualifier); spec->type = FORMAT_TYPE_STR; @@ -672,6 +674,12 @@ pointer(const char *fmt, struct expression *arg, int vaidx) vaidx, type_to_str(type)); return; } + + /* error pointers */ + if (*fmt == 'e') + fmt++; + + /* Just plain %p, nothing to check. */ if (!isalnum(*fmt)) return; diff --git a/usr/src/tools/smatch/src/smatch_db.c b/usr/src/tools/smatch/src/smatch_db.c index d31c3a17cf..8d3c3ca49d 100644 --- a/usr/src/tools/smatch/src/smatch_db.c +++ b/usr/src/tools/smatch/src/smatch_db.c @@ -2719,8 +2719,9 @@ const char *state_name_to_param_name(const char *state_name, const char *param_n return alloc_sname(buf); } - if (state_name[name_len] == '-' && /* check for '-' from "->" */ - strncmp(state_name, param_name, name_len) == 0) { + /* check for '-' from "->" */ + if (strncmp(state_name, param_name, name_len) == 0 && + state_name[name_len] == '-') { snprintf(buf, sizeof(buf), "%.*s$%s", star_cnt, "**********", state_name + name_len); return alloc_sname(buf); } diff --git a/usr/src/uts/common/sys/int_types.h b/usr/src/uts/common/sys/int_types.h index e57d6f8b75..8f7c423643 100644 --- a/usr/src/uts/common/sys/int_types.h +++ b/usr/src/uts/common/sys/int_types.h @@ -173,6 +173,9 @@ typedef long long int_least64_t; #endif #endif +/* + * If these are changed, please update char16_t and char32_t in head/uchar.h. + */ typedef unsigned char uint_least8_t; typedef unsigned short uint_least16_t; typedef unsigned int uint_least32_t; diff --git a/usr/src/uts/sun4/io/px/px.c b/usr/src/uts/sun4/io/px/px.c index 6d9c5797c4..82948b2609 100644 --- a/usr/src/uts/sun4/io/px/px.c +++ b/usr/src/uts/sun4/io/px/px.c @@ -44,8 +44,6 @@ #include <sys/pcie_pwr.h> #include <sys/pci_cfgacc.h> -/*LINTLIBRARY*/ - /* * function prototypes for dev ops routines: */ @@ -226,7 +224,7 @@ px_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) px_t *px_p; /* per bus state pointer */ int instance = DIP_TO_INST(dip); int ret = DDI_SUCCESS; - devhandle_t dev_hdl = NULL; + devhandle_t dev_hdl = 0; pcie_hp_regops_t regops; pcie_bus_t *bus_p; @@ -521,7 +519,7 @@ px_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) mutex_exit(&px_p->px_mutex); mutex_destroy(&px_p->px_mutex); - px_p->px_dev_hdl = NULL; + px_p->px_dev_hdl = 0; ddi_soft_state_free(px_state_p, instance); return (DDI_SUCCESS); @@ -688,7 +686,7 @@ px_pwr_teardown(dev_info_t *dip) px_t *px_p = INST_TO_STATE(instance); ddi_intr_handle_impl_t hdl; - if (!PCIE_PMINFO(dip) || !PCIE_NEXUS_PMINFO(dip)) + if (PCIE_PMINFO(dip) == NULL || PCIE_NEXUS_PMINFO(dip) == NULL) return; /* Initialize handle */ @@ -848,7 +846,8 @@ px_dma_setup(dev_info_t *dip, dev_info_t *rdip, ddi_dma_req_t *dmareq, ddi_driver_name(rdip), ddi_get_instance(rdip), handlep ? "alloc" : "advisory"); - if (!(mp = px_dma_lmts2hdl(dip, rdip, mmu_p, dmareq))) + mp = px_dma_lmts2hdl(dip, rdip, mmu_p, dmareq); + if (mp == NULL) return (DDI_DMA_NORESOURCES); if (mp == (ddi_dma_impl_t *)DDI_DMA_NOMAPPING) return (DDI_DMA_NOMAPPING); @@ -858,14 +857,14 @@ px_dma_setup(dev_info_t *dip, dev_info_t *rdip, ddi_dma_req_t *dmareq, goto freehandle; switch (PX_DMA_TYPE(mp)) { - case PX_DMAI_FLAGS_DVMA: /* LINTED E_EQUALITY_NOT_ASSIGNMENT */ - if ((ret = px_dvma_win(px_p, dmareq, mp)) || !handlep) + case PX_DMAI_FLAGS_DVMA: + ret = px_dvma_win(px_p, dmareq, mp); + if (ret != 0 || handlep == NULL) goto freehandle; if (!PX_DMA_CANCACHE(mp)) { /* try fast track */ if (PX_DMA_CANFAST(mp)) { if (!px_dvma_map_fast(mmu_p, mp)) break; - /* LINTED E_NOP_ELSE_STMT */ } else { PX_DVMA_FASTTRAK_PROF(mp); } @@ -873,8 +872,9 @@ px_dma_setup(dev_info_t *dip, dev_info_t *rdip, ddi_dma_req_t *dmareq, if (ret = px_dvma_map(mp, dmareq, mmu_p)) goto freehandle; break; - case PX_DMAI_FLAGS_PTP: /* LINTED E_EQUALITY_NOT_ASSIGNMENT */ - if ((ret = px_dma_physwin(px_p, dmareq, mp)) || !handlep) + case PX_DMAI_FLAGS_PTP: + ret = px_dma_physwin(px_p, dmareq, mp); + if (ret == 0 || handlep == NULL) goto freehandle; break; case PX_DMAI_FLAGS_BYPASS: @@ -915,7 +915,8 @@ px_dma_allochdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attrp, if (attrp->dma_attr_version != DMA_ATTR_V0) return (DDI_DMA_BADATTR); - if (!(mp = px_dma_allocmp(dip, rdip, waitfp, arg))) + mp = px_dma_allocmp(dip, rdip, waitfp, arg); + if (mp == NULL) return (DDI_DMA_NORESOURCES); /* @@ -989,7 +990,7 @@ px_dma_bindhdl(dev_info_t *dip, dev_info_t *rdip, if (!PX_DMA_CANCACHE(mp)) { /* try fast track */ if (PX_DMA_CANFAST(mp)) { if (!px_dvma_map_fast(mmu_p, mp)) - goto mapped; /*LINTED E_NOP_ELSE_STMT*/ + goto mapped; } else { PX_DVMA_FASTTRAK_PROF(mp); } diff --git a/usr/src/uts/sun4/io/px/px_fm.c b/usr/src/uts/sun4/io/px/px_fm.c index 9db9e7e50a..5f0de5be0a 100644 --- a/usr/src/uts/sun4/io/px/px_fm.c +++ b/usr/src/uts/sun4/io/px/px_fm.c @@ -329,7 +329,7 @@ px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) switch (ranges_p->child_high & PCI_ADDR_MASK) { case PCI_ADDR_CONFIG: acc_type = PF_ADDR_CFG; - addr = NULL; + addr = 0; bdf = (pcie_req_id_t)((fault_addr >> 12) & 0xFFFF); break; @@ -448,7 +448,7 @@ px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, pcie_req_id_t rid) } /* Ensure that the rid of the fabric message will get scanned. */ - pfd_p = px_rp_en_q(px_p, rid, NULL, NULL); + pfd_p = px_rp_en_q(px_p, rid, 0, 0); PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_FABRIC; rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); @@ -739,7 +739,7 @@ px_get_pfd(px_t *px_p) { PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0; PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE; PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL; - PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = NULL; + PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0; PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF; PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = 0; PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0; @@ -890,7 +890,7 @@ px_err_pio_hdl_check(dev_info_t *dip, const void *handle, const void *arg1, */ size = hp->ah_len; if (((fault_addr >= base_addr) && (fault_addr < (base_addr + size))) || - ((fault_addr == NULL) && (PCIE_CHECK_VALID_BDF(bdf) && + ((fault_addr == 0) && (PCIE_CHECK_VALID_BDF(bdf) && (bdf == PCIE_DIP2BUS(dip)->bus_bdf)))) status = DDI_FM_NONFATAL; @@ -927,7 +927,7 @@ px_err_dma_hdl_check(dev_info_t *dip, const void *handle, const void *arg1, * know the BDF and ADDR == 0. */ if (((addr >= base_addr) && (addr < (base_addr + size))) || - ((addr == NULL) && PCIE_CHECK_VALID_BDF(bdf))) + ((addr == 0) && PCIE_CHECK_VALID_BDF(bdf))) status = DDI_FM_NONFATAL; return (status); diff --git a/usr/src/uts/sun4/io/px/px_tools.c b/usr/src/uts/sun4/io/px/px_tools.c index 8d424f7957..ab089009fd 100644 --- a/usr/src/uts/sun4/io/px/px_tools.c +++ b/usr/src/uts/sun4/io/px/px_tools.c @@ -488,7 +488,7 @@ pxtool_get_phys_addr(px_t *px_p, int space, uint64_t offset) "space:0x%d, offset:0x%" PRIx64 "\n", space, offset); if (rval != DDI_SUCCESS) - return (NULL); + return ((uintptr_t)NULL); /* Bustype here returns the high order address bits. */ xlated_regspec.regspec_bustype &= px_get_rng_parent_hi_mask(px_p); diff --git a/usr/src/uts/sun4/io/px/px_util.c b/usr/src/uts/sun4/io/px/px_util.c index e2b4bdbbc4..893690a5d9 100644 --- a/usr/src/uts/sun4/io/px/px_util.c +++ b/usr/src/uts/sun4/io/px/px_util.c @@ -41,8 +41,6 @@ #include "px_obj.h" #include <sys/pcie_pwr.h> -/*LINTLIBRARY*/ - /* * px_get_props * @@ -407,7 +405,7 @@ px_init_child(px_t *px_p, dev_info_t *child) pci_regspec_t *pci_rp; char name[10]; int i, no_config; - intptr_t ppd = NULL; + intptr_t ppd = (intptr_t)NULL; /* * The following is a special case for pcimem nodes. diff --git a/usr/src/uts/sun4u/io/px/px_err.c b/usr/src/uts/sun4u/io/px/px_err.c index 3ff353d2e9..5db4380371 100644 --- a/usr/src/uts/sun4u/io/px/px_err.c +++ b/usr/src/uts/sun4u/io/px/px_err.c @@ -570,14 +570,14 @@ px_err_bit_desc_t px_err_lpug_tbl[] = { /* LPU Registers Addresses */ #define LR4(pre) \ - NULL, \ + 0, \ LPU_ ## pre ## _INTERRUPT_MASK, \ LPU_ ## pre ## _INTERRUPT_AND_STATUS, \ LPU_ ## pre ## _INTERRUPT_AND_STATUS /* LPU Registers Addresses with Irregularities */ #define LR4_FIXME(pre) \ - NULL, \ + 0, \ LPU_ ## pre ## _INTERRUPT_MASK, \ LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS, \ LPU_ ## pre ## _LAYER_INTERRUPT_AND_STATUS @@ -737,7 +737,7 @@ px_err_reg_enable(px_err_id_t reg_id, caddr_t csr_base) uint64_t log_mask = *reg_desc_p->log_mask_p; /* Enable logs if it exists */ - if (reg_desc_p->log_addr != NULL) + if (reg_desc_p->log_addr != 0) CSR_XS(csr_base, reg_desc_p->log_addr, log_mask); /* @@ -758,7 +758,7 @@ px_err_reg_enable(px_err_id_t reg_id, caddr_t csr_base) CSR_XR(csr_base, reg_desc_p->status_addr)); DBG(DBG_ATTACH, NULL, "%s Clear: 0x%llx\n", reg_desc_p->msg, CSR_XR(csr_base, reg_desc_p->clear_addr)); - if (reg_desc_p->log_addr != NULL) { + if (reg_desc_p->log_addr != 0) { DBG(DBG_ATTACH, NULL, "%s Log: 0x%llx\n", reg_desc_p->msg, CSR_XR(csr_base, reg_desc_p->log_addr)); } @@ -770,7 +770,7 @@ px_err_reg_disable(px_err_id_t reg_id, caddr_t csr_base) const px_err_reg_desc_t *reg_desc_p = &px_err_reg_tbl[reg_id]; uint64_t val = (reg_id >= PX_ERR_LPU_LINK) ? -1 : 0; - if (reg_desc_p->log_addr != NULL) + if (reg_desc_p->log_addr != 0) CSR_XS(csr_base, reg_desc_p->log_addr, val); CSR_XS(csr_base, reg_desc_p->enable_addr, val); } @@ -1722,7 +1722,7 @@ PX_ERPT_SEND_DEC(mmu_tfar_tfsr) s_status = PCI_STAT_S_TARG_AB; /* Only PIO Fault Addresses are valid, this is DMA */ - (void) px_rp_en_q(px_p, fault_bdf, NULL, s_status); + (void) px_rp_en_q(px_p, fault_bdf, 0, s_status); } (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); @@ -1793,8 +1793,7 @@ px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, goto done; bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); - (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, NULL, - bdf); + (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, 0, bdf); done: return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, @@ -1824,8 +1823,7 @@ px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, goto done; bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); - (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, NULL, - bdf); + (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_ADDR_DMA, 0, bdf); done: return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, diff --git a/usr/src/uts/sun4u/io/px/px_lib4u.c b/usr/src/uts/sun4u/io/px/px_lib4u.c index 57cbeac22f..1180f3fbd9 100644 --- a/usr/src/uts/sun4u/io/px/px_lib4u.c +++ b/usr/src/uts/sun4u/io/px/px_lib4u.c @@ -698,13 +698,9 @@ px_lib_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle, * CPU's internal "invalidate FIFOs" are flushed. */ -#if !defined(lint) kpreempt_disable(); -#endif jbus_stst_order(); -#if !defined(lint) kpreempt_enable(); -#endif return (DDI_SUCCESS); } @@ -1416,7 +1412,7 @@ oberon_set_cb(dev_info_t *dip, uint64_t val) * register array. */ for (ubc_id = 0; ubc_id < OBERON_UBC_ID_MAX; ubc_id++) { - if (px_oberon_ubc_scratch_regs[ubc_id] != NULL) + if (px_oberon_ubc_scratch_regs[ubc_id] != 0) return; } @@ -1559,7 +1555,7 @@ px_lib_clr_errs(px_t *px_p, dev_info_t *rdip, uint64_t addr) } } - (void) px_rp_en_q(px_p, bdf, addr_low, NULL); + (void) px_rp_en_q(px_p, bdf, addr_low, 0); /* * XXX - Current code scans the fabric for all px_tool accesses. @@ -1877,8 +1873,8 @@ px_goto_l23ready(px_t *px_p) int mutex_held = 1; /* If no PM info, return failure */ - if (!PCIE_PMINFO(px_p->px_dip) || - !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip))) + if (PCIE_PMINFO(px_p->px_dip) == NULL || + (pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)) == NULL) return (DDI_FAILURE); mutex_enter(&pwr_p->pwr_lock); @@ -2001,8 +1997,8 @@ px_pre_pwron_check(px_t *px_p) pcie_pwr_t *pwr_p; /* If no PM info, return failure */ - if (!PCIE_PMINFO(px_p->px_dip) || - !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip))) + if (PCIE_PMINFO(px_p->px_dip) == NULL || + (pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)) == NULL) return (DDI_FAILURE); /* @@ -2023,8 +2019,8 @@ px_goto_l0(px_t *px_p) uint64_t time_spent = 0; /* If no PM info, return failure */ - if (!PCIE_PMINFO(px_p->px_dip) || - !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip))) + if (PCIE_PMINFO(px_p->px_dip) == NULL || + (pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)) == NULL) return (DDI_FAILURE); mutex_enter(&pwr_p->pwr_lock); @@ -2139,7 +2135,7 @@ px_cb_intr_redist(void *arg) mutex_enter(&cb_p->cb_mutex); pxl = cb_p->pxl; - if (!pxl) + if (pxl == NULL) goto cb_done; pxp = pxl->pxp; @@ -2275,7 +2271,7 @@ px_cb_rem_intr(px_fault_t *fault_p) pxl = pxl->next; for (; pxl && (pxl->pxp != px_p); prev = pxl, pxl = pxl->next) { }; - if (!pxl) { + if (pxl == NULL) { cmn_err(CE_WARN, "px_cb_rem_intr: can't find px_p 0x%p " "in registered CB list.", (void *)px_p); mutex_exit(&cb_p->cb_mutex); @@ -2342,7 +2338,7 @@ px_cb_intr(caddr_t arg) mutex_enter(&cb_p->cb_mutex); - if (!cb_p->pxl) { + if (cb_p->pxl == NULL) { mutex_exit(&cb_p->cb_mutex); return (DDI_INTR_UNCLAIMED); } diff --git a/usr/src/uts/sun4u/px/Makefile b/usr/src/uts/sun4u/px/Makefile index 79ef780ea3..ca7c77da9c 100644 --- a/usr/src/uts/sun4u/px/Makefile +++ b/usr/src/uts/sun4u/px/Makefile @@ -39,7 +39,6 @@ UTSBASE = ../.. # MODULE = px OBJECTS = $(PX_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(PX_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE) # @@ -51,7 +50,6 @@ include $(UTSBASE)/sun4u/Makefile.sun4u # Define targets # ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # @@ -60,9 +58,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) INC_PATH += -I$(UTSBASE)/sun4/io/px INC_PATH += -I$(UTSBASE)/sun4u/io/px -# -# lint pass one enforcement -# CFLAGS += $(CCVERBOSE) # @@ -70,18 +65,6 @@ CFLAGS += $(CCVERBOSE) # CFLAGS += -dalign -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - CERRWARN += -_gcc=-Wno-parentheses CERRWARN += -_gcc=-Wno-type-limits CERRWARN += $(CNOWARN_UNINIT) @@ -107,12 +90,6 @@ clean: $(CLEAN_DEPS) clobber: $(CLOBBER_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) # diff --git a/usr/src/uts/sun4v/io/px/px_err.c b/usr/src/uts/sun4v/io/px/px_err.c index e99ca6b882..3bdd99aaf6 100644 --- a/usr/src/uts/sun4v/io/px/px_err.c +++ b/usr/src/uts/sun4v/io/px/px_err.c @@ -151,7 +151,7 @@ px_err_fill_pfd(dev_info_t *dip, pf_data_t *pfd_p, px_rc_err_t *epkt) { case BLOCK_MMU: /* Only PIO Fault Addresses are valid, this is DMA */ s_status = PCI_STAT_S_TARG_AB; - fault_addr = NULL; + fault_addr = 0; if (epkt->rc_descr.H) { fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16); @@ -836,7 +836,7 @@ px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) pcie_req_id_t bdf = PCIE_INVALID_BDF; if (epkt->rc_descr.H) { - bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF); + bdf = (uint32_t)((epkt->hdr[0] >> 16) & 0xFFFF); } return (pf_hdl_lookup(dip, derr->fme_ena, PF_ADDR_DMA, addr, diff --git a/usr/src/uts/sun4v/io/px/px_lib4v.c b/usr/src/uts/sun4v/io/px/px_lib4v.c index 54bcd03bef..710379950c 100644 --- a/usr/src/uts/sun4v/io/px/px_lib4v.c +++ b/usr/src/uts/sun4v/io/px/px_lib4v.c @@ -1686,7 +1686,7 @@ px_lib_log_safeacc_err(px_t *px_p, ddi_acc_handle_t handle, int fme_flag, } } - (void) px_rp_en_q(px_p, bdf, addr, NULL); + (void) px_rp_en_q(px_p, bdf, addr, 0); (void) px_scan_fabric(px_p, px_p->px_dip, &derr); px_fm_exit(px_p); } diff --git a/usr/src/uts/sun4v/io/px/px_tools_4v.c b/usr/src/uts/sun4v/io/px/px_tools_4v.c index 42ec37636b..288fa61e10 100644 --- a/usr/src/uts/sun4v/io/px/px_tools_4v.c +++ b/usr/src/uts/sun4v/io/px/px_tools_4v.c @@ -184,7 +184,7 @@ pxtool_phys_access(px_t *px_p, uintptr_t dev_addr, from_addr = dev_addr; } - rval = hv_hpriv((void *)pfunc, from_addr, to_addr, NULL); + rval = hv_hpriv((void *)pfunc, from_addr, to_addr, 0); switch (rval) { case H_ENOACCESS: /* Returned by non-debug hypervisor. */ rval = ENOTSUP; diff --git a/usr/src/uts/sun4v/px/Makefile b/usr/src/uts/sun4v/px/Makefile index d3b224707a..c63335e3b0 100644 --- a/usr/src/uts/sun4v/px/Makefile +++ b/usr/src/uts/sun4v/px/Makefile @@ -37,7 +37,6 @@ UTSBASE = ../.. # MODULE = px OBJECTS = $(PX_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(PX_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE) # @@ -55,7 +54,6 @@ CLEANFILES += $(MODSTUBS_O) # Define targets # ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # @@ -64,27 +62,10 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) INC_PATH += -I$(UTSBASE)/sun4/io/px # -# lint pass one enforcement -# -CFLAGS += $(CCVERBOSE) - -# # Turn on doubleword alignment for 64 bit registers # CFLAGS += -dalign -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED -LINTTAGS += -erroff=E_STATIC_UNUSED -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - CERRWARN += -_gcc=-Wno-parentheses CERRWARN += -_gcc=-Wno-type-limits CERRWARN += -_gcc=-Wno-clobbered @@ -109,12 +90,6 @@ clean: $(CLEAN_DEPS) clobber: $(CLOBBER_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) # |