diff options
Diffstat (limited to 'usr/src/man/man3c')
-rw-r--r-- | usr/src/man/man3c/Makefile | 20 | ||||
-rw-r--r-- | usr/src/man/man3c/c16rtomb.3c | 285 | ||||
-rw-r--r-- | usr/src/man/man3c/mbrtoc16.3c | 397 | ||||
-rw-r--r-- | usr/src/man/man3c/mbrtowc.3c | 231 | ||||
-rw-r--r-- | usr/src/man/man3c/wcrtomb.3c | 146 |
5 files changed, 696 insertions, 383 deletions
diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile index 726125d3e9..0387bbd608 100644 --- a/usr/src/man/man3c/Makefile +++ b/usr/src/man/man3c/Makefile @@ -62,6 +62,7 @@ MANFILES= __fbufsize.3c \ bstring.3c \ btowc.3c \ byteorder.3c \ + c16rtomb.3c \ call_once.3c \ catgets.3c \ catopen.3c \ @@ -241,7 +242,7 @@ MANFILES= __fbufsize.3c \ malloc.3c \ mblen.3c \ mbrlen.3c \ - mbrtowc.3c \ + mbrtoc16.3c \ mbsinit.3c \ mbsrtowcs.3c \ mbstowcs.3c \ @@ -543,7 +544,6 @@ MANFILES= __fbufsize.3c \ waitpid.3c \ walkcontext.3c \ wcpcpy.3c \ - wcrtomb.3c \ wcscasecmp.3c \ wcscoll.3c \ wcsdup.3c \ @@ -729,6 +729,7 @@ MANLINKS= FD_CLR.3c \ bindtextdomain.3c \ btowc_l.3c \ bzero.3c \ + c32rtomb.3c \ calloc.3c \ canonicalize_file_name.3c \ catclose.3c \ @@ -1006,6 +1007,8 @@ MANLINKS= FD_CLR.3c \ major.3c \ mblen_l.3c \ mbrlen_l.3c \ + mbrtoc32.3c \ + mbrtowc.3c \ mbrtowc_l.3c \ mbsinit_l.3c \ mbsnrtowcs.3c \ @@ -1385,6 +1388,7 @@ MANLINKS= FD_CLR.3c \ watol.3c \ watoll.3c \ wcpncpy.3c \ + wcrtomb.3c \ wcrtomb_l.3c \ wcscasecmp_l.3c \ wcscat.3c \ @@ -1599,6 +1603,10 @@ ntohl.3c := LINKSRC = byteorder.3c ntohll.3c := LINKSRC = byteorder.3c ntohs.3c := LINKSRC = byteorder.3c +c32rtomb.3c := LINKSRC = c16rtomb.3c +wcrtomb.3c := LINKSRC = c16rtomb.3c +wcrtomb_l.3c := LINKSRC = c16rtomb.3c + canonicalize_file_name.3c := LINKSRC = realpath.3c catclose.3c := LINKSRC = catopen.3c @@ -2012,10 +2020,12 @@ mblen_l.3c := LINKSRC = mblen.3c mbrlen_l.3c := LINKSRC = mbrlen.3c -mbrtowc_l.3c := LINKSRC = mbrtowc.3c - mbsinit_l.3c := LINKSRC = mbsinit.3c +mbrtoc32.3c := LINKSRC = mbrtoc16.3c +mbrtowc.3c := LINKSRC = mbrtoc16.3c +mbrtowc_l.3c := LINKSRC = mbrtoc16.3c + mbsnrtowcs.3c := LINKSRC = mbsrtowcs.3c mbsnrtowcs_l.3c := LINKSRC = mbsrtowcs.3c mbsrtowcs_l.3c := LINKSRC = mbsrtowcs.3c @@ -2509,8 +2519,6 @@ printstack.3c := LINKSRC = walkcontext.3c wcpncpy.3c := LINKSRC = wcpcpy.3c -wcrtomb_l.3c := LINKSRC = wcrtomb.3c - wcscasecmp_l.3c := LINKSRC = wcscasecmp.3c wcsncasecmp.3c := LINKSRC = wcscasecmp.3c wcsncasecmp_l.3c := LINKSRC = wcscasecmp.3c diff --git a/usr/src/man/man3c/c16rtomb.3c b/usr/src/man/man3c/c16rtomb.3c new file mode 100644 index 0000000000..33c6189dd3 --- /dev/null +++ b/usr/src/man/man3c/c16rtomb.3c @@ -0,0 +1,285 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 Robert Mustacchi +.\" +.Dd April 23, 2020 +.Dt C16RTOMB 3C +.Os +.Sh NAME +.Nm c16rtomb , +.Nm c32rtomb , +.Nm wcrtomb , +.Nm wcrtomb_l +.Nd convert wide-characters to character sequences +.Sh SYNOPSIS +.In uchar.h +.Ft size_t +.Fo c16rtomb +.Fa "char *restrict str" +.Fa "char16_t c16" +.Fa "mbstate_t *restrict ps" +.Fc +.Ft size_t +.Fo c32rtomb +.Fa "char *restrict str" +.Fa "char32_t c32" +.Fa "mbstate_t *restrict ps" +.Fc +.In stdio.h +.Ft size_t +.Fo wcrtomb +.Fa "char *restrict str" +.Fa "wchar_t wc" +.Fa "mbstate_t *restrict ps" +.Fc +.In stdio.h +.In xlocale.h +.Ft size_t +.Fo wcrtomb_l +.Fa "char *restrict str" +.Fa "wchar_t wc" +.Fa "mbstate_t *restrict ps" +.Fa "locale_t loc" +.Fc +.Sh DESCRIPTION +The +.Fn c16rtomb , +.Fn c32rtomb , +.Fn wcrtomb , +and +.Fn wcrtomb_l +functions convert wide-character sequences into a series of multi-byte +characters. +The functions work in the following formats: +.Bl -tag -width wcrtomb_l +.It Fn c16rtomb +A UTF-16 code sequence, where every code point is represented by one or +two +.Vt char16_t . +The UTF-16 encoding will encode certain Unicode code points as a pair of +two 16-bit code sequences, commonly referred to as a surrogate pair. +.It Fn c32rtomb +A UTF-32 code sequence, where every code point is represented by a +single +.Vt char32_t . +It is illegal to pass reserved Unicode code points. +.It Fn wcrtomb , Fn wcrtomb_l +Wide characters, being a 32-bit value where every code point is +represented by a single +.Vt wchar_t . +While the +.Vt wchar_t +and +.Vt char32_t +are different types, in this implementation, they are similar encodings. +.El +.Pp +The functions all work by looking at the passed in wide-character +.Po +.Fa c16 , +.Fa c32 , +.Fa wc +.Pc +and appending it to the current conversion state, +.Fa ps . +Once a valid code point, based on the current locale, is found, then it +will be converted into a series of characters that are stored in +.Fa str . +Up to +.Dv MB_CUR_MAX +bytes will be stored in +.Fa str . +It is the caller's responsibility to ensure that there is sufficient +space in +.Fa str . +.Pp +The functions are all influenced by the +.Dv LC_CTYPE +category of the current locale for determining what is considered a +valid character. +For example, in the +.Sy C +locale, +only ASCII characters are recognized, while in a +.Sy UTF-8 +based locale like +.Sy en_us.UTF-8 , +all valid Unicode code points are recognized and will be converted into +the corresponding multi-byte sequence. +The +.Fn wcrtomb_l +function uses the locale passed in +.Fa loc +rather than the locale of the current thread. +.Pp +The +.Fa ps +argument represents a multi-byte conversion state which can be used +across multiple calls to a given function +.Pq but not mixed between functions . +These allow for characters to be consumed from subsequent buffers, e.g. +different values of +.Fa str . +The functions may be called from multiple threads as long as they use +unique values for +.Fa ps . +If +.Fa ps +is +.Dv NULL , +then a function-specific buffer will be used for the conversion state; +however, this is stored between all threads and its use is not +recommended. +.Pp +The functions all have a special behavior when +.Dv NULL +is passed for +.Fa str . +They instead will treat it as though a the NULL wide-character was +passed in +.Fa c16 , +.Fa c32 , +or +.Fa wc +and an internal buffer +.Pq buf +will be used to write out the results of the +converstion. +In other words, the functions would be called as: +.Bd -literal -offset indent +c16rtomb(buf, L'\\0', ps) +c32rtomb(buf, L'\\0', ps) +wcrtomb(buf, L'\\0', ps) +wcrtomb_l(buf, L'\\0', ps, loc) +.Ed +.Ss Locale Details +Not all locales in the system are Unicode based locales. +For example, ISO 8859 family locales have code points with values that +do not match their counterparts in Unicode. +When using these functions with non-Unicode based locales, the code +points returned will be those determined by the locale. +They will not be converted from the corresponding Unicode code point. +For example, if using the Euro sign in ISO 8859-15, these functions +will not encode the Unicode value 0x20ac into the ISO 8859-15 value +0xa4. +.Pp +Regardless of the locale, the characters returned will be encoded as +though the code point were the corresponding value in Unicode. +This means that when using UTF-16, if the corresponding code point were +in the range for surorgate pairs, then the +.Fn c16rtomb +function will expect to receive that code point in that fashion. +.Pp +This behavior of the +.Fn c16rtomb +and +.Fn c32rtomb +functions should not be relied upon, is not portable, and subject to +change for non-Unicode locales. +.Sh RETURN VALUES +Upon successful completion, the +.Fn c16rtomb , +.Fn c32rtomb , +.Fn wcrtomb , +and +.Fn wcrtomb_l +functions return the number of bytes stored in +.Fa str . +Otherwise, +.Sy (size_t)-1 +is returned to indicate an encoding error and +.Va errno +is set. +.Sh EXAMPLES +.Sy Example 1 +Converting a UTF-32 character into a multi-byte character sequence. +.Bd -literal +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <err.h> +#include <stdio.h> +#include <uchar.h> + +int +main(void) +{ + mbstate_t mbs; + size_t ret; + char buf[MB_CUR_MAX]; + char32_t val = 0x5149; + const char *uchar_exp = "\exe5\ex85\ex89"; + + (void) memset(&mbs, 0, sizeof (mbs)); + (void) setlocale(LC_CTYPE, "en_US.UTF-8"); + ret = c32rtomb(buf, val, &mbs); + if (ret != strlen(uchar_exp)) { + errx(EXIT_FAILURE, "failed to convert string, got %zd", + ret); + } + + if (strncmp(buf, uchar_exp, ret) != 0) { + errx(EXIT_FAILURE, "converted char32_t does not match " + "expected value"); + } + + return (0); +} +.Ed +.Sh ERRORS +The +.Fn c16rtomb , +.Fn c32rtomb , +.Fn wcrtomb , +and +.Fn wcrtomb_l +functions will fail if: +.Bl -tag -width Er +.It Er EINVAL +The conversion state in +.Fa ps +is invalid. +.It Er EILSEQ +An invalid character sequence has been detected. +.El +.Sh MT-LEVEL +The +.Fn c16rtomb , +.Fn c32rtomb , +.Fn wcrtomb , +and +.Fn wcrtomb_l +functions are +.Sy MT-Safe +as long as different +.Vt mbstate_t +structures are passed in +.Fa ps . +If +.Fa ps +is +.Dv NULL +or different threads use the same value for +.Fa ps , +then the functions are +.Sy Unsafe . +.Sh INTERFACE STABILITY +.Sy Committed +.Sh SEE ALSO +.Xr mbrtoc16 3C , +.Xr mbrtoc32 3C , +.Xr mbrtowc 3C , +.Xr newlocale 3C , +.Xr setlocale 3C , +.Xr uselocale 3C , +.Xr uchar.h 3HEAD , +.Xr environ 5 diff --git a/usr/src/man/man3c/mbrtoc16.3c b/usr/src/man/man3c/mbrtoc16.3c new file mode 100644 index 0000000000..d1b3ab478b --- /dev/null +++ b/usr/src/man/man3c/mbrtoc16.3c @@ -0,0 +1,397 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 Robert Mustacchi +.\" +.Dd April 23, 2020 +.Dt MBRTOC16 3C +.Os +.Sh NAME +.Nm mbrtoc16 , +.Nm mbrtoc32 , +.Nm mbrtowc , +.Nm mbrtowc_l +.Nd convert characters to wide characters +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fo mbrtowc +.Fa "wchar_t *restrict pwc" +.Fa "const char *restrict str" +.Fa "size_t len" +.Fa "mstate_t *restrict ps" +.Fc +.In wchar.h +.In xlocale.h +.Ft size_t +.Fo mbrtowc +.Fa "wchar_t *restrict pwc" +.Fa "const char *restrict str" +.Fa "size_t len" +.Fa "mstate_t *restrict ps" +.Fa "locale_t loc" +.Fc +.In uchar.h +.Ft size_t +.Fo mbrtoc16 +.Fa "char16_t *restrict p16c" +.Fa "const char *restrict str" +.Fa "size_t len" +.Fa "mbstate_t *restrict ps" +.Fc +.Ft size_t +.Fo mbrtoc32 +.Fa "char32_t *restrict p32c" +.Fa "const char *restrict str" +.Fa "size_t len" +.Fa "mbstate_t *restrict ps" +.Fc +.Sh DESCRIPTION +The +.Fn mbrtoc16 , +.Fn mbrtoc32 , +.Fn mbrtowc , +and +.Fn mbrtowc_l +functions convert character sequences, which may contain multi-byte +characters, into different character formats. +The functions work in the following formats: +.Bl -tag -width mbrtowc_l +.It Fn mbrtoc16 +A UTF-16 code sequence, where every code point is represented by one or +two +.Vt char16_t . +The UTF-16 encoding will encode certain Unicode code points as a pair of +two 16-bit code sequences, commonly referred to as a surrogate pair. +.It Fn mbrtoc32 +A UTF-32 code sequence, where every code point is represented by a +single +.Vt char32_t . +.It Fn mbrtowc , Fn mbrtowc_l +Wide characters, being a 32-bit value where every code point is +represented by a single +.Vt wchar_t . +While the +.Vt wchar_t +and +.Vt char32_t +are different types, in this implementation, they are similar encodings. +.El +.Pp +The functions consume up to +.Fa len +characters from the string +.Fa str +and accumulate them in +.Fa ps +until a valid character is found, which is influenced by +the +.Dv LC_CTYPE +category of the current locale. +For example, in the +.Sy C +locale, only ASCII characters are recognized, while in a +.Sy UTF-8 +based locale like +.Sy en_US.UTF-8 , +UTF-8 multi-byte character sequences that represent Unicode code points +are recognized. +The +.Fn mbrtowc_l +function uses the locale passed in +.Fa loc +rather than the locale of the current thread. +.Pp +When a valid character sequence has been found, it is converted to +either a 16-bit character sequence for +.Fn mbrtoc16 +or a 32-bit character sequence for +.Fn mbrtoc32 +and will be stored in +.Fa p16c +and +.Fa p32c +respectively. +.Pp +The +.Fa ps +argument represents a multi-byte conversion state which can be used +across multiple calls to a given function +.Pq but not mixed between functions . +These allow for characters to be consumed from subsequent buffers, e.g. +different values of +.Fa str . +The functions may be called from multiple threads as long as they use +unique values for +.Fa ps . +If +.Fa ps +is +.Dv NULL , +then a function-specific buffer will be used for the conversion state; +however, this is stored between all threads and its use is not +recommended. +.Pp +When using these functions, more than one character may be output for a +given set of consumed input characters. +An example of this is when a given code point is represented as a set of +surrogate pairs in UTF-16, which require two 16-bit characters to +represent a code point. +When this occurs, the functions return the special return value +.Sy -3 . +.Pp +The functions all have a special behavior when +.Dv NULL +is passed for +.Fa str . +They instead will treat it as though +.Fa pwc , +.Fa p16c , +or +.Fa p32c +were +.Dv NULL , +.Fa str +had been passed as the empty string, "" and the length, +.Fa len , +would appear as the value 1. +In other words, the functions would be called as: +.Bd -literal -offset indent +mbrtowc(NULL, "", 1, ps) +mbrtowc_l(NULL, "", 1, ps) +mbrtoc16(NULL, "", 1, ps) +mbrtoc32(NULL, "", 1, ps) +.Ed +.Ss Locale Details +Not all locales in the system are Unicode based locales. +For example, ISO 8859 family locales have code points with values that +do not match their counterparts in Unicode. +When using these functions with non-Unicode based locales, the code +points returned will be those determined by the locale. +They will not be converted to the corresponding Unicode code point. +For example, if using the Euro sign in ISO 8859-15, these functions +might return the code point 0xa4 and not the Unicode value 0x20ac. +.Pp +Regardless of the locale, the characters returned will be encoded as +though the code point were the corresponding value in Unicode. +This means that if a locale returns a value that would be a surrogate +pair in the UTF-16 encoding, it will still be encoded as a UTF-16 +character. +.Pp +This behavior of the +.Fn mbrtoc16 +and +.Fn mbrtoc32 +functions should not be relied upon, is not portable, and subject to +change for non-Unicode locales. +.Sh RETURN VALUES +The +.Fn mbrtoc16 , +.Fn mbrtoc32 , +.Fn mbrtowc , +and +.Fn mbrtowc_l +functions return the following values: +.Bl -tag -width (size_t)-3 +.It Sy 0 +.Fa len +or fewer bytes of +.Fa str +were consumed and the null wide character was written into the wide +character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +.It Sy between 1 and len +The specified number of bytes were consumed and a single character was +written into the wide character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +.It Sy (size_t)-1 +An encoding error has occurred. +The next +.Fa len +bytes of +.Fa str +do not contribute to a valid character. +.Va errno +has been set to +.Er EILSEQ . +No data was written into the wide character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +.It Sy (size_t)-2 +.Fa len +bytes of +.Fa str +were consumed, but a complete multi-byte character sequence has not been +found and no data was written into the wide character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +.It Sy (size_t)-3 +A character has been written into the wide character buffer +.Po +.Fa pwc , +.Fa p16c , +.Fa p32c +.Pc . +This character was from a previous call (such as another part of a +UTF-16 surrogate pair) and no input was consumed. +This is limited to the +.Fn mbrtoc16 +and +.Fn mbrtoc32 +functions. +.El +.Sh EXAMPLES +.Sy Example 1 +Using the +.Fn mbrtoc32 +function to convert a multibyte string. +.Bd -literal +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <err.h> +#include <stdio.h> +#include <uchar.h> + +int +main(void) +{ + mbstate_t mbs; + char32_t out; + size_t ret; + const char *uchar_str = "\exe5\ex85\ex89"; + + (void) memset(&mbs, 0, sizeof (mbs)); + (void) setlocale(LC_CTYPE, "en_US.UTF-8"); + ret = mbrtoc32(&out, uchar_str, strlen(uchar_str), &mbs); + if (ret != strlen(uchar_str)) { + errx(EXIT_FAILURE, "failed to convert string, got %zd", + ret); + } + + (void) printf("Converted %zu bytes into UTF-32 character " + "0x%x\n", ret, out); + return (0); +} +.Ed +.Pp +When compiled and run, this produces: +.Bd -literal -offset indent +$ ./a.out +Converted 3 bytes into UTF-32 character 0x5149 +.Ed +.Pp +.Sy Example 2 +Handling surrogate pairs from the +.Fn mbrtoc16 +function. +.Bd -literal +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <err.h> +#include <stdio.h> +#include <uchar.h> + +int +main(void) +{ + mbstate_t mbs; + char16_t first, second; + size_t ret; + const char *uchar_str = "\exf0\ex9f\ex92\exa9"; + + (void) memset(&mbs, '\0', sizeof (mbs)); + (void) setlocale(LC_CTYPE, "en_US.UTF-8"); + ret = mbrtoc16(&first, uchar_str, strlen(uchar_str), &mbs); + if (ret != strlen(uchar_str)) { + errx(EXIT_FAILURE, "failed to convert string, got %zd", + ret); + } + + ret = mbrtoc16(&second, "", 0, &mbs); + if (ret != (size_t)-3) { + errx(EXIT_FAILURE, "didn't get second surrogate pair, " + "got %zd", ret); + } + + (void) printf("UTF-16 surrogates: 0x%x 0x%x\n", first, second); + return (0); +} +.Ed +.Pp +When compiled and run, this produces: +.Bd -literal -offset indent +$ ./a.out +UTF-16 surrogates: 0xd83d 0xdca9 +.Ed +.Sh ERRORS +The +.Fn mbrtoc16 , +.Fn mbrtoc32 , +.Fn mbrtowc , +and +.Fn mbrtowc_l +functions will fail if: +.Bl -tag -width Er +.It Er EINVAL +The conversion state in +.Fa ps +is invalid. +.It Er EILSEQ +An invalid character sequence has been detected. +.El +.Sh MT-LEVEL +The +.Fn mbrtoc16 , +.Fn mbrtoc32 , +.Fn mbrtowc , +and +.Fn mbrtowc_l +functions are +.Sy MT-Safe +as long as different +.Vt mbstate_t +structures are passed in +.Fa ps . +If +.Fa ps +is +.Dv NULL +or different threads use the same value for +.Fa ps , +then the functions are +.Sy Unsafe . +.Sh INTERFACE STABILITY +.Sy Committed +.Sh SEE ALSO +.Xr c16rtomb 3C , +.Xr c32rtomb 3C , +.Xr newlocale 3C , +.Xr setlocale 3C , +.Xr uselocale 3C , +.Xr wcrtomb 3C , +.Xr uchar.h 3HEAD , +.Xr environ 5 diff --git a/usr/src/man/man3c/mbrtowc.3c b/usr/src/man/man3c/mbrtowc.3c deleted file mode 100644 index 70fe73d25e..0000000000 --- a/usr/src/man/man3c/mbrtowc.3c +++ /dev/null @@ -1,231 +0,0 @@ -.\" -.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for -.\" permission to reproduce portions of its copyrighted documentation. -.\" Original documentation from The Open Group can be obtained online at -.\" http://www.opengroup.org/bookstore/. -.\" -.\" The Institute of Electrical and Electronics Engineers and The Open -.\" Group, have given us permission to reprint portions of their -.\" documentation. -.\" -.\" In the following statement, the phrase ``this text'' refers to portions -.\" of the system documentation. -.\" -.\" Portions of this text are reprinted and reproduced in electronic form -.\" in the SunOS Reference Manual, from IEEE Std 1003.1, 2004 Edition, -.\" Standard for Information Technology -- Portable Operating System -.\" Interface (POSIX), The Open Group Base Specifications Issue 6, -.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics -.\" Engineers, Inc and The Open Group. In the event of any discrepancy -.\" between these versions and the original IEEE and The Open Group -.\" Standard, the original IEEE and The Open Group Standard is the referee -.\" document. The original Standard can be obtained online at -.\" http://www.opengroup.org/unix/online.html. -.\" -.\" This notice shall appear on any product containing this material. -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" -.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. -.\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright 2014 Garrett D'Amore <garrett@damore.org> -.\" -.TH MBRTOWC 3C "Jun 23, 2014" -.SH NAME -mbrtowc, mbrtowc_l \- convert a character to a wide-character code (restartable) -.SH SYNOPSIS -.LP -.nf -#include <wchar.h> - -\fBsize_t\fR \fBmbrtowc\fR(\fBwchar_t *restrict\fR \fIpwc\fR, \fBconst char *restrict\fR \fIs\fR, \fBsize_t\fR \fIn\fR, - \fBmbstate_t *restrict\fR \fIps\fR); -.fi -.LP -.nf -#include <wchar.h> -#include <xlocale.h> - -\fBsize_t\fR \fBmbrtowc_l\fR(\fBwchar_t *restrict\fR \fIpwc\fR, \fBconst char *restrict\fR \fIs\fR, \fBsize_t\fR \fIn\fR, - \fBmbstate_t *restrict\fR \fIps\fR, \fBlocale_t\fR \fIloc\fR); -.fi -.SH DESCRIPTION -.LP -If -.I s -is a null pointer, the -.B mbrtowc() -function is equivalent to the call: -.sp -.in +2 -.nf -\fBmbrtowc\fR(NULL, "", 1, \fIps\fR) -.fi -.in -2 -.LP -Likewise, if -.I s -is a null pointer, the -.B mbrtowc_l() -function is equivalent to the call: -.sp -.in +2 -.nf -\fBmbrtowc_l\fR(NULL, "", 1, \fIps\fR, \fIloc\fR); -.fi -.in -2 -.LP -In these cases, the values of the arguments -.I pwc -and -.I n -are ignored. -.LP -If -.I s -is not a null pointer, these functions inspect at most -.I n -bytes beginning at the byte pointed to by -.I s -to determine the number of bytes needed to complete the next character -(including any shift sequences). If the functions determine that the next -character is completed, -they determine the value of the corresponding wide-character and then, if -.I pwc -is not a null pointer, stores that value in the object pointed to by -.IR pwc . -If the corresponding wide-character is the null wide-character, the -resulting state described is the initial conversion state. -.LP -If -.I ps -is a null pointer, these functions use their own -internal -.B mbstate_t -object, which is initialized at program startup to the -initial conversion state. Otherwise, the -.B mbstate_t -object pointed to by -.I ps -is used to completely describe the current conversion state of the -associated character sequence. The system will behave as if no function defined -in the Reference Manual calls -.B mbrtowc() -or -.BR Bmbrtowc_l() . -.LP -The behavior of -.B mbrtowc() -is affected by the -.B LC_CTYPE -category of the current locale. The -.B mbrtowc_l() -function is affected by the -.B LC_CTYPE -category of the specified -.I loc -locale object. See -.B environ (5). -.SH RETURN VALUES -.LP -The -.B mbrtowc() -and -.B mbrtowc_l() -functions return the first of the following that applies: -.IP \fB0\fR -If the next -.I n -or fewer bytes complete the character that corresponds to -the null wide-character (which is the value stored). -.IP \fBpositive\fR -If the next -.I n -or fewer bytes complete a valid character (which is the -value stored); the value returned is the number of bytes that complete the -character. -.IP \fB(size_t)\(mi2\fR -If the next -.I n -bytes contribute to an incomplete but potentially valid -character, and all -.I n -bytes have been processed (no value is stored). -When -.I n -has at least the value of the -.B MB_CUR_MAX -macro, this case can only occur if -.I s -points at a sequence of redundant shift sequences -(for implementations with state-dependent encodings). -.IP \fB(size_t)\(mi1\fR -If an encoding error occurs, in which case the next \fIn\fR or fewer bytes do -not contribute to a complete and valid character (no value is stored). In -this case, -.B EILSEQ -is stored in -.B errno -and the conversion state is undefined. -.SH ERRORS -.LP -The -.B mbrtowc() -and -.B mbrtowc_l() -functions may fail if: -.IP \fBEINVAL\fR -The -.I ps -argument points to an object that contains an invalid conversion -state. -.IP \fBEILSEQ\fR -Invalid character sequence is detected. -.SH ATTRIBUTES -.LP -See \fBattributes\fR(5) for descriptions of the following attributes: -.TS -box; -c | c -l | l . -ATTRIBUTE TYPE ATTRIBUTE VALUE -_ -Interface Stability See below. -_ -MT-Level See below. -.TE - -.LP -The -.B mbrtowc() -function is Standard. The -.B mbrtowc_l() -function is Uncommitted. -.LP -If -.I ps -is a null pointer, these functions are Unsafe for use in -multithreaded applications. Otherwise they are MT-Safe. -.SH SEE ALSO -.LP -.BR mbsinit (3C), -.BR newlocale (3C), -.BR setlocale (3C), -.BR uselocale (3C), -.BR attributes (5), -.BR environ (5), -.BR standards (5) diff --git a/usr/src/man/man3c/wcrtomb.3c b/usr/src/man/man3c/wcrtomb.3c deleted file mode 100644 index 9a3478cb07..0000000000 --- a/usr/src/man/man3c/wcrtomb.3c +++ /dev/null @@ -1,146 +0,0 @@ -.\" -.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for -.\" permission to reproduce portions of its copyrighted documentation. -.\" Original documentation from The Open Group can be obtained online at -.\" http://www.opengroup.org/bookstore/. -.\" -.\" The Institute of Electrical and Electronics Engineers and The Open -.\" Group, have given us permission to reprint portions of their -.\" documentation. -.\" -.\" In the following statement, the phrase ``this text'' refers to portions -.\" of the system documentation. -.\" -.\" Portions of this text are reprinted and reproduced in electronic form -.\" in the SunOS Reference Manual, from IEEE Std 1003.1, 2004 Edition, -.\" Standard for Information Technology -- Portable Operating System -.\" Interface (POSIX), The Open Group Base Specifications Issue 6, -.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics -.\" Engineers, Inc and The Open Group. In the event of any discrepancy -.\" between these versions and the original IEEE and The Open Group -.\" Standard, the original IEEE and The Open Group Standard is the referee -.\" document. The original Standard can be obtained online at -.\" http://www.opengroup.org/unix/online.html. -.\" -.\" This notice shall appear on any product containing this material. -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" -.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. -.\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright 2014 Garrett D'Amore <garrett@damore.org> -.\" -.TH WCRTOMB 3C "Jun 24, 2014" -.SH NAME -wcrtomb, wcrtomb_l \- convert a wide-character code to a character (restartable) -.SH SYNOPSIS -.LP -.nf -#include <stdio.h> - -\fBsize_t\fR \fBwcrtomb\fR(\fBchar *restrict\fR \fIs\fR, \fBwchar_t\fR \fIwc\fR, \fBmbstate_t *restrict\fR \fIps\fR); -.fi -.LP -.nf -#include <stdio.h> -#include <xlocale.h> - -\fBsize_t\fR \fBwcrtomb_l\fR(\fBchar *restrict\fR \fIs\fR, \fBwchar_t\fR \fIwc\fR, \fBmbstate_t *restrict\fR \fIps\fR, - \fBlocale_t\fR \fIloc\fR); -.fi -.SH DESCRIPTION -.LP -If \fIs\fR is a null pointer, the \fBwcrtomb()\fR function is equivalent to the -call: -.IP -\fBwcrtomb\fR(\fIbuf\fR, L'\e0', \fIps\fR); -.LP -where \fIbuf\fR is an internal buffer. -.LP -If \fIs\fR is not a null pointer, the \fBwcrtomb()\fR function determines the -number of bytes needed to represent the character that corresponds to the -wide-character given by \fIwc\fR (including any shift sequences), and stores -the resulting bytes in the array whose first element is pointed to by \fIs\fR. -At most \fBMB_CUR_MAX\fR bytes are stored. If \fIwc\fR is a null -wide-character, a null byte is stored, preceded by any shift sequence needed to -restore the initial shift state. The resulting state described is the initial -conversion state. -.LP -If \fIps\fR is a null pointer, the \fBwcrtomb()\fR function uses its own -internal \fBmbstate_t\fR object, which is initialized at program startup to the -initial conversion state. Otherwise, the \fBmbstate_t\fR object pointed to -by \fIps\fR is used to completely describe the current conversion state of the -associated character sequence. The system will behave as if no function -defined in the Reference Manual calls \fBwcrtomb()\fR. -.LP -The behavior of \fBwcrtomb()\fR is affected by the \fBLC_CTYPE\fR category of the -current locale. See \fBenviron\fR(5). The function \fBwcrtomb_l()\fR behaves -identically to \fBwcrtomb()\fR, except instead of operating in the current -locale, it operates in the locale specified by \fIloc\fR. -.SH RETURN VALUES -.LP -The \fBwcrtomb()\fR function returns the number of bytes stored in the array -object (including any shift sequences). When \fIwc\fR is not a valid -wide-character, an encoding error occurs. In this case, the function stores -the value of the macros \fBEILSEQ\fR in \fBerrno\fR and returns -\fB(size_t)\(mi1\fR; the conversion state is undefined. -.SH ERRORS -.LP -The \fBwcrtomb()\fR function may fail if: -.sp -.ne 2 -.na -\fB\fBEINVAL\fR\fR -.ad -.RS 10n -The \fIps\fR argument points to an object that contains an invalid conversion -state. -.RE - -.sp -.ne 2 -.na -\fB\fBEILSEQ\fR\fR -.ad -.RS 10n -Invalid wide-character code is detected. -.RE -.SH ATTRIBUTES -.LP -See \fBattributes\fR(5) for descriptions of the following attributes: -.TS -box; -c | c -l | l . -ATTRIBUTE TYPE ATTRIBUTE VALUE -_ -Interface Stability See below. -_ -MT-Level See below. -.TE - -.LP -The \fBwcrtomb()\fR function is Standard. The -\fBwcrtomb_l()\fR function is Uncommitted. -.LP -If \fIps\fR is a null pointer, these functions should be considered Unsafe -for use in multithreaded applications. Otherwise, they are MT-Safe. -.SH SEE ALSO -.LP -\fBmbsinit\fR(3C), \fBnewlocale\fR(3C), \fBsetlocale\fR(3C), -\fBuselocale\fR(3C), \fBattributes\fR(5), -\fBstandards\fR(5), \fBenviron\fR(5) |