5 files changed, 696 insertions, 383 deletions
diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile
index 726125d3e9..0387bbd608 100644
--- a/usr/src/man/man3c/Makefile
+++ b/usr/src/man/man3c/Makefile
@@ -62,6 +62,7 @@ MANFILES=	__fbufsize.3c					\
 		bstring.3c					\
 		btowc.3c					\
 		byteorder.3c					\
+		c16rtomb.3c					\
 		call_once.3c					\
 		catgets.3c					\
 		catopen.3c					\
@@ -241,7 +242,7 @@ MANFILES=	__fbufsize.3c					\
 		malloc.3c					\
 		mblen.3c					\
 		mbrlen.3c					\
-		mbrtowc.3c					\
+		mbrtoc16.3c					\
 		mbsinit.3c					\
 		mbsrtowcs.3c					\
 		mbstowcs.3c					\
@@ -543,7 +544,6 @@ MANFILES=	__fbufsize.3c					\
 		waitpid.3c					\
 		walkcontext.3c					\
 		wcpcpy.3c					\
-		wcrtomb.3c					\
 		wcscasecmp.3c					\
 		wcscoll.3c					\
 		wcsdup.3c					\
@@ -729,6 +729,7 @@ MANLINKS=	FD_CLR.3c				\
 		bindtextdomain.3c			\
 		btowc_l.3c				\
 		bzero.3c				\
+		c32rtomb.3c				\
 		calloc.3c				\
 		canonicalize_file_name.3c		\
 		catclose.3c				\
@@ -1006,6 +1007,8 @@ MANLINKS=	FD_CLR.3c				\
 		major.3c				\
 		mblen_l.3c				\
 		mbrlen_l.3c				\
+		mbrtoc32.3c				\
+		mbrtowc.3c				\
 		mbrtowc_l.3c				\
 		mbsinit_l.3c				\
 		mbsnrtowcs.3c				\
@@ -1385,6 +1388,7 @@ MANLINKS=	FD_CLR.3c				\
 		watol.3c				\
 		watoll.3c				\
 		wcpncpy.3c				\
+		wcrtomb.3c				\
 		wcrtomb_l.3c				\
 		wcscasecmp_l.3c				\
 		wcscat.3c				\
@@ -1599,6 +1603,10 @@ ntohl.3c				:= LINKSRC = byteorder.3c
 ntohll.3c				:= LINKSRC = byteorder.3c
 ntohs.3c				:= LINKSRC = byteorder.3c
 
+c32rtomb.3c				:= LINKSRC = c16rtomb.3c
+wcrtomb.3c				:= LINKSRC = c16rtomb.3c
+wcrtomb_l.3c				:= LINKSRC = c16rtomb.3c
+
 canonicalize_file_name.3c		:= LINKSRC = realpath.3c
 
 catclose.3c				:= LINKSRC = catopen.3c
@@ -2012,10 +2020,12 @@ mblen_l.3c				:= LINKSRC = mblen.3c
 
 mbrlen_l.3c				:= LINKSRC = mbrlen.3c
 
-mbrtowc_l.3c				:= LINKSRC = mbrtowc.3c
-
 mbsinit_l.3c				:= LINKSRC = mbsinit.3c
 
+mbrtoc32.3c				:= LINKSRC = mbrtoc16.3c
+mbrtowc.3c				:= LINKSRC = mbrtoc16.3c
+mbrtowc_l.3c				:= LINKSRC = mbrtoc16.3c
+
 mbsnrtowcs.3c				:= LINKSRC = mbsrtowcs.3c
 mbsnrtowcs_l.3c				:= LINKSRC = mbsrtowcs.3c
 mbsrtowcs_l.3c				:= LINKSRC = mbsrtowcs.3c
@@ -2509,8 +2519,6 @@ printstack.3c				:= LINKSRC = walkcontext.3c
 
 wcpncpy.3c				:= LINKSRC = wcpcpy.3c
 
-wcrtomb_l.3c				:= LINKSRC = wcrtomb.3c
-
 wcscasecmp_l.3c				:= LINKSRC = wcscasecmp.3c
 wcsncasecmp.3c				:= LINKSRC = wcscasecmp.3c
 wcsncasecmp_l.3c			:= LINKSRC = wcscasecmp.3c
diff --git a/usr/src/man/man3c/c16rtomb.3c b/usr/src/man/man3c/c16rtomb.3c
new file mode 100644
index 0000000000..33c6189dd3
--- /dev/null
+++ b/usr/src/man/man3c/c16rtomb.3c
@@ -0,0 +1,285 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source.  A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2020 Robert Mustacchi
+.\"
+.Dd April 23, 2020
+.Dt C16RTOMB 3C
+.Os
+.Sh NAME
+.Nm c16rtomb ,
+.Nm c32rtomb ,
+.Nm wcrtomb ,
+.Nm wcrtomb_l
+.Nd convert wide-characters to character sequences
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fo c16rtomb
+.Fa "char *restrict str"
+.Fa "char16_t c16"
+.Fa "mbstate_t *restrict ps"
+.Fc
+.Ft size_t
+.Fo c32rtomb
+.Fa "char *restrict str"
+.Fa "char32_t c32"
+.Fa "mbstate_t *restrict ps"
+.Fc
+.In stdio.h
+.Ft size_t
+.Fo wcrtomb
+.Fa "char *restrict str"
+.Fa "wchar_t wc"
+.Fa "mbstate_t *restrict ps"
+.Fc
+.In stdio.h
+.In xlocale.h
+.Ft size_t
+.Fo wcrtomb_l
+.Fa "char *restrict str"
+.Fa "wchar_t wc"
+.Fa "mbstate_t *restrict ps"
+.Fa "locale_t loc"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn c16rtomb ,
+.Fn c32rtomb ,
+.Fn wcrtomb ,
+and
+.Fn wcrtomb_l
+functions convert wide-character sequences into a series of multi-byte
+characters.
+The functions work in the following formats:
+.Bl -tag -width wcrtomb_l
+.It Fn c16rtomb
+A UTF-16 code sequence, where every code point is represented by one or
+two
+.Vt char16_t .
+The UTF-16 encoding will encode certain Unicode code points as a pair of
+two 16-bit code sequences, commonly referred to as a surrogate pair.
+.It Fn c32rtomb
+A UTF-32 code sequence, where every code point is represented by a
+single
+.Vt char32_t .
+It is illegal to pass reserved Unicode code points.
+.It Fn wcrtomb , Fn wcrtomb_l
+Wide characters, being a 32-bit value where every code point is
+represented by a single
+.Vt wchar_t .
+While the
+.Vt wchar_t
+and
+.Vt char32_t
+are different types, in this implementation, they are similar encodings.
+.El
+.Pp
+The functions all work by looking at the passed in wide-character
+.Po
+.Fa c16 ,
+.Fa c32 ,
+.Fa wc
+.Pc
+and appending it to the current conversion state,
+.Fa ps .
+Once a valid code point, based on the current locale, is found, then it
+will be converted into a series of characters that are stored in
+.Fa str .
+Up to
+.Dv MB_CUR_MAX
+bytes will be stored in
+.Fa str .
+It is the caller's responsibility to ensure that there is sufficient
+space in
+.Fa str .
+.Pp
+The functions are all influenced by the
+.Dv LC_CTYPE
+category of the current locale for determining what is considered a
+valid character.
+For example, in the
+.Sy C
+locale,
+only ASCII characters are recognized, while in a
+.Sy UTF-8
+based locale like
+.Sy en_us.UTF-8 ,
+all valid Unicode code points are recognized and will be converted into
+the corresponding multi-byte sequence.
+The
+.Fn wcrtomb_l
+function uses the locale passed in
+.Fa loc
+rather than the locale of the current thread.
+.Pp
+The
+.Fa ps
+argument represents a multi-byte conversion state which can be used
+across multiple calls to a given function
+.Pq but not mixed between functions .
+These allow for characters to be consumed from subsequent buffers, e.g.
+different values of
+.Fa str .
+The functions may be called from multiple threads as long as they use
+unique values for
+.Fa ps .
+If
+.Fa ps
+is
+.Dv NULL ,
+then a function-specific buffer will be used for the conversion state;
+however, this is stored between all threads and its use is not
+recommended.
+.Pp
+The functions all have a special behavior when
+.Dv NULL
+is passed for
+.Fa str .
+They instead will treat it as though a the NULL wide-character was
+passed in
+.Fa c16 ,
+.Fa c32 ,
+or
+.Fa wc
+and an internal buffer
+.Pq buf
+will be used to write out the results of the
+converstion.
+In other words, the functions would be called as:
+.Bd -literal -offset indent
+c16rtomb(buf, L'\\0', ps)
+c32rtomb(buf, L'\\0', ps)
+wcrtomb(buf, L'\\0', ps)
+wcrtomb_l(buf, L'\\0', ps, loc)
+.Ed
+.Ss Locale Details
+Not all locales in the system are Unicode based locales.
+For example, ISO 8859 family locales have code points with values that
+do not match their counterparts in Unicode.
+When using these functions with non-Unicode based locales, the code
+points returned will be those determined by the locale.
+They will not be converted from the corresponding Unicode code point.
+For example, if using the Euro sign in ISO 8859-15, these functions
+will not encode the Unicode value 0x20ac into the ISO 8859-15 value
+0xa4.
+.Pp
+Regardless of the locale, the characters returned will be encoded as
+though the code point were the corresponding value in Unicode.
+This means that when using UTF-16, if the corresponding code point were
+in the range for surorgate pairs, then the
+.Fn c16rtomb
+function will expect to receive that code point in that fashion.
+.Pp
+This behavior of the
+.Fn c16rtomb
+and
+.Fn c32rtomb
+functions should not be relied upon, is not portable, and subject to
+change for non-Unicode locales.
+.Sh RETURN VALUES
+Upon successful completion, the
+.Fn c16rtomb ,
+.Fn c32rtomb ,
+.Fn wcrtomb ,
+and
+.Fn wcrtomb_l
+functions return the number of bytes stored in
+.Fa str .
+Otherwise,
+.Sy (size_t)-1
+is returned to indicate an encoding error and
+.Va errno
+is set.
+.Sh EXAMPLES
+.Sy Example 1
+Converting a UTF-32 character into a multi-byte character sequence.
+.Bd -literal
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <stdio.h>
+#include <uchar.h>
+
+int
+main(void)
+{
+        mbstate_t mbs;
+        size_t ret;
+        char buf[MB_CUR_MAX];
+        char32_t val = 0x5149;
+        const char *uchar_exp = "\exe5\ex85\ex89";
+
+        (void) memset(&mbs, 0, sizeof (mbs));
+        (void) setlocale(LC_CTYPE, "en_US.UTF-8");
+        ret = c32rtomb(buf, val, &mbs);
+        if (ret != strlen(uchar_exp)) {
+                errx(EXIT_FAILURE, "failed to convert string, got %zd",
+                    ret);
+        }
+
+        if (strncmp(buf, uchar_exp, ret) != 0) {
+                errx(EXIT_FAILURE, "converted char32_t does not match "
+                    "expected value");
+        }
+
+        return (0);
+}
+.Ed
+.Sh ERRORS
+The
+.Fn c16rtomb ,
+.Fn c32rtomb ,
+.Fn wcrtomb ,
+and
+.Fn wcrtomb_l
+functions will fail if:
+.Bl -tag -width Er
+.It Er EINVAL
+The conversion state in
+.Fa ps
+is invalid.
+.It Er EILSEQ
+An invalid character sequence has been detected.
+.El
+.Sh MT-LEVEL
+The
+.Fn c16rtomb ,
+.Fn c32rtomb ,
+.Fn wcrtomb ,
+and
+.Fn wcrtomb_l
+functions are
+.Sy MT-Safe
+as long as different
+.Vt mbstate_t
+structures are passed in
+.Fa ps .
+If
+.Fa ps
+is
+.Dv NULL
+or different threads use the same value for
+.Fa ps ,
+then the functions are
+.Sy Unsafe .
+.Sh INTERFACE STABILITY
+.Sy Committed
+.Sh SEE ALSO
+.Xr mbrtoc16 3C ,
+.Xr mbrtoc32 3C ,
+.Xr mbrtowc 3C ,
+.Xr newlocale 3C ,
+.Xr setlocale 3C ,
+.Xr uselocale 3C ,
+.Xr uchar.h 3HEAD ,
+.Xr environ 5
diff --git a/usr/src/man/man3c/mbrtoc16.3c b/usr/src/man/man3c/mbrtoc16.3c
new file mode 100644
index 0000000000..d1b3ab478b
--- /dev/null
+++ b/usr/src/man/man3c/mbrtoc16.3c
@@ -0,0 +1,397 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source.  A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2020 Robert Mustacchi
+.\"
+.Dd April 23, 2020
+.Dt MBRTOC16 3C
+.Os
+.Sh NAME
+.Nm mbrtoc16 ,
+.Nm mbrtoc32 ,
+.Nm mbrtowc ,
+.Nm mbrtowc_l
+.Nd convert characters to wide characters
+.Sh SYNOPSIS
+.In wchar.h
+.Ft size_t
+.Fo mbrtowc
+.Fa "wchar_t *restrict pwc"
+.Fa "const char *restrict str"
+.Fa "size_t len"
+.Fa "mstate_t *restrict ps"
+.Fc
+.In wchar.h
+.In xlocale.h
+.Ft size_t
+.Fo mbrtowc
+.Fa "wchar_t *restrict pwc"
+.Fa "const char *restrict str"
+.Fa "size_t len"
+.Fa "mstate_t *restrict ps"
+.Fa "locale_t loc"
+.Fc
+.In uchar.h
+.Ft size_t
+.Fo mbrtoc16
+.Fa "char16_t *restrict p16c"
+.Fa "const char *restrict str"
+.Fa "size_t len"
+.Fa "mbstate_t *restrict ps"
+.Fc
+.Ft size_t
+.Fo mbrtoc32
+.Fa "char32_t *restrict p32c"
+.Fa "const char *restrict str"
+.Fa "size_t len"
+.Fa "mbstate_t *restrict ps"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn mbrtoc16 ,
+.Fn mbrtoc32 ,
+.Fn mbrtowc ,
+and
+.Fn mbrtowc_l
+functions convert character sequences, which may contain multi-byte
+characters, into different character formats.
+The functions work in the following formats:
+.Bl -tag -width mbrtowc_l
+.It Fn mbrtoc16
+A UTF-16 code sequence, where every code point is represented by one or
+two
+.Vt char16_t .
+The UTF-16 encoding will encode certain Unicode code points as a pair of
+two 16-bit code sequences, commonly referred to as a surrogate pair.
+.It Fn mbrtoc32
+A UTF-32 code sequence, where every code point is represented by a
+single
+.Vt char32_t .
+.It Fn mbrtowc , Fn mbrtowc_l
+Wide characters, being a 32-bit value where every code point is
+represented by a single
+.Vt wchar_t .
+While the
+.Vt wchar_t
+and
+.Vt char32_t
+are different types, in this implementation, they are similar encodings.
+.El
+.Pp
+The functions consume up to
+.Fa len
+characters from the string
+.Fa str
+and accumulate them in
+.Fa ps
+until a valid character is found, which is influenced by
+the
+.Dv LC_CTYPE
+category of the current locale.
+For example, in the
+.Sy C
+locale, only ASCII characters are recognized, while in a
+.Sy UTF-8
+based locale like
+.Sy en_US.UTF-8 ,
+UTF-8 multi-byte character sequences that represent Unicode code points
+are recognized.
+The
+.Fn mbrtowc_l
+function uses the locale passed in
+.Fa loc
+rather than the locale of the current thread.
+.Pp
+When a valid character sequence has been found, it is converted to
+either a 16-bit character sequence for
+.Fn mbrtoc16
+or a 32-bit character sequence for
+.Fn mbrtoc32
+and will be stored in
+.Fa p16c
+and
+.Fa p32c
+respectively.
+.Pp
+The
+.Fa ps
+argument represents a multi-byte conversion state which can be used
+across multiple calls to a given function
+.Pq but not mixed between functions .
+These allow for characters to be consumed from subsequent buffers, e.g.
+different values of
+.Fa str .
+The functions may be called from multiple threads as long as they use
+unique values for
+.Fa ps .
+If
+.Fa ps
+is
+.Dv NULL ,
+then a function-specific buffer will be used for the conversion state;
+however, this is stored between all threads and its use is not
+recommended.
+.Pp
+When using these functions, more than one character may be output for a
+given set of consumed input characters.
+An example of this is when a given code point is represented as a set of
+surrogate pairs in UTF-16, which require two 16-bit characters to
+represent a code point.
+When this occurs, the functions return the special return value
+.Sy -3 .
+.Pp
+The functions all have a special behavior when
+.Dv NULL
+is passed for
+.Fa str .
+They instead will treat it as though
+.Fa pwc ,
+.Fa p16c ,
+or
+.Fa p32c
+were
+.Dv NULL ,
+.Fa str
+had been passed as the empty string, "" and the length,
+.Fa len ,
+would appear as the value 1.
+In other words, the functions would be called as:
+.Bd -literal -offset indent
+mbrtowc(NULL, "", 1, ps)
+mbrtowc_l(NULL, "", 1, ps)
+mbrtoc16(NULL, "", 1, ps)
+mbrtoc32(NULL, "", 1, ps)
+.Ed
+.Ss Locale Details
+Not all locales in the system are Unicode based locales.
+For example, ISO 8859 family locales have code points with values that
+do not match their counterparts in Unicode.
+When using these functions with non-Unicode based locales, the code
+points returned will be those determined by the locale.
+They will not be converted to the corresponding Unicode code point.
+For example, if using the Euro sign in ISO 8859-15, these functions
+might return the code point 0xa4 and not the Unicode value 0x20ac.
+.Pp
+Regardless of the locale, the characters returned will be encoded as
+though the code point were the corresponding value in Unicode.
+This means that if a locale returns a value that would be a surrogate
+pair in the UTF-16 encoding, it will still be encoded as a UTF-16
+character.
+.Pp
+This behavior of the
+.Fn mbrtoc16
+and
+.Fn mbrtoc32
+functions should not be relied upon, is not portable, and subject to
+change for non-Unicode locales.
+.Sh RETURN VALUES
+The
+.Fn mbrtoc16 ,
+.Fn mbrtoc32 ,
+.Fn mbrtowc ,
+and
+.Fn mbrtowc_l
+functions return the following values:
+.Bl -tag -width (size_t)-3
+.It Sy 0
+.Fa len
+or fewer bytes of
+.Fa str
+were consumed and the null wide character was written into the wide
+character buffer
+.Po
+.Fa pwc ,
+.Fa p16c ,
+.Fa p32c
+.Pc .
+.It Sy between 1 and len
+The specified number of bytes were consumed and a single character was
+written into the wide character buffer
+.Po
+.Fa pwc ,
+.Fa p16c ,
+.Fa p32c
+.Pc .
+.It Sy (size_t)-1
+An encoding error has occurred.
+The next
+.Fa len
+bytes of
+.Fa str
+do not contribute to a valid character.
+.Va errno
+has been set to
+.Er EILSEQ .
+No data was written into the wide character buffer
+.Po
+.Fa pwc ,
+.Fa p16c ,
+.Fa p32c
+.Pc .
+.It Sy (size_t)-2
+.Fa len
+bytes of
+.Fa str
+were consumed, but a complete multi-byte character sequence has not been
+found and no data was written into the wide character buffer
+.Po
+.Fa pwc ,
+.Fa p16c ,
+.Fa p32c
+.Pc .
+.It Sy (size_t)-3
+A character has been written into the wide character buffer
+.Po
+.Fa pwc ,
+.Fa p16c ,
+.Fa p32c
+.Pc .
+This character was from a previous call (such as another part of a
+UTF-16 surrogate pair) and no input was consumed.
+This is limited to the
+.Fn mbrtoc16
+and
+.Fn mbrtoc32
+functions.
+.El
+.Sh EXAMPLES
+.Sy Example 1
+Using the
+.Fn mbrtoc32
+function to convert a multibyte string.
+.Bd -literal
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <stdio.h>
+#include <uchar.h>
+
+int
+main(void)
+{
+	mbstate_t mbs;
+	char32_t out;
+	size_t ret;
+	const char *uchar_str = "\exe5\ex85\ex89";
+
+	(void) memset(&mbs, 0, sizeof (mbs));
+	(void) setlocale(LC_CTYPE, "en_US.UTF-8");
+	ret = mbrtoc32(&out, uchar_str, strlen(uchar_str), &mbs);
+	if (ret != strlen(uchar_str)) {
+		errx(EXIT_FAILURE, "failed to convert string, got %zd",
+		    ret);
+	}
+
+	(void) printf("Converted %zu bytes into UTF-32 character "
+	    "0x%x\n", ret, out);
+	return (0);
+}
+.Ed
+.Pp
+When compiled and run, this produces:
+.Bd -literal -offset indent
+$ ./a.out
+Converted 3 bytes into UTF-32 character 0x5149
+.Ed
+.Pp
+.Sy Example 2
+Handling surrogate pairs from the
+.Fn mbrtoc16
+function.
+.Bd -literal
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <stdio.h>
+#include <uchar.h>
+
+int
+main(void)
+{
+        mbstate_t mbs;
+        char16_t first, second;
+        size_t ret;
+        const char *uchar_str = "\exf0\ex9f\ex92\exa9";
+
+        (void) memset(&mbs, '\0', sizeof (mbs));
+        (void) setlocale(LC_CTYPE, "en_US.UTF-8");
+        ret = mbrtoc16(&first, uchar_str, strlen(uchar_str), &mbs);
+        if (ret != strlen(uchar_str)) {
+                errx(EXIT_FAILURE, "failed to convert string, got %zd",
+                    ret);
+        }
+
+        ret = mbrtoc16(&second, "", 0, &mbs);
+        if (ret != (size_t)-3) {
+                errx(EXIT_FAILURE, "didn't get second surrogate pair, "
+                    "got %zd", ret);
+        }
+
+        (void) printf("UTF-16 surrogates: 0x%x 0x%x\n", first, second);
+        return (0);
+}
+.Ed
+.Pp
+When compiled and run, this produces:
+.Bd -literal -offset indent
+$ ./a.out
+UTF-16 surrogates: 0xd83d 0xdca9
+.Ed
+.Sh ERRORS
+The
+.Fn mbrtoc16 ,
+.Fn mbrtoc32 ,
+.Fn mbrtowc ,
+and
+.Fn mbrtowc_l
+functions will fail if:
+.Bl -tag -width Er
+.It Er EINVAL
+The conversion state in
+.Fa ps
+is invalid.
+.It Er EILSEQ
+An invalid character sequence has been detected.
+.El
+.Sh MT-LEVEL
+The
+.Fn mbrtoc16 ,
+.Fn mbrtoc32 ,
+.Fn mbrtowc ,
+and
+.Fn mbrtowc_l
+functions are
+.Sy MT-Safe
+as long as different
+.Vt mbstate_t
+structures are passed in
+.Fa ps .
+If
+.Fa ps
+is
+.Dv NULL
+or different threads use the same value for
+.Fa ps ,
+then the functions are
+.Sy Unsafe .
+.Sh INTERFACE STABILITY
+.Sy Committed
+.Sh SEE ALSO
+.Xr c16rtomb 3C ,
+.Xr c32rtomb 3C ,
+.Xr newlocale 3C ,
+.Xr setlocale 3C ,
+.Xr uselocale 3C ,
+.Xr wcrtomb 3C ,
+.Xr uchar.h 3HEAD ,
+.Xr environ 5
diff --git a/usr/src/man/man3c/mbrtowc.3c b/usr/src/man/man3c/mbrtowc.3c
deleted file mode 100644
index 70fe73d25e..0000000000
--- a/usr/src/man/man3c/mbrtowc.3c
+++ /dev/null
@@ -1,231 +0,0 @@
-.\"
-.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for
-.\" permission to reproduce portions of its copyrighted documentation.
-.\" Original documentation from The Open Group can be obtained online at
-.\" http://www.opengroup.org/bookstore/.
-.\"
-.\" The Institute of Electrical and Electronics Engineers and The Open
-.\" Group, have given us permission to reprint portions of their
-.\" documentation.
-.\"
-.\" In the following statement, the phrase ``this text'' refers to portions
-.\" of the system documentation.
-.\"
-.\" Portions of this text are reprinted and reproduced in electronic form
-.\" in the SunOS Reference Manual, from IEEE Std 1003.1, 2004 Edition,
-.\" Standard for Information Technology -- Portable Operating System
-.\" Interface (POSIX), The Open Group Base Specifications Issue 6,
-.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics
-.\" Engineers, Inc and The Open Group.  In the event of any discrepancy
-.\" between these versions and the original IEEE and The Open Group
-.\" Standard, the original IEEE and The Open Group Standard is the referee
-.\" document.  The original Standard can be obtained online at
-.\" http://www.opengroup.org/unix/online.html.
-.\"
-.\" This notice shall appear on any product containing this material.
-.\"
-.\" The contents of this file are subject to the terms of the
-.\" Common Development and Distribution License (the "License").
-.\" You may not use this file except in compliance with the License.
-.\"
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-.\" or http://www.opensolaris.org/os/licensing.
-.\" See the License for the specific language governing permissions
-.\" and limitations under the License.
-.\"
-.\" When distributing Covered Code, include this CDDL HEADER in each
-.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-.\" If applicable, add the following below this CDDL HEADER, with the
-.\" fields enclosed by brackets "[]" replaced with your own identifying
-.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.\"
-.\"
-.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved.
-.\" Portions Copyright (c) 2003, Sun Microsystems, Inc.  All Rights Reserved.
-.\" Copyright 2014 Garrett D'Amore <garrett@damore.org>
-.\"
-.TH MBRTOWC 3C "Jun 23, 2014"
-.SH NAME
-mbrtowc, mbrtowc_l \- convert a character to a wide-character code (restartable)
-.SH SYNOPSIS
-.LP
-.nf
-#include <wchar.h>
-
-\fBsize_t\fR \fBmbrtowc\fR(\fBwchar_t *restrict\fR \fIpwc\fR, \fBconst char *restrict\fR \fIs\fR, \fBsize_t\fR \fIn\fR,
-     \fBmbstate_t *restrict\fR \fIps\fR);
-.fi
-.LP
-.nf
-#include <wchar.h>
-#include <xlocale.h>
-
-\fBsize_t\fR \fBmbrtowc_l\fR(\fBwchar_t *restrict\fR \fIpwc\fR, \fBconst char *restrict\fR \fIs\fR, \fBsize_t\fR \fIn\fR,
-     \fBmbstate_t *restrict\fR \fIps\fR, \fBlocale_t\fR \fIloc\fR);
-.fi
-.SH DESCRIPTION
-.LP
-If
-.I s
-is a null pointer, the
-.B mbrtowc()
-function is equivalent to the call:
-.sp
-.in +2
-.nf
-\fBmbrtowc\fR(NULL, "", 1, \fIps\fR)
-.fi
-.in -2
-.LP
-Likewise, if
-.I s
-is a null pointer, the
-.B mbrtowc_l()
-function is equivalent to the call:
-.sp
-.in +2
-.nf
-\fBmbrtowc_l\fR(NULL, "", 1, \fIps\fR, \fIloc\fR);
-.fi
-.in -2
-.LP
-In these cases, the values of the arguments
-.I pwc
-and
-.I n
-are ignored.
-.LP
-If
-.I s
-is not a null pointer, these functions inspect at most
-.I n
-bytes beginning at the byte pointed to by
-.I s
-to determine the number of bytes needed to complete the next character
-(including any shift sequences).  If the functions determine that the next
-character is completed,
-they determine the value of the corresponding wide-character and then, if
-.I pwc
-is not a null pointer, stores that value in the object pointed to by
-.IR pwc .
-If the corresponding wide-character is the null wide-character, the
-resulting state described is the initial conversion state.
-.LP
-If
-.I ps
-is a null pointer, these functions use their own
-internal
-.B mbstate_t
-object, which is initialized at program startup to the
-initial conversion state. Otherwise, the
-.B mbstate_t
-object pointed to by
-.I ps
-is used to completely describe the current conversion state of the
-associated character sequence. The system will behave as if no function defined
-in the Reference Manual calls
-.B mbrtowc()
-or
-.BR Bmbrtowc_l() .
-.LP
-The behavior of
-.B mbrtowc()
-is affected by the
-.B LC_CTYPE
-category of the current locale.  The
-.B mbrtowc_l()
-function is affected by the
-.B LC_CTYPE
-category of the specified
-.I loc
-locale object.  See
-.B environ (5).
-.SH RETURN VALUES
-.LP
-The
-.B mbrtowc()
-and
-.B mbrtowc_l()
-functions return the first of the following that applies:
-.IP \fB0\fR
-If the next
-.I n
-or fewer bytes complete the character that corresponds to
-the null wide-character (which is the value stored).
-.IP \fBpositive\fR
-If the next
-.I n
-or fewer bytes complete a valid character (which is the
-value stored); the value returned is the number of bytes that complete the
-character.
-.IP \fB(size_t)\(mi2\fR
-If the next
-.I n
-bytes contribute to an incomplete but potentially valid
-character, and all
-.I n
-bytes have been processed (no value is stored).
-When
-.I n
-has at least the value of the
-.B MB_CUR_MAX
-macro, this case can only occur if
-.I s
-points at a sequence of redundant shift sequences
-(for implementations with state-dependent encodings).
-.IP \fB(size_t)\(mi1\fR
-If an encoding error occurs, in which case the next \fIn\fR or fewer bytes do
-not contribute to a complete and valid  character (no value is stored).  In
-this case,
-.B EILSEQ
-is stored in
-.B errno
-and the conversion state is undefined.
-.SH ERRORS
-.LP
-The
-.B mbrtowc()
-and
-.B mbrtowc_l()
-functions may fail if:
-.IP \fBEINVAL\fR
-The
-.I ps
-argument points to an object that contains an invalid conversion
-state.
-.IP \fBEILSEQ\fR
-Invalid character sequence is detected.
-.SH ATTRIBUTES
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.TS
-box;
-c | c
-l | l .
-ATTRIBUTE TYPE	ATTRIBUTE VALUE
-_
-Interface Stability	See below.
-_
-MT-Level	See below.
-.TE
-
-.LP
-The
-.B mbrtowc()
-function is Standard.  The
-.B mbrtowc_l()
-function is Uncommitted.
-.LP
-If
-.I ps
-is a null pointer, these functions are Unsafe for use in
-multithreaded applications.  Otherwise they are MT-Safe.
-.SH SEE ALSO
-.LP
-.BR mbsinit (3C),
-.BR newlocale (3C),
-.BR setlocale (3C),
-.BR uselocale (3C),
-.BR attributes (5),
-.BR environ (5),
-.BR standards (5)
diff --git a/usr/src/man/man3c/wcrtomb.3c b/usr/src/man/man3c/wcrtomb.3c
deleted file mode 100644
index 9a3478cb07..0000000000
--- a/usr/src/man/man3c/wcrtomb.3c
+++ /dev/null
@@ -1,146 +0,0 @@
-.\"
-.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for
-.\" permission to reproduce portions of its copyrighted documentation.
-.\" Original documentation from The Open Group can be obtained online at
-.\" http://www.opengroup.org/bookstore/.
-.\"
-.\" The Institute of Electrical and Electronics Engineers and The Open
-.\" Group, have given us permission to reprint portions of their
-.\" documentation.
-.\"
-.\" In the following statement, the phrase ``this text'' refers to portions
-.\" of the system documentation.
-.\"
-.\" Portions of this text are reprinted and reproduced in electronic form
-.\" in the SunOS Reference Manual, from IEEE Std 1003.1, 2004 Edition,
-.\" Standard for Information Technology -- Portable Operating System
-.\" Interface (POSIX), The Open Group Base Specifications Issue 6,
-.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics
-.\" Engineers, Inc and The Open Group.  In the event of any discrepancy
-.\" between these versions and the original IEEE and The Open Group
-.\" Standard, the original IEEE and The Open Group Standard is the referee
-.\" document.  The original Standard can be obtained online at
-.\" http://www.opengroup.org/unix/online.html.
-.\"
-.\" This notice shall appear on any product containing this material.
-.\"
-.\" The contents of this file are subject to the terms of the
-.\" Common Development and Distribution License (the "License").
-.\" You may not use this file except in compliance with the License.
-.\"
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-.\" or http://www.opensolaris.org/os/licensing.
-.\" See the License for the specific language governing permissions
-.\" and limitations under the License.
-.\"
-.\" When distributing Covered Code, include this CDDL HEADER in each
-.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-.\" If applicable, add the following below this CDDL HEADER, with the
-.\" fields enclosed by brackets "[]" replaced with your own identifying
-.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.\"
-.\"
-.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved.
-.\" Portions Copyright (c) 2003, Sun Microsystems, Inc.  All Rights Reserved.
-.\" Copyright 2014 Garrett D'Amore <garrett@damore.org>
-.\"
-.TH WCRTOMB 3C "Jun 24, 2014"
-.SH NAME
-wcrtomb, wcrtomb_l \- convert a wide-character code to a character (restartable)
-.SH SYNOPSIS
-.LP
-.nf
-#include <stdio.h>
-
-\fBsize_t\fR \fBwcrtomb\fR(\fBchar *restrict\fR \fIs\fR, \fBwchar_t\fR \fIwc\fR, \fBmbstate_t *restrict\fR \fIps\fR);
-.fi
-.LP
-.nf
-#include <stdio.h>
-#include <xlocale.h>
-
-\fBsize_t\fR \fBwcrtomb_l\fR(\fBchar *restrict\fR \fIs\fR, \fBwchar_t\fR \fIwc\fR, \fBmbstate_t *restrict\fR \fIps\fR,
-    \fBlocale_t\fR \fIloc\fR);
-.fi
-.SH DESCRIPTION
-.LP
-If \fIs\fR is a null pointer, the \fBwcrtomb()\fR function is equivalent to the
-call:
-.IP
-\fBwcrtomb\fR(\fIbuf\fR, L'\e0', \fIps\fR);
-.LP
-where \fIbuf\fR is an internal buffer.
-.LP
-If \fIs\fR is not a null pointer, the \fBwcrtomb()\fR function determines the
-number of bytes needed to represent the character that corresponds to the
-wide-character given by \fIwc\fR (including any shift sequences), and stores
-the resulting bytes in the array whose first element is pointed to by \fIs\fR.
-At most \fBMB_CUR_MAX\fR bytes are stored.  If \fIwc\fR is a null
-wide-character, a null byte is stored, preceded by any shift sequence needed to
-restore the initial shift state. The resulting state described is the initial
-conversion state.
-.LP
-If \fIps\fR is a null pointer, the \fBwcrtomb()\fR function uses its own
-internal \fBmbstate_t\fR object, which is initialized at program startup to the
-initial conversion state.   Otherwise, the  \fBmbstate_t\fR object pointed to
-by \fIps\fR is used to completely describe the current conversion state of the
-associated character sequence. The system will behave as if no function
-defined in the Reference Manual calls \fBwcrtomb()\fR.
-.LP
-The behavior of \fBwcrtomb()\fR is affected by the \fBLC_CTYPE\fR category of the
-current locale.  See \fBenviron\fR(5). The function \fBwcrtomb_l()\fR behaves
-identically to \fBwcrtomb()\fR, except instead of operating in the current
-locale, it operates in the locale specified by \fIloc\fR.
-.SH RETURN VALUES
-.LP
-The \fBwcrtomb()\fR function returns the number of bytes stored in the array
-object (including any shift sequences).  When \fIwc\fR is not a valid
-wide-character, an encoding error occurs.  In this case, the function stores
-the value of the macros \fBEILSEQ\fR in \fBerrno\fR and returns
-\fB(size_t)\(mi1\fR; the conversion state is undefined.
-.SH ERRORS
-.LP
-The \fBwcrtomb()\fR function may fail if:
-.sp
-.ne 2
-.na
-\fB\fBEINVAL\fR\fR
-.ad
-.RS 10n
-The \fIps\fR argument points to an object that contains an invalid conversion
-state.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBEILSEQ\fR\fR
-.ad
-.RS 10n
-Invalid wide-character code is detected.
-.RE
-.SH ATTRIBUTES
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.TS
-box;
-c | c
-l | l .
-ATTRIBUTE TYPE	ATTRIBUTE VALUE
-_
-Interface Stability	See below.
-_
-MT-Level	See below.
-.TE
-
-.LP
-The \fBwcrtomb()\fR function is Standard.  The
-\fBwcrtomb_l()\fR function is Uncommitted.
-.LP
-If \fIps\fR is a null pointer, these functions should be considered Unsafe
-for use in multithreaded applications.  Otherwise, they are MT-Safe.
-.SH SEE ALSO
-.LP
-\fBmbsinit\fR(3C), \fBnewlocale\fR(3C), \fBsetlocale\fR(3C),
-\fBuselocale\fR(3C), \fBattributes\fR(5),
-\fBstandards\fR(5), \fBenviron\fR(5)