30 Need iconv

Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com> Approved by: Dan McDonald <danmcd@omniti.com>
author: Gordon Ross <gwr@nexenta.com> 2011-05-28 21:30:16 -0400
committer: Dan McDonald <danmcd@omniti.com> 2016-10-24 14:17:22 -0400
commit: 48edc7cf07b5dccc3ad84bf2dafe4150bd666d60 (patch)
tree: 164586150388a2a388179accfea08d51e34ab040
parent: 3db3a6b813432461e8e60af00e9ad6f0bf0d5eaf (diff)
download: illumos-joyent-48edc7cf07b5dccc3ad84bf2dafe4150bd666d60.tar.gz
13 files changed, 2267 insertions, 2 deletions
diff --git a/exception_lists/closed-bins b/exception_lists/closed-bins
index 3b54696fa2..8a7ecbc066 100644
--- a/exception_lists/closed-bins
+++ b/exception_lists/closed-bins
@@ -81,6 +81,7 @@
 ./usr/lib/nfs/lockd
 ./usr/lib/snmp
 ./usr/lib/snmp/mibiisa
+./usr/bin/iconv
 ./usr/bin/kbdcomp
 ./usr/bin/localedef
 ./usr/bin/od
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 0c8f8fcdac..779f8de7fc 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -197,6 +197,7 @@ COMMON_SUBDIRS=		\
 	hotplugd	\
 	hwdata		\
 	ibd_upgrade	\
+	iconv		\
 	id		\
 	idmap		\
 	infocmp		\
@@ -595,6 +596,7 @@ MSGSUBDIRS=		\
 	head		\
 	hostname	\
 	hotplug		\
+	iconv		\
 	id		\
 	idmap		\
 	isaexec		\
diff --git a/usr/src/cmd/iconv/Makefile b/usr/src/cmd/iconv/Makefile
new file mode 100644
index 0000000000..9e4a83cc18
--- /dev/null
+++ b/usr/src/cmd/iconv/Makefile
@@ -0,0 +1,66 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+#
+
+PROG=iconv
+
+include ../Makefile.cmd
+include ../Makefile.ctf
+
+OBJS = iconv_main.o iconv_list.o charmap.o parser.tab.o scanner.o
+
+SRCS	= $(OBJS:%.o=%.c)
+
+C99MODE=	$(C99_ENABLE)
+LDLIBS		+= -lcmdutils -lavl
+YFLAGS		= -d -b parser
+CPPFLAGS	+= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+$(RELEASE_BUILD) CPPFLAGS += -DNDEBUG
+
+CERRWARN +=	-_gcc=-Wno-unused-label
+
+CLEANFILES	= $(OBJS) parser.tab.c parser.tab.h
+CLOBBERFILES	= $(PROG) $(POFILE)
+PIFILES		= $(OBJS:%.o=%.i)
+POFILE		= iconv_cmd.po
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+$(PROG): $(OBJS)
+	$(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+	$(POST_PROCESS)
+
+$(OBJS):	parser.tab.h
+
+parser.tab.c parser.tab.h: parser.y
+	$(YACC) $(YFLAGS) parser.y
+
+lint:	$(SRCS)
+	$(LINT.c) $(CPPFLAGS) $(SRCS)
+
+clean:
+	$(RM) $(CLEANFILES)
+
+$(POFILE):	$(PIFILES)
+	$(RM) $@
+	$(RM) messages.po
+	$(XGETTEXT) -s $(PIFILES)
+	$(SED) -e '/domain/d' messages.po > $@
+	$(RM) $(PIFILES) messages.po
+
+.KEEP_STATE:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/iconv/charmap.c b/usr/src/cmd/iconv/charmap.c
new file mode 100644
index 0000000000..5d510326c6
--- /dev/null
+++ b/usr/src/cmd/iconv/charmap.c
@@ -0,0 +1,559 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+/*
+ * CHARMAP file handling for iconv.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <alloca.h>
+#include <sys/avl.h>
+#include <stddef.h>
+#include <unistd.h>
+#include "charmap.h"
+#include "parser.tab.h"
+#include <assert.h>
+
+enum cmap_pass cmap_pass;
+static avl_tree_t	cmap_sym;
+static avl_tree_t	cmap_mbs;
+
+typedef struct charmap {
+	const char *cm_name;
+	struct charmap *cm_alias_of;
+	avl_node_t cm_avl_sym;
+	avl_node_t cm_avl_mbs;
+	int cm_warned;
+	int cm_frmbs_len;
+	int cm_tombs_len;
+	char cm_frmbs[MB_LEN_MAX + 1]; /* input */
+	char cm_tombs[MB_LEN_MAX + 1]; /* output */
+} charmap_t;
+
+static void add_charmap_impl_fr(char *sym, char *mbs, int mbs_len, int nodups);
+static void add_charmap_impl_to(char *sym, char *mbs, int mbs_len, int nodups);
+
+/*
+ * Array of POSIX specific portable characters.
+ */
+static const struct {
+	char	*name;
+	int	ch;
+} portable_chars[] = {
+	{ "NUL",		 '\0' },
+	{ "alert",		'\a' },
+	{ "backspace",		'\b' },
+	{ "tab",		'\t' },
+	{ "carriage-return",	'\r' },
+	{ "newline",		'\n' },
+	{ "vertical-tab",	'\v' },
+	{ "form-feed",		'\f' },
+	{ "space",		' ' },
+	{ "exclamation-mark",	'!' },
+	{ "quotation-mark",	'"' },
+	{ "number-sign",	'#' },
+	{ "dollar-sign",	'$' },
+	{ "percent-sign",	'%' },
+	{ "ampersand",		'&' },
+	{ "apostrophe",		'\'' },
+	{ "left-parenthesis",	'(' },
+	{ "right-parenthesis",	'(' },
+	{ "asterisk",		'*' },
+	{ "plus-sign",		'+' },
+	{ "comma",		 ','},
+	{ "hyphen-minus",	'-' },
+	{ "hyphen",		'-' },
+	{ "full-stop",		'.' },
+	{ "period",		'.' },
+	{ "slash",		'/' },
+	{ "solidus",		'/' },
+	{ "zero",		'0' },
+	{ "one",		'1' },
+	{ "two",		'2' },
+	{ "three",		'3' },
+	{ "four",		'4' },
+	{ "five",		'5' },
+	{ "six",		'6' },
+	{ "seven",		'7' },
+	{ "eight",		'8' },
+	{ "nine",		'9' },
+	{ "colon",		':' },
+	{ "semicolon",		';' },
+	{ "less-than-sign",	'<' },
+	{ "equals-sign",	'=' },
+	{ "greater-than-sign",	'>' },
+	{ "question-mark",	'?' },
+	{ "commercial-at",	'@' },
+	{ "left-square-bracket", '[' },
+	{ "backslash",		'\\' },
+	{ "reverse-solidus",	'\\' },
+	{ "right-square-bracket", ']' },
+	{ "circumflex",		'^' },
+	{ "circumflex-accent",	'^' },
+	{ "low-line",		'_' },
+	{ "underscore",		'_' },
+	{ "grave-accent",	'`' },
+	{ "left-brace",		'{' },
+	{ "left-curly-bracket",	'{' },
+	{ "vertical-line",	'|' },
+	{ "right-brace",	'}' },
+	{ "right-curly-bracket", '}' },
+	{ "tilde",		'~' },
+	{ "A", 'A' },
+	{ "B", 'B' },
+	{ "C", 'C' },
+	{ "D", 'D' },
+	{ "E", 'E' },
+	{ "F", 'F' },
+	{ "G", 'G' },
+	{ "H", 'H' },
+	{ "I", 'I' },
+	{ "J", 'J' },
+	{ "K", 'K' },
+	{ "L", 'L' },
+	{ "M", 'M' },
+	{ "N", 'N' },
+	{ "O", 'O' },
+	{ "P", 'P' },
+	{ "Q", 'Q' },
+	{ "R", 'R' },
+	{ "S", 'S' },
+	{ "T", 'T' },
+	{ "U", 'U' },
+	{ "V", 'V' },
+	{ "W", 'W' },
+	{ "X", 'X' },
+	{ "Y", 'Y' },
+	{ "Z", 'Z' },
+	{ "a", 'a' },
+	{ "b", 'b' },
+	{ "c", 'c' },
+	{ "d", 'd' },
+	{ "e", 'e' },
+	{ "f", 'f' },
+	{ "g", 'g' },
+	{ "h", 'h' },
+	{ "i", 'i' },
+	{ "j", 'j' },
+	{ "k", 'k' },
+	{ "l", 'l' },
+	{ "m", 'm' },
+	{ "n", 'n' },
+	{ "o", 'o' },
+	{ "p", 'p' },
+	{ "q", 'q' },
+	{ "r", 'r' },
+	{ "s", 's' },
+	{ "t", 't' },
+	{ "u", 'u' },
+	{ "v", 'v' },
+	{ "w", 'w' },
+	{ "x", 'x' },
+	{ "y", 'y' },
+	{ "z", 'z' },
+	{ NULL, 0 }
+};
+
+static int
+cmap_compare_sym(const void *n1, const void *n2)
+{
+	const charmap_t *c1 = n1;
+	const charmap_t *c2 = n2;
+	int rv;
+
+	rv = strcmp(c1->cm_name, c2->cm_name);
+	return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+/*
+ * In order for partial match searches to work,
+ * we need these sorted by mbs contents.
+ */
+static int
+cmap_compare_mbs(const void *n1, const void *n2)
+{
+	const charmap_t *c1 = n1;
+	const charmap_t *c2 = n2;
+	int len, rv;
+
+	len = c1->cm_frmbs_len;
+	if (len < c2->cm_frmbs_len)
+		len = c2->cm_frmbs_len;
+	rv = memcmp(c1->cm_frmbs, c2->cm_frmbs, len);
+	if (rv < 0)
+		return (-1);
+	if (rv > 0)
+		return (1);
+	/* they match through length */
+	if (c1->cm_frmbs_len < c2->cm_frmbs_len)
+		return (-1);
+	if (c2->cm_frmbs_len < c1->cm_frmbs_len)
+		return (1);
+	return (0);
+}
+
+void
+charmap_init(char *to_map, char *from_map)
+{
+	avl_create(&cmap_sym, cmap_compare_sym, sizeof (charmap_t),
+	    offsetof(charmap_t, cm_avl_sym));
+
+	avl_create(&cmap_mbs, cmap_compare_mbs, sizeof (charmap_t),
+	    offsetof(charmap_t, cm_avl_mbs));
+
+	cmap_pass = CMAP_PASS_FROM;
+	reset_scanner(from_map);
+	(void) yyparse();
+	add_charmap_posix();
+
+	cmap_pass = CMAP_PASS_TO;
+	reset_scanner(to_map);
+	(void) yyparse();
+}
+
+void
+charmap_dump()
+{
+	charmap_t *cm;
+	int i;
+
+	cm = avl_first(&cmap_mbs);
+	while (cm != NULL) {
+		(void) printf("name=\"%s\"\n", cm->cm_name);
+
+		(void) printf("\timbs=\"");
+		for (i = 0; i < cm->cm_frmbs_len; i++)
+			(void) printf("\\x%02x", cm->cm_frmbs[i] & 0xFF);
+		(void) printf("\"\n");
+
+		(void) printf("\tombs=\"");
+		for (i = 0; i < cm->cm_tombs_len; i++)
+			(void) printf("\\x%02x", cm->cm_tombs[i] & 0xFF);
+		(void) printf("\"\n");
+
+		cm = AVL_NEXT(&cmap_mbs, cm);
+	}
+}
+
+/*
+ * We parse two charmap files:  First the "from" map, where we build
+ * cmap_mbs and cmap_sym which we'll later use to translate the input
+ * stream (mbs encodings) to symbols.  Second, we parse the "to" map,
+ * where we fill in the tombs members of entries in cmap_sym, (which
+ * must alread exist) used later to write the output encoding.
+ */
+static void
+add_charmap_impl(char *sym, char *mbs, int mbs_len, int nodups)
+{
+
+	/*
+	 * While parsing both the "from" and "to" cmaps,
+	 * require both the symbol and encoding.
+	 */
+	if (sym == NULL || mbs == NULL) {
+		errf(_("invalid charmap entry"));
+		return;
+	}
+
+	switch (cmap_pass) {
+	case CMAP_PASS_FROM:
+		add_charmap_impl_fr(sym, mbs, mbs_len, nodups);
+		break;
+	case CMAP_PASS_TO:
+		add_charmap_impl_to(sym, mbs, mbs_len, nodups);
+		break;
+	default:
+		abort();
+		break;
+	}
+}
+
+static void
+add_charmap_impl_fr(char *sym, char *mbs, int mbs_len, int nodups)
+{
+	charmap_t	*m, *n, *s;
+	avl_index_t	where_sym, where_mbs;
+
+	if ((n = calloc(1, sizeof (*n))) == NULL) {
+		errf(_("out of memory"));
+		return;
+	}
+	n->cm_name = sym;
+
+	assert(0 < mbs_len && mbs_len <= MB_LEN_MAX);
+	(void) memcpy(n->cm_frmbs, mbs, mbs_len);
+	n->cm_frmbs_len = mbs_len;
+
+	m = avl_find(&cmap_mbs, n, &where_mbs);
+	s = avl_find(&cmap_sym, n, &where_sym);
+
+	/*
+	 * If we found the symbol, this is a dup.
+	 */
+	if (s != NULL) {
+		if (nodups) {
+			warn(_("%s: duplicate character symbol"), sym);
+		}
+		free(n);
+		return;
+	}
+
+	/*
+	 * If we found the mbs, the new one is an alias,
+	 * which we'll add _only_ to the symbol AVL.
+	 */
+	if (m != NULL) {
+		/* The new one is an alias of the original. */
+		n->cm_alias_of = m;
+		avl_insert(&cmap_sym, n, where_sym);
+		return;
+	}
+
+	avl_insert(&cmap_sym, n, where_sym);
+	avl_insert(&cmap_mbs, n, where_mbs);
+}
+
+static void
+add_charmap_impl_to(char *sym, char *mbs, int mbs_len, int nodups)
+{
+	charmap_t	srch = {0};
+	charmap_t	*m;
+
+	assert(0 < mbs_len && mbs_len <= MB_LEN_MAX);
+
+	srch.cm_name = sym;
+
+	m = avl_find(&cmap_sym, &srch, NULL);
+	if (m == NULL) {
+		if (sflag == 0)
+			warn(_("%s: symbol not found"), sym);
+		return;
+	}
+	if (m->cm_alias_of != NULL) {
+		m = m->cm_alias_of;
+
+		/* don't warn for dups with aliases */
+		if (m->cm_tombs_len != 0)
+			return;
+	}
+
+	if (m->cm_tombs_len != 0) {
+		if (nodups) {
+			warn(_("%s: duplicate encoding for"), sym);
+		}
+		return;
+	}
+
+	(void) memcpy(m->cm_tombs, mbs, mbs_len);
+	m->cm_tombs_len = mbs_len;
+}
+
+void
+add_charmap(char *sym, char *mbs)
+{
+	/* mbs[0] is the length */
+	int mbs_len = *mbs++;
+	assert(0 < mbs_len && mbs_len <= MB_LEN_MAX);
+	add_charmap_impl(sym, mbs, mbs_len, 1);
+}
+
+
+/*
+ * This is called by the parser with start/end symbol strings (ssym, esym),
+ * which are allocated in the scanner (T_SYMBOL) and free'd here.
+ */
+void
+add_charmap_range(char *ssym, char *esym, char *mbs)
+{
+	int	ls, le;
+	int	si;
+	int	sn, en;
+	int	i;
+	int	mbs_len;
+	char	tmbs[MB_LEN_MAX+1];
+	char	*mb_last;
+
+	static const char *digits = "0123456789";
+
+	/* mbs[0] is the length */
+	mbs_len = *mbs++;
+	assert(0 < mbs_len && mbs_len <= MB_LEN_MAX);
+	(void) memcpy(tmbs, mbs, mbs_len);
+	mb_last = tmbs + mbs_len - 1;
+
+	ls = strlen(ssym);
+	le = strlen(esym);
+
+	if (((si = strcspn(ssym, digits)) == 0) || (si == ls) ||
+	    (strncmp(ssym, esym, si) != 0) ||
+	    (strspn(ssym + si, digits) != (ls - si)) ||
+	    (strspn(esym + si, digits) != (le - si)) ||
+	    ((sn = atoi(ssym + si)) > ((en = atoi(esym + si))))) {
+		errf(_("malformed charmap range"));
+		return;
+	}
+
+	ssym[si] = 0;
+	for (i = sn; i <= en; i++) {
+		char *nn;
+		(void) asprintf(&nn, "%s%0*u", ssym, ls - si, i);
+		if (nn == NULL) {
+			errf(_("out of memory"));
+			return;
+		}
+
+		add_charmap_impl(nn, tmbs, mbs_len, 1);
+		(*mb_last)++;
+	}
+	free(ssym);
+	free(esym);
+}
+
+void
+add_charmap_char(char *name, int c)
+{
+	char mbs[MB_LEN_MAX+1];
+
+	mbs[0] = c;
+	mbs[1] = '\0';
+	add_charmap_impl(name, mbs, 1, 0);
+}
+
+/*
+ * POSIX insists that certain entries be present, even when not in the
+ * orginal charmap file.
+ */
+void
+add_charmap_posix(void)
+{
+	int	i;
+
+	for (i = 0; portable_chars[i].name; i++) {
+		add_charmap_char(portable_chars[i].name, portable_chars[i].ch);
+	}
+}
+
+/*
+ * This is called with a buffer of (typically) MB_LEN_MAX bytes,
+ * which is potentially a multi-byte symbol, but often contains
+ * extra bytes. Find and return the longest match in the charmap.
+ */
+static charmap_t *
+find_mbs(const char *mbs, int len)
+{
+	charmap_t srch = {0};
+	charmap_t *cm = NULL;
+
+	while (len > 0) {
+		(void) memcpy(srch.cm_frmbs, mbs, len);
+		srch.cm_frmbs_len = len;
+		cm = avl_find(&cmap_mbs, &srch, NULL);
+		if (cm != NULL)
+			break;
+		len--;
+	}
+
+	return (cm);
+}
+
+/*
+ * Return true if this sequence matches the initial part
+ * of any sequence known in this charmap.
+ */
+static boolean_t
+find_mbs_partial(const char *mbs, int len)
+{
+	charmap_t srch = {0};
+	charmap_t *cm;
+	avl_index_t where;
+
+	(void) memcpy(srch.cm_frmbs, mbs, len);
+	srch.cm_frmbs_len = len;
+	cm = avl_find(&cmap_mbs, &srch, &where);
+	if (cm != NULL) {
+		/* full match - not expected, but OK */
+		return (B_TRUE);
+	}
+	cm = avl_nearest(&cmap_mbs, where, AVL_AFTER);
+	if (cm != NULL && 0 == memcmp(cm->cm_frmbs, mbs, len))
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * Do like iconv(3), but with charmaps.
+ */
+size_t
+cm_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
+{
+	charmap_t *cm;
+	int mbs_len;
+
+	/* Ignore state reset requests. */
+	if (iptr == NULL || *iptr == NULL)
+		return (0);
+
+	if (*oleft < MB_LEN_MAX) {
+		errno = E2BIG;
+		return ((size_t)-1);
+	}
+
+	while (*ileft > 0 && *oleft >= MB_LEN_MAX) {
+		mbs_len = MB_LEN_MAX;
+		if (mbs_len > *ileft)
+			mbs_len = *ileft;
+		cm = find_mbs(*iptr, mbs_len);
+		if (cm == NULL) {
+			if (mbs_len < MB_LEN_MAX &&
+			    find_mbs_partial(*iptr, mbs_len)) {
+				/* incomplete sequence */
+				errno = EINVAL;
+			} else {
+				errno = EILSEQ;
+			}
+			return ((size_t)-1);
+		}
+		assert(cm->cm_frmbs_len > 0);
+		if (cm->cm_tombs_len == 0) {
+			if (sflag == 0 && cm->cm_warned == 0) {
+				cm->cm_warned = 1;
+				warn(_("To-map does not encode <%s>\n"),
+				    cm->cm_name);
+			}
+			if (cflag == 0) {
+				errno = EILSEQ;
+				return ((size_t)-1);
+			}
+			/* just skip this input seq. */
+			*iptr  += cm->cm_frmbs_len;
+			*ileft -= cm->cm_frmbs_len;
+			continue;
+		}
+
+		*iptr  += cm->cm_frmbs_len;
+		*ileft -= cm->cm_frmbs_len;
+		(void) memcpy(*optr, cm->cm_tombs, cm->cm_tombs_len);
+		*optr  += cm->cm_tombs_len;
+		*oleft -= cm->cm_tombs_len;
+	}
+
+	return (0);
+}
diff --git a/usr/src/cmd/iconv/charmap.h b/usr/src/cmd/iconv/charmap.h
new file mode 100644
index 0000000000..e2c36ea9e7
--- /dev/null
+++ b/usr/src/cmd/iconv/charmap.h
@@ -0,0 +1,68 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy is of the CDDL is also available via the Internet
+ * at http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _CHARMAP_H
+#define	_CHARMAP_H
+
+/*
+ * CHARMAP file handling for iconv.
+ */
+
+/* Common header files. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <libintl.h>
+
+enum cmap_pass {
+	CMAP_PASS_FROM,
+	CMAP_PASS_TO
+};
+
+extern int com_char;
+extern int esc_char;
+extern int mb_cur_max;
+extern int mb_cur_min;
+extern int last_kw;
+extern int verbose;
+extern int yydebug;
+extern int lineno;
+extern int debug;
+extern int warnings;
+extern int cflag;
+extern int sflag;
+
+int yyparse(void);
+void yyerror(const char *);
+void errf(const char *, ...);
+void warn(const char *, ...);
+
+void reset_scanner(const char *);
+void scan_to_eol(void);
+
+/* charmap.c - CHARMAP handling */
+void init_charmap(void);
+void add_charmap(char *, char *);
+void add_charmap_posix(void);
+void add_charmap_range(char *, char *, char *);
+
+void charmap_init(char *to, char *fr);
+size_t cm_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft);
+void charmap_dump(void);
+
+#define	_(x)	gettext(x)
+
+#endif /* _CHARMAP_H */
diff --git a/usr/src/cmd/iconv/iconv_list.c b/usr/src/cmd/iconv/iconv_list.c
new file mode 100644
index 0000000000..4fac3506d8
--- /dev/null
+++ b/usr/src/cmd/iconv/iconv_list.c
@@ -0,0 +1,298 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+/*
+ * implement "iconv -l"
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <alloca.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+#include <sys/param.h>
+#include <stddef.h>
+#include <dirent.h>
+#include <unistd.h>
+
+#define	PATH_LIBICONV	"/usr/lib/iconv"
+#define	PATH_BTABLES	"/usr/lib/iconv/geniconvtbl/binarytables"
+#define	PATH_ALIASES	"/usr/lib/iconv/alias"
+
+typedef struct codeset {
+	avl_node_t cs_node;
+	char *cs_name;
+	list_t cs_aliases;
+} codeset_t;
+
+typedef struct csalias {
+	list_node_t a_node;
+	char *a_name;
+} csalias_t;
+
+static avl_tree_t	cs_avl;
+
+static void alias_destroy(csalias_t *);
+
+/*
+ * codesets
+ */
+
+static int
+cs_compare(const void *n1, const void *n2)
+{
+	const codeset_t *c1 = n1;
+	const codeset_t *c2 = n2;
+	int rv;
+
+	rv = strcmp(c1->cs_name, c2->cs_name);
+	return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+static void
+cs_insert(char *key)
+{
+	codeset_t tmp, *cs;
+	avl_index_t where;
+
+	(void) memset(&tmp, 0, sizeof (tmp));
+	tmp.cs_name = key;
+
+	cs = avl_find(&cs_avl, &tmp, &where);
+	if (cs != NULL)
+		return; /* already there */
+
+	cs = calloc(1, sizeof (*cs));
+	if (cs == NULL) {
+		perror("cs_insert:calloc");
+		exit(1);
+	}
+	cs->cs_name = strdup(key);
+	if (cs->cs_name == NULL) {
+		perror("cs_insert:strdup");
+		exit(1);
+	}
+	list_create(&cs->cs_aliases, sizeof (csalias_t),
+	    offsetof(csalias_t, a_node));
+
+	avl_insert(&cs_avl, cs, where);
+}
+
+const char topmatter[] =
+	"The following are all supported code set names.  All combinations\n"
+	"of those names are not necessarily available for the pair of the\n"
+	"fromcode-tocode.  Some of those code set names have aliases, which\n"
+	"are case-insensitive and described in parentheses following the\n"
+	"canonical name:\n";
+
+
+static void
+cs_dump(void)
+{
+	codeset_t *cs;
+	csalias_t *a;
+
+	(void) puts(topmatter);
+
+	for (cs = avl_first(&cs_avl); cs != NULL;
+	    cs = AVL_NEXT(&cs_avl, cs)) {
+
+		(void) printf("    %s", cs->cs_name);
+		if (!list_is_empty(&cs->cs_aliases)) {
+			a = list_head(&cs->cs_aliases);
+			(void) printf(" (%s", a->a_name);
+			while ((a = list_next(&cs->cs_aliases, a)) != NULL) {
+				(void) printf(", %s", a->a_name);
+			}
+			(void) printf(")");
+		}
+		(void) printf(",\n");
+	}
+}
+
+static void
+cs_destroy(void)
+{
+	void *cookie = NULL;
+	codeset_t *cs;
+	csalias_t *a;
+
+	while ((cs = avl_destroy_nodes(&cs_avl, &cookie)) != NULL) {
+		while ((a = list_remove_head(&cs->cs_aliases)) != NULL) {
+			alias_destroy(a);
+		}
+		free(cs->cs_name);
+		free(cs);
+	}
+	avl_destroy(&cs_avl);
+}
+
+/*
+ * aliases
+ */
+
+static void
+alias_insert(char *codeset, char *alias)
+{
+	codeset_t tcs, *cs;
+	csalias_t *a;
+
+	/*
+	 * Find the codeset.  If non-existent,
+	 * ignore aliases of this codeset.
+	 */
+	(void) memset(&tcs, 0, sizeof (tcs));
+	tcs.cs_name = codeset;
+	cs = avl_find(&cs_avl, &tcs, NULL);
+	if (cs == NULL)
+		return;
+
+	/*
+	 * Add this alias
+	 */
+	a = calloc(1, sizeof (*a));
+	if (a == NULL) {
+		perror("alias_insert:calloc");
+		exit(1);
+	}
+	a->a_name = strdup(alias);
+	if (a->a_name == NULL) {
+		perror("alias_insert:strdup");
+		exit(1);
+	}
+
+	list_insert_tail(&cs->cs_aliases, a);
+}
+
+static void
+alias_destroy(csalias_t *a)
+{
+	free(a->a_name);
+	free(a);
+}
+
+
+static void
+scan_dir(DIR *dh, char sep, char *suffix)
+{
+	char namebuf[MAXNAMELEN];
+	struct dirent *de;
+
+	while ((de = readdir(dh)) != NULL) {
+		char *p2, *p1;
+
+		/*
+		 * We'll modify, so let's copy.  If the dirent name is
+		 * longer than MAXNAMELEN, then it can't possibly be a
+		 * valid pair of codeset names, so just skip it.
+		 */
+		if (strlcpy(namebuf, de->d_name, sizeof (namebuf)) >=
+		    sizeof (namebuf))
+			continue;
+
+		/* Find suffix (.so | .t) */
+		p2 = strrchr(namebuf, *suffix);
+		if (p2 == NULL)
+			continue;
+		if (strcmp(p2, suffix) != 0)
+			continue;
+		*p2 = '\0';
+
+		p1 = strchr(namebuf, sep);
+		if (p1 == NULL)
+			continue;
+		*p1++ = '\0';
+
+		/* More than one sep? */
+		if (strchr(p1, sep) != NULL)
+			continue;
+
+		/* Empty strings? */
+		if (*namebuf == '\0' || *p1 == '\0')
+			continue;
+
+		/* OK, add both to the map. */
+		cs_insert(namebuf);
+		cs_insert(p1);
+	}
+}
+
+static void
+scan_aliases(FILE *fh)
+{
+	char linebuf[256];
+	char *p1, *p2;
+
+	while (fgets(linebuf, sizeof (linebuf), fh) != NULL) {
+		if (linebuf[0] == '#')
+			continue;
+		p1 = strchr(linebuf, ' ');
+		if (p1 == NULL)
+			continue;
+		*p1++ = '\0';
+		p2 = strchr(p1, '\n');
+		if (p2 == NULL)
+			continue;
+		*p2 = '\0';
+		alias_insert(p1, linebuf);
+	}
+}
+
+int
+list_codesets(void)
+{
+	DIR *dh;
+	FILE *fh;
+
+	avl_create(&cs_avl, cs_compare, sizeof (codeset_t),
+	    offsetof(codeset_t, cs_node));
+
+	dh = opendir(PATH_LIBICONV);
+	if (dh == NULL) {
+		perror(PATH_LIBICONV);
+		return (1);
+	}
+	scan_dir(dh, '%', ".so");
+	rewinddir(dh);
+	scan_dir(dh, '.', ".t");
+	(void) closedir(dh);
+
+	dh = opendir(PATH_BTABLES);
+	if (dh == NULL) {
+		perror(PATH_BTABLES);
+		return (1);
+	}
+	scan_dir(dh, '%', ".bt");
+	(void) closedir(dh);
+
+	fh = fopen(PATH_ALIASES, "r");
+	if (fh == NULL) {
+		perror(PATH_ALIASES);
+		/* let's continue */
+	} else {
+		scan_aliases(fh);
+		(void) fclose(fh);
+	}
+
+	cs_dump();
+
+	cs_destroy();
+
+	return (0);
+}
diff --git a/usr/src/cmd/iconv/iconv_main.c b/usr/src/cmd/iconv/iconv_main.c
new file mode 100644
index 0000000000..260d6ba9bc
--- /dev/null
+++ b/usr/src/cmd/iconv/iconv_main.c
@@ -0,0 +1,310 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+/*
+ * iconv(1) command.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <limits.h>
+#include <iconv.h>
+#include <libintl.h>
+#include <langinfo.h>
+#include <locale.h>
+#include "charmap.h"
+
+#include <assert.h>
+
+const char *progname;
+
+char *from_cs;
+char *to_cs;
+int debug;
+int cflag;	/* skip invalid characters */
+int sflag;	/* silent */
+int lflag;	/* list conversions */
+
+void iconv_file(FILE *, const char *);
+extern int list_codesets(void);
+
+iconv_t ich;	/* iconv(3c) lib handle */
+size_t (*pconv)(const char **iptr, size_t *ileft,
+		char **optr, size_t *oleft);
+
+size_t
+lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
+{
+	return (iconv(ich, iptr, ileft, optr, oleft));
+}
+
+void
+usage(void)
+{
+	(void) fprintf(stderr, gettext(
+	    "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
+	    "[file ...]\n"), progname);
+	(void) fprintf(stderr, gettext("\t%s -l\n"), progname);
+	exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+	FILE *fp;
+	char *fslash, *tslash;
+	int c;
+
+	yydebug = 0;
+	progname = getprogname();
+
+	(void) setlocale(LC_ALL, "");
+
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN	"SYS_TEST"
+#endif
+	(void) textdomain(TEXT_DOMAIN);
+
+	while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
+		switch (c) {
+		case 'c':
+			cflag++;
+			break;
+		case 'd':
+			debug++;
+			break;
+		case 'l':
+			lflag++;
+			break;
+		case 's':
+			sflag++;
+			break;
+		case 'f':
+			from_cs = optarg;
+			break;
+		case 't':
+			to_cs = optarg;
+			break;
+		case '?':
+			usage();
+		}
+	}
+
+	if (lflag) {
+		if (from_cs != NULL || to_cs != NULL || optind != argc)
+			usage();
+		exit(list_codesets());
+	}
+
+	if (from_cs == NULL)
+		from_cs = nl_langinfo(CODESET);
+	if (to_cs == NULL)
+		to_cs = nl_langinfo(CODESET);
+
+	/*
+	 * If either "from" or "to" contains a slash,
+	 * then we're using charmaps.
+	 */
+	fslash = strchr(from_cs, '/');
+	tslash = strchr(to_cs, '/');
+	if (fslash != NULL || tslash != NULL) {
+		charmap_init(to_cs, from_cs);
+		pconv = cm_iconv;
+		if (debug)
+			charmap_dump();
+	} else {
+		ich = iconv_open(to_cs, from_cs);
+		if (ich == ((iconv_t)-1)) {
+			switch (errno) {
+			case EINVAL:
+				(void) fprintf(stderr,
+				    _("Not supported %s to %s\n"),
+				    from_cs, to_cs);
+				break;
+			default:
+				(void) fprintf(stderr,
+				    _("iconv_open failed: %s\n"),
+				    strerror(errno));
+				break;
+			}
+			exit(1);
+		}
+		pconv = lib_iconv;
+	}
+
+	if (optind == argc ||
+	    (optind == argc - 1 && 0 == strcmp(argv[optind], "-"))) {
+		iconv_file(stdin, "stdin");
+		exit(warnings ? 1 : 0);
+	}
+
+	for (; optind < argc; optind++) {
+		fp = fopen(argv[optind], "r");
+		if (fp == NULL) {
+			perror(argv[optind]);
+			exit(1);
+		}
+		iconv_file(fp, argv[optind]);
+		(void) fclose(fp);
+	}
+	exit(warnings ? 1 : 0);
+}
+
+/*
+ * Conversion buffer sizes:
+ *
+ * The input buffer has room to prepend one mbs character if needed for
+ * handling a left-over at the end of a previous conversion buffer.
+ *
+ * Conversions may grow or shrink data, so using a larger output buffer
+ * to reduce the likelihood of leftover input buffer data in each pass.
+ */
+#define	IBUFSIZ	(MB_LEN_MAX + BUFSIZ)
+#define	OBUFSIZ	(2 * BUFSIZ)
+
+void
+iconv_file(FILE *fp, const char *fname)
+{
+	static char ibuf[IBUFSIZ];
+	static char obuf[OBUFSIZ];
+	const char *iptr;
+	char *optr;
+	off64_t offset;
+	size_t ileft, oleft, ocnt;
+	int iconv_errno;
+	int nr, nw, rc;
+
+	offset = 0;
+	ileft = 0;
+	iptr = ibuf + MB_LEN_MAX;
+
+	while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {
+
+		assert(iptr <= ibuf+MB_LEN_MAX);
+		assert(ileft <= MB_LEN_MAX);
+		ileft += nr;
+		offset += nr;
+
+		optr = obuf;
+		oleft = OBUFSIZ;
+
+		/*
+		 * Note: the *pconv function is either iconv(3c) or our
+		 * private equivalent when using charmaps. Both update
+		 * ileft, oleft etc. even when conversion stops due to
+		 * an illegal sequence or whatever, so we need to copy
+		 * the partially converted buffer even on error.
+		 */
+	iconv_again:
+		rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
+		iconv_errno = errno;
+
+		ocnt = OBUFSIZ - oleft;
+		if (ocnt > 0) {
+			nw = fwrite(obuf, 1, ocnt, stdout);
+			if (nw != ocnt) {
+				perror("fwrite");
+				exit(1);
+			}
+		}
+		optr = obuf;
+		oleft = OBUFSIZ;
+
+		if (rc == (size_t)-1) {
+			switch (iconv_errno) {
+
+			case E2BIG:	/* no room in output buffer */
+				goto iconv_again;
+
+			case EINVAL:	/* incomplete sequence on input */
+				if (debug) {
+					(void) fprintf(stderr,
+			_("Incomplete sequence in %s at offset %lld\n"),
+					    fname, offset - ileft);
+				}
+				/*
+				 * Copy the remainder to the space reserved
+				 * at the start of the input buffer.
+				 */
+				assert(ileft > 0);
+				if (ileft <= MB_LEN_MAX) {
+					char *p = ibuf+MB_LEN_MAX-ileft;
+					(void) memmove(p, iptr, ileft);
+					iptr = p;
+					continue; /* read again */
+				}
+				/*
+				 * Should not see ileft > MB_LEN_MAX,
+				 * but if we do, handle as EILSEQ.
+				 */
+				/* FALLTHROUGH */
+
+			case EILSEQ:	/* invalid sequence on input */
+				if (!sflag) {
+					(void) fprintf(stderr,
+			_("Illegal sequence in %s at offset %lld\n"),
+					    fname, offset - ileft);
+					(void) fprintf(stderr,
+			_("bad seq: \\x%02x\\x%02x\\x%02x\n"),
+					    iptr[0] & 0xff,
+					    iptr[1] & 0xff,
+					    iptr[2] & 0xff);
+				}
+				assert(ileft > 0);
+				/* skip one */
+				iptr++;
+				ileft--;
+				assert(oleft > 0);
+				if (!cflag) {
+					*optr++ = '?';
+					oleft--;
+				}
+				goto iconv_again;
+
+			default:
+				(void) fprintf(stderr,
+			_("iconv error (%s) in file $s at offset %lld\n"),
+				    strerror(iconv_errno), fname,
+				    offset - ileft);
+				break;
+			}
+		}
+
+		/* normal iconv return */
+		ileft = 0;
+		iptr = ibuf + MB_LEN_MAX;
+	}
+
+	/*
+	 * End of file
+	 * Flush any shift encodings.
+	 */
+	iptr = NULL;
+	ileft = 0;
+	optr = obuf;
+	oleft = OBUFSIZ;
+	(*pconv)(&iptr, &ileft, &optr, &oleft);
+	ocnt = OBUFSIZ - oleft;
+	if (ocnt > 0) {
+		nw = fwrite(obuf, 1, ocnt, stdout);
+		if (nw != ocnt) {
+			perror("fwrite");
+			exit(1);
+		}
+	}
+}
diff --git a/usr/src/cmd/iconv/parser.y b/usr/src/cmd/iconv/parser.y
new file mode 100644
index 0000000000..5abd7e2024
--- /dev/null
+++ b/usr/src/cmd/iconv/parser.y
@@ -0,0 +1,118 @@
+%{
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+/*
+ * POSIX iconv charmap grammar.
+ */
+
+#include <wchar.h>
+#include <stdio.h>
+#include <limits.h>
+#include "charmap.h"
+
+%}
+%union {
+	char		*token;
+	int		num;
+	char		mbs[MB_LEN_MAX + 2]; /* NB: [0] is length! */
+}
+
+%token		T_CODE_SET
+%token		T_MB_CUR_MAX
+%token		T_MB_CUR_MIN
+%token		T_COM_CHAR
+%token		T_ESC_CHAR
+%token		T_LT
+%token		T_GT
+%token		T_NL
+%token		T_SEMI
+%token		T_COMMA
+%token		T_ELLIPSIS
+%token		T_RPAREN
+%token		T_LPAREN
+%token		T_QUOTE
+%token		T_NULL
+%token		T_END
+%token		T_CHARMAP
+%token		T_WIDTH
+%token		T_WIDTH_DEFAULT
+%token	<mbs>		T_CHAR
+%token	<token>		T_NAME
+%token	<num>		T_NUMBER
+%token	<token>		T_SYMBOL
+
+%%
+
+goal		: setting_list charmap
+		| charmap
+		;
+
+string		: T_QUOTE charlist T_QUOTE
+		| T_QUOTE T_QUOTE
+		;
+
+charlist	: charlist T_CHAR
+		| T_CHAR
+		;
+
+setting_list	: setting_list setting
+		| setting
+		;
+
+setting		: T_COM_CHAR T_CHAR T_NL
+		{
+			com_char = $2[1];
+		}
+		| T_ESC_CHAR T_CHAR T_NL
+		{
+			esc_char = $2[1];
+		}
+		| T_MB_CUR_MAX T_NUMBER T_NL
+		{
+			mb_cur_max = $2;
+		}
+		| T_MB_CUR_MIN T_NUMBER T_NL
+		{
+			mb_cur_min = $2;
+		}
+		| T_CODE_SET T_NAME T_NL
+		{
+			/* ignore */
+		}
+		| T_CODE_SET string T_NL
+		{
+			/* ignore */
+		}
+		;
+
+charmap		: T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
+
+charmap_list	: charmap_list charmap_entry
+		| charmap_entry
+		;
+
+charmap_entry	: T_SYMBOL T_CHAR
+		{
+			add_charmap($1, $2);
+			scan_to_eol();
+		}
+		| T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
+		{
+			add_charmap_range($1, $3, $4);
+			scan_to_eol();
+		}
+		| T_NL
+		;
diff --git a/usr/src/cmd/iconv/scanner.c b/usr/src/cmd/iconv/scanner.c
new file mode 100644
index 0000000000..5c53695282
--- /dev/null
+++ b/usr/src/cmd/iconv/scanner.c
@@ -0,0 +1,682 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+/*
+ * This file contains the "scanner", which tokenizes charmap files
+ * for iconv for processing by the higher level grammar processor.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#include <widec.h>
+#include <sys/types.h>
+#include <assert.h>
+#include "charmap.h"
+#include "parser.tab.h"
+
+int			com_char = '#';
+int			esc_char = '\\';
+int			mb_cur_min = 1;
+int			mb_cur_max = MB_LEN_MAX;
+int			lineno = 1;
+int			warnings = 0;
+static int		nextline;
+static FILE		*input = stdin;
+static const char	*filename = "<stdin>";
+static int		instring = 0;
+static int		escaped = 0;
+
+/*
+ * Token space ... grows on demand.
+ */
+static char *token = NULL;
+static int tokidx;
+static int toksz = 0;
+static int hadtok = 0;
+
+/*
+ * The last keyword seen.  This is useful to trigger the special lexer rules
+ * for "copy" and also collating symbols and elements.
+ */
+int	last_kw = 0;
+static int	category = T_END;
+
+static struct token {
+	int id;
+	const char *name;
+} keywords[] = {
+	{ T_COM_CHAR,		"comment_char" },
+	{ T_ESC_CHAR,		"escape_char" },
+	{ T_END,		"END" },
+
+	/*
+	 * These are keywords used in the charmap file.  Note that
+	 * Solaris orginally used angle brackets to wrap some of them,
+	 * but we removed that to simplify our parser.  The first of these
+	 * items are "global items."
+	 */
+	{ T_CHARMAP,		"CHARMAP" },
+	{ T_WIDTH,		"WIDTH" },
+	{ T_WIDTH_DEFAULT,	"WIDTH_DEFAULT" },
+
+	{ -1, NULL },
+};
+
+/*
+ * These special words are only used in a charmap file, enclosed in <>.
+ */
+static struct token symwords[] = {
+	{ T_COM_CHAR,		"comment_char" },
+	{ T_ESC_CHAR,		"escape_char" },
+	{ T_CODE_SET,		"code_set_name" },
+	{ T_MB_CUR_MAX,		"mb_cur_max" },
+	{ T_MB_CUR_MIN,		"mb_cur_min" },
+	{ -1, NULL },
+};
+
+static int categories[] = {
+	T_CHARMAP,
+	0
+};
+
+void
+reset_scanner(const char *fname)
+{
+	if (fname == NULL) {
+		filename = "<stdin>";
+		input = stdin;
+	} else {
+		if (input != stdin)
+			(void) fclose(input);
+		if ((input = fopen(fname, "r")) == NULL) {
+			perror(fname);
+			exit(1);
+		}
+		filename = fname;
+	}
+	com_char = '#';
+	esc_char = '\\';
+	instring = 0;
+	escaped = 0;
+	lineno = 1;
+	nextline = 1;
+	tokidx = 0;
+	last_kw = 0;
+	category = T_END;
+}
+
+#define	hex(x)	\
+	(isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
+#define	isodigit(x)	((x >= '0') && (x <= '7'))
+
+static int
+scanc(void)
+{
+	int	c;
+
+	c = getc(input);
+	lineno = nextline;
+	if (c == '\n') {
+		nextline++;
+	}
+	return (c);
+}
+
+static void
+unscanc(int c)
+{
+	if (c == '\n') {
+		nextline--;
+	}
+	if (ungetc(c, input) < 0) {
+		yyerror(_("ungetc failed"));
+	}
+}
+
+static int
+scan_hex_byte(void)
+{
+	int	c1, c2;
+	int	v;
+
+	c1 = scanc();
+	if (!isxdigit(c1)) {
+		yyerror(_("malformed hex digit"));
+		return (0);
+	}
+	c2 = scanc();
+	if (!isxdigit(c2)) {
+		yyerror(_("malformed hex digit"));
+		return (0);
+	}
+	v = ((hex(c1) << 4) | hex(c2));
+	return (v);
+}
+
+static int
+scan_dec_byte(void)
+{
+	int	c1, c2, c3;
+	int	b;
+
+	c1 = scanc();
+	if (!isdigit(c1)) {
+		yyerror(_("malformed decimal digit"));
+		return (0);
+	}
+	b = c1 - '0';
+	c2 = scanc();
+	if (!isdigit(c2)) {
+		yyerror(_("malformed decimal digit"));
+		return (0);
+	}
+	b *= 10;
+	b += (c2 - '0');
+	c3 = scanc();
+	if (!isdigit(c3)) {
+		unscanc(c3);
+	} else {
+		b *= 10;
+		b += (c3 - '0');
+	}
+	return (b);
+}
+
+static int
+scan_oct_byte(void)
+{
+	int c1, c2, c3;
+	int	b;
+
+	b = 0;
+
+	c1 = scanc();
+	if (!isodigit(c1)) {
+		yyerror(_("malformed octal digit"));
+		return (0);
+	}
+	b = c1 - '0';
+	c2 = scanc();
+	if (!isodigit(c2)) {
+		yyerror(_("malformed octal digit"));
+		return (0);
+	}
+	b *= 8;
+	b += (c2 - '0');
+	c3 = scanc();
+	if (!isodigit(c3)) {
+		unscanc(c3);
+	} else {
+		b *= 8;
+		b += (c3 - '0');
+	}
+	return (b);
+}
+
+void
+add_tok(int c)
+{
+	if ((tokidx + 1) >= toksz) {
+		toksz += 64;
+		if ((token = realloc(token, toksz)) == NULL) {
+			yyerror(_("out of memory"));
+			tokidx = 0;
+			toksz = 0;
+			return;
+		}
+	}
+
+	token[tokidx++] = (char)c;
+	token[tokidx] = 0;
+}
+
+static int
+get_byte(void)
+{
+	int	c;
+
+	if ((c = scanc()) != esc_char) {
+		unscanc(c);
+		return (EOF);
+	}
+	c = scanc();
+
+	switch (c) {
+	case 'd':
+	case 'D':
+		return (scan_dec_byte());
+	case 'x':
+	case 'X':
+		return (scan_hex_byte());
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		/* put the character back so we can get it */
+		unscanc(c);
+		return (scan_oct_byte());
+	default:
+		unscanc(c);
+		unscanc(esc_char);
+		return (EOF);
+	}
+}
+
+int
+get_escaped(int c)
+{
+	switch (c) {
+	case 'n':
+		return ('\n');
+	case 'r':
+		return ('\r');
+	case 't':
+		return ('\t');
+	case 'f':
+		return ('\f');
+	case 'v':
+		return ('\v');
+	case 'b':
+		return ('\b');
+	case 'a':
+		return ('\a');
+	default:
+		return (c);
+	}
+}
+
+int
+get_wide(void)
+{
+	/* NB: yylval.mbs[0] is the length */
+	char *mbs = &yylval.mbs[1];
+	int mbi = 0;
+	int c;
+
+	mbs[mbi] = 0;
+	if (mb_cur_max > MB_LEN_MAX) {
+		yyerror(_("max multibyte character size too big"));
+		return (T_NULL);
+	}
+	for (;;) {
+		if ((c = get_byte()) == EOF)
+			break;
+		if (mbi == mb_cur_max) {
+			unscanc(c);
+			yyerror(_("length > mb_cur_max"));
+			return (T_NULL);
+		}
+		mbs[mbi++] = c;
+		mbs[mbi] = 0;
+	}
+
+	/* result in yylval.mbs */
+	mbs[-1] = mbi;
+	return (T_CHAR);
+}
+
+int
+get_symbol(void)
+{
+	int	c;
+
+	while ((c = scanc()) != EOF) {
+		if (escaped) {
+			escaped = 0;
+			if (c == '\n')
+				continue;
+			add_tok(get_escaped(c));
+			continue;
+		}
+		if (c == esc_char) {
+			escaped = 1;
+			continue;
+		}
+		if (c == '\n') {	/* well that's strange! */
+			yyerror(_("unterminated symbolic name"));
+			continue;
+		}
+		if (c == '>') {		/* end of symbol */
+
+			/*
+			 * This restarts the token from the beginning
+			 * the next time we scan a character.  (This
+			 * token is complete.)
+			 */
+
+			if (token == NULL) {
+				yyerror(_("missing symbolic name"));
+				return (T_NULL);
+			}
+			tokidx = 0;
+
+			/*
+			 * A few symbols are handled as keywords outside
+			 * of the normal categories.
+			 */
+			if (category == T_END) {
+				int i;
+				for (i = 0; symwords[i].name != 0; i++) {
+					if (strcmp(token, symwords[i].name) ==
+					    0) {
+						last_kw = symwords[i].id;
+						return (last_kw);
+					}
+				}
+			}
+			/* its an undefined symbol */
+			yylval.token = strdup(token);
+			if (yylval.token == NULL) {
+				perror("malloc");
+				exit(1);
+			}
+			token = NULL;
+			toksz = 0;
+			tokidx = 0;
+			return (T_SYMBOL);
+		}
+		add_tok(c);
+	}
+
+	yyerror(_("unterminated symbolic name"));
+	return (EOF);
+}
+
+
+static int
+consume_token(void)
+{
+	int	len = tokidx;
+	int	i;
+
+	tokidx = 0;
+	if (token == NULL)
+		return (T_NULL);
+
+	/*
+	 * this one is special, because we don't want it to alter the
+	 * last_kw field.
+	 */
+	if (strcmp(token, "...") == 0) {
+		return (T_ELLIPSIS);
+	}
+
+	/* search for reserved words first */
+	for (i = 0; keywords[i].name; i++) {
+		int j;
+		if (strcmp(keywords[i].name, token) != 0) {
+			continue;
+		}
+
+		last_kw = keywords[i].id;
+
+		/* clear the top level category if we're done with it */
+		if (last_kw == T_END) {
+			category = T_END;
+		}
+
+		/* set the top level category if we're changing */
+		for (j = 0; categories[j]; j++) {
+			if (categories[j] != last_kw)
+				continue;
+			category = last_kw;
+		}
+
+		return (keywords[i].id);
+	}
+
+	/* maybe its a numeric constant? */
+	if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
+		char *eptr;
+		yylval.num = strtol(token, &eptr, 10);
+		if (*eptr != 0)
+			yyerror(_("malformed number"));
+		return (T_NUMBER);
+	}
+
+	/*
+	 * A single lone character is treated as a character literal.
+	 * To avoid duplication of effort, we stick in the charmap.
+	 */
+	if (len == 1) {
+		yylval.mbs[0] = 1; /* length */
+		yylval.mbs[1] = token[0];
+		yylval.mbs[2] = '\0';
+		return (T_CHAR);
+	}
+
+	/* anything else is treated as a symbolic name */
+	yylval.token = strdup(token);
+	token = NULL;
+	toksz = 0;
+	tokidx = 0;
+	return (T_NAME);
+}
+
+void
+scan_to_eol(void)
+{
+	int	c;
+	while ((c = scanc()) != '\n') {
+		if (c == EOF) {
+			/* end of file without newline! */
+			errf(_("missing newline"));
+			return;
+		}
+	}
+	assert(c == '\n');
+}
+
+int
+yylex(void)
+{
+	int		c;
+
+	while ((c = scanc()) != EOF) {
+
+		/* special handling for quoted string */
+		if (instring) {
+			if (escaped) {
+				escaped = 0;
+
+				/* if newline, just eat and forget it */
+				if (c == '\n')
+					continue;
+
+				if (strchr("xXd01234567", c)) {
+					unscanc(c);
+					unscanc(esc_char);
+					return (get_wide());
+				}
+				yylval.mbs[0] = 1; /* length */
+				yylval.mbs[1] = get_escaped(c);
+				yylval.mbs[2] = '\0';
+				return (T_CHAR);
+			}
+			if (c == esc_char) {
+				escaped = 1;
+				continue;
+			}
+			switch (c) {
+			case '<':
+				return (get_symbol());
+			case '>':
+				/* oops! should generate syntax error  */
+				return (T_GT);
+			case '"':
+				instring = 0;
+				return (T_QUOTE);
+			default:
+				yylval.mbs[0] = 1; /* length */
+				yylval.mbs[1] = c;
+				yylval.mbs[2] = '\0';
+				return (T_CHAR);
+			}
+		}
+
+		/* escaped characters first */
+		if (escaped) {
+			escaped = 0;
+			if (c == '\n') {
+				/* eat the newline */
+				continue;
+			}
+			hadtok = 1;
+			if (tokidx) {
+				/* an escape mid-token is nonsense */
+				return (T_NULL);
+			}
+
+			/* numeric escapes are treated as wide characters */
+			if (strchr("xXd01234567", c)) {
+				unscanc(c);
+				unscanc(esc_char);
+				return (get_wide());
+			}
+
+			add_tok(get_escaped(c));
+			continue;
+		}
+
+		/* if it is the escape charter itself note it */
+		if (c == esc_char) {
+			escaped = 1;
+			continue;
+		}
+
+		/* remove from the comment char to end of line */
+		if (c == com_char) {
+			while (c != '\n') {
+				if ((c = scanc()) == EOF) {
+					/* end of file without newline! */
+					return (EOF);
+				}
+			}
+			assert(c == '\n');
+			if (!hadtok) {
+				/*
+				 * If there were no tokens on this line,
+				 * then just pretend it didn't exist at all.
+				 */
+				continue;
+			}
+			hadtok = 0;
+			return (T_NL);
+		}
+
+		if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
+			/*
+			 * These are all token delimiters.  If there
+			 * is a token already in progress, we need to
+			 * process it.
+			 */
+			unscanc(c);
+			return (consume_token());
+		}
+
+		switch (c) {
+		case '\n':
+			if (!hadtok) {
+				/*
+				 * If the line was completely devoid of tokens,
+				 * then just ignore it.
+				 */
+				continue;
+			}
+			/* we're starting a new line, reset the token state */
+			hadtok = 0;
+			return (T_NL);
+		case ',':
+			hadtok = 1;
+			return (T_COMMA);
+		case ';':
+			hadtok = 1;
+			return (T_SEMI);
+		case '(':
+			hadtok = 1;
+			return (T_LPAREN);
+		case ')':
+			hadtok = 1;
+			return (T_RPAREN);
+		case '>':
+			hadtok = 1;
+			return (T_GT);
+		case '<':
+			/* symbol start! */
+			hadtok = 1;
+			return (get_symbol());
+		case ' ':
+		case '\t':
+			/* whitespace, just ignore it */
+			continue;
+		case '"':
+			hadtok = 1;
+			instring = 1;
+			return (T_QUOTE);
+		default:
+			hadtok = 1;
+			add_tok(c);
+			continue;
+		}
+	}
+	return (EOF);
+}
+
+void
+yyerror(const char *msg)
+{
+	(void) fprintf(stderr, _("%s: %d: error: %s\n"),
+	    filename, lineno, msg);
+	exit(1);
+}
+
+void
+errf(const char *fmt, ...)
+{
+	char	*msg;
+
+	va_list	va;
+	va_start(va, fmt);
+	(void) vasprintf(&msg, fmt, va);
+	va_end(va);
+
+	(void) fprintf(stderr, _("%s: %d: error: %s\n"),
+	    filename, lineno, msg);
+	free(msg);
+	exit(1);
+}
+
+void
+warn(const char *fmt, ...)
+{
+	char	*msg;
+
+	va_list	va;
+	va_start(va, fmt);
+	(void) vasprintf(&msg, fmt, va);
+	va_end(va);
+
+	(void) fprintf(stderr, _("%s: %d: warning: %s\n"),
+	    filename, lineno, msg);
+	free(msg);
+	warnings++;
+}
diff --git a/usr/src/pkg/manifests/system-test-utiltest.mf b/usr/src/pkg/manifests/system-test-utiltest.mf
index 6d0ec62ca3..9a4ddd4efa 100644
--- a/usr/src/pkg/manifests/system-test-utiltest.mf
+++ b/usr/src/pkg/manifests/system-test-utiltest.mf
@@ -30,6 +30,7 @@ file path=opt/util-tests/bin/print_json mode=0555
 file path=opt/util-tests/bin/utiltest mode=0555
 file path=opt/util-tests/runfiles/default.run mode=0444
 file path=opt/util-tests/tests/allowed-ips mode=0555
+file path=opt/util-tests/tests/iconv_test mode=0555
 file path=opt/util-tests/tests/libnvpair_json/json_00_blank mode=0555
 file path=opt/util-tests/tests/libnvpair_json/json_01_boolean mode=0555
 file path=opt/util-tests/tests/libnvpair_json/json_02_numbers mode=0555
@@ -42,4 +43,5 @@ file path=opt/util-tests/tests/libnvpair_json/json_common mode=0555
 file path=opt/util-tests/tests/printf_test mode=0555
 file path=opt/util-tests/tests/xargs_test mode=0555
 license lic_CDDL license=lic_CDDL
+depend fmri=system/library/iconv/utf-8 type=require
 depend fmri=system/test/testrunner type=require
diff --git a/usr/src/test/util-tests/tests/Makefile b/usr/src/test/util-tests/tests/Makefile
index 4709c7adcd..e12ab73c58 100644
--- a/usr/src/test/util-tests/tests/Makefile
+++ b/usr/src/test/util-tests/tests/Makefile
@@ -14,7 +14,6 @@
 # Copyright 2014 Garrett D'Amore <garrett@damore.org>
 #
 
-SUBDIRS = dladm printf xargs
-SUBDIRS = dladm libnvpair_json printf xargs
+SUBDIRS = dladm iconv libnvpair_json printf xargs
 
 include $(SRC)/test/Makefile.com
diff --git a/usr/src/test/util-tests/tests/iconv/Makefile b/usr/src/test/util-tests/tests/iconv/Makefile
new file mode 100644
index 0000000000..c0fb8a9940
--- /dev/null
+++ b/usr/src/test/util-tests/tests/iconv/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2012 by Delphix. All rights reserved.
+# Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
+#
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/test/Makefile.com
+
+SHFILES = iconv_test
+
+ROOTBIN = $(ROOT)/opt/util-tests
+TESTDIR = $(ROOTBIN)/tests
+
+CMDS = $(SHFILES:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+all: $(PROG)
+
+$(SHFILES): $(SHFILES).sh
+	-$(RM) $(SHFILES)
+	$(CP) $(SHFILES).sh $(SHFILES)
+
+install: all $(CMDS)
+
+lint:
+
+clobber: clean
+	-$(RM) $(SHFILES)
+
+clean:
+
+$(CMDS): $(TESTDIR)
+
+$(TESTDIR):
+	$(INS.dir)
+
+$(TESTDIR)/%: %
+	$(INS.file)
diff --git a/usr/src/test/util-tests/tests/iconv/iconv_test.sh b/usr/src/test/util-tests/tests/iconv/iconv_test.sh
new file mode 100644
index 0000000000..e22bce7099
--- /dev/null
+++ b/usr/src/test/util-tests/tests/iconv/iconv_test.sh
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
+#
+
+ICONV=${ICONV:-/usr/bin/iconv}
+#ICONV=${ROOT}/usr/bin/iconv
+
+# test name, file a, file b
+check() {
+  if ! cmp -s "$2" "$3" ; then
+    echo "TEST FAIL: $1"
+    exit 1
+  fi
+  echo "TEST PASS: $1"
+}
+
+
+# fromcs, tocs, in, out
+test_conv() {
+  echo "$3" > in
+  echo "$4" > o1
+  $ICONV -f "$1" -t "$2" < in > o2
+  check "${1}:${2}" o1 o2
+  rm in o1 o2
+}
+
+mkmap_one() {
+  echo '<code_set_name> one'
+  echo 'CHARMAP'
+  echo '<NULL>\t\x00'
+  for i in 8 9 a b c d e f
+  do
+    for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f
+    do
+      echo "<c1-$i$j>\t\x$i$j"
+    done
+  done
+  echo 'END CHARMAP'
+}
+
+mkmap_two() {
+  echo '<code_set_name> two'
+  echo 'CHARMAP'
+  echo '<NULL>\t\x00'
+  for i in 8 9 a b c d e f
+  do
+    for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f
+    do
+      echo "<c1-$i$j>\t\x20\x$i$j"
+    done
+  done
+  echo 'END CHARMAP'
+}
+
+# write 1023 bytes of space
+wr1023() {
+  n=1023
+  while [[ $n -gt 0 ]]; do
+    echo ' \c'
+   ((n-=1))
+  done
+}
+
+# two-byte utf-8 crossing 1024 byte boundary
+mkbuf_utf8() {
+  wr1023
+  echo '\0303\0240'
+}
+
+# one-byte 8859-1 at 1024 byte boundary
+mkbuf_8859() {
+  wr1023
+  echo '\0340'
+}
+
+# Test some simple, built-in conversions
+
+test_conv ascii utf-8 abcdef abcdef
+test_conv utf-8 ascii abcdef abcdef
+test_conv ascii ucs-2le abc 'a\0b\0c\0\n\0\c'
+test_conv ucs-2le ascii 'a\0b\0c\0\n\0\c' abc
+
+# Test user-provided charmap
+
+mkmap_one > one.cm
+mkmap_two > two.cm
+test_conv ./one.cm ./two.cm '\0200\0201\0202\c' ' \0200 \0201 \0202\c'
+rm one.cm two.cm
+
+# test crossing 1024 byte buffer boundary
+
+mkbuf_utf8 > in
+mkbuf_8859 > o1
+$ICONV -f UTF-8 -t 8859-1 < in > o2
+check "boundary" o1 o2
+rm in o1 o2
+
+exit 0
author	Gordon Ross <gwr@nexenta.com>	2011-05-28 21:30:16 -0400
committer	Dan McDonald <danmcd@omniti.com>	2016-10-24 14:17:22 -0400
commit	48edc7cf07b5dccc3ad84bf2dafe4150bd666d60 (patch)
tree	164586150388a2a388179accfea08d51e34ab040
parent	3db3a6b813432461e8e60af00e9ad6f0bf0d5eaf (diff)
download	illumos-joyent-48edc7cf07b5dccc3ad84bf2dafe4150bd666d60.tar.gz