summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--exception_lists/closed-bins1
-rw-r--r--usr/src/cmd/Makefile2
-rw-r--r--usr/src/cmd/iconv/Makefile66
-rw-r--r--usr/src/cmd/iconv/charmap.c559
-rw-r--r--usr/src/cmd/iconv/charmap.h68
-rw-r--r--usr/src/cmd/iconv/iconv_list.c298
-rw-r--r--usr/src/cmd/iconv/iconv_main.c310
-rw-r--r--usr/src/cmd/iconv/parser.y118
-rw-r--r--usr/src/cmd/iconv/scanner.c682
-rw-r--r--usr/src/pkg/manifests/system-test-utiltest.mf2
-rw-r--r--usr/src/test/util-tests/tests/Makefile3
-rw-r--r--usr/src/test/util-tests/tests/iconv/Makefile49
-rw-r--r--usr/src/test/util-tests/tests/iconv/iconv_test.sh111
13 files changed, 2267 insertions, 2 deletions
diff --git a/exception_lists/closed-bins b/exception_lists/closed-bins
index 3b54696fa2..8a7ecbc066 100644
--- a/exception_lists/closed-bins
+++ b/exception_lists/closed-bins
@@ -81,6 +81,7 @@
./usr/lib/nfs/lockd
./usr/lib/snmp
./usr/lib/snmp/mibiisa
+./usr/bin/iconv
./usr/bin/kbdcomp
./usr/bin/localedef
./usr/bin/od
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 0c8f8fcdac..779f8de7fc 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -197,6 +197,7 @@ COMMON_SUBDIRS= \
hotplugd \
hwdata \
ibd_upgrade \
+ iconv \
id \
idmap \
infocmp \
@@ -595,6 +596,7 @@ MSGSUBDIRS= \
head \
hostname \
hotplug \
+ iconv \
id \
idmap \
isaexec \
diff --git a/usr/src/cmd/iconv/Makefile b/usr/src/cmd/iconv/Makefile
new file mode 100644
index 0000000000..9e4a83cc18
--- /dev/null
+++ b/usr/src/cmd/iconv/Makefile
@@ -0,0 +1,66 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+#
+
+PROG=iconv
+
+include ../Makefile.cmd
+include ../Makefile.ctf
+
+OBJS = iconv_main.o iconv_list.o charmap.o parser.tab.o scanner.o
+
+SRCS = $(OBJS:%.o=%.c)
+
+C99MODE= $(C99_ENABLE)
+LDLIBS += -lcmdutils -lavl
+YFLAGS = -d -b parser
+CPPFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+$(RELEASE_BUILD) CPPFLAGS += -DNDEBUG
+
+CERRWARN += -_gcc=-Wno-unused-label
+
+CLEANFILES = $(OBJS) parser.tab.c parser.tab.h
+CLOBBERFILES = $(PROG) $(POFILE)
+PIFILES = $(OBJS:%.o=%.i)
+POFILE = iconv_cmd.po
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+$(OBJS): parser.tab.h
+
+parser.tab.c parser.tab.h: parser.y
+ $(YACC) $(YFLAGS) parser.y
+
+lint: $(SRCS)
+ $(LINT.c) $(CPPFLAGS) $(SRCS)
+
+clean:
+ $(RM) $(CLEANFILES)
+
+$(POFILE): $(PIFILES)
+ $(RM) $@
+ $(RM) messages.po
+ $(XGETTEXT) -s $(PIFILES)
+ $(SED) -e '/domain/d' messages.po > $@
+ $(RM) $(PIFILES) messages.po
+
+.KEEP_STATE:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/iconv/charmap.c b/usr/src/cmd/iconv/charmap.c
new file mode 100644
index 0000000000..5d510326c6
--- /dev/null
+++ b/usr/src/cmd/iconv/charmap.c
@@ -0,0 +1,559 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * CHARMAP file handling for iconv.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <alloca.h>
+#include <sys/avl.h>
+#include <stddef.h>
+#include <unistd.h>
+#include "charmap.h"
+#include "parser.tab.h"
+#include <assert.h>
+
+enum cmap_pass cmap_pass;
+static avl_tree_t cmap_sym;
+static avl_tree_t cmap_mbs;
+
+typedef struct charmap {
+ const char *cm_name;
+ struct charmap *cm_alias_of;
+ avl_node_t cm_avl_sym;
+ avl_node_t cm_avl_mbs;
+ int cm_warned;
+ int cm_frmbs_len;
+ int cm_tombs_len;
+ char cm_frmbs[MB_LEN_MAX + 1]; /* input */
+ char cm_tombs[MB_LEN_MAX + 1]; /* output */
+} charmap_t;
+
+static void add_charmap_impl_fr(char *sym, char *mbs, int mbs_len, int nodups);
+static void add_charmap_impl_to(char *sym, char *mbs, int mbs_len, int nodups);
+
+/*
+ * Array of POSIX specific portable characters.
+ */
+static const struct {
+ char *name;
+ int ch;
+} portable_chars[] = {
+ { "NUL", '\0' },
+ { "alert", '\a' },
+ { "backspace", '\b' },
+ { "tab", '\t' },
+ { "carriage-return", '\r' },
+ { "newline", '\n' },
+ { "vertical-tab", '\v' },
+ { "form-feed", '\f' },
+ { "space", ' ' },
+ { "exclamation-mark", '!' },
+ { "quotation-mark", '"' },
+ { "number-sign", '#' },
+ { "dollar-sign", '$' },
+ { "percent-sign", '%' },
+ { "ampersand", '&' },
+ { "apostrophe", '\'' },
+ { "left-parenthesis", '(' },
+ { "right-parenthesis", '(' },
+ { "asterisk", '*' },
+ { "plus-sign", '+' },
+ { "comma", ','},
+ { "hyphen-minus", '-' },
+ { "hyphen", '-' },
+ { "full-stop", '.' },
+ { "period", '.' },
+ { "slash", '/' },
+ { "solidus", '/' },
+ { "zero", '0' },
+ { "one", '1' },
+ { "two", '2' },
+ { "three", '3' },
+ { "four", '4' },
+ { "five", '5' },
+ { "six", '6' },
+ { "seven", '7' },
+ { "eight", '8' },
+ { "nine", '9' },
+ { "colon", ':' },
+ { "semicolon", ';' },
+ { "less-than-sign", '<' },
+ { "equals-sign", '=' },
+ { "greater-than-sign", '>' },
+ { "question-mark", '?' },
+ { "commercial-at", '@' },
+ { "left-square-bracket", '[' },
+ { "backslash", '\\' },
+ { "reverse-solidus", '\\' },
+ { "right-square-bracket", ']' },
+ { "circumflex", '^' },
+ { "circumflex-accent", '^' },
+ { "low-line", '_' },
+ { "underscore", '_' },
+ { "grave-accent", '`' },
+ { "left-brace", '{' },
+ { "left-curly-bracket", '{' },
+ { "vertical-line", '|' },
+ { "right-brace", '}' },
+ { "right-curly-bracket", '}' },
+ { "tilde", '~' },
+ { "A", 'A' },
+ { "B", 'B' },
+ { "C", 'C' },
+ { "D", 'D' },
+ { "E", 'E' },
+ { "F", 'F' },
+ { "G", 'G' },
+ { "H", 'H' },
+ { "I", 'I' },
+ { "J", 'J' },
+ { "K", 'K' },
+ { "L", 'L' },
+ { "M", 'M' },
+ { "N", 'N' },
+ { "O", 'O' },
+ { "P", 'P' },
+ { "Q", 'Q' },
+ { "R", 'R' },
+ { "S", 'S' },
+ { "T", 'T' },
+ { "U", 'U' },
+ { "V", 'V' },
+ { "W", 'W' },
+ { "X", 'X' },
+ { "Y", 'Y' },
+ { "Z", 'Z' },
+ { "a", 'a' },
+ { "b", 'b' },
+ { "c", 'c' },
+ { "d", 'd' },
+ { "e", 'e' },
+ { "f", 'f' },
+ { "g", 'g' },
+ { "h", 'h' },
+ { "i", 'i' },
+ { "j", 'j' },
+ { "k", 'k' },
+ { "l", 'l' },
+ { "m", 'm' },
+ { "n", 'n' },
+ { "o", 'o' },
+ { "p", 'p' },
+ { "q", 'q' },
+ { "r", 'r' },
+ { "s", 's' },
+ { "t", 't' },
+ { "u", 'u' },
+ { "v", 'v' },
+ { "w", 'w' },
+ { "x", 'x' },
+ { "y", 'y' },
+ { "z", 'z' },
+ { NULL, 0 }
+};
+
+static int
+cmap_compare_sym(const void *n1, const void *n2)
+{
+ const charmap_t *c1 = n1;
+ const charmap_t *c2 = n2;
+ int rv;
+
+ rv = strcmp(c1->cm_name, c2->cm_name);
+ return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+/*
+ * In order for partial match searches to work,
+ * we need these sorted by mbs contents.
+ */
+static int
+cmap_compare_mbs(const void *n1, const void *n2)
+{
+ const charmap_t *c1 = n1;
+ const charmap_t *c2 = n2;
+ int len, rv;
+
+ len = c1->cm_frmbs_len;
+ if (len < c2->cm_frmbs_len)
+ len = c2->cm_frmbs_len;
+ rv = memcmp(c1->cm_frmbs, c2->cm_frmbs, len);
+ if (rv < 0)
+ return (-1);
+ if (rv > 0)
+ return (1);
+ /* they match through length */
+ if (c1->cm_frmbs_len < c2->cm_frmbs_len)
+ return (-1);
+ if (c2->cm_frmbs_len < c1->cm_frmbs_len)
+ return (1);
+ return (0);
+}
+
+void
+charmap_init(char *to_map, char *from_map)
+{
+ avl_create(&cmap_sym, cmap_compare_sym, sizeof (charmap_t),
+ offsetof(charmap_t, cm_avl_sym));
+
+ avl_create(&cmap_mbs, cmap_compare_mbs, sizeof (charmap_t),
+ offsetof(charmap_t, cm_avl_mbs));
+
+ cmap_pass = CMAP_PASS_FROM;
+ reset_scanner(from_map);
+ (void) yyparse();
+ add_charmap_posix();
+
+ cmap_pass = CMAP_PASS_TO;
+ reset_scanner(to_map);
+ (void) yyparse();
+}
+
+void
+charmap_dump()
+{
+ charmap_t *cm;
+ int i;
+
+ cm = avl_first(&cmap_mbs);
+ while (cm != NULL) {
+ (void) printf("name=\"%s\"\n", cm->cm_name);
+
+ (void) printf("\timbs=\"");
+ for (i = 0; i < cm->cm_frmbs_len; i++)
+ (void) printf("\\x%02x", cm->cm_frmbs[i] & 0xFF);
+ (void) printf("\"\n");
+
+ (void) printf("\tombs=\"");
+ for (i = 0; i < cm->cm_tombs_len; i++)
+ (void) printf("\\x%02x", cm->cm_tombs[i] & 0xFF);
+ (void) printf("\"\n");
+
+ cm = AVL_NEXT(&cmap_mbs, cm);
+ }
+}
+
+/*
+ * We parse two charmap files: First the "from" map, where we build
+ * cmap_mbs and cmap_sym which we'll later use to translate the input
+ * stream (mbs encodings) to symbols. Second, we parse the "to" map,
+ * where we fill in the tombs members of entries in cmap_sym, (which
+ * must alread exist) used later to write the output encoding.
+ */
+static void
+add_charmap_impl(char *sym, char *mbs, int mbs_len, int nodups)
+{
+
+ /*
+ * While parsing both the "from" and "to" cmaps,
+ * require both the symbol and encoding.
+ */
+ if (sym == NULL || mbs == NULL) {
+ errf(_("invalid charmap entry"));
+ return;
+ }
+
+ switch (cmap_pass) {
+ case CMAP_PASS_FROM:
+ add_charmap_impl_fr(sym, mbs, mbs_len, nodups);
+ break;
+ case CMAP_PASS_TO:
+ add_charmap_impl_to(sym, mbs, mbs_len, nodups);
+ break;
+ default:
+ abort();
+ break;
+ }
+}
+
+static void
+add_charmap_impl_fr(char *sym, char *mbs, int mbs_len, int nodups)
+{
+ charmap_t *m, *n, *s;
+ avl_index_t where_sym, where_mbs;
+
+ if ((n = calloc(1, sizeof (*n))) == NULL) {
+ errf(_("out of memory"));
+ return;
+ }
+ n->cm_name = sym;
+
+ assert(0 < mbs_len && mbs_len <= MB_LEN_MAX);
+ (void) memcpy(n->cm_frmbs, mbs, mbs_len);
+ n->cm_frmbs_len = mbs_len;
+
+ m = avl_find(&cmap_mbs, n, &where_mbs);
+ s = avl_find(&cmap_sym, n, &where_sym);
+
+ /*
+ * If we found the symbol, this is a dup.
+ */
+ if (s != NULL) {
+ if (nodups) {
+ warn(_("%s: duplicate character symbol"), sym);
+ }
+ free(n);
+ return;
+ }
+
+ /*
+ * If we found the mbs, the new one is an alias,
+ * which we'll add _only_ to the symbol AVL.
+ */
+ if (m != NULL) {
+ /* The new one is an alias of the original. */
+ n->cm_alias_of = m;
+ avl_insert(&cmap_sym, n, where_sym);
+ return;
+ }
+
+ avl_insert(&cmap_sym, n, where_sym);
+ avl_insert(&cmap_mbs, n, where_mbs);
+}
+
+static void
+add_charmap_impl_to(char *sym, char *mbs, int mbs_len, int nodups)
+{
+ charmap_t srch = {0};
+ charmap_t *m;
+
+ assert(0 < mbs_len && mbs_len <= MB_LEN_MAX);
+
+ srch.cm_name = sym;
+
+ m = avl_find(&cmap_sym, &srch, NULL);
+ if (m == NULL) {
+ if (sflag == 0)
+ warn(_("%s: symbol not found"), sym);
+ return;
+ }
+ if (m->cm_alias_of != NULL) {
+ m = m->cm_alias_of;
+
+ /* don't warn for dups with aliases */
+ if (m->cm_tombs_len != 0)
+ return;
+ }
+
+ if (m->cm_tombs_len != 0) {
+ if (nodups) {
+ warn(_("%s: duplicate encoding for"), sym);
+ }
+ return;
+ }
+
+ (void) memcpy(m->cm_tombs, mbs, mbs_len);
+ m->cm_tombs_len = mbs_len;
+}
+
+void
+add_charmap(char *sym, char *mbs)
+{
+ /* mbs[0] is the length */
+ int mbs_len = *mbs++;
+ assert(0 < mbs_len && mbs_len <= MB_LEN_MAX);
+ add_charmap_impl(sym, mbs, mbs_len, 1);
+}
+
+
+/*
+ * This is called by the parser with start/end symbol strings (ssym, esym),
+ * which are allocated in the scanner (T_SYMBOL) and free'd here.
+ */
+void
+add_charmap_range(char *ssym, char *esym, char *mbs)
+{
+ int ls, le;
+ int si;
+ int sn, en;
+ int i;
+ int mbs_len;
+ char tmbs[MB_LEN_MAX+1];
+ char *mb_last;
+
+ static const char *digits = "0123456789";
+
+ /* mbs[0] is the length */
+ mbs_len = *mbs++;
+ assert(0 < mbs_len && mbs_len <= MB_LEN_MAX);
+ (void) memcpy(tmbs, mbs, mbs_len);
+ mb_last = tmbs + mbs_len - 1;
+
+ ls = strlen(ssym);
+ le = strlen(esym);
+
+ if (((si = strcspn(ssym, digits)) == 0) || (si == ls) ||
+ (strncmp(ssym, esym, si) != 0) ||
+ (strspn(ssym + si, digits) != (ls - si)) ||
+ (strspn(esym + si, digits) != (le - si)) ||
+ ((sn = atoi(ssym + si)) > ((en = atoi(esym + si))))) {
+ errf(_("malformed charmap range"));
+ return;
+ }
+
+ ssym[si] = 0;
+ for (i = sn; i <= en; i++) {
+ char *nn;
+ (void) asprintf(&nn, "%s%0*u", ssym, ls - si, i);
+ if (nn == NULL) {
+ errf(_("out of memory"));
+ return;
+ }
+
+ add_charmap_impl(nn, tmbs, mbs_len, 1);
+ (*mb_last)++;
+ }
+ free(ssym);
+ free(esym);
+}
+
+void
+add_charmap_char(char *name, int c)
+{
+ char mbs[MB_LEN_MAX+1];
+
+ mbs[0] = c;
+ mbs[1] = '\0';
+ add_charmap_impl(name, mbs, 1, 0);
+}
+
+/*
+ * POSIX insists that certain entries be present, even when not in the
+ * orginal charmap file.
+ */
+void
+add_charmap_posix(void)
+{
+ int i;
+
+ for (i = 0; portable_chars[i].name; i++) {
+ add_charmap_char(portable_chars[i].name, portable_chars[i].ch);
+ }
+}
+
+/*
+ * This is called with a buffer of (typically) MB_LEN_MAX bytes,
+ * which is potentially a multi-byte symbol, but often contains
+ * extra bytes. Find and return the longest match in the charmap.
+ */
+static charmap_t *
+find_mbs(const char *mbs, int len)
+{
+ charmap_t srch = {0};
+ charmap_t *cm = NULL;
+
+ while (len > 0) {
+ (void) memcpy(srch.cm_frmbs, mbs, len);
+ srch.cm_frmbs_len = len;
+ cm = avl_find(&cmap_mbs, &srch, NULL);
+ if (cm != NULL)
+ break;
+ len--;
+ }
+
+ return (cm);
+}
+
+/*
+ * Return true if this sequence matches the initial part
+ * of any sequence known in this charmap.
+ */
+static boolean_t
+find_mbs_partial(const char *mbs, int len)
+{
+ charmap_t srch = {0};
+ charmap_t *cm;
+ avl_index_t where;
+
+ (void) memcpy(srch.cm_frmbs, mbs, len);
+ srch.cm_frmbs_len = len;
+ cm = avl_find(&cmap_mbs, &srch, &where);
+ if (cm != NULL) {
+ /* full match - not expected, but OK */
+ return (B_TRUE);
+ }
+ cm = avl_nearest(&cmap_mbs, where, AVL_AFTER);
+ if (cm != NULL && 0 == memcmp(cm->cm_frmbs, mbs, len))
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+/*
+ * Do like iconv(3), but with charmaps.
+ */
+size_t
+cm_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
+{
+ charmap_t *cm;
+ int mbs_len;
+
+ /* Ignore state reset requests. */
+ if (iptr == NULL || *iptr == NULL)
+ return (0);
+
+ if (*oleft < MB_LEN_MAX) {
+ errno = E2BIG;
+ return ((size_t)-1);
+ }
+
+ while (*ileft > 0 && *oleft >= MB_LEN_MAX) {
+ mbs_len = MB_LEN_MAX;
+ if (mbs_len > *ileft)
+ mbs_len = *ileft;
+ cm = find_mbs(*iptr, mbs_len);
+ if (cm == NULL) {
+ if (mbs_len < MB_LEN_MAX &&
+ find_mbs_partial(*iptr, mbs_len)) {
+ /* incomplete sequence */
+ errno = EINVAL;
+ } else {
+ errno = EILSEQ;
+ }
+ return ((size_t)-1);
+ }
+ assert(cm->cm_frmbs_len > 0);
+ if (cm->cm_tombs_len == 0) {
+ if (sflag == 0 && cm->cm_warned == 0) {
+ cm->cm_warned = 1;
+ warn(_("To-map does not encode <%s>\n"),
+ cm->cm_name);
+ }
+ if (cflag == 0) {
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+ /* just skip this input seq. */
+ *iptr += cm->cm_frmbs_len;
+ *ileft -= cm->cm_frmbs_len;
+ continue;
+ }
+
+ *iptr += cm->cm_frmbs_len;
+ *ileft -= cm->cm_frmbs_len;
+ (void) memcpy(*optr, cm->cm_tombs, cm->cm_tombs_len);
+ *optr += cm->cm_tombs_len;
+ *oleft -= cm->cm_tombs_len;
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/iconv/charmap.h b/usr/src/cmd/iconv/charmap.h
new file mode 100644
index 0000000000..e2c36ea9e7
--- /dev/null
+++ b/usr/src/cmd/iconv/charmap.h
@@ -0,0 +1,68 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy is of the CDDL is also available via the Internet
+ * at http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+
+#ifndef _CHARMAP_H
+#define _CHARMAP_H
+
+/*
+ * CHARMAP file handling for iconv.
+ */
+
+/* Common header files. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <libintl.h>
+
+enum cmap_pass {
+ CMAP_PASS_FROM,
+ CMAP_PASS_TO
+};
+
+extern int com_char;
+extern int esc_char;
+extern int mb_cur_max;
+extern int mb_cur_min;
+extern int last_kw;
+extern int verbose;
+extern int yydebug;
+extern int lineno;
+extern int debug;
+extern int warnings;
+extern int cflag;
+extern int sflag;
+
+int yyparse(void);
+void yyerror(const char *);
+void errf(const char *, ...);
+void warn(const char *, ...);
+
+void reset_scanner(const char *);
+void scan_to_eol(void);
+
+/* charmap.c - CHARMAP handling */
+void init_charmap(void);
+void add_charmap(char *, char *);
+void add_charmap_posix(void);
+void add_charmap_range(char *, char *, char *);
+
+void charmap_init(char *to, char *fr);
+size_t cm_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft);
+void charmap_dump(void);
+
+#define _(x) gettext(x)
+
+#endif /* _CHARMAP_H */
diff --git a/usr/src/cmd/iconv/iconv_list.c b/usr/src/cmd/iconv/iconv_list.c
new file mode 100644
index 0000000000..4fac3506d8
--- /dev/null
+++ b/usr/src/cmd/iconv/iconv_list.c
@@ -0,0 +1,298 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * implement "iconv -l"
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <alloca.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+#include <sys/param.h>
+#include <stddef.h>
+#include <dirent.h>
+#include <unistd.h>
+
+#define PATH_LIBICONV "/usr/lib/iconv"
+#define PATH_BTABLES "/usr/lib/iconv/geniconvtbl/binarytables"
+#define PATH_ALIASES "/usr/lib/iconv/alias"
+
+typedef struct codeset {
+ avl_node_t cs_node;
+ char *cs_name;
+ list_t cs_aliases;
+} codeset_t;
+
+typedef struct csalias {
+ list_node_t a_node;
+ char *a_name;
+} csalias_t;
+
+static avl_tree_t cs_avl;
+
+static void alias_destroy(csalias_t *);
+
+/*
+ * codesets
+ */
+
+static int
+cs_compare(const void *n1, const void *n2)
+{
+ const codeset_t *c1 = n1;
+ const codeset_t *c2 = n2;
+ int rv;
+
+ rv = strcmp(c1->cs_name, c2->cs_name);
+ return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
+}
+
+static void
+cs_insert(char *key)
+{
+ codeset_t tmp, *cs;
+ avl_index_t where;
+
+ (void) memset(&tmp, 0, sizeof (tmp));
+ tmp.cs_name = key;
+
+ cs = avl_find(&cs_avl, &tmp, &where);
+ if (cs != NULL)
+ return; /* already there */
+
+ cs = calloc(1, sizeof (*cs));
+ if (cs == NULL) {
+ perror("cs_insert:calloc");
+ exit(1);
+ }
+ cs->cs_name = strdup(key);
+ if (cs->cs_name == NULL) {
+ perror("cs_insert:strdup");
+ exit(1);
+ }
+ list_create(&cs->cs_aliases, sizeof (csalias_t),
+ offsetof(csalias_t, a_node));
+
+ avl_insert(&cs_avl, cs, where);
+}
+
+const char topmatter[] =
+ "The following are all supported code set names. All combinations\n"
+ "of those names are not necessarily available for the pair of the\n"
+ "fromcode-tocode. Some of those code set names have aliases, which\n"
+ "are case-insensitive and described in parentheses following the\n"
+ "canonical name:\n";
+
+
+static void
+cs_dump(void)
+{
+ codeset_t *cs;
+ csalias_t *a;
+
+ (void) puts(topmatter);
+
+ for (cs = avl_first(&cs_avl); cs != NULL;
+ cs = AVL_NEXT(&cs_avl, cs)) {
+
+ (void) printf(" %s", cs->cs_name);
+ if (!list_is_empty(&cs->cs_aliases)) {
+ a = list_head(&cs->cs_aliases);
+ (void) printf(" (%s", a->a_name);
+ while ((a = list_next(&cs->cs_aliases, a)) != NULL) {
+ (void) printf(", %s", a->a_name);
+ }
+ (void) printf(")");
+ }
+ (void) printf(",\n");
+ }
+}
+
+static void
+cs_destroy(void)
+{
+ void *cookie = NULL;
+ codeset_t *cs;
+ csalias_t *a;
+
+ while ((cs = avl_destroy_nodes(&cs_avl, &cookie)) != NULL) {
+ while ((a = list_remove_head(&cs->cs_aliases)) != NULL) {
+ alias_destroy(a);
+ }
+ free(cs->cs_name);
+ free(cs);
+ }
+ avl_destroy(&cs_avl);
+}
+
+/*
+ * aliases
+ */
+
+static void
+alias_insert(char *codeset, char *alias)
+{
+ codeset_t tcs, *cs;
+ csalias_t *a;
+
+ /*
+ * Find the codeset. If non-existent,
+ * ignore aliases of this codeset.
+ */
+ (void) memset(&tcs, 0, sizeof (tcs));
+ tcs.cs_name = codeset;
+ cs = avl_find(&cs_avl, &tcs, NULL);
+ if (cs == NULL)
+ return;
+
+ /*
+ * Add this alias
+ */
+ a = calloc(1, sizeof (*a));
+ if (a == NULL) {
+ perror("alias_insert:calloc");
+ exit(1);
+ }
+ a->a_name = strdup(alias);
+ if (a->a_name == NULL) {
+ perror("alias_insert:strdup");
+ exit(1);
+ }
+
+ list_insert_tail(&cs->cs_aliases, a);
+}
+
+static void
+alias_destroy(csalias_t *a)
+{
+ free(a->a_name);
+ free(a);
+}
+
+
+static void
+scan_dir(DIR *dh, char sep, char *suffix)
+{
+ char namebuf[MAXNAMELEN];
+ struct dirent *de;
+
+ while ((de = readdir(dh)) != NULL) {
+ char *p2, *p1;
+
+ /*
+ * We'll modify, so let's copy. If the dirent name is
+ * longer than MAXNAMELEN, then it can't possibly be a
+ * valid pair of codeset names, so just skip it.
+ */
+ if (strlcpy(namebuf, de->d_name, sizeof (namebuf)) >=
+ sizeof (namebuf))
+ continue;
+
+ /* Find suffix (.so | .t) */
+ p2 = strrchr(namebuf, *suffix);
+ if (p2 == NULL)
+ continue;
+ if (strcmp(p2, suffix) != 0)
+ continue;
+ *p2 = '\0';
+
+ p1 = strchr(namebuf, sep);
+ if (p1 == NULL)
+ continue;
+ *p1++ = '\0';
+
+ /* More than one sep? */
+ if (strchr(p1, sep) != NULL)
+ continue;
+
+ /* Empty strings? */
+ if (*namebuf == '\0' || *p1 == '\0')
+ continue;
+
+ /* OK, add both to the map. */
+ cs_insert(namebuf);
+ cs_insert(p1);
+ }
+}
+
+static void
+scan_aliases(FILE *fh)
+{
+ char linebuf[256];
+ char *p1, *p2;
+
+ while (fgets(linebuf, sizeof (linebuf), fh) != NULL) {
+ if (linebuf[0] == '#')
+ continue;
+ p1 = strchr(linebuf, ' ');
+ if (p1 == NULL)
+ continue;
+ *p1++ = '\0';
+ p2 = strchr(p1, '\n');
+ if (p2 == NULL)
+ continue;
+ *p2 = '\0';
+ alias_insert(p1, linebuf);
+ }
+}
+
+int
+list_codesets(void)
+{
+ DIR *dh;
+ FILE *fh;
+
+ avl_create(&cs_avl, cs_compare, sizeof (codeset_t),
+ offsetof(codeset_t, cs_node));
+
+ dh = opendir(PATH_LIBICONV);
+ if (dh == NULL) {
+ perror(PATH_LIBICONV);
+ return (1);
+ }
+ scan_dir(dh, '%', ".so");
+ rewinddir(dh);
+ scan_dir(dh, '.', ".t");
+ (void) closedir(dh);
+
+ dh = opendir(PATH_BTABLES);
+ if (dh == NULL) {
+ perror(PATH_BTABLES);
+ return (1);
+ }
+ scan_dir(dh, '%', ".bt");
+ (void) closedir(dh);
+
+ fh = fopen(PATH_ALIASES, "r");
+ if (fh == NULL) {
+ perror(PATH_ALIASES);
+ /* let's continue */
+ } else {
+ scan_aliases(fh);
+ (void) fclose(fh);
+ }
+
+ cs_dump();
+
+ cs_destroy();
+
+ return (0);
+}
diff --git a/usr/src/cmd/iconv/iconv_main.c b/usr/src/cmd/iconv/iconv_main.c
new file mode 100644
index 0000000000..260d6ba9bc
--- /dev/null
+++ b/usr/src/cmd/iconv/iconv_main.c
@@ -0,0 +1,310 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * iconv(1) command.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <limits.h>
+#include <iconv.h>
+#include <libintl.h>
+#include <langinfo.h>
+#include <locale.h>
+#include "charmap.h"
+
+#include <assert.h>
+
+const char *progname;
+
+char *from_cs;
+char *to_cs;
+int debug;
+int cflag; /* skip invalid characters */
+int sflag; /* silent */
+int lflag; /* list conversions */
+
+void iconv_file(FILE *, const char *);
+extern int list_codesets(void);
+
+iconv_t ich; /* iconv(3c) lib handle */
+size_t (*pconv)(const char **iptr, size_t *ileft,
+ char **optr, size_t *oleft);
+
+size_t
+lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
+{
+ return (iconv(ich, iptr, ileft, optr, oleft));
+}
+
+void
+usage(void)
+{
+ (void) fprintf(stderr, gettext(
+ "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
+ "[file ...]\n"), progname);
+ (void) fprintf(stderr, gettext("\t%s -l\n"), progname);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ FILE *fp;
+ char *fslash, *tslash;
+ int c;
+
+ yydebug = 0;
+ progname = getprogname();
+
+ (void) setlocale(LC_ALL, "");
+
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+ (void) textdomain(TEXT_DOMAIN);
+
+ while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
+ switch (c) {
+ case 'c':
+ cflag++;
+ break;
+ case 'd':
+ debug++;
+ break;
+ case 'l':
+ lflag++;
+ break;
+ case 's':
+ sflag++;
+ break;
+ case 'f':
+ from_cs = optarg;
+ break;
+ case 't':
+ to_cs = optarg;
+ break;
+ case '?':
+ usage();
+ }
+ }
+
+ if (lflag) {
+ if (from_cs != NULL || to_cs != NULL || optind != argc)
+ usage();
+ exit(list_codesets());
+ }
+
+ if (from_cs == NULL)
+ from_cs = nl_langinfo(CODESET);
+ if (to_cs == NULL)
+ to_cs = nl_langinfo(CODESET);
+
+ /*
+ * If either "from" or "to" contains a slash,
+ * then we're using charmaps.
+ */
+ fslash = strchr(from_cs, '/');
+ tslash = strchr(to_cs, '/');
+ if (fslash != NULL || tslash != NULL) {
+ charmap_init(to_cs, from_cs);
+ pconv = cm_iconv;
+ if (debug)
+ charmap_dump();
+ } else {
+ ich = iconv_open(to_cs, from_cs);
+ if (ich == ((iconv_t)-1)) {
+ switch (errno) {
+ case EINVAL:
+ (void) fprintf(stderr,
+ _("Not supported %s to %s\n"),
+ from_cs, to_cs);
+ break;
+ default:
+ (void) fprintf(stderr,
+ _("iconv_open failed: %s\n"),
+ strerror(errno));
+ break;
+ }
+ exit(1);
+ }
+ pconv = lib_iconv;
+ }
+
+ if (optind == argc ||
+ (optind == argc - 1 && 0 == strcmp(argv[optind], "-"))) {
+ iconv_file(stdin, "stdin");
+ exit(warnings ? 1 : 0);
+ }
+
+ for (; optind < argc; optind++) {
+ fp = fopen(argv[optind], "r");
+ if (fp == NULL) {
+ perror(argv[optind]);
+ exit(1);
+ }
+ iconv_file(fp, argv[optind]);
+ (void) fclose(fp);
+ }
+ exit(warnings ? 1 : 0);
+}
+
+/*
+ * Conversion buffer sizes:
+ *
+ * The input buffer has room to prepend one mbs character if needed for
+ * handling a left-over at the end of a previous conversion buffer.
+ *
+ * Conversions may grow or shrink data, so using a larger output buffer
+ * to reduce the likelihood of leftover input buffer data in each pass.
+ */
+#define IBUFSIZ (MB_LEN_MAX + BUFSIZ)
+#define OBUFSIZ (2 * BUFSIZ)
+
+void
+iconv_file(FILE *fp, const char *fname)
+{
+ static char ibuf[IBUFSIZ];
+ static char obuf[OBUFSIZ];
+ const char *iptr;
+ char *optr;
+ off64_t offset;
+ size_t ileft, oleft, ocnt;
+ int iconv_errno;
+ int nr, nw, rc;
+
+ offset = 0;
+ ileft = 0;
+ iptr = ibuf + MB_LEN_MAX;
+
+ while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {
+
+ assert(iptr <= ibuf+MB_LEN_MAX);
+ assert(ileft <= MB_LEN_MAX);
+ ileft += nr;
+ offset += nr;
+
+ optr = obuf;
+ oleft = OBUFSIZ;
+
+ /*
+ * Note: the *pconv function is either iconv(3c) or our
+ * private equivalent when using charmaps. Both update
+ * ileft, oleft etc. even when conversion stops due to
+ * an illegal sequence or whatever, so we need to copy
+ * the partially converted buffer even on error.
+ */
+ iconv_again:
+ rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
+ iconv_errno = errno;
+
+ ocnt = OBUFSIZ - oleft;
+ if (ocnt > 0) {
+ nw = fwrite(obuf, 1, ocnt, stdout);
+ if (nw != ocnt) {
+ perror("fwrite");
+ exit(1);
+ }
+ }
+ optr = obuf;
+ oleft = OBUFSIZ;
+
+ if (rc == (size_t)-1) {
+ switch (iconv_errno) {
+
+ case E2BIG: /* no room in output buffer */
+ goto iconv_again;
+
+ case EINVAL: /* incomplete sequence on input */
+ if (debug) {
+ (void) fprintf(stderr,
+ _("Incomplete sequence in %s at offset %lld\n"),
+ fname, offset - ileft);
+ }
+ /*
+ * Copy the remainder to the space reserved
+ * at the start of the input buffer.
+ */
+ assert(ileft > 0);
+ if (ileft <= MB_LEN_MAX) {
+ char *p = ibuf+MB_LEN_MAX-ileft;
+ (void) memmove(p, iptr, ileft);
+ iptr = p;
+ continue; /* read again */
+ }
+ /*
+ * Should not see ileft > MB_LEN_MAX,
+ * but if we do, handle as EILSEQ.
+ */
+ /* FALLTHROUGH */
+
+ case EILSEQ: /* invalid sequence on input */
+ if (!sflag) {
+ (void) fprintf(stderr,
+ _("Illegal sequence in %s at offset %lld\n"),
+ fname, offset - ileft);
+ (void) fprintf(stderr,
+ _("bad seq: \\x%02x\\x%02x\\x%02x\n"),
+ iptr[0] & 0xff,
+ iptr[1] & 0xff,
+ iptr[2] & 0xff);
+ }
+ assert(ileft > 0);
+ /* skip one */
+ iptr++;
+ ileft--;
+ assert(oleft > 0);
+ if (!cflag) {
+ *optr++ = '?';
+ oleft--;
+ }
+ goto iconv_again;
+
+ default:
+ (void) fprintf(stderr,
+ _("iconv error (%s) in file $s at offset %lld\n"),
+ strerror(iconv_errno), fname,
+ offset - ileft);
+ break;
+ }
+ }
+
+ /* normal iconv return */
+ ileft = 0;
+ iptr = ibuf + MB_LEN_MAX;
+ }
+
+ /*
+ * End of file
+ * Flush any shift encodings.
+ */
+ iptr = NULL;
+ ileft = 0;
+ optr = obuf;
+ oleft = OBUFSIZ;
+ (*pconv)(&iptr, &ileft, &optr, &oleft);
+ ocnt = OBUFSIZ - oleft;
+ if (ocnt > 0) {
+ nw = fwrite(obuf, 1, ocnt, stdout);
+ if (nw != ocnt) {
+ perror("fwrite");
+ exit(1);
+ }
+ }
+}
diff --git a/usr/src/cmd/iconv/parser.y b/usr/src/cmd/iconv/parser.y
new file mode 100644
index 0000000000..5abd7e2024
--- /dev/null
+++ b/usr/src/cmd/iconv/parser.y
@@ -0,0 +1,118 @@
+%{
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * POSIX iconv charmap grammar.
+ */
+
+#include <wchar.h>
+#include <stdio.h>
+#include <limits.h>
+#include "charmap.h"
+
+%}
+%union {
+ char *token;
+ int num;
+ char mbs[MB_LEN_MAX + 2]; /* NB: [0] is length! */
+}
+
+%token T_CODE_SET
+%token T_MB_CUR_MAX
+%token T_MB_CUR_MIN
+%token T_COM_CHAR
+%token T_ESC_CHAR
+%token T_LT
+%token T_GT
+%token T_NL
+%token T_SEMI
+%token T_COMMA
+%token T_ELLIPSIS
+%token T_RPAREN
+%token T_LPAREN
+%token T_QUOTE
+%token T_NULL
+%token T_END
+%token T_CHARMAP
+%token T_WIDTH
+%token T_WIDTH_DEFAULT
+%token <mbs> T_CHAR
+%token <token> T_NAME
+%token <num> T_NUMBER
+%token <token> T_SYMBOL
+
+%%
+
+goal : setting_list charmap
+ | charmap
+ ;
+
+string : T_QUOTE charlist T_QUOTE
+ | T_QUOTE T_QUOTE
+ ;
+
+charlist : charlist T_CHAR
+ | T_CHAR
+ ;
+
+setting_list : setting_list setting
+ | setting
+ ;
+
+setting : T_COM_CHAR T_CHAR T_NL
+ {
+ com_char = $2[1];
+ }
+ | T_ESC_CHAR T_CHAR T_NL
+ {
+ esc_char = $2[1];
+ }
+ | T_MB_CUR_MAX T_NUMBER T_NL
+ {
+ mb_cur_max = $2;
+ }
+ | T_MB_CUR_MIN T_NUMBER T_NL
+ {
+ mb_cur_min = $2;
+ }
+ | T_CODE_SET T_NAME T_NL
+ {
+ /* ignore */
+ }
+ | T_CODE_SET string T_NL
+ {
+ /* ignore */
+ }
+ ;
+
+charmap : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
+
+charmap_list : charmap_list charmap_entry
+ | charmap_entry
+ ;
+
+charmap_entry : T_SYMBOL T_CHAR
+ {
+ add_charmap($1, $2);
+ scan_to_eol();
+ }
+ | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
+ {
+ add_charmap_range($1, $3, $4);
+ scan_to_eol();
+ }
+ | T_NL
+ ;
diff --git a/usr/src/cmd/iconv/scanner.c b/usr/src/cmd/iconv/scanner.c
new file mode 100644
index 0000000000..5c53695282
--- /dev/null
+++ b/usr/src/cmd/iconv/scanner.c
@@ -0,0 +1,682 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * This file contains the "scanner", which tokenizes charmap files
+ * for iconv for processing by the higher level grammar processor.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#include <widec.h>
+#include <sys/types.h>
+#include <assert.h>
+#include "charmap.h"
+#include "parser.tab.h"
+
+int com_char = '#';
+int esc_char = '\\';
+int mb_cur_min = 1;
+int mb_cur_max = MB_LEN_MAX;
+int lineno = 1;
+int warnings = 0;
+static int nextline;
+static FILE *input = stdin;
+static const char *filename = "<stdin>";
+static int instring = 0;
+static int escaped = 0;
+
+/*
+ * Token space ... grows on demand.
+ */
+static char *token = NULL;
+static int tokidx;
+static int toksz = 0;
+static int hadtok = 0;
+
+/*
+ * The last keyword seen. This is useful to trigger the special lexer rules
+ * for "copy" and also collating symbols and elements.
+ */
+int last_kw = 0;
+static int category = T_END;
+
+static struct token {
+ int id;
+ const char *name;
+} keywords[] = {
+ { T_COM_CHAR, "comment_char" },
+ { T_ESC_CHAR, "escape_char" },
+ { T_END, "END" },
+
+ /*
+ * These are keywords used in the charmap file. Note that
+ * Solaris orginally used angle brackets to wrap some of them,
+ * but we removed that to simplify our parser. The first of these
+ * items are "global items."
+ */
+ { T_CHARMAP, "CHARMAP" },
+ { T_WIDTH, "WIDTH" },
+ { T_WIDTH_DEFAULT, "WIDTH_DEFAULT" },
+
+ { -1, NULL },
+};
+
+/*
+ * These special words are only used in a charmap file, enclosed in <>.
+ */
+static struct token symwords[] = {
+ { T_COM_CHAR, "comment_char" },
+ { T_ESC_CHAR, "escape_char" },
+ { T_CODE_SET, "code_set_name" },
+ { T_MB_CUR_MAX, "mb_cur_max" },
+ { T_MB_CUR_MIN, "mb_cur_min" },
+ { -1, NULL },
+};
+
+static int categories[] = {
+ T_CHARMAP,
+ 0
+};
+
+void
+reset_scanner(const char *fname)
+{
+ if (fname == NULL) {
+ filename = "<stdin>";
+ input = stdin;
+ } else {
+ if (input != stdin)
+ (void) fclose(input);
+ if ((input = fopen(fname, "r")) == NULL) {
+ perror(fname);
+ exit(1);
+ }
+ filename = fname;
+ }
+ com_char = '#';
+ esc_char = '\\';
+ instring = 0;
+ escaped = 0;
+ lineno = 1;
+ nextline = 1;
+ tokidx = 0;
+ last_kw = 0;
+ category = T_END;
+}
+
+#define hex(x) \
+ (isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
+#define isodigit(x) ((x >= '0') && (x <= '7'))
+
+static int
+scanc(void)
+{
+ int c;
+
+ c = getc(input);
+ lineno = nextline;
+ if (c == '\n') {
+ nextline++;
+ }
+ return (c);
+}
+
+static void
+unscanc(int c)
+{
+ if (c == '\n') {
+ nextline--;
+ }
+ if (ungetc(c, input) < 0) {
+ yyerror(_("ungetc failed"));
+ }
+}
+
+static int
+scan_hex_byte(void)
+{
+ int c1, c2;
+ int v;
+
+ c1 = scanc();
+ if (!isxdigit(c1)) {
+ yyerror(_("malformed hex digit"));
+ return (0);
+ }
+ c2 = scanc();
+ if (!isxdigit(c2)) {
+ yyerror(_("malformed hex digit"));
+ return (0);
+ }
+ v = ((hex(c1) << 4) | hex(c2));
+ return (v);
+}
+
+static int
+scan_dec_byte(void)
+{
+ int c1, c2, c3;
+ int b;
+
+ c1 = scanc();
+ if (!isdigit(c1)) {
+ yyerror(_("malformed decimal digit"));
+ return (0);
+ }
+ b = c1 - '0';
+ c2 = scanc();
+ if (!isdigit(c2)) {
+ yyerror(_("malformed decimal digit"));
+ return (0);
+ }
+ b *= 10;
+ b += (c2 - '0');
+ c3 = scanc();
+ if (!isdigit(c3)) {
+ unscanc(c3);
+ } else {
+ b *= 10;
+ b += (c3 - '0');
+ }
+ return (b);
+}
+
+static int
+scan_oct_byte(void)
+{
+ int c1, c2, c3;
+ int b;
+
+ b = 0;
+
+ c1 = scanc();
+ if (!isodigit(c1)) {
+ yyerror(_("malformed octal digit"));
+ return (0);
+ }
+ b = c1 - '0';
+ c2 = scanc();
+ if (!isodigit(c2)) {
+ yyerror(_("malformed octal digit"));
+ return (0);
+ }
+ b *= 8;
+ b += (c2 - '0');
+ c3 = scanc();
+ if (!isodigit(c3)) {
+ unscanc(c3);
+ } else {
+ b *= 8;
+ b += (c3 - '0');
+ }
+ return (b);
+}
+
+void
+add_tok(int c)
+{
+ if ((tokidx + 1) >= toksz) {
+ toksz += 64;
+ if ((token = realloc(token, toksz)) == NULL) {
+ yyerror(_("out of memory"));
+ tokidx = 0;
+ toksz = 0;
+ return;
+ }
+ }
+
+ token[tokidx++] = (char)c;
+ token[tokidx] = 0;
+}
+
+static int
+get_byte(void)
+{
+ int c;
+
+ if ((c = scanc()) != esc_char) {
+ unscanc(c);
+ return (EOF);
+ }
+ c = scanc();
+
+ switch (c) {
+ case 'd':
+ case 'D':
+ return (scan_dec_byte());
+ case 'x':
+ case 'X':
+ return (scan_hex_byte());
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ /* put the character back so we can get it */
+ unscanc(c);
+ return (scan_oct_byte());
+ default:
+ unscanc(c);
+ unscanc(esc_char);
+ return (EOF);
+ }
+}
+
+int
+get_escaped(int c)
+{
+ switch (c) {
+ case 'n':
+ return ('\n');
+ case 'r':
+ return ('\r');
+ case 't':
+ return ('\t');
+ case 'f':
+ return ('\f');
+ case 'v':
+ return ('\v');
+ case 'b':
+ return ('\b');
+ case 'a':
+ return ('\a');
+ default:
+ return (c);
+ }
+}
+
+int
+get_wide(void)
+{
+ /* NB: yylval.mbs[0] is the length */
+ char *mbs = &yylval.mbs[1];
+ int mbi = 0;
+ int c;
+
+ mbs[mbi] = 0;
+ if (mb_cur_max > MB_LEN_MAX) {
+ yyerror(_("max multibyte character size too big"));
+ return (T_NULL);
+ }
+ for (;;) {
+ if ((c = get_byte()) == EOF)
+ break;
+ if (mbi == mb_cur_max) {
+ unscanc(c);
+ yyerror(_("length > mb_cur_max"));
+ return (T_NULL);
+ }
+ mbs[mbi++] = c;
+ mbs[mbi] = 0;
+ }
+
+ /* result in yylval.mbs */
+ mbs[-1] = mbi;
+ return (T_CHAR);
+}
+
+int
+get_symbol(void)
+{
+ int c;
+
+ while ((c = scanc()) != EOF) {
+ if (escaped) {
+ escaped = 0;
+ if (c == '\n')
+ continue;
+ add_tok(get_escaped(c));
+ continue;
+ }
+ if (c == esc_char) {
+ escaped = 1;
+ continue;
+ }
+ if (c == '\n') { /* well that's strange! */
+ yyerror(_("unterminated symbolic name"));
+ continue;
+ }
+ if (c == '>') { /* end of symbol */
+
+ /*
+ * This restarts the token from the beginning
+ * the next time we scan a character. (This
+ * token is complete.)
+ */
+
+ if (token == NULL) {
+ yyerror(_("missing symbolic name"));
+ return (T_NULL);
+ }
+ tokidx = 0;
+
+ /*
+ * A few symbols are handled as keywords outside
+ * of the normal categories.
+ */
+ if (category == T_END) {
+ int i;
+ for (i = 0; symwords[i].name != 0; i++) {
+ if (strcmp(token, symwords[i].name) ==
+ 0) {
+ last_kw = symwords[i].id;
+ return (last_kw);
+ }
+ }
+ }
+ /* its an undefined symbol */
+ yylval.token = strdup(token);
+ if (yylval.token == NULL) {
+ perror("malloc");
+ exit(1);
+ }
+ token = NULL;
+ toksz = 0;
+ tokidx = 0;
+ return (T_SYMBOL);
+ }
+ add_tok(c);
+ }
+
+ yyerror(_("unterminated symbolic name"));
+ return (EOF);
+}
+
+
+static int
+consume_token(void)
+{
+ int len = tokidx;
+ int i;
+
+ tokidx = 0;
+ if (token == NULL)
+ return (T_NULL);
+
+ /*
+ * this one is special, because we don't want it to alter the
+ * last_kw field.
+ */
+ if (strcmp(token, "...") == 0) {
+ return (T_ELLIPSIS);
+ }
+
+ /* search for reserved words first */
+ for (i = 0; keywords[i].name; i++) {
+ int j;
+ if (strcmp(keywords[i].name, token) != 0) {
+ continue;
+ }
+
+ last_kw = keywords[i].id;
+
+ /* clear the top level category if we're done with it */
+ if (last_kw == T_END) {
+ category = T_END;
+ }
+
+ /* set the top level category if we're changing */
+ for (j = 0; categories[j]; j++) {
+ if (categories[j] != last_kw)
+ continue;
+ category = last_kw;
+ }
+
+ return (keywords[i].id);
+ }
+
+ /* maybe its a numeric constant? */
+ if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
+ char *eptr;
+ yylval.num = strtol(token, &eptr, 10);
+ if (*eptr != 0)
+ yyerror(_("malformed number"));
+ return (T_NUMBER);
+ }
+
+ /*
+ * A single lone character is treated as a character literal.
+ * To avoid duplication of effort, we stick in the charmap.
+ */
+ if (len == 1) {
+ yylval.mbs[0] = 1; /* length */
+ yylval.mbs[1] = token[0];
+ yylval.mbs[2] = '\0';
+ return (T_CHAR);
+ }
+
+ /* anything else is treated as a symbolic name */
+ yylval.token = strdup(token);
+ token = NULL;
+ toksz = 0;
+ tokidx = 0;
+ return (T_NAME);
+}
+
+void
+scan_to_eol(void)
+{
+ int c;
+ while ((c = scanc()) != '\n') {
+ if (c == EOF) {
+ /* end of file without newline! */
+ errf(_("missing newline"));
+ return;
+ }
+ }
+ assert(c == '\n');
+}
+
+int
+yylex(void)
+{
+ int c;
+
+ while ((c = scanc()) != EOF) {
+
+ /* special handling for quoted string */
+ if (instring) {
+ if (escaped) {
+ escaped = 0;
+
+ /* if newline, just eat and forget it */
+ if (c == '\n')
+ continue;
+
+ if (strchr("xXd01234567", c)) {
+ unscanc(c);
+ unscanc(esc_char);
+ return (get_wide());
+ }
+ yylval.mbs[0] = 1; /* length */
+ yylval.mbs[1] = get_escaped(c);
+ yylval.mbs[2] = '\0';
+ return (T_CHAR);
+ }
+ if (c == esc_char) {
+ escaped = 1;
+ continue;
+ }
+ switch (c) {
+ case '<':
+ return (get_symbol());
+ case '>':
+ /* oops! should generate syntax error */
+ return (T_GT);
+ case '"':
+ instring = 0;
+ return (T_QUOTE);
+ default:
+ yylval.mbs[0] = 1; /* length */
+ yylval.mbs[1] = c;
+ yylval.mbs[2] = '\0';
+ return (T_CHAR);
+ }
+ }
+
+ /* escaped characters first */
+ if (escaped) {
+ escaped = 0;
+ if (c == '\n') {
+ /* eat the newline */
+ continue;
+ }
+ hadtok = 1;
+ if (tokidx) {
+ /* an escape mid-token is nonsense */
+ return (T_NULL);
+ }
+
+ /* numeric escapes are treated as wide characters */
+ if (strchr("xXd01234567", c)) {
+ unscanc(c);
+ unscanc(esc_char);
+ return (get_wide());
+ }
+
+ add_tok(get_escaped(c));
+ continue;
+ }
+
+ /* if it is the escape charter itself note it */
+ if (c == esc_char) {
+ escaped = 1;
+ continue;
+ }
+
+ /* remove from the comment char to end of line */
+ if (c == com_char) {
+ while (c != '\n') {
+ if ((c = scanc()) == EOF) {
+ /* end of file without newline! */
+ return (EOF);
+ }
+ }
+ assert(c == '\n');
+ if (!hadtok) {
+ /*
+ * If there were no tokens on this line,
+ * then just pretend it didn't exist at all.
+ */
+ continue;
+ }
+ hadtok = 0;
+ return (T_NL);
+ }
+
+ if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
+ /*
+ * These are all token delimiters. If there
+ * is a token already in progress, we need to
+ * process it.
+ */
+ unscanc(c);
+ return (consume_token());
+ }
+
+ switch (c) {
+ case '\n':
+ if (!hadtok) {
+ /*
+ * If the line was completely devoid of tokens,
+ * then just ignore it.
+ */
+ continue;
+ }
+ /* we're starting a new line, reset the token state */
+ hadtok = 0;
+ return (T_NL);
+ case ',':
+ hadtok = 1;
+ return (T_COMMA);
+ case ';':
+ hadtok = 1;
+ return (T_SEMI);
+ case '(':
+ hadtok = 1;
+ return (T_LPAREN);
+ case ')':
+ hadtok = 1;
+ return (T_RPAREN);
+ case '>':
+ hadtok = 1;
+ return (T_GT);
+ case '<':
+ /* symbol start! */
+ hadtok = 1;
+ return (get_symbol());
+ case ' ':
+ case '\t':
+ /* whitespace, just ignore it */
+ continue;
+ case '"':
+ hadtok = 1;
+ instring = 1;
+ return (T_QUOTE);
+ default:
+ hadtok = 1;
+ add_tok(c);
+ continue;
+ }
+ }
+ return (EOF);
+}
+
+void
+yyerror(const char *msg)
+{
+ (void) fprintf(stderr, _("%s: %d: error: %s\n"),
+ filename, lineno, msg);
+ exit(1);
+}
+
+void
+errf(const char *fmt, ...)
+{
+ char *msg;
+
+ va_list va;
+ va_start(va, fmt);
+ (void) vasprintf(&msg, fmt, va);
+ va_end(va);
+
+ (void) fprintf(stderr, _("%s: %d: error: %s\n"),
+ filename, lineno, msg);
+ free(msg);
+ exit(1);
+}
+
+void
+warn(const char *fmt, ...)
+{
+ char *msg;
+
+ va_list va;
+ va_start(va, fmt);
+ (void) vasprintf(&msg, fmt, va);
+ va_end(va);
+
+ (void) fprintf(stderr, _("%s: %d: warning: %s\n"),
+ filename, lineno, msg);
+ free(msg);
+ warnings++;
+}
diff --git a/usr/src/pkg/manifests/system-test-utiltest.mf b/usr/src/pkg/manifests/system-test-utiltest.mf
index 6d0ec62ca3..9a4ddd4efa 100644
--- a/usr/src/pkg/manifests/system-test-utiltest.mf
+++ b/usr/src/pkg/manifests/system-test-utiltest.mf
@@ -30,6 +30,7 @@ file path=opt/util-tests/bin/print_json mode=0555
file path=opt/util-tests/bin/utiltest mode=0555
file path=opt/util-tests/runfiles/default.run mode=0444
file path=opt/util-tests/tests/allowed-ips mode=0555
+file path=opt/util-tests/tests/iconv_test mode=0555
file path=opt/util-tests/tests/libnvpair_json/json_00_blank mode=0555
file path=opt/util-tests/tests/libnvpair_json/json_01_boolean mode=0555
file path=opt/util-tests/tests/libnvpair_json/json_02_numbers mode=0555
@@ -42,4 +43,5 @@ file path=opt/util-tests/tests/libnvpair_json/json_common mode=0555
file path=opt/util-tests/tests/printf_test mode=0555
file path=opt/util-tests/tests/xargs_test mode=0555
license lic_CDDL license=lic_CDDL
+depend fmri=system/library/iconv/utf-8 type=require
depend fmri=system/test/testrunner type=require
diff --git a/usr/src/test/util-tests/tests/Makefile b/usr/src/test/util-tests/tests/Makefile
index 4709c7adcd..e12ab73c58 100644
--- a/usr/src/test/util-tests/tests/Makefile
+++ b/usr/src/test/util-tests/tests/Makefile
@@ -14,7 +14,6 @@
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
#
-SUBDIRS = dladm printf xargs
-SUBDIRS = dladm libnvpair_json printf xargs
+SUBDIRS = dladm iconv libnvpair_json printf xargs
include $(SRC)/test/Makefile.com
diff --git a/usr/src/test/util-tests/tests/iconv/Makefile b/usr/src/test/util-tests/tests/iconv/Makefile
new file mode 100644
index 0000000000..c0fb8a9940
--- /dev/null
+++ b/usr/src/test/util-tests/tests/iconv/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2012 by Delphix. All rights reserved.
+# Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+#
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/test/Makefile.com
+
+SHFILES = iconv_test
+
+ROOTBIN = $(ROOT)/opt/util-tests
+TESTDIR = $(ROOTBIN)/tests
+
+CMDS = $(SHFILES:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+all: $(PROG)
+
+$(SHFILES): $(SHFILES).sh
+ -$(RM) $(SHFILES)
+ $(CP) $(SHFILES).sh $(SHFILES)
+
+install: all $(CMDS)
+
+lint:
+
+clobber: clean
+ -$(RM) $(SHFILES)
+
+clean:
+
+$(CMDS): $(TESTDIR)
+
+$(TESTDIR):
+ $(INS.dir)
+
+$(TESTDIR)/%: %
+ $(INS.file)
diff --git a/usr/src/test/util-tests/tests/iconv/iconv_test.sh b/usr/src/test/util-tests/tests/iconv/iconv_test.sh
new file mode 100644
index 0000000000..e22bce7099
--- /dev/null
+++ b/usr/src/test/util-tests/tests/iconv/iconv_test.sh
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+#
+
+ICONV=${ICONV:-/usr/bin/iconv}
+#ICONV=${ROOT}/usr/bin/iconv
+
+# test name, file a, file b
+check() {
+ if ! cmp -s "$2" "$3" ; then
+ echo "TEST FAIL: $1"
+ exit 1
+ fi
+ echo "TEST PASS: $1"
+}
+
+
+# fromcs, tocs, in, out
+test_conv() {
+ echo "$3" > in
+ echo "$4" > o1
+ $ICONV -f "$1" -t "$2" < in > o2
+ check "${1}:${2}" o1 o2
+ rm in o1 o2
+}
+
+mkmap_one() {
+ echo '<code_set_name> one'
+ echo 'CHARMAP'
+ echo '<NULL>\t\x00'
+ for i in 8 9 a b c d e f
+ do
+ for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ do
+ echo "<c1-$i$j>\t\x$i$j"
+ done
+ done
+ echo 'END CHARMAP'
+}
+
+mkmap_two() {
+ echo '<code_set_name> two'
+ echo 'CHARMAP'
+ echo '<NULL>\t\x00'
+ for i in 8 9 a b c d e f
+ do
+ for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ do
+ echo "<c1-$i$j>\t\x20\x$i$j"
+ done
+ done
+ echo 'END CHARMAP'
+}
+
+# write 1023 bytes of space
+wr1023() {
+ n=1023
+ while [[ $n -gt 0 ]]; do
+ echo ' \c'
+ ((n-=1))
+ done
+}
+
+# two-byte utf-8 crossing 1024 byte boundary
+mkbuf_utf8() {
+ wr1023
+ echo '\0303\0240'
+}
+
+# one-byte 8859-1 at 1024 byte boundary
+mkbuf_8859() {
+ wr1023
+ echo '\0340'
+}
+
+# Test some simple, built-in conversions
+
+test_conv ascii utf-8 abcdef abcdef
+test_conv utf-8 ascii abcdef abcdef
+test_conv ascii ucs-2le abc 'a\0b\0c\0\n\0\c'
+test_conv ucs-2le ascii 'a\0b\0c\0\n\0\c' abc
+
+# Test user-provided charmap
+
+mkmap_one > one.cm
+mkmap_two > two.cm
+test_conv ./one.cm ./two.cm '\0200\0201\0202\c' ' \0200 \0201 \0202\c'
+rm one.cm two.cm
+
+# test crossing 1024 byte buffer boundary
+
+mkbuf_utf8 > in
+mkbuf_8859 > o1
+$ICONV -f UTF-8 -t 8859-1 < in > o2
+check "boundary" o1 o2
+rm in o1 o2
+
+exit 0