summaryrefslogtreecommitdiff
path: root/usr/src/cmd/ast/msgcc
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/ast/msgcc')
-rw-r--r--usr/src/cmd/ast/msgcc/Makefile75
-rw-r--r--usr/src/cmd/ast/msgcc/NOTES65
-rw-r--r--usr/src/cmd/ast/msgcc/PROMO.mm22
-rw-r--r--usr/src/cmd/ast/msgcc/msgcc.sh405
-rw-r--r--usr/src/cmd/ast/msgcc/msgcc.tst28
-rw-r--r--usr/src/cmd/ast/msgcc/msgcpp.c289
-rw-r--r--usr/src/cmd/ast/msgcc/msgcvt.c691
-rw-r--r--usr/src/cmd/ast/msgcc/msggen.c522
-rw-r--r--usr/src/cmd/ast/msgcc/msgget.c109
9 files changed, 2206 insertions, 0 deletions
diff --git a/usr/src/cmd/ast/msgcc/Makefile b/usr/src/cmd/ast/msgcc/Makefile
new file mode 100644
index 0000000000..b11651278a
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/Makefile
@@ -0,0 +1,75 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+SHELL=/usr/bin/ksh
+
+include ../../Makefile.cmd
+
+.KEEP_STATE:
+
+# Set common AST build flags (e.g., needed to support the math stuff).
+include ../../../Makefile.ast
+
+# build rules
+CPPFLAGS = \
+ $(DTEXTDOM) $(DTS_ERRNO) \
+ -I$(ROOT)/usr/include/ast \
+ -D_PACKAGE_ast \
+ '-DUSAGE_LICENSE=\
+ "[-author?Glenn Fowler <gsf@research.att.com>]"\
+ "[-copyright?Copyright (c) 2000-2007 AT&T Knowledge Ventures]"\
+ "[-license?http://www.opensource.org/licenses/cpl1.0.txt]"\
+ "[--catalog?msgcc]"'
+
+CFLAGS += \
+ $(CCVERBOSE) \
+ -xstrconst
+
+LDLIBS += -last
+msgcpp := LDLIBS += -lpp
+
+msgcc: msgcc.sh
+ rm -f msgcc ; \
+ ( \
+ print "#!/usr/bin/ksh93" ; \
+ print "export PATH=/usr/ast/bin:/usr/xpg6/bin:/usr/xpg4/bin:/usr/bin:\$${PATH}" ; \
+ print "builtin date" ; \
+ cat "msgcc.sh" ; \
+ ) >msgcc ; \
+ chmod a+rx msgcc
+
+ROOTCMDDIR=$(ROOT)/usr/ast/bin
+
+PROG= msgcvt msggen msgget msgcpp msgcc
+
+all: $(PROG)
+
+install: all $(ROOTCMD)
+
+clean lint:
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/ast/msgcc/NOTES b/usr/src/cmd/ast/msgcc/NOTES
new file mode 100644
index 0000000000..864a3a206c
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/NOTES
@@ -0,0 +1,65 @@
+2000-04-01 ast message catalog plan
+
+(1) error_info.dictionary should be error_info.catalog
+ to match xopen and the internal naming in our implementation
+ and also to more closely match the webster definition
+ (catalog == enumerated list)
+
+ DONE
+
+(2) nmake by default will
+ CATALOG = $(ID:N=+([A-Za-z0-9_]):?$(PWD:N=*/lib/*:Y,lib,,)$(ID)?$(PWD:B)?)
+ ERROR_CATALOG == "$(CATALOG)"
+ e.g., commands in src/cmd/std will use the "std" catalog; all of the
+ commands in src/lib/libcmd will use the "libcmd" catalog
+
+ nmake will add "[--catalog?$(CATALOG)]" to USAGE_LICENSE
+ optget() will set error_info.catalog if not defined on the first call
+ commands that don't emit messages before optget() need not change
+ otherwise the command should
+
+ error_info.id = "foo";
+ error_info.catalog = ERROR_CATALOG;
+
+ undefined references to { USAGE_LICENSE ERROR_CATALOG } are hard
+ compile time errors
+
+ DONE
+
+(3) add catalog argument to libcmd <cmd.h> cmdinit(argv, context, catalog)
+
+ DONE
+
+(4) msgcat global target build msgs/*.mso and $(CATALOG).msg
+ each Makefile will generate one catalog $(CATALOG).msg where
+
+ the *.msg files are weird -- we need to build them viewed over an
+ architecture specific tree, even though they will be eventually used
+ as architecture independent source
+
+ $(CATALOG).msg will be the "C" locale
+
+ debug will be a debugging locale that will translate each message to
+ (CATALOG-NAME:MESSAGE-INDEX)\n
+ this will make it easy to locate text that escaped translation (in what
+ should be translated output); it will also be a way for us to do
+ regression tests in the face of typo fixes -- presumably typos can be
+ fixed without changing the message index
+
+ see msgadmin(1)
+
+ DONE
+
+(5) once all this is working I'll do catopen(3) and msggen(1)
+
+ DONE
+
+(6) the makerules "all" action will
+ catgen $(CATALOG).cat
+ catgen $(CATALOG)-*.cat
+ and the makerules "install" action will copy the catgen output to
+ $(LOCALEDIR)/$(LOCALE)/LC_MESSAGES/$(CATALOG)*
+ where
+ LOCALEDIR = $(INSTALLROOT)/lib/locale
+
+ NOTE: still under consideration
diff --git a/usr/src/cmd/ast/msgcc/PROMO.mm b/usr/src/cmd/ast/msgcc/PROMO.mm
new file mode 100644
index 0000000000..d39264185e
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/PROMO.mm
@@ -0,0 +1,22 @@
+.H 1 msgcc
+.B msgcc
+and
+.B msgcpp
+extract message text from C source for
+.BR gencat (1)
+message catalogs.
+.BR msggen (1)
+is a
+.BR gencat (1)
+replacement that generates machine independent binary message
+catalogs that are compatible with the
+.B ast
+.BR catgets (3)
+implementation.
+.B catgets
+also supports native message catalogs where available.
+.BR msgcvt (1)
+and
+.BR msgadmin (1)
+are administrative commands that support machine translation
+of C locale message catalogs.
diff --git a/usr/src/cmd/ast/msgcc/msgcc.sh b/usr/src/cmd/ast/msgcc/msgcc.sh
new file mode 100644
index 0000000000..aad98f7c1f
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/msgcc.sh
@@ -0,0 +1,405 @@
+########################################################################
+# #
+# This software is part of the ast package #
+# Copyright (c) 2000-2007 AT&T Knowledge Ventures #
+# and is licensed under the #
+# Common Public License, Version 1.0 #
+# by AT&T Knowledge Ventures #
+# #
+# A copy of the License is available at #
+# http://www.opensource.org/licenses/cpl1.0.txt #
+# (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) #
+# #
+# Information and Software Systems Research #
+# AT&T Research #
+# Florham Park NJ #
+# #
+# Glenn Fowler <gsf@research.att.com> #
+# #
+########################################################################
+: C language message catalog compiler
+
+# NOTE: all variable names match __*__ to avoid clash with msgcpp def vars
+
+__command__=msgcc
+integer __similar__=30
+
+case `(getopts '[-][123:xyz]' opt --xyz; echo 0$opt) 2>/dev/null` in
+0123) ARGV0="-a $__command__"
+ USAGE=$'
+[-?
+@(#)$Id: msgcc (AT&T Labs Research) 2002-09-15 $
+]
+'$USAGE_LICENSE$'
+[+NAME?msgcc - C language message catalog compiler]
+[+DESCRIPTION?\bmsgcc\b is a C language message catalog compiler. It accepts
+ \bcc\b(1) style options and arguments. A \bmsgcpp\b(1) \b.mso\b file
+ is generated for each input \b.c\b file. If the \b-c\b option is not
+ specified then a \bgencat\b(1) format \b.msg\b file is generated from
+ the input \b.mso\b and \b.msg\b files. If \b-c\b is not specified then
+ a \b.msg\b suffix is appended to the \b-o\b \afile\a if it doesn\'t
+ already have a suffix. The default output is \ba.out.msg\b if \b-c\b
+ and \b-o\b are not specified.]
+[+?If \b-M-new\b is not specified then messages are merged with those in the
+ pre-existing \b-o\b file.]
+[M?Set a \bmsgcc\b specific \aoption\a. \aoption\a may be:]:[-option]{
+ [+mkmsgs?The \b-o\b file is assumed to be in \bmkmsgs\b(1) format.]
+ [+new?Create a new \b-o\b file.]
+ [+preserve?Messages in the \b-o\b file that are not in new
+ \b.msg\b file arguments are preserved. The default is to
+ either reuse the message numbers with new message text that
+ is similar to the old or to delete the message text, leaving
+ an unused message number.]
+ [+set=\anumber\a?Set the message set number to \anumber\a. The default
+ is \b1\b.]
+ [+similar=\anumber\a?The message text similarity measure thresshold.
+ The similarity measure between \aold\a and \anew\a message
+ text is 100*(2*gzip(\aold\a+\anew\a)/(gzip(\aold\a)+gzip(\anew\a))-1),
+ where gzip(\ax\a) is the size of text \ax\a when compressed by
+ \bgzip\b(1). The default threshhold is '$__similar__$'. A
+ threshhold of \b0\b turns off message replacement, but unused
+ old messages are still deleted. Use \b-M-preserve\b to preserve
+ all old messages.]
+ [+verbose?Trace similar message replacements on the standard error.]
+}
+
+file ...
+
+[+SEE ALSO?\bcc\b(1), \bcpp\b(1), \bgencat\b(1), \bmsggen\b(1),
+ \bmsgcpp\b(1), \bmsgcvt\b(1)]
+'
+ ;;
+*) ARGV0=""
+ USAGE="M:[-option] [ cc-options ] file ..."
+ ;;
+esac
+
+usage()
+{
+ OPTIND=0
+ getopts $ARGV0 "$USAGE" OPT '-?'
+ exit 2
+}
+
+keys()
+{
+ $1 --??keys -- 2>&1 | grep '^".*"$'
+}
+
+typeset -A __index__
+typeset __keep__ __text__ __drop__ __oz__ __nz__ __z__ __hit__ __hit_i__
+typeset __compile__ __debug__ __mkmsgs__ __preprocess__
+typeset __merge__=1 __preserve__ __verbose__
+integer __i__=0 __args__=0 __code__=0 __files__=0 __max__=0 __num__=0 __skip__=0
+integer __set__=1 __sources__=0 __cmds__=0 __ndrop__=0 __new__=0 __old__=0
+__out__=a.out.msg
+__OUT__=
+
+case " $* " in
+*" --"*|*" -?"*)
+ while getopts $ARGV0 "$USAGE" OPT
+ do case $OPT in
+ *) break ;;
+ esac
+ done
+ ;;
+esac
+while :
+do case $# in
+ 0) break ;;
+ esac
+ __arg__=$1
+ case $__arg__ in
+ -c) __compile__=1
+ ;;
+ -[DIU]*)__argv__[__args__]=$__arg__
+ (( __args__++ ))
+ ;;
+ -E) __preprocess__=1
+ ;;
+ -M-debug)
+ __debug__=1
+ ;;
+ -M-mkmsgs)
+ __mkmsgs__=1
+ ;;
+ -M-new) __merge__=
+ ;;
+ -M-perserve)
+ __preserve__=1
+ ;;
+ -M-set=*)
+ __set__=$(msggen -s ${__arg__#*=}.1)
+ ;;
+ -M-similar=*)
+ __similar__=${__arg__#*=}
+ ;;
+ -M-verbose)
+ __verbose__=1
+ ;;
+ -o) case $# in
+ 1) print -u2 $"$__command__: output argument expected"
+ exit 1
+ ;;
+ esac
+ shift
+ __out__=${1%.*}.msg
+ __OUT__=$1
+ ;;
+ [-+]*|*.[aAlLsS]*)
+ ;;
+ *.[cCiI]*|*.[oO]*)
+ case $__arg__ in
+ *.[oO]*);;
+ *) __srcv__[__files__]=$__arg__
+ (( __sources__++ ))
+ ;;
+ esac
+ __arg__=${__arg__##*/}
+ __arg__=${__arg__%.*}.mso
+ __objv__[__files__]=$__arg__
+ (( __files__++ ))
+ ;;
+ *.ms[go])
+ __objv__[__files__]=$__arg__
+ (( __files__++ ))
+ ;;
+ *) __cmdv__[__cmds__]=$__arg__
+ (( __cmds__++ ))
+ ;;
+ esac
+ shift
+done
+__cmdv__[__cmds__]=${__out__%.msg}
+(( __cmds__++ ))
+
+# generate the .mso files
+
+if [[ $__OUT__ && $__compile__ ]]
+then __objv__[0]=$__OUT__
+fi
+
+if (( __sources__ ))
+then for (( __i__=0; __i__<=__files__; __i__++ ))
+ do if [[ ${__srcv__[__i__]} ]]
+ then if (( __sources__ > 1 ))
+ then print "${__srcv__[__i__]}:"
+ fi
+ if [[ $__preprocess__ ]]
+ then msgcpp "${__argv__[@]}" "${__srcv__[__i__]}"
+ else msgcpp "${__argv__[@]}" "${__srcv__[__i__]}" > "${__objv__[__i__]}"
+ fi
+ fi
+ done
+fi
+
+# combine the .mso and .msg files
+
+if [[ ! $__compile__ && ! $__preprocess__ ]]
+then if [[ $__merge__ && -r $__out__ ]]
+ then __tmp__=$__out__.tmp
+ trap '__code__=$?; rm -f ${__tmp__}*; exit $__code__' 0 1 2
+ while read -r __line__
+ do if (( $__skip__ ))
+ then if [[ $__line__ == '%}'* ]]
+ then __skip__=0
+ fi
+ continue
+ fi
+ if [[ $__mkmsgs__ && $__line__ == '%{'* ]]
+ then __skip__=1
+ continue
+ fi
+ if [[ $__mkmsgs__ ]]
+ then if [[ $__line__ == '%#'*';;'* ]]
+ then __line__=${__line__#'%#'}
+ __num__=${__line__%';;'*}
+ read -r __line__
+ elif [[ $__line__ == %* ]]
+ then continue
+ else print -u2 $"$__command__: unrecognized line=$__line__"
+ __code__=1
+ fi
+ else case $__line__ in
+ +([0-9])' '*)
+ __num__=${__line__%%' '*}
+ __line__=${__line__#*'"'}
+ __line__=${__line__%'"'}
+ ;;
+ *) continue
+ ;;
+ esac
+ fi
+ __index__["$__line__"]=$__num__
+ __text__[$__num__]=$__line__
+ if (( __max__ < __num__ ))
+ then (( __max__=__num__ ))
+ fi
+ done < $__out__
+ (( __new__=__max__+1 ))
+ else __tmp__=$__out__
+ (( __new__=1 ))
+ fi
+ if (( __code__ ))
+ then exit $__code__
+ fi
+ exec 1>$__tmp__ 9>&1
+ print -r -- '$'" ${__out__%.msg} message catalog"
+ print -r -- '$translation'" $__command__ $(date +%Y-%m-%d)"
+ print -r -- '$set'" $__set__"
+ print -r -- '$quote "'
+ sort -u "${__objv__[@]}" | {
+ while read -r __line__
+ do __op__=${__line__%% *}
+ __line__=${__line__#* }
+ case $__op__ in
+ cmd) __a1__=${__line__%% *}
+ case $__a1__ in
+ dot_cmd) __a1__=. ;;
+ esac
+ keys $__a1__
+ ;;
+ def) __a1__=${__line__%% *}
+ __a2__=${__line__#* }
+ eval $__a1__='$'__a2__
+ ;;
+ str) print -r -- "$__line__"
+ ;;
+ var) __a1__=${__line__%% *}
+ __a2__=${__line__#* }
+ case $__a1__ in
+ [[:digit:]]*)
+ eval __v__='$'$__a2__
+ __v__='"'${__v__:__a1__+1}
+ ;;
+ *) eval __v__='$'$__a1__
+ ;;
+ esac
+ if [[ $__v__ == '"'*'"' ]]
+ then print -r -- "$__v__"
+ fi
+ ;;
+ [[:digit:]]*)
+ [[ $__preserve__ ]] && print -r -- "$__line__"
+ ;;
+ '$') print -r -u9 $__op__ include $__line__
+ ;;
+ esac
+ done
+ for (( __i__=0; __i__ < __cmds__; __i__++ ))
+ do keys ${__cmdv__[__i__]}
+ done
+ } | {
+ __num__=1
+ while read -r __line__
+ do case $__line__ in
+ '$'[\ \ ]*)
+ print -r -- "$__line__"
+ continue
+ ;;
+ '$'*|*"@(#)"*|*"<"*([[:word:] .-])"@"*([[:word:] .-])">"*([ ])'"'|"http://"*)
+ continue
+ ;;
+ *[[:alpha:]][[:alpha:]]*)
+ __line__=${__line__#*'"'}
+ __line__=${__line__%'"'}
+ if [[ $__line__ ]]
+ then if [[ ${__index__["$__line__"]} ]]
+ then if [[ ! $__preserve__ ]]
+ then __num__=${__index__["$__line__"]}
+ __keep__[$__num__]=1
+ fi
+ else while [[ ${__text__[$__num__]} ]]
+ do (( __num__++ ))
+ done
+ if (( __max__ < __num__ ))
+ then (( __max__=__num__ ))
+ fi
+ if [[ ! $__preserve__ ]]
+ then __keep__[$__num__]=1
+ fi
+ __text__[$__num__]=$__line__
+ __index__["$__line__"]=$__num__
+ (( __num__++ ))
+ fi
+ fi
+ ;;
+ esac
+ done
+ if (( __max__ < __num__ ))
+ then (( __max__=__num__ ))
+ fi
+ if [[ $__debug__ ]]
+ then for (( __num__=1; __num__<=__max__; __num__++ ))
+ do if [[ ${__text__[$__num__]} ]]
+ then if (( __num__ > __new__ ))
+ then if [[ ! ${__keep__[$__num__]} ]]
+ then print -r -u2 -- $__num__ HUH '"'"${__text__[$__num__]}"'"'
+ else print -r -u2 -- $__num__ NEW '"'"${__text__[$__num__]}"'"'
+ fi
+ elif [[ ${__keep__[$__num__]} ]]
+ then print -r -u2 -- $__num__ OLD '"'"${__text__[$__num__]}"'"'
+ else print -r -u2 -- $__num__ XXX '"'"${__text__[$__num__]}"'"'
+ fi
+ fi
+ done
+ exit 0
+ fi
+ # check for replacements
+ if [[ ! $__preserve__ ]]
+ then for (( __num__=1; __num__<__new__; __num__++ ))
+ do if [[ ${__text__[$__num__]} && ! ${__keep__[$__num__]} ]]
+ then (( __ndrop__++ ))
+ __drop__[__ndrop__]=$__num__
+ fi
+ done
+ [[ $__verbose__ ]] && print -u2 $__command__: old:1-$((__new__-1)) new:$__new__-$__max__ drop $__ndrop__ add $((__max__-__new__+1))
+ if (( __ndrop__ ))
+ then for (( __i__=1; __i__<=__ndrop__; __i__++ ))
+ do (( __old__=${__drop__[$__i__]} ))
+ __oz__[__i__]=$(print -r -- "\"${__text__[$__old__]}\"" | gzip | wc -c)
+ done
+ for (( __num__=__new__; __num__<=__max__; __num__++ ))
+ do [[ ${__text__[$__num__]} ]] || continue
+ __nz__=$(print -r -- "\"${__text__[$__num__]}\"" | gzip | wc -c)
+ __hit__=0
+ (( __bz__=__similar__ ))
+ for (( __i__=1; __i__<=__ndrop__; __i__++ ))
+ do if (( __old__=${__drop__[$__i__]} ))
+ then __z__=$(print -r -- "\"${__text__[$__old__]}\"""\"${__text__[$__num__]}\"" | gzip | wc -c)
+ (( __z__ = (__z__ * 200 / (${__oz__[__i__]} + $__nz__)) - 100 ))
+ if (( __z__ < __bz__ ))
+ then (( __bz__=__z__ ))
+ (( __hit__=__old__ ))
+ (( __hit_i__=__i__ ))
+ fi
+ fi
+ done
+ if (( __hit__ ))
+ then [[ $__verbose__ ]] && print -u2 $__command__: $__hit__ $__num__ $__bz__
+ __text__[$__hit__]=${__text__[$__num__]}
+ __keep__[$__hit__]=1
+ __drop__[$__hit_i__]=0
+ __text__[$__num__]=
+ __keep__[$__num__]=
+ fi
+ done
+ fi
+ fi
+ # final output
+ for (( __num__=1; __num__<=__max__; __num__++ ))
+ do if [[ ${__text__[$__num__]} && ( $__preserve__ || ${__keep__[$__num__]} ) ]]
+ then print -r -- $__num__ "\"${__text__[$__num__]}\""
+ fi
+ done
+ }
+ if [[ $__tmp__ != $__out__ ]]
+ then grep -v '^\$' $__tmp__ > ${__tmp__}n
+ [[ -f $__out__ ]] && grep -v '^\$' $__out__ > ${__tmp__}o
+ cmp -s ${__tmp__}n ${__tmp__}o || {
+ [[ -f $__out__ ]] && mv $__out__ $__out__.old
+ mv $__tmp__ $__out__
+ }
+ fi
+fi
+exit $__code__
diff --git a/usr/src/cmd/ast/msgcc/msgcc.tst b/usr/src/cmd/ast/msgcc/msgcc.tst
new file mode 100644
index 0000000000..101b05de5b
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/msgcc.tst
@@ -0,0 +1,28 @@
+# regression tests for the msgcc utility
+
+TEST 01 'basics'
+ EXEC -c t.c
+ NOTE 'pp:allpossible'
+ INPUT t.c $'
+ #include <foo-bar.h>
+ void f(void)
+ {
+ #if 0
+ error(1, "foo bar");
+ #else
+ errormsg(locale, 2, "%s: bar foo");
+ #endif
+ }
+ '
+ OUTPUT t.mso $'str "foo bar"\nstr "%s: bar foo"'
+ OUTPUT -
+ EXEC -Dfprintf=_STDIO_ -c t.c
+ NOTE 'ignore readonly redefinitions'
+ INPUT t.c $'
+ #define stderr foo
+ void f(void)
+ {
+ fprintf(stderr, "foo bar");
+ }
+ '
+ OUTPUT t.mso $'str "foo bar"'
diff --git a/usr/src/cmd/ast/msgcc/msgcpp.c b/usr/src/cmd/ast/msgcc/msgcpp.c
new file mode 100644
index 0000000000..677065f6cf
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/msgcpp.c
@@ -0,0 +1,289 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 2000-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * Glenn Fowler
+ * AT&T Research
+ *
+ * C message catalog preprocessor
+ */
+
+static const char usage[] =
+"[-?\n@(#)$Id: msgcpp (AT&T Research) 2002-03-11 $\n]"
+USAGE_LICENSE
+"[+NAME?msgcpp - C language message catalog preprocessor]"
+"[+DESCRIPTION?\bmsgcpp\b is a C language message catalog preprocessor."
+" It accepts \bcpp\b(1) style options and arguments. \bmsgcpp\b"
+" preprocesses an input C source file and emits keyed lines to the"
+" output, usually for further processing by \bmsgcc\b(1). \bmsgcc\b"
+" output is in the \bgencat\b(1) syntax. Candidate message text is"
+" determined by arguments to the \bast\b \b<error.h>\b and"
+" \b<option.h>\b functions. The \bmsgcpp\b keyed output lines are:]{"
+" [+cmd \acommand\a?\acommand\a is a candidate for \b--??keys\b"
+" option string generation. Triggered by"
+" \bb_\b\acommand\a\b(int argc,\b in the input.]"
+" [+def \aname\a \astring\a?\aname\a is a candidate variable with"
+" string value \astring\a.]"
+" [+str \astring\a?\astring\a should be entered into the catalog.]"
+" [+var \aname\a?If \bdef\b \aname\a occurs then its \astring\a value"
+" should be entered into the catalog.]"
+" }"
+"[+?The input source file is preprocessed with the \bpp:allpossible\b"
+" option on. This enables non-C semantics; all source should first"
+" be compiled error-free with a real compiler before running \bmsgcpp\b."
+" The following changes are enabled for the top level files (i.e.,"
+" included file behavior is not affected):]{"
+" [+(1)?All \b#if\b, \b#ifdef\b and \b#ifndef\b branches"
+" are enabled.]"
+" [+(2)?The first definition for a macro is retained, even when"
+" subsequent \b#define\b statements would normally"
+" redefine the macro. \b#undef\b must be used to"
+" redefine a macro.]"
+" [+(3)?Macro calls with an improper number of arguments are"
+" silently ignored.]"
+" [+(4)?\b#include\b on non-existent headers are silently"
+" ignored.]"
+" [+(5)?Invalid C source characters are silently ignored.]"
+" }"
+"[+?\b\"msgcat.h\"\b is included if it exists. This file may contain macro"
+" definitions for functions that translate string arguments. If \afoo\a"
+" is a function that translates its string arguments then include the"
+" line \b#define \b\afoo\a\b _TRANSLATE_\b in \bmsgcat.h\b or specify"
+" the option \b-D\b\afoo\a\b=_TRANSLATE_\b. If \abar\a is a function"
+" that translates string arguments if the first argument is \bstderr\b"
+" then use either \b#define \b\abar\a\b _STDIO_\b or"
+" \b-D\b\abar\a\b=_STDIO_\b.]"
+"[+?The macro \b_BLD_msgcat\b is defined to be \b1\b. As an alternative to"
+" \bmsgcat.h\b, \b_TRANSLATE_\b definitions could be placed inside"
+" \b#ifdef _BLD_msgcat\b ... \b#endif\b.]"
+
+"\n"
+"\n[ input [ output ] ]\n"
+"\n"
+
+"[+SEE ALSO?\bcc\b(1), \bcpp\b(1), \bgencat\b(1), \bmsggen\b(1),"
+" \bmsgcc\b(1), \bmsgcvt\b(1)]"
+;
+
+#include <ast.h>
+#include <error.h>
+
+#include "pp.h"
+#include "ppkey.h"
+
+#define T_STDERR (T_KEYWORD+1)
+#define T_STDIO (T_KEYWORD+2)
+#define T_TRANSLATE (T_KEYWORD+3)
+
+#define OMIT "*@(\\[[-+]*\\?*\\]|\\@\\(#\\)|Copyright \\(c\\)|\\\\000|\\\\00[!0-9]|\\\\0[!0-9])*"
+
+static struct ppkeyword keys[] =
+{
+ "char", T_CHAR,
+ "int", T_INT,
+ "sfstderr", T_STDERR,
+ "stderr", T_STDERR,
+ "_STDIO_", T_STDIO,
+ "_TRANSLATE_", T_TRANSLATE,
+ 0, 0
+};
+
+static int
+msgppargs(char** argv, int last)
+{
+ for (;;)
+ {
+ switch (optget(argv, usage))
+ {
+ case 0:
+ break;
+ case '?':
+ if (!last)
+ {
+ opt_info.again = 1;
+ return 1;
+ }
+ error(ERROR_USAGE|4, "%s", opt_info.arg);
+ break;
+ case ':':
+ if (!last)
+ {
+ opt_info.again = 1;
+ return 1;
+ }
+ error(2, "%s", opt_info.arg);
+ continue;
+ default:
+ if (!last)
+ {
+ opt_info.again = 1;
+ return 1;
+ }
+ continue;
+ }
+ break;
+ }
+ return argv[opt_info.index] != 0;
+}
+
+int
+main(int argc, char** argv)
+{
+ register char* s;
+ register int x;
+ register int c;
+ Sfio_t* tmp;
+
+ NoP(argc);
+ if (s = strrchr(*argv, '/'))
+ s++;
+ else
+ s = *argv;
+ error_info.id = s;
+ ppop(PP_DEFAULT, PPDEFAULT);
+ optjoin(argv, msgppargs, ppargs, NiL);
+ if (strlen(s) >= 5 && *(s + 3) != 'c')
+ {
+ ppop(PP_PLUSPLUS, 1);
+ ppop(PP_NOHASH, 1);
+ ppop(PP_PROBE, "CC");
+ }
+ ppop(PP_SPACEOUT, 0);
+ ppop(PP_COMPILE, keys);
+ ppop(PP_OPTION, "allpossible");
+ ppop(PP_OPTION, "catliteral");
+ ppop(PP_OPTION, "modern");
+ ppop(PP_OPTION, "readonly");
+ ppop(PP_DEFINE, "_BLD_msgcat=1");
+ ppop(PP_DEFINE, "const=");
+ ppop(PP_DEFINE, "errorf=_TRANSLATE_");
+ ppop(PP_DEFINE, "register=");
+ ppop(PP_DEFINE, "sfstderr=sfstderr");
+ ppop(PP_DEFINE, "stderr=stderr");
+ ppop(PP_DEFINE, "_(m)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "__(m)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "gettxt(i,m)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "gettext(m)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "dgettext(d,m)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "dcgettext(d,m,c)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "ERROR_catalog(m)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "ERROR_dictionary(m)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "ERROR_translate(l,i,c,m)=_TRANSLATE_(m)");
+ ppop(PP_DEFINE, "error(l,f,...)=_TRANSLATE_(f)");
+ ppop(PP_DEFINE, "errormsg(t,l,f,...)=_TRANSLATE_(f)");
+ ppop(PP_DIRECTIVE, "include \"msgcat.h\"");
+ ppop(PP_OPTION, "noreadonly");
+ ppop(PP_INIT);
+ if (!(tmp = sfstropen()))
+ error(ERROR_SYSTEM|3, "out of space");
+ x = 0;
+ for (;;)
+ {
+ c = pplex();
+ again:
+ switch (c)
+ {
+ case 0:
+ break;
+ case T_TRANSLATE:
+ switch (c = pplex())
+ {
+ case '(':
+ x = 1;
+ break;
+ case ')':
+ if ((c = pplex()) != '(')
+ {
+ x = 0;
+ goto again;
+ }
+ x = 1;
+ break;
+ default:
+ x = 0;
+ goto again;
+ }
+ continue;
+ case '(':
+ if (x > 0)
+ x++;
+ continue;
+ case ')':
+ if (x > 0)
+ x--;
+ continue;
+ case T_STDIO:
+ if ((c = pplex()) != '(' || (c = pplex()) != T_STDERR || (c = pplex()) != ',')
+ {
+ x = 0;
+ goto again;
+ }
+ x = 1;
+ continue;
+ case T_STRING:
+ if (x > 0 && !strmatch(pp.token, OMIT))
+ sfprintf(sfstdout, "str \"%s\"\n", pp.token);
+ continue;
+ case T_ID:
+ s = pp.symbol->name;
+ if (x > 0)
+ {
+ if ((c = pplex()) == '+' && ppisinteger(c = pplex()))
+ sfprintf(sfstdout, "var %s %s\n", pp.token, s);
+ else
+ sfprintf(sfstdout, "var %s\n", s);
+ }
+ else if (s[0] == 'b' && s[1] == '_' && s[2])
+ {
+ if ((c = pplex()) == '(' && (c = pplex()) == T_INT && (c = pplex()) == T_ID && (c = pplex()) == ',' && (c = pplex()) == T_CHAR && (c = pplex()) == '*')
+ sfprintf(sfstdout, "cmd %s\n", s + 2);
+ else
+ goto again;
+ }
+ else
+ {
+ if ((c = pplex()) == '[')
+ {
+ if (ppisinteger(c = pplex()))
+ c = pplex();
+ if (c != ']')
+ goto again;
+ c = pplex();
+ }
+ if (c == '=' && (c = pplex()) == T_STRING && !strmatch(pp.token, OMIT))
+ {
+ sfprintf(sfstdout, "def %s \"%s\"\n", s, pp.token);
+ sfprintf(tmp, "#define %s \"%s\"\n", s, pp.token);
+ if (!(s = sfstruse(tmp)))
+ error(ERROR_SYSTEM|3, "out of space");
+ ppinput(s, "string", 0);
+ }
+ else
+ goto again;
+ }
+ continue;
+ default:
+ continue;
+ }
+ break;
+ }
+ ppop(PP_DONE);
+ return error_info.errors != 0;
+}
diff --git a/usr/src/cmd/ast/msgcc/msgcvt.c b/usr/src/cmd/ast/msgcc/msgcvt.c
new file mode 100644
index 0000000000..89dd3e848d
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/msgcvt.c
@@ -0,0 +1,691 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 2000-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * Glenn Fowler
+ * AT&T Research
+ */
+
+static const char usage[] =
+"[-?\n@(#)$Id: msgcvt (AT&T Research) 2000-05-01 $\n]"
+USAGE_LICENSE
+"[+NAME?msgcvt - convert message file to/from html]"
+"[+DESCRIPTION?\bmsgcvt\b reads a \bgencat\b(1) format file on the standard"
+" input and converts it to \bhtml\b on the standard output. The input"
+" file must contain the control statement \b$quote \"\b and use the \""
+" character to quote message text. The output is in a form suitable for"
+" automatic translation by web sites like"
+" \bhttp://babelfish.altavista.com/\b or filters like"
+" \btranslate\b(1).]"
+"[h:html?Generate \bhtml\b from \bgencat\b(1) input. This is the default.]"
+"[m:msg?Generate a \bgencat\b(1) message file from (presumably translated)"
+" \bhtml\b. Wide characters are UTF-8 encoded.]"
+"[r:raw?The message file is raw message text, one message per line, with no"
+" quoting or line numbering.]"
+"[+SEE ALSO?\bgencat\b(1), \bmsgcc\b(1), \bmsggen\b(1), \btranslate\b(1)]"
+;
+
+#include <ast.h>
+#include <ctype.h>
+#include <error.h>
+
+#define MSG_RAW (1<<0)
+#define MSG_SPLICE (1<<1)
+
+#define SPACE(s) (isspace(*s)&&(s+=1)||*s=='\\'&&(*(s+1)=='n'||*(s+1)=='t')&&(s+=2))
+
+typedef void (*Convert_f)(Sfio_t*, Sfio_t*, int);
+
+typedef struct
+{
+ const char* name;
+ int code;
+} Code_t;
+
+static const Code_t codes[] =
+{
+ "aacute", 225,
+ "Aacute", 193,
+ "acirc", 226,
+ "Acirc", 194,
+ "aelig", 230,
+ "AElig", 198,
+ "agrave", 224,
+ "Agrave", 192,
+ "amp", '&',
+ "aring", 229,
+ "Aring", 197,
+ "atilde", 227,
+ "Atilde", 195,
+ "auml", 228,
+ "Auml", 196,
+ "ccedil", 231,
+ "Ccedil", 199,
+ "copy", 169,
+ "eacute", 233,
+ "Eacute", 201,
+ "ecirc", 234,
+ "Ecirc", 202,
+ "egrave", 232,
+ "Egrave", 200,
+ "euml", 235,
+ "Euml", 203,
+ "gt", '>',
+ "iacute", 237,
+ "Iacute", 205,
+ "icirc", 238,
+ "Icirc", 206,
+ "igrave", 236,
+ "Igrave", 204,
+ "iuml", 239,
+ "Iuml", 207,
+ "lt", '<',
+ "nbsp", ' ',
+ "ntilde", 241,
+ "Ntilde", 209,
+ "oacute", 243,
+ "Oacute", 211,
+ "ocirc", 244,
+ "Ocirc", 212,
+ "ograve", 242,
+ "Ograve", 210,
+ "oslash", 248,
+ "Oslash", 216,
+ "otilde", 245,
+ "Otilde", 213,
+ "ouml", 246,
+ "Ouml", 214,
+ "quot", '"',
+ "reg", 174,
+ "szlig", 223,
+ "uacute", 250,
+ "Uacute", 218,
+ "ucirc", 251,
+ "Ucirc", 219,
+ "ugrave", 249,
+ "Ugrave", 217,
+ "uuml", 252,
+ "Uuml", 220,
+ "yuml", 255,
+};
+
+static int
+decode(Sfio_t* ip)
+{
+ register int c;
+ register int i;
+ char name[32];
+
+ if ((c = sfgetc(ip)) == EOF)
+ return '&';
+ name[0] = c;
+ i = 1;
+ if (c != '#' && !isalpha(c))
+ goto bad;
+ while ((c = sfgetc(ip)) != EOF && c != ';')
+ {
+ if (c == '&')
+ i = 0;
+ else
+ {
+ name[i++] = c;
+ if (!isalnum(c) && (i > 1 || c != '#') || i >= (elementsof(name) - 1))
+ goto bad;
+ }
+ }
+ name[i] = 0;
+ if (name[0] == '#')
+ {
+ switch (c = strtol(name + 1, NiL, 10))
+ {
+ case 91:
+ c = '[';
+ break;
+ case 93:
+ c = ']';
+ break;
+ }
+ }
+ else
+ {
+ for (i = 0; i < elementsof(codes); i++)
+ if (streq(codes[i].name, name))
+ {
+ c = codes[i].code;
+ break;
+ }
+ if (i >= elementsof(codes))
+ goto bad;
+ }
+ return c;
+ bad:
+ name[i] = 0;
+ if (c == ';')
+ error(1, "&%s: unknown HTML special character -- & assumed", name);
+ else
+ error(1, "&%s: invalid HTML special character -- & assumed", name);
+ while (i--)
+ sfungetc(ip, name[i]);
+ return '&';
+}
+
+static int
+sfpututf(Sfio_t* op, register int w)
+{
+ if (!(w & ~0x7F))
+ return sfputc(op, w);
+ else if (!(w & ~0x7FF))
+ sfputc(op, 0xC0 + (w >> 6));
+ else if (!(w & ~0xFFFF))
+ {
+ sfputc(op, 0xE0 + (w >> 12));
+ sfputc(op, 0x80 + (w >> 6 ) & 0x3F);
+ }
+ else
+ return sfputc(op, '?');
+ return sfputc(op, 0x80 + (w & 0x3F));
+}
+
+static int
+sfnext(Sfio_t* ip)
+{
+ register int c;
+
+ while (isspace(c = sfgetc(ip)));
+ return c;
+}
+
+static void
+html2msg(register Sfio_t* ip, register Sfio_t* op, int flags)
+{
+ register int c;
+ register int q;
+
+ again:
+ while ((c = sfgetc(ip)) != EOF)
+ if (c == '<')
+ {
+ if ((c = sfnext(ip)) == 'O' &&
+ (c = sfnext(ip)) == 'L' &&
+ isspace(c = sfgetc(ip)) &&
+ (c = sfnext(ip)) == 'S' &&
+ (c = sfnext(ip)) == 'T' &&
+ (c = sfnext(ip)) == 'A' &&
+ (c = sfnext(ip)) == 'R' &&
+ (c = sfnext(ip)) == 'T' &&
+ (c = sfnext(ip)) == '=' &&
+ (c = sfnext(ip)) == '"' &&
+ (c = sfnext(ip)) == '5' &&
+ (c = sfnext(ip)) == '5' &&
+ (c = sfnext(ip)) == '0' &&
+ (c = sfnext(ip)) == '7' &&
+ (c = sfnext(ip)) == '1' &&
+ (c = sfnext(ip)) == '7' &&
+ (c = sfnext(ip)) == '"' &&
+ (c = sfnext(ip)) == '>')
+ break;
+ while (c != EOF && c != '>')
+ c = sfgetc(ip);
+ }
+ if ((c = sfnext(ip)) != EOF)
+ sfungetc(ip, c);
+ q = 0;
+ for (;;)
+ {
+ switch (c = sfgetc(ip))
+ {
+ case EOF:
+ break;
+ case '&':
+ c = decode(ip);
+ sfpututf(op, c);
+ if (isspace(c))
+ {
+ while (isspace(c = sfgetc(ip)));
+ if (c == EOF)
+ break;
+ sfungetc(ip, c);
+ }
+ continue;
+ case '<':
+ switch (c = sfnext(ip))
+ {
+ case '/':
+ if ((c = sfnext(ip)) == 'O' &&
+ (c = sfgetc(ip)) == 'L' &&
+ (c = sfnext(ip)) == '>')
+ {
+ if (q)
+ {
+ sfputc(op, q);
+ q = '"';
+ }
+ goto again;
+ }
+ break;
+ case 'B':
+ if ((c = sfgetc(ip)) == 'R' &&
+ (c = sfnext(ip)) == '>')
+ sfputc(op, ' ');
+ break;
+ case 'L':
+ if ((c = sfgetc(ip)) == 'I' &&
+ (c = sfnext(ip)) == '>' &&
+ isdigit(c = sfnext(ip)))
+ {
+ if (q)
+ sfputc(op, q);
+ else
+ q = '"';
+ sfputc(op, '\n');
+ do
+ {
+ sfputc(op, c);
+ } while (isdigit(c = sfgetc(ip)));
+ if (c == EOF)
+ break;
+ sfputc(op, ' ');
+ sfputc(op, '"');
+ if (isspace(c))
+ c = sfnext(ip);
+ if (c == '<' &&
+ (c = sfnext(ip)) == 'L' &&
+ (c = sfgetc(ip)) == 'I' &&
+ (c = sfnext(ip)) == '>')
+ /* great */;
+ continue;
+ }
+ break;
+ case 'P':
+ if ((c = sfnext(ip)) == '>')
+ sfputc(op, '\n');
+ else if (c == 'C' &&
+ (c = sfgetc(ip)) == 'L' &&
+ (c = sfgetc(ip)) == 'A' &&
+ (c = sfgetc(ip)) == 'S' &&
+ (c = sfgetc(ip)) == 'S' &&
+ (c = sfnext(ip)) == '=' &&
+ (c = sfnext(ip)) == '"')
+ for (;;)
+ {
+ switch (c = sfgetc(ip))
+ {
+ case EOF:
+ case '"':
+ break;
+ case '&':
+ c = decode(ip);
+ sfpututf(op, c);
+ continue;
+ default:
+ sfpututf(op, c);
+ continue;
+ }
+ break;
+ }
+ break;
+ }
+ while (c != EOF && c != '>')
+ c = sfgetc(ip);
+ if (c == EOF || (c = sfgetc(ip)) == EOF)
+ break;
+ sfungetc(ip, c);
+ continue;
+ case '"':
+ if (!flags)
+ sfputc(op, '\\');
+ sfputc(op, c);
+ continue;
+ case '\n':
+ if (flags)
+ {
+ sfputc(op, c);
+ continue;
+ }
+ /*FALLTHROUGH*/
+ case ' ':
+ case '\t':
+ while ((c = sfgetc(ip)) != EOF)
+ if (c == '&')
+ {
+ c = decode(ip);
+ if (!isspace(c))
+ sfputc(op, ' ');
+ sfpututf(op, c);
+ break;
+ }
+ else if (!isspace(c))
+ {
+ if (c == '<')
+ {
+ c = sfgetc(ip);
+ if (c == EOF)
+ break;
+ sfungetc(ip, c);
+ sfungetc(ip, '<');
+ if (c != 'L' && c != '/')
+ sfputc(op, ' ');
+ }
+ else
+ {
+ if (c != EOF)
+ sfungetc(ip, c);
+ sfputc(op, ' ');
+ }
+ break;
+ }
+ continue;
+ case '\r':
+ case '[':
+ case ']':
+ continue;
+ default:
+ sfpututf(op, c);
+ continue;
+ }
+ break;
+ }
+ if (q)
+ sfputc(op, q);
+ sfputc(op, '\n');
+}
+
+static void
+encode(Sfio_t* op, register int c)
+{
+ if (c == '<')
+ sfprintf(op, "&lt;");
+ else if (c == '>')
+ sfprintf(op, "&gt;");
+ else if (c == '"')
+ sfprintf(op, "&quot;");
+ else if (c == '&')
+ sfprintf(op, "&amp;");
+ else if (c == '[')
+ sfprintf(op, "&#091;");
+ else if (c == ']')
+ sfprintf(op, "&#093;");
+ else
+ sfputc(op, c);
+}
+
+static void
+msg2html(register Sfio_t* ip, register Sfio_t* op, register int flags)
+{
+ register char* s;
+ register int c;
+ register int q;
+ register int p;
+
+ sfprintf(op, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\"><HTML><HEAD><!-- text massaged for external translation --></HEAD><BODY>\n");
+ sfprintf(op, "<OL START=\"550717\">\n");
+ p = q = 0;
+ while (s = sfgetr(ip, '\n', 1))
+ {
+ error_info.line++;
+ if (flags)
+ sfprintf(op, "<P>");
+ else
+ {
+ if (*s == '$')
+ {
+ if (p)
+ sfprintf(op, "<P>");
+ else
+ p = 1;
+ sfprintf(op, "<P CLASS=\"", s);
+ while (c = *s++)
+ encode(op, c);
+ sfprintf(op, "\">\n");
+ continue;
+ }
+ p = 0;
+ if (!isdigit(*s))
+ continue;
+ sfprintf(op, "<LI>");
+ while (isdigit(c = *s++))
+ sfputc(op, c);
+ sfprintf(op, "<LI>");
+ while (c && c != '"')
+ c = *s++;
+ if (!c)
+ s--;
+ else if (isspace(*s))
+ {
+ s++;
+ sfprintf(op, "<BR>");
+ }
+ }
+ for (;;)
+ {
+ switch (c = *s++)
+ {
+ case 0:
+ flags &= ~MSG_SPLICE;
+ if (q)
+ {
+ q = 0;
+ sfprintf(op, "\">");
+ }
+ sfputc(op, '\n');
+ break;
+ case '<':
+ sfprintf(op, "&lt;");
+ continue;
+ case '>':
+ sfprintf(op, "&gt;");
+ continue;
+ case '&':
+ sfprintf(op, "&amp;");
+ continue;
+ case '[':
+ sfprintf(op, "&#091;");
+ continue;
+ case ']':
+ sfprintf(op, "&#093;");
+ continue;
+ case '$':
+ if (!q)
+ {
+ q = 1;
+ sfprintf(op, "<P CLASS=\"");
+ }
+ sfputc(op, c);
+ while (isalnum(c = *s++))
+ sfputc(op, c);
+ s--;
+ continue;
+ case '%':
+ if (!q)
+ {
+ q = 1;
+ sfprintf(op, "<P CLASS=\"");
+ }
+ sfputc(op, c);
+ if (*s == '%')
+ sfputc(op, *s++);
+ else
+ do
+ {
+ if (!(c = *s++) || c == '"')
+ {
+ s--;
+ break;
+ }
+ encode(op, c);
+ } while (!isalpha(c) || (!islower(c) || c == 'h' || c == 'l') && isalpha(*s));
+ if (SPACE(s))
+ sfprintf(op, "&nbsp;");
+ continue;
+ case '"':
+ if (!(flags & MSG_RAW))
+ {
+ s = "";
+ continue;
+ }
+ /*FALLTHROUGH*/
+ case '\'':
+ case ':':
+ case '/':
+ case '+':
+ case '@':
+ if (!q)
+ {
+ q = 1;
+ sfprintf(op, "<P CLASS=\"");
+ }
+ /*FALLTHROUGH*/
+ case '.':
+ case ',':
+ sfputc(op, c);
+ if (SPACE(s))
+ sfprintf(op, "&nbsp;");
+ continue;
+ case '\\':
+ if (!(c = *s++))
+ {
+ flags |= MSG_SPLICE;
+ break;
+ }
+ if (c != 'n' && c != 't')
+ {
+ if (!q)
+ {
+ q = 1;
+ sfprintf(op, "<P CLASS=\"");
+ }
+ sfputc(op, '\\');
+ encode(op, c);
+ if (c == 'b')
+ {
+ for (;;)
+ {
+ if (!(c = *s++) || c == '"')
+ {
+ s--;
+ break;
+ }
+ if (c == '?')
+ {
+ if (*s != '?')
+ {
+ s--;
+ break;
+ }
+ sfputc(op, c);
+ sfputc(op, *s++);
+ continue;
+ }
+ if (c == '\\')
+ {
+ if (!*s)
+ break;
+ sfputc(op, c);
+ if (*s == 'a' || *s == 'b' || *s == '0')
+ {
+ sfputc(op, *s++);
+ break;
+ }
+ c = *s++;
+ }
+ encode(op, c);
+ }
+ }
+ else if (isdigit(c) && isdigit(*s))
+ {
+ sfputc(op, *s++);
+ if (isdigit(*s))
+ sfputc(op, *s++);
+ }
+ if (SPACE(s))
+ sfprintf(op, "&nbsp;");
+ continue;
+ }
+ /*FALLTHROUGH*/
+ case ' ':
+ case '\t':
+ while (isspace(*s) || *s == '\\' && (*(s + 1) == 'n' || *(s + 1) == 't') && s++)
+ s++;
+ if (*s == '"')
+ {
+ if (q)
+ {
+ q = 0;
+ sfprintf(op, " \">");
+ }
+ else
+ sfprintf(op, "<BR>");
+ continue;
+ }
+ c = ' ';
+ /*FALLTHROUGH*/
+ default:
+ if (q)
+ {
+ q = 0;
+ sfprintf(op, "\">");
+ }
+ sfputc(op, c);
+ continue;
+ }
+ break;
+ }
+ }
+ sfprintf(op, "</OL>\n");
+ sfprintf(op, "</BODY></HTML>\n");
+ error_info.line = 0;
+}
+
+int
+main(int argc, char** argv)
+{
+ int flags = 0;
+ Convert_f convert = msg2html;
+
+ NoP(argc);
+ error_info.id = "msgcvt";
+ for (;;)
+ {
+ switch (optget(argv, usage))
+ {
+ case 'h':
+ convert = msg2html;
+ continue;
+ case 'm':
+ convert = html2msg;
+ continue;
+ case 'r':
+ flags |= MSG_RAW;
+ continue;
+ case '?':
+ error(ERROR_USAGE|4, "%s", opt_info.arg);
+ continue;
+ case ':':
+ error(2, "%s", opt_info.arg);
+ continue;
+ }
+ break;
+ }
+ argv += opt_info.index;
+ if (error_info.errors)
+ error(ERROR_USAGE|4, "%s", optusage(NiL));
+ (*convert)(sfstdin, sfstdout, flags);
+ return error_info.errors != 0;
+}
diff --git a/usr/src/cmd/ast/msgcc/msggen.c b/usr/src/cmd/ast/msgcc/msggen.c
new file mode 100644
index 0000000000..5c34d20c40
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/msggen.c
@@ -0,0 +1,522 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 2000-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * Glenn Fowler
+ * AT&T Research
+ */
+
+static const char usage[] =
+"[-?\n@(#)$Id: msggen (AT&T Research) 2002-03-11 $\n]"
+USAGE_LICENSE
+"[+NAME?msggen - generate a machine independent formatted message catalog]"
+"[+DESCRIPTION?\bmsggen\b merges the message text source files \amsgfile\a"
+" into a machine independent formatted message catalog \acatfile\a."
+" The file \acatfile\a will be created if it does not already exist."
+" If \acatfile\a does exist, its messages will be included in the new"
+" \acatfile\a. If set and message numbers collide, the new message"
+" text defined in \amsgfile\a will replace the old message text"
+" currently contained in \acatfile\a. Non-ASCII characters must be"
+" UTF-8 encoded. \biconv\b(1) can be used to convert to/from UTF-8.]"
+"[f:format?List the \bprintf\b(3) format signature for each message in"
+" \acatfile\a. A format signature is one line containing one character"
+" per format specification:]{"
+" [c?char]"
+" [d?double]"
+" [D?long double]"
+" [f?float]"
+" [h?short]"
+" [i?int]"
+" [j?long long]"
+" [l?long]"
+" [p?void*]"
+" [s?string]"
+" [t?ptrdiff_t]"
+" [z?size_t]"
+" [???unknown]"
+"}"
+"[l:list?List \acatfile\a in UTF-8 \amsgfile\a form.]"
+"[s:set?Convert the \acatfile\a operand to a message set number and"
+" print the number on the standard output.]"
+"[+EXTENDED DESCRIPTION?Message text source files are in \bgencat\b(1)"
+" format, defined as follows. Note that the fields of a message text"
+" source line are separated by a single blank character. Any other"
+" blank characters are considered as being part of the subsequent"
+" field. The \bNL_*\b constants are defined in one or both of"
+" \b<limits.h>\b and \b<nl_types.h>\b.]{"
+" [+$ \acomment\a?A line beginning with \b$\b followed by a"
+" blank character is treated as a comment.]"
+" [+$delset \an\a \acomment\a?This line deletes message set"
+" \an\a from an existing message catalog. \an\a"
+" denotes the set number [1, \bNL_SETMAX\b]]. Any"
+" text following the set number is treated as a"
+" comment.]"
+" [+$quote \ac\a?This line specifies an optional quote"
+" character \ac\a, which can be used to surround"
+" \amessage-text\a so that trailing spaces or"
+" empty messages are visible in a message source"
+" line. By default, or if an empty \b$quote\b"
+" directive is supplied, no quoting of \amessage-text\a"
+" will be recognized.]"
+" [+$set \an\a \acomment\a?This line specifies the set"
+" identifier of the following messages until the next"
+" \b$set\b or end-of-file appears. \an\a denotes the set"
+" identifier, which is defined as a number in the range"
+" [1, \bNL_SETMAX\b]]. Set numbers need not be"
+" contiguous. Any text following the set identifier is"
+" treated as a comment. If no \b$set\b directive is"
+" specified in a message text source file, all messages"
+" will be located in message set \b1\b.]"
+" [+$translation \aidentification\a \aYYYY-MM-DD\a[,...]]?Append"
+" translation info to the message catalog header. Only"
+" the newest date for a given \aidentification\a"
+" is retained in the catalog. Multiple translation lines"
+" are combined into a single \b,\b separated list.]"
+" [+\am\a \amessage-text\a?\am\a denotes the message identifier,"
+" which is defined as a number in the range"
+" [1, \bNL_MSGMAX\b]]. The message-text is stored in the"
+" message catalogue with the set identifier specified by"
+" the last \b$set\b directive, and with message"
+" identifier \am\a. If the \amessage-text\a is empty,"
+" and a blank character field separator is present, an"
+" empty string is stored in the message catalogue. If a"
+" message source line has a message number, but neither"
+" a field separator nor \amessage-text\a, the existing"
+" message with that number (if any) is deleted from the"
+" catalogue. Message identifiers need not be contiguous."
+" There are no \amessage-text\a length restrictions.]"
+"}"
+
+"\n"
+"\ncatfile [ msgfile ]\n"
+"\n"
+
+"[+SEE ALSO?\bgencat\b(1), \biconv\b(1), \bmsgcc\b(1), \btranslate\b(1),"
+" \bfmtfmt\b(3)]"
+;
+
+#include <ast.h>
+#include <ctype.h>
+#include <ccode.h>
+#include <error.h>
+#include <mc.h>
+
+typedef struct Xl_s
+{
+ struct Xl_s* next;
+ char* date;
+ char name[1];
+} Xl_t;
+
+/*
+ * append s to the translation list
+ */
+
+static Xl_t*
+translation(Xl_t* xp, register char* s)
+{
+ register Xl_t* px;
+ register char* t;
+ char* d;
+ char* e;
+
+ do
+ {
+ for (; isspace(*s); s++);
+ for (d = e = 0, t = s; *t; t++)
+ if (*t == ',')
+ {
+ e = t;
+ *e++ = 0;
+ break;
+ }
+ else if (isspace(*t))
+ d = t;
+ if (d)
+ {
+ *d++ = 0;
+ for (px = xp; px; px = px->next)
+ if (streq(px->name, s))
+ {
+ if (strcoll(px->date, d) < 0)
+ {
+ free(px->date);
+ if (!(px->date = strdup(d)))
+ error(ERROR_SYSTEM|3, "out of space [translation]");
+ }
+ break;
+ }
+ if (!px)
+ {
+ if (!(px = newof(0, Xl_t, 1, strlen(s))) || !(px->date = strdup(d)))
+ error(ERROR_SYSTEM|3, "out of space [translation]");
+ strcpy(px->name, s);
+ px->next = xp;
+ xp = px;
+ }
+ }
+ } while (s = e);
+ return xp;
+}
+
+/*
+ * sfprintf() with ccmaps(from,to)
+ */
+
+static int
+ccsfprintf(int from, int to, Sfio_t* sp, const char* format, ...)
+{
+ va_list ap;
+ Sfio_t* tp;
+ char* s;
+ int n;
+
+ va_start(ap, format);
+ if (from == to)
+ n = sfvprintf(sp, format, ap);
+ else if (tp = sfstropen())
+ {
+ n = sfvprintf(tp, format, ap);
+ s = sfstrbase(tp);
+ ccmaps(s, n, from, to);
+ n = sfwrite(sp, s, n);
+ sfstrclose(tp);
+ }
+ else
+ n = -1;
+ return n;
+}
+
+int
+main(int argc, char** argv)
+{
+ register Mc_t* mc;
+ register char* s;
+ register char* t;
+ register int c;
+ register int q;
+ register int i;
+ int num;
+ char* b;
+ char* e;
+ char* catfile;
+ char* msgfile;
+ Sfio_t* sp;
+ Sfio_t* mp;
+ Sfio_t* tp;
+ Xl_t* px;
+ Xl_t* bp;
+
+ Xl_t* xp = 0;
+ int format = 0;
+ int list = 0;
+ int set = 0;
+
+ NoP(argc);
+ error_info.id = "msggen";
+ for (;;)
+ {
+ switch (optget(argv, usage))
+ {
+ case 'f':
+ format = list = 1;
+ continue;
+ case 'l':
+ list = 1;
+ continue;
+ case 's':
+ set = 1;
+ continue;
+ case '?':
+ error(ERROR_USAGE|4, "%s", opt_info.arg);
+ continue;
+ case ':':
+ error(2, "%s", opt_info.arg);
+ continue;
+ }
+ break;
+ }
+ argv += opt_info.index;
+ if (error_info.errors || !(catfile = *argv++))
+ error(ERROR_USAGE|4, "%s", optusage(NiL));
+
+ /*
+ * set and list only need catfile
+ */
+
+ if (set)
+ {
+ sfprintf(sfstdout, "%d\n", mcindex(catfile, NiL, NiL, NiL));
+ return error_info.errors != 0;
+ }
+ else if (list)
+ {
+ if (!(sp = sfopen(NiL, catfile, "r")))
+ error(ERROR_SYSTEM|3, "%s: cannot read catalog", catfile);
+ if (!(mc = mcopen(sp)))
+ error(ERROR_SYSTEM|3, "%s: catalog content error", catfile);
+ sfclose(sp);
+ if (format)
+ {
+ for (set = 1; set <= mc->num; set++)
+ if (mc->set[set].num)
+ {
+ sfprintf(sfstdout, "$set %d\n", set);
+ for (num = 1; num <= mc->set[set].num; num++)
+ if (s = mc->set[set].msg[num])
+ sfprintf(sfstdout, "%d \"%s\"\n", num, fmtfmt(s));
+ }
+ }
+ else
+ {
+ if (*mc->translation)
+ {
+ ccsfprintf(CC_NATIVE, CC_ASCII, sfstdout, "$translation ");
+ sfprintf(sfstdout, "%s", mc->translation);
+ ccsfprintf(CC_NATIVE, CC_ASCII, sfstdout, "\n");
+ }
+ ccsfprintf(CC_NATIVE, CC_ASCII, sfstdout, "$quote \"\n");
+ for (set = 1; set <= mc->num; set++)
+ if (mc->set[set].num)
+ {
+ ccsfprintf(CC_NATIVE, CC_ASCII, sfstdout, "$set %d\n", set);
+ for (num = 1; num <= mc->set[set].num; num++)
+ if (s = mc->set[set].msg[num])
+ {
+ ccsfprintf(CC_NATIVE, CC_ASCII, sfstdout, "%d \"", num);
+ while (c = *s++)
+ {
+ /*INDENT...*/
+
+ switch (c)
+ {
+ case 0x22: /* " */
+ case 0x5C: /* \ */
+ sfputc(sfstdout, 0x5C);
+ break;
+ case 0x07: /* \a */
+ c = 0x61;
+ sfputc(sfstdout, 0x5C);
+ break;
+ case 0x08: /* \b */
+ c = 0x62;
+ sfputc(sfstdout, 0x5C);
+ break;
+ case 0x0A: /* \n */
+ c = 0x6E;
+ sfputc(sfstdout, 0x5C);
+ break;
+ case 0x0B: /* \v */
+ c = 0x76;
+ sfputc(sfstdout, 0x5C);
+ break;
+ case 0x0C: /* \f */
+ c = 0x66;
+ sfputc(sfstdout, 0x5C);
+ break;
+ case 0x0D: /* \r */
+ c = 0x72;
+ sfputc(sfstdout, 0x5C);
+ break;
+ }
+
+ /*...UNDENT*/
+ sfputc(sfstdout, c);
+ }
+ ccsfprintf(CC_NATIVE, CC_ASCII, sfstdout, "\"\n");
+ }
+ }
+ }
+ mcclose(mc);
+ return error_info.errors != 0;
+ }
+ else if (!(msgfile = *argv++) || *argv)
+ error(3, "exactly one message file must be specified");
+
+ /*
+ * open the files and handles
+ */
+
+ if (!(tp = sfstropen()))
+ error(ERROR_SYSTEM|3, "out of space [string stream]");
+ if (!(mp = sfopen(NiL, msgfile, "r")))
+ error(ERROR_SYSTEM|3, "%s: cannot read message file", msgfile);
+ sp = sfopen(NiL, catfile, "r");
+ if (!(mc = mcopen(sp)))
+ error(ERROR_SYSTEM|3, "%s: catalog content error", catfile);
+ if (sp)
+ sfclose(sp);
+ xp = translation(xp, mc->translation);
+
+ /*
+ * read the message file
+ */
+
+ q = 0;
+ set = 1;
+ error_info.file = msgfile;
+ while (s = sfgetr(mp, '\n', 1))
+ {
+ error_info.line++;
+ if (!*s)
+ continue;
+ if (*s == '$')
+ {
+ if (!*++s || isspace(*s))
+ continue;
+ for (t = s; *s && !isspace(*s); s++);
+ if (*s)
+ *s++ = 0;
+ if (streq(t, "delset"))
+ {
+ while (isspace(*s))
+ s++;
+ num = (int)strtol(s, NiL, 0);
+ if (num < mc->num && mc->set[num].num)
+ for (i = 1; i <= mc->set[num].num; i++)
+ mcput(mc, num, i, NiL);
+ }
+ else if (streq(t, "quote"))
+ q = *s ? *s : 0;
+ else if (streq(t, "set"))
+ {
+ while (isspace(*s))
+ s++;
+ num = (int)strtol(s, &e, 0);
+ if (e != s)
+ set = num;
+ else
+ error(2, "set number expected");
+ }
+ else if (streq(t, "translation"))
+ xp = translation(xp, s);
+ }
+ else
+ {
+ t = s + sfvalue(mp);
+ num = (int)strtol(s, &e, 0);
+ if (e != s)
+ {
+ s = e;
+ if (!*s)
+ {
+ if (mcput(mc, set, num, NiL))
+ error(2, "(%d,%d): cannot delete message", set, num);
+ }
+ else if (isspace(*s++))
+ {
+ if (t > (s + 1) && *(t -= 2) == '\\')
+ {
+ sfwrite(tp, s, t - s);
+ while (s = sfgetr(mp, '\n', 0))
+ {
+ error_info.line++;
+ t = s + sfvalue(mp);
+ if (t <= (s + 1) || *(t -= 2) != '\\')
+ break;
+ sfwrite(tp, s, t - s);
+ }
+ if (!(s = sfstruse(tp)))
+ error(ERROR_SYSTEM|3, "out of space");
+ }
+ if (q)
+ {
+ if (*s++ != q)
+ {
+ error(2, "(%d,%d): %c quote expected", set, num, q);
+ continue;
+ }
+ b = t = s;
+ while (c = *s++)
+ {
+ if (c == '\\')
+ {
+ c = chresc(s - 1, &e);
+ s = e;
+ if (c)
+ *t++ = c;
+ else
+ error(1, "nul character ignored");
+ }
+ else if (c == q)
+ break;
+ else
+ *t++ = c;
+ }
+ if (*s)
+ {
+ error(2, "(%d,%d): characters after quote not expected", set, num);
+ continue;
+ }
+ *t = 0;
+ s = b;
+ }
+ if (mcput(mc, set, num, s))
+ error(2, "(%d,%d): cannot add message", set, num);
+ }
+ else
+ error(2, "message text expected");
+ }
+ else
+ error(2, "message number expected");
+ }
+ }
+ error_info.file = 0;
+ error_info.line = 0;
+
+ /*
+ * fix up the translation record
+ */
+
+ if (xp)
+ {
+ t = "";
+ for (;;)
+ {
+ for (bp = 0, px = xp; px; px = px->next)
+ if (px->date && (!bp || strcoll(bp->date, px->date) < 0))
+ bp = px;
+ if (!bp)
+ break;
+ sfprintf(tp, "%s%s %s", t, bp->name, bp->date);
+ t = ", ";
+ bp->date = 0;
+ }
+ if (!(mc->translation = sfstruse(tp)))
+ error(ERROR_SYSTEM|3, "out of space");
+ }
+
+ /*
+ * dump the catalog to a local temporary
+ * rename if no errors
+ */
+
+ if (!(s = pathtemp(NiL, 0, "", error_info.id, NiL)) || !(sp = sfopen(NiL, s, "w")))
+ error(ERROR_SYSTEM|3, "%s: cannot write catalog file", catfile);
+ if (mcdump(mc, sp) || mcclose(mc) || sfclose(sp))
+ {
+ remove(s);
+ error(ERROR_SYSTEM|3, "%s: temporary catalog file write error", s);
+ }
+ remove(catfile);
+ if (rename(s, catfile))
+ error(ERROR_SYSTEM|3, "%s: cannot rename from temporary catalog file %s", catfile, s);
+ return error_info.errors != 0;
+}
diff --git a/usr/src/cmd/ast/msgcc/msgget.c b/usr/src/cmd/ast/msgcc/msgget.c
new file mode 100644
index 0000000000..bd06c6d343
--- /dev/null
+++ b/usr/src/cmd/ast/msgcc/msgget.c
@@ -0,0 +1,109 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 2000-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * Glenn Fowler
+ * AT&T Research
+ */
+
+static const char usage[] =
+"[-?\n@(#)$Id: msgget (AT&T Research) 2001-04-21 $\n]"
+USAGE_LICENSE
+"[+NAME?msgget - get a message from a message catalog]"
+"[+DESCRIPTION?\bmsgget\b gets the message corresponding to the parameters."
+" If \alocale\a is \b-\b then the current locale is used. \acommand\a"
+" may be specified for command specific messages. \acatalog\a specifies"
+" the message catalog name. [\aset\a.]]\anumber\a identifies the message"
+" by message \anumber\a and an optional message \aset\a; if specified as"
+" \b-\b then the message set and number are determined by looking up"
+" \atext\a in the corresponding \bC\b locale message catalog.]"
+
+"\n"
+"\nlocale [command:]catalog [set.]number [ text ]\n"
+"\n"
+
+"[+SEE ALSO?\biconv\b(1), \bmsgcc\b(1), \bmsggen\b(1)]"
+;
+
+#include <ast.h>
+#include <error.h>
+#include <mc.h>
+
+int
+main(int argc, char** argv)
+{
+ register Mc_t* mc;
+ register char* s;
+ char* loc;
+ char* cmd;
+ char* cat;
+ char* msg;
+ int set;
+ int num;
+ Sfio_t* sp;
+ char path[PATH_MAX];
+
+ NoP(argc);
+ error_info.id = "msgget";
+ for (;;)
+ {
+ switch (optget(argv, usage))
+ {
+ case '?':
+ error(ERROR_USAGE|4, "%s", opt_info.arg);
+ continue;
+ case ':':
+ error(2, "%s", opt_info.arg);
+ continue;
+ }
+ break;
+ }
+ argv += opt_info.index;
+ if (error_info.errors || !(loc = *argv++) || !(cmd = *argv++) || !(s = *argv++))
+ error(ERROR_USAGE|4, "%s", optusage(NiL));
+ if (streq(s, "-"))
+ set = num = 0;
+ else
+ mcindex(s, NiL, &set, &num);
+ if (!(msg = *argv++))
+ msg = "";
+ else if (*argv)
+ error(ERROR_USAGE|4, "%s", optusage(NiL));
+ if (streq(loc, "-"))
+ loc = 0;
+ if (cat = strchr(cmd, ':'))
+ *cat++ = 0;
+ if (!mcfind(path, loc, cmd, LC_MESSAGES, 0) && (!cat || !mcfind(path, loc, cat, LC_MESSAGES, 0)))
+ {
+ if (cat)
+ *--cat = ':';
+ error(3, "%s: cannot locate message catalog", cmd);
+ }
+ if (!(sp = sfopen(NiL, path, "r")))
+ error(ERROR_SYSTEM|3, "%s: cannot read message catalog", path);
+ if (!(mc = mcopen(sp)))
+ error(3, "%s: invalid message catalog", path);
+ if (set)
+ s = mcget(mc, set, num, msg);
+ else
+ s = errorx(loc, cmd, cat, msg);
+ sfputr(sfstdout, s, '\n');
+ return error_info.errors != 0;
+}