diff options
Diffstat (limited to 'usr/src/cmd/ast/msgcc/msgcvt.c')
-rw-r--r-- | usr/src/cmd/ast/msgcc/msgcvt.c | 691 |
1 files changed, 691 insertions, 0 deletions
diff --git a/usr/src/cmd/ast/msgcc/msgcvt.c b/usr/src/cmd/ast/msgcc/msgcvt.c new file mode 100644 index 0000000000..89dd3e848d --- /dev/null +++ b/usr/src/cmd/ast/msgcc/msgcvt.c @@ -0,0 +1,691 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 2000-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * Glenn Fowler + * AT&T Research + */ + +static const char usage[] = +"[-?\n@(#)$Id: msgcvt (AT&T Research) 2000-05-01 $\n]" +USAGE_LICENSE +"[+NAME?msgcvt - convert message file to/from html]" +"[+DESCRIPTION?\bmsgcvt\b reads a \bgencat\b(1) format file on the standard" +" input and converts it to \bhtml\b on the standard output. The input" +" file must contain the control statement \b$quote \"\b and use the \"" +" character to quote message text. The output is in a form suitable for" +" automatic translation by web sites like" +" \bhttp://babelfish.altavista.com/\b or filters like" +" \btranslate\b(1).]" +"[h:html?Generate \bhtml\b from \bgencat\b(1) input. This is the default.]" +"[m:msg?Generate a \bgencat\b(1) message file from (presumably translated)" +" \bhtml\b. Wide characters are UTF-8 encoded.]" +"[r:raw?The message file is raw message text, one message per line, with no" +" quoting or line numbering.]" +"[+SEE ALSO?\bgencat\b(1), \bmsgcc\b(1), \bmsggen\b(1), \btranslate\b(1)]" +; + +#include <ast.h> +#include <ctype.h> +#include <error.h> + +#define MSG_RAW (1<<0) +#define MSG_SPLICE (1<<1) + +#define SPACE(s) (isspace(*s)&&(s+=1)||*s=='\\'&&(*(s+1)=='n'||*(s+1)=='t')&&(s+=2)) + +typedef void (*Convert_f)(Sfio_t*, Sfio_t*, int); + +typedef struct +{ + const char* name; + int code; +} Code_t; + +static const Code_t codes[] = +{ + "aacute", 225, + "Aacute", 193, + "acirc", 226, + "Acirc", 194, + "aelig", 230, + "AElig", 198, + "agrave", 224, + "Agrave", 192, + "amp", '&', + "aring", 229, + "Aring", 197, + "atilde", 227, + "Atilde", 195, + "auml", 228, + "Auml", 196, + "ccedil", 231, + "Ccedil", 199, + "copy", 169, + "eacute", 233, + "Eacute", 201, + "ecirc", 234, + "Ecirc", 202, + "egrave", 232, + "Egrave", 200, + "euml", 235, + "Euml", 203, + "gt", '>', + "iacute", 237, + "Iacute", 205, + "icirc", 238, + "Icirc", 206, + "igrave", 236, + "Igrave", 204, + "iuml", 239, + "Iuml", 207, + "lt", '<', + "nbsp", ' ', + "ntilde", 241, + "Ntilde", 209, + "oacute", 243, + "Oacute", 211, + "ocirc", 244, + "Ocirc", 212, + "ograve", 242, + "Ograve", 210, + "oslash", 248, + "Oslash", 216, + "otilde", 245, + "Otilde", 213, + "ouml", 246, + "Ouml", 214, + "quot", '"', + "reg", 174, + "szlig", 223, + "uacute", 250, + "Uacute", 218, + "ucirc", 251, + "Ucirc", 219, + "ugrave", 249, + "Ugrave", 217, + "uuml", 252, + "Uuml", 220, + "yuml", 255, +}; + +static int +decode(Sfio_t* ip) +{ + register int c; + register int i; + char name[32]; + + if ((c = sfgetc(ip)) == EOF) + return '&'; + name[0] = c; + i = 1; + if (c != '#' && !isalpha(c)) + goto bad; + while ((c = sfgetc(ip)) != EOF && c != ';') + { + if (c == '&') + i = 0; + else + { + name[i++] = c; + if (!isalnum(c) && (i > 1 || c != '#') || i >= (elementsof(name) - 1)) + goto bad; + } + } + name[i] = 0; + if (name[0] == '#') + { + switch (c = strtol(name + 1, NiL, 10)) + { + case 91: + c = '['; + break; + case 93: + c = ']'; + break; + } + } + else + { + for (i = 0; i < elementsof(codes); i++) + if (streq(codes[i].name, name)) + { + c = codes[i].code; + break; + } + if (i >= elementsof(codes)) + goto bad; + } + return c; + bad: + name[i] = 0; + if (c == ';') + error(1, "&%s: unknown HTML special character -- & assumed", name); + else + error(1, "&%s: invalid HTML special character -- & assumed", name); + while (i--) + sfungetc(ip, name[i]); + return '&'; +} + +static int +sfpututf(Sfio_t* op, register int w) +{ + if (!(w & ~0x7F)) + return sfputc(op, w); + else if (!(w & ~0x7FF)) + sfputc(op, 0xC0 + (w >> 6)); + else if (!(w & ~0xFFFF)) + { + sfputc(op, 0xE0 + (w >> 12)); + sfputc(op, 0x80 + (w >> 6 ) & 0x3F); + } + else + return sfputc(op, '?'); + return sfputc(op, 0x80 + (w & 0x3F)); +} + +static int +sfnext(Sfio_t* ip) +{ + register int c; + + while (isspace(c = sfgetc(ip))); + return c; +} + +static void +html2msg(register Sfio_t* ip, register Sfio_t* op, int flags) +{ + register int c; + register int q; + + again: + while ((c = sfgetc(ip)) != EOF) + if (c == '<') + { + if ((c = sfnext(ip)) == 'O' && + (c = sfnext(ip)) == 'L' && + isspace(c = sfgetc(ip)) && + (c = sfnext(ip)) == 'S' && + (c = sfnext(ip)) == 'T' && + (c = sfnext(ip)) == 'A' && + (c = sfnext(ip)) == 'R' && + (c = sfnext(ip)) == 'T' && + (c = sfnext(ip)) == '=' && + (c = sfnext(ip)) == '"' && + (c = sfnext(ip)) == '5' && + (c = sfnext(ip)) == '5' && + (c = sfnext(ip)) == '0' && + (c = sfnext(ip)) == '7' && + (c = sfnext(ip)) == '1' && + (c = sfnext(ip)) == '7' && + (c = sfnext(ip)) == '"' && + (c = sfnext(ip)) == '>') + break; + while (c != EOF && c != '>') + c = sfgetc(ip); + } + if ((c = sfnext(ip)) != EOF) + sfungetc(ip, c); + q = 0; + for (;;) + { + switch (c = sfgetc(ip)) + { + case EOF: + break; + case '&': + c = decode(ip); + sfpututf(op, c); + if (isspace(c)) + { + while (isspace(c = sfgetc(ip))); + if (c == EOF) + break; + sfungetc(ip, c); + } + continue; + case '<': + switch (c = sfnext(ip)) + { + case '/': + if ((c = sfnext(ip)) == 'O' && + (c = sfgetc(ip)) == 'L' && + (c = sfnext(ip)) == '>') + { + if (q) + { + sfputc(op, q); + q = '"'; + } + goto again; + } + break; + case 'B': + if ((c = sfgetc(ip)) == 'R' && + (c = sfnext(ip)) == '>') + sfputc(op, ' '); + break; + case 'L': + if ((c = sfgetc(ip)) == 'I' && + (c = sfnext(ip)) == '>' && + isdigit(c = sfnext(ip))) + { + if (q) + sfputc(op, q); + else + q = '"'; + sfputc(op, '\n'); + do + { + sfputc(op, c); + } while (isdigit(c = sfgetc(ip))); + if (c == EOF) + break; + sfputc(op, ' '); + sfputc(op, '"'); + if (isspace(c)) + c = sfnext(ip); + if (c == '<' && + (c = sfnext(ip)) == 'L' && + (c = sfgetc(ip)) == 'I' && + (c = sfnext(ip)) == '>') + /* great */; + continue; + } + break; + case 'P': + if ((c = sfnext(ip)) == '>') + sfputc(op, '\n'); + else if (c == 'C' && + (c = sfgetc(ip)) == 'L' && + (c = sfgetc(ip)) == 'A' && + (c = sfgetc(ip)) == 'S' && + (c = sfgetc(ip)) == 'S' && + (c = sfnext(ip)) == '=' && + (c = sfnext(ip)) == '"') + for (;;) + { + switch (c = sfgetc(ip)) + { + case EOF: + case '"': + break; + case '&': + c = decode(ip); + sfpututf(op, c); + continue; + default: + sfpututf(op, c); + continue; + } + break; + } + break; + } + while (c != EOF && c != '>') + c = sfgetc(ip); + if (c == EOF || (c = sfgetc(ip)) == EOF) + break; + sfungetc(ip, c); + continue; + case '"': + if (!flags) + sfputc(op, '\\'); + sfputc(op, c); + continue; + case '\n': + if (flags) + { + sfputc(op, c); + continue; + } + /*FALLTHROUGH*/ + case ' ': + case '\t': + while ((c = sfgetc(ip)) != EOF) + if (c == '&') + { + c = decode(ip); + if (!isspace(c)) + sfputc(op, ' '); + sfpututf(op, c); + break; + } + else if (!isspace(c)) + { + if (c == '<') + { + c = sfgetc(ip); + if (c == EOF) + break; + sfungetc(ip, c); + sfungetc(ip, '<'); + if (c != 'L' && c != '/') + sfputc(op, ' '); + } + else + { + if (c != EOF) + sfungetc(ip, c); + sfputc(op, ' '); + } + break; + } + continue; + case '\r': + case '[': + case ']': + continue; + default: + sfpututf(op, c); + continue; + } + break; + } + if (q) + sfputc(op, q); + sfputc(op, '\n'); +} + +static void +encode(Sfio_t* op, register int c) +{ + if (c == '<') + sfprintf(op, "<"); + else if (c == '>') + sfprintf(op, ">"); + else if (c == '"') + sfprintf(op, """); + else if (c == '&') + sfprintf(op, "&"); + else if (c == '[') + sfprintf(op, "["); + else if (c == ']') + sfprintf(op, "]"); + else + sfputc(op, c); +} + +static void +msg2html(register Sfio_t* ip, register Sfio_t* op, register int flags) +{ + register char* s; + register int c; + register int q; + register int p; + + sfprintf(op, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\"><HTML><HEAD><!-- text massaged for external translation --></HEAD><BODY>\n"); + sfprintf(op, "<OL START=\"550717\">\n"); + p = q = 0; + while (s = sfgetr(ip, '\n', 1)) + { + error_info.line++; + if (flags) + sfprintf(op, "<P>"); + else + { + if (*s == '$') + { + if (p) + sfprintf(op, "<P>"); + else + p = 1; + sfprintf(op, "<P CLASS=\"", s); + while (c = *s++) + encode(op, c); + sfprintf(op, "\">\n"); + continue; + } + p = 0; + if (!isdigit(*s)) + continue; + sfprintf(op, "<LI>"); + while (isdigit(c = *s++)) + sfputc(op, c); + sfprintf(op, "<LI>"); + while (c && c != '"') + c = *s++; + if (!c) + s--; + else if (isspace(*s)) + { + s++; + sfprintf(op, "<BR>"); + } + } + for (;;) + { + switch (c = *s++) + { + case 0: + flags &= ~MSG_SPLICE; + if (q) + { + q = 0; + sfprintf(op, "\">"); + } + sfputc(op, '\n'); + break; + case '<': + sfprintf(op, "<"); + continue; + case '>': + sfprintf(op, ">"); + continue; + case '&': + sfprintf(op, "&"); + continue; + case '[': + sfprintf(op, "["); + continue; + case ']': + sfprintf(op, "]"); + continue; + case '$': + if (!q) + { + q = 1; + sfprintf(op, "<P CLASS=\""); + } + sfputc(op, c); + while (isalnum(c = *s++)) + sfputc(op, c); + s--; + continue; + case '%': + if (!q) + { + q = 1; + sfprintf(op, "<P CLASS=\""); + } + sfputc(op, c); + if (*s == '%') + sfputc(op, *s++); + else + do + { + if (!(c = *s++) || c == '"') + { + s--; + break; + } + encode(op, c); + } while (!isalpha(c) || (!islower(c) || c == 'h' || c == 'l') && isalpha(*s)); + if (SPACE(s)) + sfprintf(op, " "); + continue; + case '"': + if (!(flags & MSG_RAW)) + { + s = ""; + continue; + } + /*FALLTHROUGH*/ + case '\'': + case ':': + case '/': + case '+': + case '@': + if (!q) + { + q = 1; + sfprintf(op, "<P CLASS=\""); + } + /*FALLTHROUGH*/ + case '.': + case ',': + sfputc(op, c); + if (SPACE(s)) + sfprintf(op, " "); + continue; + case '\\': + if (!(c = *s++)) + { + flags |= MSG_SPLICE; + break; + } + if (c != 'n' && c != 't') + { + if (!q) + { + q = 1; + sfprintf(op, "<P CLASS=\""); + } + sfputc(op, '\\'); + encode(op, c); + if (c == 'b') + { + for (;;) + { + if (!(c = *s++) || c == '"') + { + s--; + break; + } + if (c == '?') + { + if (*s != '?') + { + s--; + break; + } + sfputc(op, c); + sfputc(op, *s++); + continue; + } + if (c == '\\') + { + if (!*s) + break; + sfputc(op, c); + if (*s == 'a' || *s == 'b' || *s == '0') + { + sfputc(op, *s++); + break; + } + c = *s++; + } + encode(op, c); + } + } + else if (isdigit(c) && isdigit(*s)) + { + sfputc(op, *s++); + if (isdigit(*s)) + sfputc(op, *s++); + } + if (SPACE(s)) + sfprintf(op, " "); + continue; + } + /*FALLTHROUGH*/ + case ' ': + case '\t': + while (isspace(*s) || *s == '\\' && (*(s + 1) == 'n' || *(s + 1) == 't') && s++) + s++; + if (*s == '"') + { + if (q) + { + q = 0; + sfprintf(op, " \">"); + } + else + sfprintf(op, "<BR>"); + continue; + } + c = ' '; + /*FALLTHROUGH*/ + default: + if (q) + { + q = 0; + sfprintf(op, "\">"); + } + sfputc(op, c); + continue; + } + break; + } + } + sfprintf(op, "</OL>\n"); + sfprintf(op, "</BODY></HTML>\n"); + error_info.line = 0; +} + +int +main(int argc, char** argv) +{ + int flags = 0; + Convert_f convert = msg2html; + + NoP(argc); + error_info.id = "msgcvt"; + for (;;) + { + switch (optget(argv, usage)) + { + case 'h': + convert = msg2html; + continue; + case 'm': + convert = html2msg; + continue; + case 'r': + flags |= MSG_RAW; + continue; + case '?': + error(ERROR_USAGE|4, "%s", opt_info.arg); + continue; + case ':': + error(2, "%s", opt_info.arg); + continue; + } + break; + } + argv += opt_info.index; + if (error_info.errors) + error(ERROR_USAGE|4, "%s", optusage(NiL)); + (*convert)(sfstdin, sfstdout, flags); + return error_info.errors != 0; +} |