summaryrefslogtreecommitdiff
path: root/encoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'encoding.c')
-rw-r--r--encoding.c1502
1 files changed, 1502 insertions, 0 deletions
diff --git a/encoding.c b/encoding.c
new file mode 100644
index 0000000..bf99dca
--- /dev/null
+++ b/encoding.c
@@ -0,0 +1,1502 @@
+/* Copyright (c) 1993-2002
+ * Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de)
+ * Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de)
+ * Copyright (c) 1987 Oliver Laumann
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (see the file COPYING); if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ ****************************************************************
+ */
+
+#include "rcs.h"
+RCS_ID("$Id: utf8.c,v 1.5 2001/04/25 12:32:47 mlschroe Exp $ FAU")
+
+#include <sys/types.h>
+
+#include "config.h"
+#include "screen.h"
+#include "extern.h"
+
+#ifdef ENCODINGS
+
+extern unsigned char *null;
+extern struct display *display, *displays;
+extern struct layer *flayer;
+
+extern char *screenencodings;
+
+static int encmatch __P((char *, char *));
+
+struct encoding {
+ char *name;
+ char *charsets;
+ int deffont;
+ int usegr;
+ int noc1;
+ char *fontlist;
+};
+
+/* big5 font: ^X */
+/* KOI8-R font: 96 ! */
+/* CP1251 font: 96 ? */
+
+struct encoding encodings[] = {
+ { "C", 0, 0, 0, 0, 0 },
+ { "eucJP", "B\002I\004\00204",0, 1, 0, "\002\004I" },
+ { "SJIS", "BIBBI04", 0, 1, 1, "\002I" },
+ { "eucKR", "B\003BB\00304",0, 1, 0, "\003" },
+ { "eucCN", "B\001BB\00104",0, 1, 0, "\001" },
+ { "Big5", "B\030BB\03004",0, 1, 0, "\030" },
+ { "KOI8-R", 0, 0x80|'!', 0, 1, 0 },
+ { "CP1251", 0, 0x80|'?', 0, 1, 0 },
+ { "UTF-8", 0, -1, 0, 0, 0 },
+ { "ISO8859-2", 0, 0x80|'B', 0, 0, 0 },
+ { "ISO8859-3", 0, 0x80|'C', 0, 0, 0 },
+ { "ISO8859-4", 0, 0x80|'D', 0, 0, 0 },
+ { "ISO8859-5", 0, 0x80|'L', 0, 0, 0 },
+ { "ISO8859-6", 0, 0x80|'G', 0, 0, 0 },
+ { "ISO8859-7", 0, 0x80|'F', 0, 0, 0 },
+ { "ISO8859-8", 0, 0x80|'H', 0, 0, 0 },
+ { "ISO8859-9", 0, 0x80|'M', 0, 0, 0 },
+ { "ISO8859-10", 0, 0x80|'V', 0, 0, 0 },
+ { "ISO8859-15", 0, 0x80|'b', 0, 0, 0 },
+ { "jis", 0, 0, 0, 0, "\002\004I" },
+};
+
+#ifdef UTF8
+
+static unsigned short builtin_tabs[][2] = {
+ { 0x30, 0 }, /* 0: special graphics (line drawing) */
+ { 0x005f, 0x25AE },
+ { 0x0060, 0x25C6 },
+ { 0x0061, 0x2592 },
+ { 0x0062, 0x2409 },
+ { 0x0063, 0x240C },
+ { 0x0064, 0x240D },
+ { 0x0065, 0x240A },
+ { 0x0066, 0x00B0 },
+ { 0x0067, 0x00B1 },
+ { 0x0068, 0x2424 },
+ { 0x0069, 0x240B },
+ { 0x006a, 0x2518 },
+ { 0x006b, 0x2510 },
+ { 0x006c, 0x250C },
+ { 0x006d, 0x2514 },
+ { 0x006e, 0x253C },
+ { 0x006f, 0x23BA },
+ { 0x0070, 0x23BB },
+ { 0x0071, 0x2500 },
+ { 0x0072, 0x23BC },
+ { 0x0073, 0x23BD },
+ { 0x0074, 0x251C },
+ { 0x0075, 0x2524 },
+ { 0x0076, 0x2534 },
+ { 0x0077, 0x252C },
+ { 0x0078, 0x2502 },
+ { 0x0079, 0x2264 },
+ { 0x007a, 0x2265 },
+ { 0x007b, 0x03C0 },
+ { 0x007c, 0x2260 },
+ { 0x007d, 0x00A3 },
+ { 0x007e, 0x00B7 },
+ { 0, 0},
+
+ { 0x34, 0 }, /* 4: Dutch */
+ { 0x0023, 0x00a3 },
+ { 0x0040, 0x00be },
+ { 0x005b, 0x00ff },
+ { 0x005c, 0x00bd },
+ { 0x005d, 0x007c },
+ { 0x007b, 0x00a8 },
+ { 0x007c, 0x0066 },
+ { 0x007d, 0x00bc },
+ { 0x007e, 0x00b4 },
+ { 0, 0},
+
+ { 0x35, 0 }, /* 5: Finnish */
+ { 0x005b, 0x00c4 },
+ { 0x005c, 0x00d6 },
+ { 0x005d, 0x00c5 },
+ { 0x005e, 0x00dc },
+ { 0x0060, 0x00e9 },
+ { 0x007b, 0x00e4 },
+ { 0x007c, 0x00f6 },
+ { 0x007d, 0x00e5 },
+ { 0x007e, 0x00fc },
+ { 0, 0},
+
+ { 0x36, 0 }, /* 6: Norwegian/Danish */
+ { 0x0040, 0x00c4 },
+ { 0x005b, 0x00c6 },
+ { 0x005c, 0x00d8 },
+ { 0x005d, 0x00c5 },
+ { 0x005e, 0x00dc },
+ { 0x0060, 0x00e4 },
+ { 0x007b, 0x00e6 },
+ { 0x007c, 0x00f8 },
+ { 0x007d, 0x00e5 },
+ { 0x007e, 0x00fc },
+ { 0, 0},
+
+ { 0x37, 0 }, /* 7: Swedish */
+ { 0x0040, 0x00c9 },
+ { 0x005b, 0x00c4 },
+ { 0x005c, 0x00d6 },
+ { 0x005d, 0x00c5 },
+ { 0x005e, 0x00dc },
+ { 0x0060, 0x00e9 },
+ { 0x007b, 0x00e4 },
+ { 0x007c, 0x00f6 },
+ { 0x007d, 0x00e5 },
+ { 0x007e, 0x00fc },
+ { 0, 0},
+
+ { 0x3d, 0}, /* =: Swiss */
+ { 0x0023, 0x00f9 },
+ { 0x0040, 0x00e0 },
+ { 0x005b, 0x00e9 },
+ { 0x005c, 0x00e7 },
+ { 0x005d, 0x00ea },
+ { 0x005e, 0x00ee },
+ { 0x005f, 0x00e8 },
+ { 0x0060, 0x00f4 },
+ { 0x007b, 0x00e4 },
+ { 0x007c, 0x00f6 },
+ { 0x007d, 0x00fc },
+ { 0x007e, 0x00fb },
+ { 0, 0},
+
+ { 0x41, 0}, /* A: UK */
+ { 0x0023, 0x00a3 },
+ { 0, 0},
+
+ { 0x4b, 0}, /* K: German */
+ { 0x0040, 0x00a7 },
+ { 0x005b, 0x00c4 },
+ { 0x005c, 0x00d6 },
+ { 0x005d, 0x00dc },
+ { 0x007b, 0x00e4 },
+ { 0x007c, 0x00f6 },
+ { 0x007d, 0x00fc },
+ { 0x007e, 0x00df },
+ { 0, 0},
+
+ { 0x51, 0}, /* Q: French Canadian */
+ { 0x0040, 0x00e0 },
+ { 0x005b, 0x00e2 },
+ { 0x005c, 0x00e7 },
+ { 0x005d, 0x00ea },
+ { 0x005e, 0x00ee },
+ { 0x0060, 0x00f4 },
+ { 0x007b, 0x00e9 },
+ { 0x007c, 0x00f9 },
+ { 0x007d, 0x00e8 },
+ { 0x007e, 0x00fb },
+ { 0, 0},
+
+ { 0x52, 0}, /* R: French */
+ { 0x0023, 0x00a3 },
+ { 0x0040, 0x00e0 },
+ { 0x005b, 0x00b0 },
+ { 0x005c, 0x00e7 },
+ { 0x005d, 0x00a7 },
+ { 0x007b, 0x00e9 },
+ { 0x007c, 0x00f9 },
+ { 0x007d, 0x00e8 },
+ { 0x007e, 0x00a8 },
+ { 0, 0},
+
+ { 0x59, 0}, /* Y: Italian */
+ { 0x0023, 0x00a3 },
+ { 0x0040, 0x00a7 },
+ { 0x005b, 0x00b0 },
+ { 0x005c, 0x00e7 },
+ { 0x005d, 0x00e9 },
+ { 0x0060, 0x00f9 },
+ { 0x007b, 0x00e0 },
+ { 0x007c, 0x00f2 },
+ { 0x007d, 0x00e8 },
+ { 0x007e, 0x00ec },
+ { 0, 0},
+
+ { 0x5a, 0}, /* Z: Spanish */
+ { 0x0023, 0x00a3 },
+ { 0x0040, 0x00a7 },
+ { 0x005b, 0x00a1 },
+ { 0x005c, 0x00d1 },
+ { 0x005d, 0x00bf },
+ { 0x007b, 0x00b0 },
+ { 0x007c, 0x00f1 },
+ { 0x007d, 0x00e7 },
+ { 0, 0},
+
+ { 0xe2, 0}, /* 96-b: ISO-8859-15*/
+ { 0x00a4, 0x20ac },
+ { 0x00a6, 0x0160 },
+ { 0x00a8, 0x0161 },
+ { 0x00b4, 0x017D },
+ { 0x00b8, 0x017E },
+ { 0x00bc, 0x0152 },
+ { 0x00bd, 0x0153 },
+ { 0x00be, 0x0178 },
+ { 0, 0},
+
+ { 0x4a, 0}, /* J: JIS 0201 Roman */
+ { 0x005c, 0x00a5 },
+ { 0x007e, 0x203e },
+ { 0, 0},
+
+ { 0x49, 0}, /* I: halfwidth katakana */
+ { 0x0021, 0xff61 },
+ { 0x005f|0x8000, 0xff9f },
+ { 0, 0},
+
+ { 0, 0}
+};
+
+struct recodetab
+{
+ unsigned short (*tab)[2];
+ int flags;
+};
+
+#define RECODETAB_ALLOCED 1
+#define RECODETAB_BUILTIN 2
+#define RECODETAB_TRIED 4
+
+static struct recodetab recodetabs[256];
+
+void
+InitBuiltinTabs()
+{
+ unsigned short (*p)[2];
+ for (p = builtin_tabs; (*p)[0]; p++)
+ {
+ recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN;
+ recodetabs[(*p)[0]].tab = p + 1;
+ p++;
+ while((*p)[0])
+ p++;
+ }
+}
+
+int
+recode_char(c, to_utf, font)
+int c, to_utf, font;
+{
+ int f;
+ unsigned short (*p)[2];
+
+ if (c < 256)
+ return c;
+ if (to_utf)
+ {
+ f = (c >> 8) & 0xff;
+ c &= 0xff;
+ /* map aliases to keep the table small */
+ switch (c >> 8)
+ {
+ case 'C':
+ f ^= ('C' ^ '5');
+ break;
+ case 'E':
+ f ^= ('E' ^ '6');
+ break;
+ case 'H':
+ f ^= ('H' ^ '7');
+ break;
+ default:
+ break;
+ }
+ p = recodetabs[f].tab;
+ if (p == 0 && recodetabs[f].flags == 0)
+ {
+ LoadFontTranslation(f, 0);
+ p = recodetabs[f].tab;
+ }
+ if (p)
+ for (; (*p)[0]; p++)
+ {
+ if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0])
+ return c - p[-1][0] + p[-1][1];
+ if ((*p)[0] == c)
+ return (*p)[1];
+ }
+ return c & 0xff; /* map to latin1 */
+ }
+ if (font == -1)
+ {
+ for (font = 32; font < 256; font++)
+ {
+ p = recodetabs[font].tab;
+ if (p)
+ for (; (*p)[1]; p++)
+ {
+ if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
+ return c - p[-1][1] + p[-1][0];
+ if ((*p)[1] == c)
+ return (*p)[0];
+ }
+ }
+ return '?';
+ }
+ if (font >= 32)
+ {
+ p = recodetabs[font].tab;
+ if (p == 0 && recodetabs[font].flags == 0)
+ {
+ LoadFontTranslation(font, 0);
+ p = recodetabs[font].tab;
+ }
+ if (p)
+ for (; (*p)[1]; p++)
+ if ((*p)[1] == c)
+ return (*p)[0];
+ }
+ return -1;
+}
+
+
+#ifdef DW_CHARS
+int
+recode_char_dw(c, c2p, to_utf, font)
+int c, *c2p, to_utf, font;
+{
+ int f;
+ unsigned short (*p)[2];
+
+ if (to_utf)
+ {
+ f = (c >> 8) & 0xff;
+ c = (c & 255) << 8 | (*c2p & 255);
+ *c2p = 0xffff;
+ p = recodetabs[f].tab;
+ if (p == 0 && recodetabs[f].flags == 0)
+ {
+ LoadFontTranslation(f, 0);
+ p = recodetabs[f].tab;
+ }
+ if (p)
+ for (; (*p)[0]; p++)
+ if ((*p)[0] == c)
+ {
+#ifdef DW_CHARS
+ if (!utf8_isdouble((*p)[1]))
+ *c2p = ' ';
+#endif
+ return (*p)[1];
+ }
+ return UCS_REPL_DW;
+ }
+ if (font == -1)
+ {
+ for (font = 0; font < 32; font++)
+ {
+ p = recodetabs[font].tab;
+ if (p)
+ for (; (*p)[1]; p++)
+ if ((*p)[1] == c)
+ {
+ *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
+ return ((*p)[0] >> 8) | font << 8;
+ }
+ }
+ *c2p = '?';
+ return '?';
+ }
+ if (font < 32)
+ {
+ p = recodetabs[font].tab;
+ if (p == 0 && recodetabs[font].flags == 0)
+ {
+ LoadFontTranslation(font, 0);
+ p = recodetabs[font].tab;
+ }
+ if (p)
+ for (; (*p)[1]; p++)
+ if ((*p)[1] == c)
+ {
+ *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
+ return ((*p)[0] >> 8) | font << 8;
+ }
+ }
+ return -1;
+}
+#endif
+
+int
+recode_char_to_encoding(c, encoding)
+int c, encoding;
+{
+ char *fp;
+ int x;
+
+ if (encoding == UTF8)
+ return recode_char(c, 1, -1);
+ if ((fp = encodings[encoding].fontlist) != 0)
+ while(*fp)
+ if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1)
+ return x;
+ if (encodings[encoding].deffont)
+ if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1)
+ return x;
+ return recode_char(c, 0, -1);
+}
+
+#ifdef DW_CHARS
+int
+recode_char_dw_to_encoding(c, c2p, encoding)
+int c, *c2p, encoding;
+{
+ char *fp;
+ int x;
+
+ if (encoding == UTF8)
+ return recode_char_dw(c, c2p, 1, -1);
+ if ((fp = encodings[encoding].fontlist) != 0)
+ while(*fp)
+ if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1)
+ return x;
+ if (encodings[encoding].deffont)
+ if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1)
+ return x;
+ return recode_char_dw(c, c2p, 0, -1);
+}
+#endif
+
+
+struct mchar *
+recode_mchar(mc, from, to)
+struct mchar *mc;
+int from, to;
+{
+ static struct mchar rmc;
+ int c;
+
+ debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to);
+ if (from == to || (from != UTF8 && to != UTF8))
+ return mc;
+ rmc = *mc;
+ if (rmc.font == 0 && from != UTF8)
+ rmc.font = encodings[from].deffont;
+ if (rmc.font == 0) /* latin1 is the same in unicode */
+ return mc;
+ c = rmc.image | (rmc.font << 8);
+#ifdef DW_CHARS
+ if (rmc.mbcs)
+ {
+ int c2 = rmc.mbcs;
+ c = recode_char_dw_to_encoding(c, &c2, to);
+ rmc.mbcs = c2;
+ }
+ else
+#endif
+ c = recode_char_to_encoding(c, to);
+ rmc.image = c & 255;
+ rmc.font = c >> 8 & 255;
+ return &rmc;
+}
+
+struct mline *
+recode_mline(ml, w, from, to)
+struct mline *ml;
+int w;
+int from, to;
+{
+ static int maxlen;
+ static int last;
+ static struct mline rml[2], *rl;
+ int i, c;
+
+ if (from == to || (from != UTF8 && to != UTF8) || w == 0)
+ return ml;
+ if (ml->font == null && encodings[from].deffont == 0)
+ return ml;
+ if (w > maxlen)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ if (rml[i].image == 0)
+ rml[i].image = malloc(w);
+ else
+ rml[i].image = realloc(rml[i].image, w);
+ if (rml[i].font == 0)
+ rml[i].font = malloc(w);
+ else
+ rml[i].font = realloc(rml[i].font, w);
+ if (rml[i].image == 0 || rml[i].font == 0)
+ {
+ maxlen = 0;
+ return ml; /* sorry */
+ }
+ }
+ maxlen = w;
+ }
+
+ debug("recode_mline: from\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->image[i] ) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->font[i] ) & 15]);
+ debug("\n");
+
+ rl = rml + last;
+ rl->attr = ml->attr;
+#ifdef COLOR
+ rl->color = ml->color;
+# ifdef COLORS256
+ rl->colorx = ml->colorx;
+# endif
+#endif
+ for (i = 0; i < w; i++)
+ {
+ c = ml->image[i] | (ml->font[i] << 8);
+ if (from != UTF8 && c < 256)
+ c |= encodings[from].deffont << 8;
+#ifdef DW_CHARS
+ if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c)))
+ {
+ if (i + 1 == w)
+ c = '?';
+ else
+ {
+ int c2;
+ i++;
+ c2 = ml->image[i] | (ml->font[i] << 8);
+ c = recode_char_dw_to_encoding(c, &c2, to);
+ rl->font[i - 1] = c >> 8 & 255;
+ rl->image[i - 1] = c & 255;
+ c = c2;
+ }
+ }
+ else
+#endif
+ c = recode_char_to_encoding(c, to);
+ rl->image[i] = c & 255;
+ rl->font[i] = c >> 8 & 255;
+ }
+ last ^= 1;
+ debug("recode_mline: to\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->image[i] ) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->font[i] ) & 15]);
+ debug("\n");
+ return rl;
+}
+
+void
+AddUtf8(c)
+int c;
+{
+ ASSERT(D_encoding == UTF8);
+ if (c >= 0x800)
+ {
+ AddChar((c & 0xf000) >> 12 | 0xe0);
+ c = (c & 0x0fff) | 0x1000;
+ }
+ if (c >= 0x80)
+ {
+ AddChar((c & 0x1fc0) >> 6 ^ 0xc0);
+ c = (c & 0x3f) | 0x80;
+ }
+ AddChar(c);
+}
+
+int
+ToUtf8(p, c)
+char *p;
+int c;
+{
+ int l = 1;
+ if (c >= 0x800)
+ {
+ if (p)
+ *p++ = (c & 0xf000) >> 12 | 0xe0;
+ l++;
+ c = (c & 0x0fff) | 0x1000;
+ }
+ if (c >= 0x80)
+ {
+ if (p)
+ *p++ = (c & 0x1fc0) >> 6 ^ 0xc0;
+ l++;
+ c = (c & 0x3f) | 0x80;
+ }
+ if (p)
+ *p++ = c;
+ return l;
+}
+
+/*
+ * returns:
+ * -1: need more bytes, sequence not finished
+ * -2: corrupt sequence found, redo last char
+ * >= 0: decoded character
+ */
+int
+FromUtf8(c, utf8charp)
+int c, *utf8charp;
+{
+ int utf8char = *utf8charp;
+ if (utf8char)
+ {
+ if ((c & 0xc0) != 0x80)
+ {
+ *utf8charp = 0;
+ return -2; /* corrupt sequence! */
+ }
+ else
+ c = (c & 0x3f) | (utf8char << 6);
+ if (!(utf8char & 0x40000000))
+ {
+ /* check for overlong sequences */
+ if ((c & 0x820823e0) == 0x80000000)
+ c = 0xfdffffff;
+ else if ((c & 0x020821f0) == 0x02000000)
+ c = 0xfff7ffff;
+ else if ((c & 0x000820f8) == 0x00080000)
+ c = 0xffffd000;
+ else if ((c & 0x0000207c) == 0x00002000)
+ c = 0xffffff70;
+ }
+ }
+ else
+ {
+ /* new sequence */
+ if (c >= 0xfe)
+ c = UCS_REPL;
+ else if (c >= 0xfc)
+ c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */
+ else if (c >= 0xf8)
+ c = (c & 0x03) | 0xbfffff00; /* 4 */
+ else if (c >= 0xf0)
+ c = (c & 0x07) | 0xbfffc000; /* 3 */
+ else if (c >= 0xe0)
+ c = (c & 0x0f) | 0xbff00000; /* 2 */
+ else if (c >= 0xc2)
+ c = (c & 0x1f) | 0xfc000000; /* 1 */
+ else if (c >= 0xc0)
+ c = 0xfdffffff; /* overlong */
+ else if (c >= 0x80)
+ c = UCS_REPL;
+ }
+ *utf8charp = utf8char = (c & 0x80000000) ? c : 0;
+ if (utf8char)
+ return -1;
+ if (c & 0xffff0000)
+ c = UCS_REPL; /* sorry, only know 16bit Unicode */
+ if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff))
+ c = UCS_REPL; /* illegal code */
+ return c;
+}
+
+
+void
+WinSwitchEncoding(p, encoding)
+struct win *p;
+int encoding;
+{
+ int i, j, c;
+ struct mline *ml;
+ struct display *d;
+ struct canvas *cv;
+ struct layer *oldflayer;
+
+ if ((p->w_encoding == UTF8) == (encoding == UTF8))
+ {
+ p->w_encoding = encoding;
+ return;
+ }
+ oldflayer = flayer;
+ for (d = displays; d; d = d->d_next)
+ for (cv = d->d_cvlist; cv; cv = cv->c_next)
+ if (p == Layer2Window(cv->c_layer))
+ {
+ flayer = cv->c_layer;
+ while(flayer->l_next)
+ {
+ if (oldflayer == flayer)
+ oldflayer = flayer->l_next;
+ ExitOverlayPage();
+ }
+ }
+ flayer = oldflayer;
+ for (j = 0; j < p->w_height + p->w_histheight; j++)
+ {
+#ifdef COPY_PASTE
+ ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height];
+#else
+ ml = &p->w_mlines[j];
+#endif
+ if (ml->font == null && encodings[p->w_encoding].deffont == 0)
+ continue;
+ for (i = 0; i < p->w_width; i++)
+ {
+ c = ml->image[i] | (ml->font[i] << 8);
+ if (p->w_encoding != UTF8 && c < 256)
+ c |= encodings[p->w_encoding].deffont << 8;
+ if (c < 256)
+ continue;
+ if (ml->font == null)
+ {
+ if ((ml->font = (unsigned char *)malloc(p->w_width + 1)) == 0)
+ {
+ ml->font = null;
+ break;
+ }
+ bzero(ml->font, p->w_width + 1);
+ }
+#ifdef DW_CHARS
+ if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c)))
+ {
+ if (i + 1 == p->w_width)
+ c = '?';
+ else
+ {
+ int c2;
+ i++;
+ c2 = ml->image[i] | (ml->font[i] << 8);
+ c = recode_char_dw_to_encoding(c, &c2, encoding);
+ ml->font[i - 1] = c >> 8 & 255;
+ ml->image[i - 1] = c & 255;
+ c = c2;
+ }
+ }
+ else
+#endif
+ c = recode_char_to_encoding(c, encoding);
+ ml->image[i] = c & 255;
+ ml->font[i] = c >> 8 & 255;
+ }
+ }
+ p->w_encoding = encoding;
+ return;
+}
+
+#ifdef DW_CHARS
+int
+utf8_isdouble(c)
+int c;
+{
+ return
+ (c >= 0x1100 &&
+ (c <= 0x115f || /* Hangul Jamo init. consonants */
+ (c >= 0x2e80 && c <= 0xa4cf && (c & ~0x0011) != 0x300a &&
+ c != 0x303f) || /* CJK ... Yi */
+ (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */
+ (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */
+ (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */
+ (c >= 0xff00 && c <= 0xff5f) || /* Fullwidth Forms */
+ (c >= 0xffe0 && c <= 0xffe6) ||
+ (c >= 0x20000 && c <= 0x2ffff)));
+}
+#endif
+
+#else /* !UTF8 */
+
+void
+WinSwitchEncoding(p, encoding)
+struct win *p;
+int encoding;
+{
+ p->w_encoding = encoding;
+ return;
+}
+
+#endif /* UTF8 */
+
+static int
+encmatch(s1, s2)
+char *s1;
+char *s2;
+{
+ int c1, c2;
+ do
+ {
+ c1 = (unsigned char)*s1;
+ if (c1 >= 'A' && c1 <= 'Z')
+ c1 += 'a' - 'A';
+ if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9'))
+ {
+ s1++;
+ continue;
+ }
+ c2 = (unsigned char)*s2;
+ if (c2 >= 'A' && c2 <= 'Z')
+ c2 += 'a' - 'A';
+ if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9'))
+ {
+ s2++;
+ continue;
+ }
+ if (c1 != c2)
+ return 0;
+ s1++;
+ s2++;
+ }
+ while(c1);
+ return 1;
+}
+
+int
+FindEncoding(name)
+char *name;
+{
+ int encoding;
+
+ if (name == 0 || *name == 0)
+ return 0;
+ if (encmatch(name, "euc"))
+ name = "eucJP";
+ if (encmatch(name, "off") || encmatch(name, "iso8859-1"))
+ return 0;
+#ifndef UTF8
+ if (encmatch(name, "UTF-8"))
+ return -1;
+#endif
+ for (encoding = 0; encoding < sizeof(encodings)/sizeof(*encodings); encoding++)
+ if (encmatch(name, encodings[encoding].name))
+ {
+#ifdef UTF8
+ LoadFontTranslationsForEncoding(encoding);
+#endif
+ return encoding;
+ }
+ return -1;
+}
+
+char *
+EncodingName(encoding)
+int encoding;
+{
+ if (encoding >= sizeof(encodings)/sizeof(*encodings))
+ return 0;
+ return encodings[encoding].name;
+}
+
+int
+EncodingDefFont(encoding)
+int encoding;
+{
+ return encodings[encoding].deffont;
+}
+
+void
+ResetEncoding(p)
+struct win *p;
+{
+ char *c;
+ int encoding = p->w_encoding;
+
+ c = encodings[encoding].charsets;
+ if (c)
+ SetCharsets(p, c);
+#ifdef UTF8
+ LoadFontTranslationsForEncoding(encoding);
+#endif
+ if (encodings[encoding].usegr)
+ p->w_gr = 1;
+ if (encodings[encoding].noc1)
+ p->w_c1 = 0;
+}
+
+int
+DecodeChar(c, encoding, statep)
+int c;
+int encoding;
+int *statep;
+{
+ int t;
+
+ debug2("Decoding char %02x for encoding %d\n", c, encoding);
+#ifdef UTF8
+ if (encoding == UTF8)
+ return FromUtf8(c, statep);
+#endif
+ if (encoding == SJIS)
+ {
+ if (!*statep)
+ {
+ if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef))
+ {
+ *statep = c;
+ return -1;
+ }
+ return c | (KANA << 16);
+ }
+ t = c;
+ c = *statep;
+ *statep = 0;
+ if (0x40 <= t && t <= 0xfc && t != 0x7f)
+ {
+ if (c <= 0x9f) c = (c - 0x81) * 2 + 0x21;
+ else c = (c - 0xc1) * 2 + 0x21;
+ if (t <= 0x7e) t -= 0x1f;
+ else if (t <= 0x9e) t -= 0x20;
+ else t -= 0x7e, c++;
+ return (c << 8) | t | (KANJI << 16);
+ }
+ return t;
+ }
+ if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN)
+ {
+ if (!*statep)
+ {
+ if (c & 0x80)
+ {
+ *statep = c;
+ return -1;
+ }
+ return c;
+ }
+ t = c;
+ c = *statep;
+ *statep = 0;
+ if (encoding == EUC_JP)
+ {
+ if (c == 0x8e)
+ return t | (KANA << 16);
+ if (c == 0x8f)
+ {
+ *statep = t | (KANJI0212 << 8);
+ return -1;
+ }
+ }
+ c &= 0xff7f;
+ t &= 0x7f;
+ c = c << 8 | t;
+ if (encoding == EUC_KR)
+ return c | (3 << 16);
+ if (encoding == EUC_CN)
+ return c | (1 << 16);
+ if (c & (KANJI0212 << 16))
+ return c;
+ else
+ return c | (KANJI << 16);
+ }
+ if (encoding == BIG5)
+ {
+ if (!*statep)
+ {
+ if (c & 0x80)
+ {
+ *statep = c;
+ return -1;
+ }
+ return c;
+ }
+ t = c;
+ c = *statep;
+ *statep = 0;
+ c &= 0x7f;
+ return c << 8 | t | (030 << 16);
+ }
+ return c | (encodings[encoding].deffont << 16);
+}
+
+int
+EncodeChar(bp, c, encoding, fontp)
+char *bp;
+int c;
+int encoding;
+int *fontp;
+{
+ int t, f, l;
+
+ debug2("Encoding char %02x for encoding %d\n", c, encoding);
+ if (c == -1 && fontp)
+ {
+ if (*fontp == 0)
+ return 0;
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '(';
+ *bp++ = 'B';
+ }
+ return 3;
+ }
+ f = c >> 16;
+
+#ifdef UTF8
+ if (encoding == UTF8)
+ {
+ if (f)
+ {
+# ifdef DW_CHARS
+ if (is_dw_font(f))
+ {
+ int c2 = c >> 8 & 0xff;
+ c = (c & 0xff) | (f << 8);
+ c = recode_char_dw_to_encoding(c, &c2, encoding);
+ }
+ else
+# endif
+ {
+ c = (c & 0xff) | (f << 8);
+ c = recode_char_to_encoding(c, encoding);
+ }
+ }
+ return ToUtf8(bp, c);
+ }
+ if ((c & 0xff00) && f == 0)
+ {
+# ifdef DW_CHARS
+ if (utf8_isdouble(c))
+ {
+ int c2 = 0xffff;
+ c = recode_char_dw_to_encoding(c, &c2, encoding);
+ c = (c << 8) | (c2 & 0xff);
+ }
+ else
+# endif
+ {
+ c = recode_char_to_encoding(c, encoding);
+ c = ((c & 0xff00) << 8) | (c & 0xff);
+ }
+ debug1("Encode: char mapped from utf8 to %x\n", c);
+ f = c >> 16;
+ }
+#endif
+
+ if (f & 0x80) /* map special 96-fonts to latin1 */
+ f = 0;
+
+ if (encoding == SJIS)
+ {
+ if (f == KANA)
+ c = (c & 0xff) | 0x80;
+ else if (f == KANJI)
+ {
+ if (!bp)
+ return 2;
+ t = c & 0xff;
+ c = (c >> 8) & 0xff;
+ t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
+ c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
+ *bp++ = c;
+ *bp++ = t;
+ return 2;
+ }
+ }
+ if (encoding == EUC)
+ {
+ if (f == KANA)
+ {
+ if (bp)
+ {
+ *bp++ = 0x8e;
+ *bp++ = c;
+ }
+ return 2;
+ }
+ if (f == KANJI)
+ {
+ if (bp)
+ {
+ *bp++ = (c >> 8) | 0x80;
+ *bp++ = c | 0x80;
+ }
+ return 2;
+ }
+ if (f == KANJI0212)
+ {
+ if (bp)
+ {
+ *bp++ = 0x8f;
+ *bp++ = c >> 8;
+ *bp++ = c;
+ }
+ return 3;
+ }
+ }
+ if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
+ {
+ if (bp)
+ {
+ *bp++ = (c >> 8) | 0x80;
+ *bp++ = c | 0x80;
+ }
+ return 2;
+ }
+ if (encoding == BIG5 && f == 030)
+ {
+ if (bp)
+ {
+ *bp++ = (c >> 8) | 0x80;
+ *bp++ = c;
+ }
+ return 2;
+ }
+
+ l = 0;
+ if (fontp && f != *fontp)
+ {
+ *fontp = f;
+ if (f && f < ' ')
+ {
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '$';
+ if (f > 2)
+ *bp++ = '(';
+ *bp++ = '@' + f;
+ }
+ l += f > 2 ? 4 : 3;
+ }
+ else if (f < 128)
+ {
+ if (f == 0)
+ f = 'B';
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '(';
+ *bp++ = f;
+ }
+ l += 3;
+ }
+ }
+ if (c & 0xff00)
+ {
+ if (bp)
+ *bp++ = c >> 8;
+ l++;
+ }
+ if (bp)
+ *bp++ = c;
+ return l + 1;
+}
+
+int
+CanEncodeFont(encoding, f)
+int encoding, f;
+{
+ switch(encoding)
+ {
+#ifdef UTF8
+ case UTF8:
+ return 1;
+#endif
+ case SJIS:
+ return f == KANJI || f == KANA;
+ case EUC:
+ return f == KANJI || f == KANA || f == KANJI0212;
+ case EUC_KR:
+ return f == 3;
+ case EUC_CN:
+ return f == 1;
+ case BIG5:
+ return f == 030;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#ifdef DW_CHARS
+int
+PrepareEncodedChar(c)
+int c;
+{
+ int encoding;
+ int t = 0;
+ int f;
+
+ encoding = D_encoding;
+ f = D_rend.font;
+ t = D_mbcs;
+ if (encoding == SJIS)
+ {
+ if (f == KANA)
+ return c | 0x80;
+ else if (f == KANJI)
+ {
+ t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
+ c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
+ D_mbcs = t;
+ }
+ return c;
+ }
+ if (encoding == EUC)
+ {
+ if (f == KANA)
+ {
+ AddChar(0x8e);
+ return c | 0x80;
+ }
+ if (f == KANJI)
+ {
+ D_mbcs = t | 0x80;
+ return c | 0x80;
+ }
+ if (f == KANJI0212)
+ {
+ AddChar(0x8f);
+ D_mbcs = t | 0x80;
+ return c | 0x80;
+ }
+ }
+ if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
+ {
+ D_mbcs = t | 0x80;
+ return c | 0x80;
+ }
+ if (encoding == BIG5 && f == 030)
+ return c | 0x80;
+ return c;
+}
+#endif
+
+int
+RecodeBuf(fbuf, flen, fenc, tenc, tbuf)
+unsigned char *fbuf;
+int flen;
+int fenc, tenc;
+unsigned char *tbuf;
+{
+ int c, i, j;
+ int decstate = 0, font = 0;
+
+ for (i = j = 0; i < flen; i++)
+ {
+ c = fbuf[i];
+ c = DecodeChar(c, fenc, &decstate);
+ if (c == -2)
+ i--;
+ if (c < 0)
+ continue;
+ j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font);
+ }
+ j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font);
+ return j;
+}
+
+#ifdef UTF8
+int
+ContainsSpecialDeffont(ml, xs, xe, encoding)
+struct mline *ml;
+int xs, xe;
+int encoding;
+{
+ unsigned char *f, *i;
+ int c, x, dx;
+
+ if (encoding == UTF8 || encodings[encoding].deffont == 0)
+ return 0;
+ i = ml->image + xs;
+ f = ml->font + xs;
+ dx = xe - xs + 1;
+ while (dx-- > 0)
+ {
+ if (*f++)
+ continue;
+ c = *i++;
+ x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8);
+ if (c != x)
+ {
+ debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x);
+ return 1;
+ }
+ }
+ debug("ContainsSpecialDeffont: no\n");
+ return 0;
+}
+
+
+int
+LoadFontTranslation(font, file)
+int font;
+char *file;
+{
+ char buf[1024], *myfile;
+ FILE *f;
+ int i;
+ int fo;
+ int x, u, c, ok;
+ unsigned short (*p)[2], (*tab)[2];
+
+ myfile = file;
+ if (myfile == 0)
+ {
+ if (font == 0 || screenencodings == 0)
+ return -1;
+ if (strlen(screenencodings) > sizeof(buf) - 10)
+ return -1;
+ sprintf(buf, "%s/%02x", screenencodings, font & 0xff);
+ myfile = buf;
+ }
+ debug1("LoadFontTranslation: trying %s\n", myfile);
+ if ((f = secfopen(myfile, "r")) == 0)
+ return -1;
+ i = ok = 0;
+ for (;;)
+ {
+ for(; i < 12; i++)
+ if (getc(f) != "ScreenI2UTF8"[i])
+ break;
+ if (getc(f) != 0) /* format */
+ break;
+ fo = getc(f); /* id */
+ if (fo == EOF)
+ break;
+ if (font != -1 && font != fo)
+ break;
+ i = getc(f);
+ x = getc(f);
+ if (x == EOF)
+ break;
+ i = i << 8 | x;
+ getc(f);
+ while ((x = getc(f)) && x != EOF)
+ getc(f); /* skip font name (padded to 2 bytes) */
+ if ((p = malloc(sizeof(*p) * (i + 1))) == 0)
+ break;
+ tab = p;
+ while(i > 0)
+ {
+ x = getc(f);
+ x = x << 8 | getc(f);
+ u = getc(f);
+ c = getc(f);
+ u = u << 8 | c;
+ if (c == EOF)
+ break;
+ (*p)[0] = x;
+ (*p)[1] = u;
+ p++;
+ i--;
+ }
+ (*p)[0] = 0;
+ (*p)[1] = 0;
+ if (i || (tab[0][0] & 0x8000))
+ {
+ free(tab);
+ break;
+ }
+ if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0)
+ free(recodetabs[fo].tab);
+ recodetabs[fo].tab = tab;
+ recodetabs[fo].flags = RECODETAB_ALLOCED;
+ debug1("Successful load of recodetab %02x\n", fo);
+ c = getc(f);
+ if (c == EOF)
+ {
+ ok = 1;
+ break;
+ }
+ if (c != 'S')
+ break;
+ i = 1;
+ }
+ fclose(f);
+ if (font != -1 && file == 0 && recodetabs[font].flags == 0)
+ recodetabs[font].flags = RECODETAB_TRIED;
+ return ok ? 0 : -1;
+}
+
+void
+LoadFontTranslationsForEncoding(encoding)
+int encoding;
+{
+ char *c;
+ int f;
+
+ debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding);
+ if ((c = encodings[encoding].fontlist) != 0)
+ while ((f = (unsigned char)*c++) != 0)
+ if (recodetabs[f].flags == 0)
+ LoadFontTranslation(f, 0);
+ f = encodings[encoding].deffont;
+ if (f > 0 && recodetabs[f].flags == 0)
+ LoadFontTranslation(f, 0);
+}
+
+#endif /* UTF8 */
+
+#else /* !ENCODINGS */
+
+/* Simple version of EncodeChar to encode font changes for
+ * copy/paste mode
+ */
+int
+EncodeChar(bp, c, encoding, fontp)
+char *bp;
+int c;
+int encoding;
+int *fontp;
+{
+ int f, l;
+ f = (c == -1) ? 0 : c >> 16;
+ l = 0;
+ if (fontp && f != *fontp)
+ {
+ *fontp = f;
+ if (f && f < ' ')
+ {
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '$';
+ if (f > 2)
+ *bp++ = '(';
+ *bp++ = '@' + f;
+ }
+ l += f > 2 ? 4 : 3;
+ }
+ else if (f < 128)
+ {
+ if (f == 0)
+ f = 'B';
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '(';
+ *bp++ = f;
+ }
+ l += 3;
+ }
+ }
+ if (c == -1)
+ return l;
+ if (c & 0xff00)
+ {
+ if (bp)
+ *bp++ = c >> 8;
+ l++;
+ }
+ if (bp)
+ *bp++ = c;
+ return l + 1;
+}
+
+#endif /* ENCODINGS */