diff options
Diffstat (limited to 'encoding.c')
-rw-r--r-- | encoding.c | 1502 |
1 files changed, 1502 insertions, 0 deletions
diff --git a/encoding.c b/encoding.c new file mode 100644 index 0000000..bf99dca --- /dev/null +++ b/encoding.c @@ -0,0 +1,1502 @@ +/* Copyright (c) 1993-2002 + * Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de) + * Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de) + * Copyright (c) 1987 Oliver Laumann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (see the file COPYING); if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + **************************************************************** + */ + +#include "rcs.h" +RCS_ID("$Id: utf8.c,v 1.5 2001/04/25 12:32:47 mlschroe Exp $ FAU") + +#include <sys/types.h> + +#include "config.h" +#include "screen.h" +#include "extern.h" + +#ifdef ENCODINGS + +extern unsigned char *null; +extern struct display *display, *displays; +extern struct layer *flayer; + +extern char *screenencodings; + +static int encmatch __P((char *, char *)); + +struct encoding { + char *name; + char *charsets; + int deffont; + int usegr; + int noc1; + char *fontlist; +}; + +/* big5 font: ^X */ +/* KOI8-R font: 96 ! */ +/* CP1251 font: 96 ? */ + +struct encoding encodings[] = { + { "C", 0, 0, 0, 0, 0 }, + { "eucJP", "B\002I\004\00204",0, 1, 0, "\002\004I" }, + { "SJIS", "BIBBI04", 0, 1, 1, "\002I" }, + { "eucKR", "B\003BB\00304",0, 1, 0, "\003" }, + { "eucCN", "B\001BB\00104",0, 1, 0, "\001" }, + { "Big5", "B\030BB\03004",0, 1, 0, "\030" }, + { "KOI8-R", 0, 0x80|'!', 0, 1, 0 }, + { "CP1251", 0, 0x80|'?', 0, 1, 0 }, + { "UTF-8", 0, -1, 0, 0, 0 }, + { "ISO8859-2", 0, 0x80|'B', 0, 0, 0 }, + { "ISO8859-3", 0, 0x80|'C', 0, 0, 0 }, + { "ISO8859-4", 0, 0x80|'D', 0, 0, 0 }, + { "ISO8859-5", 0, 0x80|'L', 0, 0, 0 }, + { "ISO8859-6", 0, 0x80|'G', 0, 0, 0 }, + { "ISO8859-7", 0, 0x80|'F', 0, 0, 0 }, + { "ISO8859-8", 0, 0x80|'H', 0, 0, 0 }, + { "ISO8859-9", 0, 0x80|'M', 0, 0, 0 }, + { "ISO8859-10", 0, 0x80|'V', 0, 0, 0 }, + { "ISO8859-15", 0, 0x80|'b', 0, 0, 0 }, + { "jis", 0, 0, 0, 0, "\002\004I" }, +}; + +#ifdef UTF8 + +static unsigned short builtin_tabs[][2] = { + { 0x30, 0 }, /* 0: special graphics (line drawing) */ + { 0x005f, 0x25AE }, + { 0x0060, 0x25C6 }, + { 0x0061, 0x2592 }, + { 0x0062, 0x2409 }, + { 0x0063, 0x240C }, + { 0x0064, 0x240D }, + { 0x0065, 0x240A }, + { 0x0066, 0x00B0 }, + { 0x0067, 0x00B1 }, + { 0x0068, 0x2424 }, + { 0x0069, 0x240B }, + { 0x006a, 0x2518 }, + { 0x006b, 0x2510 }, + { 0x006c, 0x250C }, + { 0x006d, 0x2514 }, + { 0x006e, 0x253C }, + { 0x006f, 0x23BA }, + { 0x0070, 0x23BB }, + { 0x0071, 0x2500 }, + { 0x0072, 0x23BC }, + { 0x0073, 0x23BD }, + { 0x0074, 0x251C }, + { 0x0075, 0x2524 }, + { 0x0076, 0x2534 }, + { 0x0077, 0x252C }, + { 0x0078, 0x2502 }, + { 0x0079, 0x2264 }, + { 0x007a, 0x2265 }, + { 0x007b, 0x03C0 }, + { 0x007c, 0x2260 }, + { 0x007d, 0x00A3 }, + { 0x007e, 0x00B7 }, + { 0, 0}, + + { 0x34, 0 }, /* 4: Dutch */ + { 0x0023, 0x00a3 }, + { 0x0040, 0x00be }, + { 0x005b, 0x00ff }, + { 0x005c, 0x00bd }, + { 0x005d, 0x007c }, + { 0x007b, 0x00a8 }, + { 0x007c, 0x0066 }, + { 0x007d, 0x00bc }, + { 0x007e, 0x00b4 }, + { 0, 0}, + + { 0x35, 0 }, /* 5: Finnish */ + { 0x005b, 0x00c4 }, + { 0x005c, 0x00d6 }, + { 0x005d, 0x00c5 }, + { 0x005e, 0x00dc }, + { 0x0060, 0x00e9 }, + { 0x007b, 0x00e4 }, + { 0x007c, 0x00f6 }, + { 0x007d, 0x00e5 }, + { 0x007e, 0x00fc }, + { 0, 0}, + + { 0x36, 0 }, /* 6: Norwegian/Danish */ + { 0x0040, 0x00c4 }, + { 0x005b, 0x00c6 }, + { 0x005c, 0x00d8 }, + { 0x005d, 0x00c5 }, + { 0x005e, 0x00dc }, + { 0x0060, 0x00e4 }, + { 0x007b, 0x00e6 }, + { 0x007c, 0x00f8 }, + { 0x007d, 0x00e5 }, + { 0x007e, 0x00fc }, + { 0, 0}, + + { 0x37, 0 }, /* 7: Swedish */ + { 0x0040, 0x00c9 }, + { 0x005b, 0x00c4 }, + { 0x005c, 0x00d6 }, + { 0x005d, 0x00c5 }, + { 0x005e, 0x00dc }, + { 0x0060, 0x00e9 }, + { 0x007b, 0x00e4 }, + { 0x007c, 0x00f6 }, + { 0x007d, 0x00e5 }, + { 0x007e, 0x00fc }, + { 0, 0}, + + { 0x3d, 0}, /* =: Swiss */ + { 0x0023, 0x00f9 }, + { 0x0040, 0x00e0 }, + { 0x005b, 0x00e9 }, + { 0x005c, 0x00e7 }, + { 0x005d, 0x00ea }, + { 0x005e, 0x00ee }, + { 0x005f, 0x00e8 }, + { 0x0060, 0x00f4 }, + { 0x007b, 0x00e4 }, + { 0x007c, 0x00f6 }, + { 0x007d, 0x00fc }, + { 0x007e, 0x00fb }, + { 0, 0}, + + { 0x41, 0}, /* A: UK */ + { 0x0023, 0x00a3 }, + { 0, 0}, + + { 0x4b, 0}, /* K: German */ + { 0x0040, 0x00a7 }, + { 0x005b, 0x00c4 }, + { 0x005c, 0x00d6 }, + { 0x005d, 0x00dc }, + { 0x007b, 0x00e4 }, + { 0x007c, 0x00f6 }, + { 0x007d, 0x00fc }, + { 0x007e, 0x00df }, + { 0, 0}, + + { 0x51, 0}, /* Q: French Canadian */ + { 0x0040, 0x00e0 }, + { 0x005b, 0x00e2 }, + { 0x005c, 0x00e7 }, + { 0x005d, 0x00ea }, + { 0x005e, 0x00ee }, + { 0x0060, 0x00f4 }, + { 0x007b, 0x00e9 }, + { 0x007c, 0x00f9 }, + { 0x007d, 0x00e8 }, + { 0x007e, 0x00fb }, + { 0, 0}, + + { 0x52, 0}, /* R: French */ + { 0x0023, 0x00a3 }, + { 0x0040, 0x00e0 }, + { 0x005b, 0x00b0 }, + { 0x005c, 0x00e7 }, + { 0x005d, 0x00a7 }, + { 0x007b, 0x00e9 }, + { 0x007c, 0x00f9 }, + { 0x007d, 0x00e8 }, + { 0x007e, 0x00a8 }, + { 0, 0}, + + { 0x59, 0}, /* Y: Italian */ + { 0x0023, 0x00a3 }, + { 0x0040, 0x00a7 }, + { 0x005b, 0x00b0 }, + { 0x005c, 0x00e7 }, + { 0x005d, 0x00e9 }, + { 0x0060, 0x00f9 }, + { 0x007b, 0x00e0 }, + { 0x007c, 0x00f2 }, + { 0x007d, 0x00e8 }, + { 0x007e, 0x00ec }, + { 0, 0}, + + { 0x5a, 0}, /* Z: Spanish */ + { 0x0023, 0x00a3 }, + { 0x0040, 0x00a7 }, + { 0x005b, 0x00a1 }, + { 0x005c, 0x00d1 }, + { 0x005d, 0x00bf }, + { 0x007b, 0x00b0 }, + { 0x007c, 0x00f1 }, + { 0x007d, 0x00e7 }, + { 0, 0}, + + { 0xe2, 0}, /* 96-b: ISO-8859-15*/ + { 0x00a4, 0x20ac }, + { 0x00a6, 0x0160 }, + { 0x00a8, 0x0161 }, + { 0x00b4, 0x017D }, + { 0x00b8, 0x017E }, + { 0x00bc, 0x0152 }, + { 0x00bd, 0x0153 }, + { 0x00be, 0x0178 }, + { 0, 0}, + + { 0x4a, 0}, /* J: JIS 0201 Roman */ + { 0x005c, 0x00a5 }, + { 0x007e, 0x203e }, + { 0, 0}, + + { 0x49, 0}, /* I: halfwidth katakana */ + { 0x0021, 0xff61 }, + { 0x005f|0x8000, 0xff9f }, + { 0, 0}, + + { 0, 0} +}; + +struct recodetab +{ + unsigned short (*tab)[2]; + int flags; +}; + +#define RECODETAB_ALLOCED 1 +#define RECODETAB_BUILTIN 2 +#define RECODETAB_TRIED 4 + +static struct recodetab recodetabs[256]; + +void +InitBuiltinTabs() +{ + unsigned short (*p)[2]; + for (p = builtin_tabs; (*p)[0]; p++) + { + recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN; + recodetabs[(*p)[0]].tab = p + 1; + p++; + while((*p)[0]) + p++; + } +} + +int +recode_char(c, to_utf, font) +int c, to_utf, font; +{ + int f; + unsigned short (*p)[2]; + + if (c < 256) + return c; + if (to_utf) + { + f = (c >> 8) & 0xff; + c &= 0xff; + /* map aliases to keep the table small */ + switch (c >> 8) + { + case 'C': + f ^= ('C' ^ '5'); + break; + case 'E': + f ^= ('E' ^ '6'); + break; + case 'H': + f ^= ('H' ^ '7'); + break; + default: + break; + } + p = recodetabs[f].tab; + if (p == 0 && recodetabs[f].flags == 0) + { + LoadFontTranslation(f, 0); + p = recodetabs[f].tab; + } + if (p) + for (; (*p)[0]; p++) + { + if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0]) + return c - p[-1][0] + p[-1][1]; + if ((*p)[0] == c) + return (*p)[1]; + } + return c & 0xff; /* map to latin1 */ + } + if (font == -1) + { + for (font = 32; font < 256; font++) + { + p = recodetabs[font].tab; + if (p) + for (; (*p)[1]; p++) + { + if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1]) + return c - p[-1][1] + p[-1][0]; + if ((*p)[1] == c) + return (*p)[0]; + } + } + return '?'; + } + if (font >= 32) + { + p = recodetabs[font].tab; + if (p == 0 && recodetabs[font].flags == 0) + { + LoadFontTranslation(font, 0); + p = recodetabs[font].tab; + } + if (p) + for (; (*p)[1]; p++) + if ((*p)[1] == c) + return (*p)[0]; + } + return -1; +} + + +#ifdef DW_CHARS +int +recode_char_dw(c, c2p, to_utf, font) +int c, *c2p, to_utf, font; +{ + int f; + unsigned short (*p)[2]; + + if (to_utf) + { + f = (c >> 8) & 0xff; + c = (c & 255) << 8 | (*c2p & 255); + *c2p = 0xffff; + p = recodetabs[f].tab; + if (p == 0 && recodetabs[f].flags == 0) + { + LoadFontTranslation(f, 0); + p = recodetabs[f].tab; + } + if (p) + for (; (*p)[0]; p++) + if ((*p)[0] == c) + { +#ifdef DW_CHARS + if (!utf8_isdouble((*p)[1])) + *c2p = ' '; +#endif + return (*p)[1]; + } + return UCS_REPL_DW; + } + if (font == -1) + { + for (font = 0; font < 32; font++) + { + p = recodetabs[font].tab; + if (p) + for (; (*p)[1]; p++) + if ((*p)[1] == c) + { + *c2p = ((*p)[0] & 255) | font << 8 | 0x8000; + return ((*p)[0] >> 8) | font << 8; + } + } + *c2p = '?'; + return '?'; + } + if (font < 32) + { + p = recodetabs[font].tab; + if (p == 0 && recodetabs[font].flags == 0) + { + LoadFontTranslation(font, 0); + p = recodetabs[font].tab; + } + if (p) + for (; (*p)[1]; p++) + if ((*p)[1] == c) + { + *c2p = ((*p)[0] & 255) | font << 8 | 0x8000; + return ((*p)[0] >> 8) | font << 8; + } + } + return -1; +} +#endif + +int +recode_char_to_encoding(c, encoding) +int c, encoding; +{ + char *fp; + int x; + + if (encoding == UTF8) + return recode_char(c, 1, -1); + if ((fp = encodings[encoding].fontlist) != 0) + while(*fp) + if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1) + return x; + if (encodings[encoding].deffont) + if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1) + return x; + return recode_char(c, 0, -1); +} + +#ifdef DW_CHARS +int +recode_char_dw_to_encoding(c, c2p, encoding) +int c, *c2p, encoding; +{ + char *fp; + int x; + + if (encoding == UTF8) + return recode_char_dw(c, c2p, 1, -1); + if ((fp = encodings[encoding].fontlist) != 0) + while(*fp) + if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1) + return x; + if (encodings[encoding].deffont) + if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1) + return x; + return recode_char_dw(c, c2p, 0, -1); +} +#endif + + +struct mchar * +recode_mchar(mc, from, to) +struct mchar *mc; +int from, to; +{ + static struct mchar rmc; + int c; + + debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to); + if (from == to || (from != UTF8 && to != UTF8)) + return mc; + rmc = *mc; + if (rmc.font == 0 && from != UTF8) + rmc.font = encodings[from].deffont; + if (rmc.font == 0) /* latin1 is the same in unicode */ + return mc; + c = rmc.image | (rmc.font << 8); +#ifdef DW_CHARS + if (rmc.mbcs) + { + int c2 = rmc.mbcs; + c = recode_char_dw_to_encoding(c, &c2, to); + rmc.mbcs = c2; + } + else +#endif + c = recode_char_to_encoding(c, to); + rmc.image = c & 255; + rmc.font = c >> 8 & 255; + return &rmc; +} + +struct mline * +recode_mline(ml, w, from, to) +struct mline *ml; +int w; +int from, to; +{ + static int maxlen; + static int last; + static struct mline rml[2], *rl; + int i, c; + + if (from == to || (from != UTF8 && to != UTF8) || w == 0) + return ml; + if (ml->font == null && encodings[from].deffont == 0) + return ml; + if (w > maxlen) + { + for (i = 0; i < 2; i++) + { + if (rml[i].image == 0) + rml[i].image = malloc(w); + else + rml[i].image = realloc(rml[i].image, w); + if (rml[i].font == 0) + rml[i].font = malloc(w); + else + rml[i].font = realloc(rml[i].font, w); + if (rml[i].image == 0 || rml[i].font == 0) + { + maxlen = 0; + return ml; /* sorry */ + } + } + maxlen = w; + } + + debug("recode_mline: from\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->image[i] ) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->font[i] ) & 15]); + debug("\n"); + + rl = rml + last; + rl->attr = ml->attr; +#ifdef COLOR + rl->color = ml->color; +# ifdef COLORS256 + rl->colorx = ml->colorx; +# endif +#endif + for (i = 0; i < w; i++) + { + c = ml->image[i] | (ml->font[i] << 8); + if (from != UTF8 && c < 256) + c |= encodings[from].deffont << 8; +#ifdef DW_CHARS + if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c))) + { + if (i + 1 == w) + c = '?'; + else + { + int c2; + i++; + c2 = ml->image[i] | (ml->font[i] << 8); + c = recode_char_dw_to_encoding(c, &c2, to); + rl->font[i - 1] = c >> 8 & 255; + rl->image[i - 1] = c & 255; + c = c2; + } + } + else +#endif + c = recode_char_to_encoding(c, to); + rl->image[i] = c & 255; + rl->font[i] = c >> 8 & 255; + } + last ^= 1; + debug("recode_mline: to\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->image[i] ) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->font[i] ) & 15]); + debug("\n"); + return rl; +} + +void +AddUtf8(c) +int c; +{ + ASSERT(D_encoding == UTF8); + if (c >= 0x800) + { + AddChar((c & 0xf000) >> 12 | 0xe0); + c = (c & 0x0fff) | 0x1000; + } + if (c >= 0x80) + { + AddChar((c & 0x1fc0) >> 6 ^ 0xc0); + c = (c & 0x3f) | 0x80; + } + AddChar(c); +} + +int +ToUtf8(p, c) +char *p; +int c; +{ + int l = 1; + if (c >= 0x800) + { + if (p) + *p++ = (c & 0xf000) >> 12 | 0xe0; + l++; + c = (c & 0x0fff) | 0x1000; + } + if (c >= 0x80) + { + if (p) + *p++ = (c & 0x1fc0) >> 6 ^ 0xc0; + l++; + c = (c & 0x3f) | 0x80; + } + if (p) + *p++ = c; + return l; +} + +/* + * returns: + * -1: need more bytes, sequence not finished + * -2: corrupt sequence found, redo last char + * >= 0: decoded character + */ +int +FromUtf8(c, utf8charp) +int c, *utf8charp; +{ + int utf8char = *utf8charp; + if (utf8char) + { + if ((c & 0xc0) != 0x80) + { + *utf8charp = 0; + return -2; /* corrupt sequence! */ + } + else + c = (c & 0x3f) | (utf8char << 6); + if (!(utf8char & 0x40000000)) + { + /* check for overlong sequences */ + if ((c & 0x820823e0) == 0x80000000) + c = 0xfdffffff; + else if ((c & 0x020821f0) == 0x02000000) + c = 0xfff7ffff; + else if ((c & 0x000820f8) == 0x00080000) + c = 0xffffd000; + else if ((c & 0x0000207c) == 0x00002000) + c = 0xffffff70; + } + } + else + { + /* new sequence */ + if (c >= 0xfe) + c = UCS_REPL; + else if (c >= 0xfc) + c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */ + else if (c >= 0xf8) + c = (c & 0x03) | 0xbfffff00; /* 4 */ + else if (c >= 0xf0) + c = (c & 0x07) | 0xbfffc000; /* 3 */ + else if (c >= 0xe0) + c = (c & 0x0f) | 0xbff00000; /* 2 */ + else if (c >= 0xc2) + c = (c & 0x1f) | 0xfc000000; /* 1 */ + else if (c >= 0xc0) + c = 0xfdffffff; /* overlong */ + else if (c >= 0x80) + c = UCS_REPL; + } + *utf8charp = utf8char = (c & 0x80000000) ? c : 0; + if (utf8char) + return -1; + if (c & 0xffff0000) + c = UCS_REPL; /* sorry, only know 16bit Unicode */ + if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff)) + c = UCS_REPL; /* illegal code */ + return c; +} + + +void +WinSwitchEncoding(p, encoding) +struct win *p; +int encoding; +{ + int i, j, c; + struct mline *ml; + struct display *d; + struct canvas *cv; + struct layer *oldflayer; + + if ((p->w_encoding == UTF8) == (encoding == UTF8)) + { + p->w_encoding = encoding; + return; + } + oldflayer = flayer; + for (d = displays; d; d = d->d_next) + for (cv = d->d_cvlist; cv; cv = cv->c_next) + if (p == Layer2Window(cv->c_layer)) + { + flayer = cv->c_layer; + while(flayer->l_next) + { + if (oldflayer == flayer) + oldflayer = flayer->l_next; + ExitOverlayPage(); + } + } + flayer = oldflayer; + for (j = 0; j < p->w_height + p->w_histheight; j++) + { +#ifdef COPY_PASTE + ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height]; +#else + ml = &p->w_mlines[j]; +#endif + if (ml->font == null && encodings[p->w_encoding].deffont == 0) + continue; + for (i = 0; i < p->w_width; i++) + { + c = ml->image[i] | (ml->font[i] << 8); + if (p->w_encoding != UTF8 && c < 256) + c |= encodings[p->w_encoding].deffont << 8; + if (c < 256) + continue; + if (ml->font == null) + { + if ((ml->font = (unsigned char *)malloc(p->w_width + 1)) == 0) + { + ml->font = null; + break; + } + bzero(ml->font, p->w_width + 1); + } +#ifdef DW_CHARS + if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c))) + { + if (i + 1 == p->w_width) + c = '?'; + else + { + int c2; + i++; + c2 = ml->image[i] | (ml->font[i] << 8); + c = recode_char_dw_to_encoding(c, &c2, encoding); + ml->font[i - 1] = c >> 8 & 255; + ml->image[i - 1] = c & 255; + c = c2; + } + } + else +#endif + c = recode_char_to_encoding(c, encoding); + ml->image[i] = c & 255; + ml->font[i] = c >> 8 & 255; + } + } + p->w_encoding = encoding; + return; +} + +#ifdef DW_CHARS +int +utf8_isdouble(c) +int c; +{ + return + (c >= 0x1100 && + (c <= 0x115f || /* Hangul Jamo init. consonants */ + (c >= 0x2e80 && c <= 0xa4cf && (c & ~0x0011) != 0x300a && + c != 0x303f) || /* CJK ... Yi */ + (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ + (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ + (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ + (c >= 0xff00 && c <= 0xff5f) || /* Fullwidth Forms */ + (c >= 0xffe0 && c <= 0xffe6) || + (c >= 0x20000 && c <= 0x2ffff))); +} +#endif + +#else /* !UTF8 */ + +void +WinSwitchEncoding(p, encoding) +struct win *p; +int encoding; +{ + p->w_encoding = encoding; + return; +} + +#endif /* UTF8 */ + +static int +encmatch(s1, s2) +char *s1; +char *s2; +{ + int c1, c2; + do + { + c1 = (unsigned char)*s1; + if (c1 >= 'A' && c1 <= 'Z') + c1 += 'a' - 'A'; + if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9')) + { + s1++; + continue; + } + c2 = (unsigned char)*s2; + if (c2 >= 'A' && c2 <= 'Z') + c2 += 'a' - 'A'; + if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9')) + { + s2++; + continue; + } + if (c1 != c2) + return 0; + s1++; + s2++; + } + while(c1); + return 1; +} + +int +FindEncoding(name) +char *name; +{ + int encoding; + + if (name == 0 || *name == 0) + return 0; + if (encmatch(name, "euc")) + name = "eucJP"; + if (encmatch(name, "off") || encmatch(name, "iso8859-1")) + return 0; +#ifndef UTF8 + if (encmatch(name, "UTF-8")) + return -1; +#endif + for (encoding = 0; encoding < sizeof(encodings)/sizeof(*encodings); encoding++) + if (encmatch(name, encodings[encoding].name)) + { +#ifdef UTF8 + LoadFontTranslationsForEncoding(encoding); +#endif + return encoding; + } + return -1; +} + +char * +EncodingName(encoding) +int encoding; +{ + if (encoding >= sizeof(encodings)/sizeof(*encodings)) + return 0; + return encodings[encoding].name; +} + +int +EncodingDefFont(encoding) +int encoding; +{ + return encodings[encoding].deffont; +} + +void +ResetEncoding(p) +struct win *p; +{ + char *c; + int encoding = p->w_encoding; + + c = encodings[encoding].charsets; + if (c) + SetCharsets(p, c); +#ifdef UTF8 + LoadFontTranslationsForEncoding(encoding); +#endif + if (encodings[encoding].usegr) + p->w_gr = 1; + if (encodings[encoding].noc1) + p->w_c1 = 0; +} + +int +DecodeChar(c, encoding, statep) +int c; +int encoding; +int *statep; +{ + int t; + + debug2("Decoding char %02x for encoding %d\n", c, encoding); +#ifdef UTF8 + if (encoding == UTF8) + return FromUtf8(c, statep); +#endif + if (encoding == SJIS) + { + if (!*statep) + { + if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef)) + { + *statep = c; + return -1; + } + return c | (KANA << 16); + } + t = c; + c = *statep; + *statep = 0; + if (0x40 <= t && t <= 0xfc && t != 0x7f) + { + if (c <= 0x9f) c = (c - 0x81) * 2 + 0x21; + else c = (c - 0xc1) * 2 + 0x21; + if (t <= 0x7e) t -= 0x1f; + else if (t <= 0x9e) t -= 0x20; + else t -= 0x7e, c++; + return (c << 8) | t | (KANJI << 16); + } + return t; + } + if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN) + { + if (!*statep) + { + if (c & 0x80) + { + *statep = c; + return -1; + } + return c; + } + t = c; + c = *statep; + *statep = 0; + if (encoding == EUC_JP) + { + if (c == 0x8e) + return t | (KANA << 16); + if (c == 0x8f) + { + *statep = t | (KANJI0212 << 8); + return -1; + } + } + c &= 0xff7f; + t &= 0x7f; + c = c << 8 | t; + if (encoding == EUC_KR) + return c | (3 << 16); + if (encoding == EUC_CN) + return c | (1 << 16); + if (c & (KANJI0212 << 16)) + return c; + else + return c | (KANJI << 16); + } + if (encoding == BIG5) + { + if (!*statep) + { + if (c & 0x80) + { + *statep = c; + return -1; + } + return c; + } + t = c; + c = *statep; + *statep = 0; + c &= 0x7f; + return c << 8 | t | (030 << 16); + } + return c | (encodings[encoding].deffont << 16); +} + +int +EncodeChar(bp, c, encoding, fontp) +char *bp; +int c; +int encoding; +int *fontp; +{ + int t, f, l; + + debug2("Encoding char %02x for encoding %d\n", c, encoding); + if (c == -1 && fontp) + { + if (*fontp == 0) + return 0; + if (bp) + { + *bp++ = 033; + *bp++ = '('; + *bp++ = 'B'; + } + return 3; + } + f = c >> 16; + +#ifdef UTF8 + if (encoding == UTF8) + { + if (f) + { +# ifdef DW_CHARS + if (is_dw_font(f)) + { + int c2 = c >> 8 & 0xff; + c = (c & 0xff) | (f << 8); + c = recode_char_dw_to_encoding(c, &c2, encoding); + } + else +# endif + { + c = (c & 0xff) | (f << 8); + c = recode_char_to_encoding(c, encoding); + } + } + return ToUtf8(bp, c); + } + if ((c & 0xff00) && f == 0) + { +# ifdef DW_CHARS + if (utf8_isdouble(c)) + { + int c2 = 0xffff; + c = recode_char_dw_to_encoding(c, &c2, encoding); + c = (c << 8) | (c2 & 0xff); + } + else +# endif + { + c = recode_char_to_encoding(c, encoding); + c = ((c & 0xff00) << 8) | (c & 0xff); + } + debug1("Encode: char mapped from utf8 to %x\n", c); + f = c >> 16; + } +#endif + + if (f & 0x80) /* map special 96-fonts to latin1 */ + f = 0; + + if (encoding == SJIS) + { + if (f == KANA) + c = (c & 0xff) | 0x80; + else if (f == KANJI) + { + if (!bp) + return 2; + t = c & 0xff; + c = (c >> 8) & 0xff; + t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e; + c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1); + *bp++ = c; + *bp++ = t; + return 2; + } + } + if (encoding == EUC) + { + if (f == KANA) + { + if (bp) + { + *bp++ = 0x8e; + *bp++ = c; + } + return 2; + } + if (f == KANJI) + { + if (bp) + { + *bp++ = (c >> 8) | 0x80; + *bp++ = c | 0x80; + } + return 2; + } + if (f == KANJI0212) + { + if (bp) + { + *bp++ = 0x8f; + *bp++ = c >> 8; + *bp++ = c; + } + return 3; + } + } + if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1)) + { + if (bp) + { + *bp++ = (c >> 8) | 0x80; + *bp++ = c | 0x80; + } + return 2; + } + if (encoding == BIG5 && f == 030) + { + if (bp) + { + *bp++ = (c >> 8) | 0x80; + *bp++ = c; + } + return 2; + } + + l = 0; + if (fontp && f != *fontp) + { + *fontp = f; + if (f && f < ' ') + { + if (bp) + { + *bp++ = 033; + *bp++ = '$'; + if (f > 2) + *bp++ = '('; + *bp++ = '@' + f; + } + l += f > 2 ? 4 : 3; + } + else if (f < 128) + { + if (f == 0) + f = 'B'; + if (bp) + { + *bp++ = 033; + *bp++ = '('; + *bp++ = f; + } + l += 3; + } + } + if (c & 0xff00) + { + if (bp) + *bp++ = c >> 8; + l++; + } + if (bp) + *bp++ = c; + return l + 1; +} + +int +CanEncodeFont(encoding, f) +int encoding, f; +{ + switch(encoding) + { +#ifdef UTF8 + case UTF8: + return 1; +#endif + case SJIS: + return f == KANJI || f == KANA; + case EUC: + return f == KANJI || f == KANA || f == KANJI0212; + case EUC_KR: + return f == 3; + case EUC_CN: + return f == 1; + case BIG5: + return f == 030; + default: + break; + } + return 0; +} + +#ifdef DW_CHARS +int +PrepareEncodedChar(c) +int c; +{ + int encoding; + int t = 0; + int f; + + encoding = D_encoding; + f = D_rend.font; + t = D_mbcs; + if (encoding == SJIS) + { + if (f == KANA) + return c | 0x80; + else if (f == KANJI) + { + t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e; + c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1); + D_mbcs = t; + } + return c; + } + if (encoding == EUC) + { + if (f == KANA) + { + AddChar(0x8e); + return c | 0x80; + } + if (f == KANJI) + { + D_mbcs = t | 0x80; + return c | 0x80; + } + if (f == KANJI0212) + { + AddChar(0x8f); + D_mbcs = t | 0x80; + return c | 0x80; + } + } + if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1)) + { + D_mbcs = t | 0x80; + return c | 0x80; + } + if (encoding == BIG5 && f == 030) + return c | 0x80; + return c; +} +#endif + +int +RecodeBuf(fbuf, flen, fenc, tenc, tbuf) +unsigned char *fbuf; +int flen; +int fenc, tenc; +unsigned char *tbuf; +{ + int c, i, j; + int decstate = 0, font = 0; + + for (i = j = 0; i < flen; i++) + { + c = fbuf[i]; + c = DecodeChar(c, fenc, &decstate); + if (c == -2) + i--; + if (c < 0) + continue; + j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font); + } + j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font); + return j; +} + +#ifdef UTF8 +int +ContainsSpecialDeffont(ml, xs, xe, encoding) +struct mline *ml; +int xs, xe; +int encoding; +{ + unsigned char *f, *i; + int c, x, dx; + + if (encoding == UTF8 || encodings[encoding].deffont == 0) + return 0; + i = ml->image + xs; + f = ml->font + xs; + dx = xe - xs + 1; + while (dx-- > 0) + { + if (*f++) + continue; + c = *i++; + x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8); + if (c != x) + { + debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x); + return 1; + } + } + debug("ContainsSpecialDeffont: no\n"); + return 0; +} + + +int +LoadFontTranslation(font, file) +int font; +char *file; +{ + char buf[1024], *myfile; + FILE *f; + int i; + int fo; + int x, u, c, ok; + unsigned short (*p)[2], (*tab)[2]; + + myfile = file; + if (myfile == 0) + { + if (font == 0 || screenencodings == 0) + return -1; + if (strlen(screenencodings) > sizeof(buf) - 10) + return -1; + sprintf(buf, "%s/%02x", screenencodings, font & 0xff); + myfile = buf; + } + debug1("LoadFontTranslation: trying %s\n", myfile); + if ((f = secfopen(myfile, "r")) == 0) + return -1; + i = ok = 0; + for (;;) + { + for(; i < 12; i++) + if (getc(f) != "ScreenI2UTF8"[i]) + break; + if (getc(f) != 0) /* format */ + break; + fo = getc(f); /* id */ + if (fo == EOF) + break; + if (font != -1 && font != fo) + break; + i = getc(f); + x = getc(f); + if (x == EOF) + break; + i = i << 8 | x; + getc(f); + while ((x = getc(f)) && x != EOF) + getc(f); /* skip font name (padded to 2 bytes) */ + if ((p = malloc(sizeof(*p) * (i + 1))) == 0) + break; + tab = p; + while(i > 0) + { + x = getc(f); + x = x << 8 | getc(f); + u = getc(f); + c = getc(f); + u = u << 8 | c; + if (c == EOF) + break; + (*p)[0] = x; + (*p)[1] = u; + p++; + i--; + } + (*p)[0] = 0; + (*p)[1] = 0; + if (i || (tab[0][0] & 0x8000)) + { + free(tab); + break; + } + if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0) + free(recodetabs[fo].tab); + recodetabs[fo].tab = tab; + recodetabs[fo].flags = RECODETAB_ALLOCED; + debug1("Successful load of recodetab %02x\n", fo); + c = getc(f); + if (c == EOF) + { + ok = 1; + break; + } + if (c != 'S') + break; + i = 1; + } + fclose(f); + if (font != -1 && file == 0 && recodetabs[font].flags == 0) + recodetabs[font].flags = RECODETAB_TRIED; + return ok ? 0 : -1; +} + +void +LoadFontTranslationsForEncoding(encoding) +int encoding; +{ + char *c; + int f; + + debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding); + if ((c = encodings[encoding].fontlist) != 0) + while ((f = (unsigned char)*c++) != 0) + if (recodetabs[f].flags == 0) + LoadFontTranslation(f, 0); + f = encodings[encoding].deffont; + if (f > 0 && recodetabs[f].flags == 0) + LoadFontTranslation(f, 0); +} + +#endif /* UTF8 */ + +#else /* !ENCODINGS */ + +/* Simple version of EncodeChar to encode font changes for + * copy/paste mode + */ +int +EncodeChar(bp, c, encoding, fontp) +char *bp; +int c; +int encoding; +int *fontp; +{ + int f, l; + f = (c == -1) ? 0 : c >> 16; + l = 0; + if (fontp && f != *fontp) + { + *fontp = f; + if (f && f < ' ') + { + if (bp) + { + *bp++ = 033; + *bp++ = '$'; + if (f > 2) + *bp++ = '('; + *bp++ = '@' + f; + } + l += f > 2 ? 4 : 3; + } + else if (f < 128) + { + if (f == 0) + f = 'B'; + if (bp) + { + *bp++ = 033; + *bp++ = '('; + *bp++ = f; + } + l += 3; + } + } + if (c == -1) + return l; + if (c & 0xff00) + { + if (bp) + *bp++ = c >> 8; + l++; + } + if (bp) + *bp++ = c; + return l + 1; +} + +#endif /* ENCODINGS */ |