summaryrefslogtreecommitdiff
path: root/usr/src/lib/libast/common/misc/magic.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libast/common/misc/magic.c')
-rw-r--r--usr/src/lib/libast/common/misc/magic.c2419
1 files changed, 2419 insertions, 0 deletions
diff --git a/usr/src/lib/libast/common/misc/magic.c b/usr/src/lib/libast/common/misc/magic.c
new file mode 100644
index 0000000000..91a9b05e00
--- /dev/null
+++ b/usr/src/lib/libast/common/misc/magic.c
@@ -0,0 +1,2419 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * Glenn Fowler
+ * AT&T Research
+ *
+ * library interface to file
+ *
+ * the sum of the hacks {s5,v10,planix} is _____ than the parts
+ */
+
+static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2007-01-08 $\0\n";
+
+static const char lib[] = "libast:magic";
+
+#include <ast.h>
+#include <ctype.h>
+#include <ccode.h>
+#include <dt.h>
+#include <modex.h>
+#include <error.h>
+#include <regex.h>
+#include <swap.h>
+
+#define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m)
+
+#define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
+
+#define MAXNEST 10 /* { ... } nesting limit */
+#define MINITEM 4 /* magic buffer rounding */
+
+typedef struct /* identifier dictionary entry */
+{
+ const char name[16]; /* identifier name */
+ int value; /* identifier value */
+ Dtlink_t link; /* dictionary link */
+} Info_t;
+
+typedef struct Edit /* edit substitution */
+{
+ struct Edit* next; /* next in list */
+ regex_t* from; /* from pattern */
+} Edit_t;
+
+struct Entry;
+
+typedef struct /* loop info */
+{
+ struct Entry* lab; /* call this function */
+ int start; /* start here */
+ int size; /* increment by this amount */
+ int count; /* dynamic loop count */
+ int offset; /* dynamic offset */
+} Loop_t;
+
+typedef struct Entry /* magic file entry */
+{
+ struct Entry* next; /* next in list */
+ char* expr; /* offset expression */
+ union
+ {
+ unsigned long num;
+ char* str;
+ struct Entry* lab;
+ regex_t* sub;
+ Loop_t* loop;
+ } value; /* comparison value */
+ char* desc; /* file description */
+ char* mime; /* file mime type */
+ unsigned long offset; /* offset in bytes */
+ unsigned long mask; /* mask before compare */
+ char cont; /* continuation operation */
+ char type; /* datum type */
+ char op; /* comparison operation */
+ char nest; /* { or } nesting operation */
+ char swap; /* forced swap order */
+} Entry_t;
+
+#define CC_BIT 5
+
+#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
+typedef unsigned short Cctype_t;
+#else
+typedef unsigned long Cctype_t;
+#endif
+
+#define CC_text 0x01
+#define CC_control 0x02
+#define CC_latin 0x04
+#define CC_binary 0x08
+#define CC_utf_8 0x10
+
+#define CC_notext CC_text /* CC_text is flipped before checking */
+
+#define CC_MASK (CC_binary|CC_latin|CC_control|CC_text)
+
+#define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
+
+#define ID_NONE 0
+#define ID_ASM 1
+#define ID_C 2
+#define ID_COBOL 3
+#define ID_COPYBOOK 4
+#define ID_CPLUSPLUS 5
+#define ID_FORTRAN 6
+#define ID_HTML 7
+#define ID_INCL1 8
+#define ID_INCL2 9
+#define ID_INCL3 10
+#define ID_MAM1 11
+#define ID_MAM2 12
+#define ID_MAM3 13
+#define ID_NOTEXT 14
+#define ID_PL1 15
+#define ID_YACC 16
+
+#define ID_MAX ID_YACC
+
+#define INFO_atime 1
+#define INFO_blocks 2
+#define INFO_ctime 3
+#define INFO_fstype 4
+#define INFO_gid 5
+#define INFO_mode 6
+#define INFO_mtime 7
+#define INFO_name 8
+#define INFO_nlink 9
+#define INFO_size 10
+#define INFO_uid 11
+
+#define _MAGIC_PRIVATE_ \
+ Magicdisc_t* disc; /* discipline */ \
+ Vmalloc_t* vm; /* vmalloc region */ \
+ Entry_t* magic; /* parsed magic table */ \
+ Entry_t* magiclast; /* last entry in magic */ \
+ char* mime; /* MIME type */ \
+ unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \
+ char fbuf[SF_BUFSIZE + 1]; /* file data */ \
+ char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \
+ char nbuf[256]; /* !CC_NATIVE data */ \
+ char mbuf[64]; /* mime string */ \
+ char sbuf[64]; /* type suffix string */ \
+ char tbuf[2 * PATH_MAX]; /* type string */ \
+ Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \
+ unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \
+ unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \
+ int keep[MAXNEST]; /* ckmagic nest stack */ \
+ char* cap[MAXNEST]; /* ckmagic mime stack */ \
+ char* msg[MAXNEST]; /* ckmagic text stack */ \
+ Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \
+ int fbsz; /* fbuf size */ \
+ int fbmx; /* fbuf max size */ \
+ int xbsz; /* xbuf size */ \
+ int swap; /* swap() operation */ \
+ unsigned long flags; /* disc+open flags */ \
+ long xoff; /* xbuf offset */ \
+ int identifier[ID_MAX + 1]; /* Info_t identifier */ \
+ Sfio_t* fp; /* fbuf fp */ \
+ Sfio_t* tmp; /* tmp string */ \
+ regdisc_t redisc; /* regex discipline */ \
+ Dtdisc_t dtdisc; /* dict discipline */ \
+ Dt_t* idtab; /* identifier dict */ \
+ Dt_t* infotab; /* info keyword dict */
+
+#include <magic.h>
+
+static Info_t dict[] = /* keyword dictionary */
+{
+ { "COMMON", ID_FORTRAN },
+ { "COMPUTE", ID_COBOL },
+ { "COMP", ID_COPYBOOK },
+ { "COMPUTATIONAL",ID_COPYBOOK },
+ { "DCL", ID_PL1 },
+ { "DEFINED", ID_PL1 },
+ { "DIMENSION", ID_FORTRAN },
+ { "DIVISION", ID_COBOL },
+ { "FILLER", ID_COPYBOOK },
+ { "FIXED", ID_PL1 },
+ { "FUNCTION", ID_FORTRAN },
+ { "HTML", ID_HTML },
+ { "INTEGER", ID_FORTRAN },
+ { "MAIN", ID_PL1 },
+ { "OPTIONS", ID_PL1 },
+ { "PERFORM", ID_COBOL },
+ { "PIC", ID_COPYBOOK },
+ { "REAL", ID_FORTRAN },
+ { "REDEFINES", ID_COPYBOOK },
+ { "S9", ID_COPYBOOK },
+ { "SECTION", ID_COBOL },
+ { "SELECT", ID_COBOL },
+ { "SUBROUTINE", ID_FORTRAN },
+ { "TEXT", ID_ASM },
+ { "VALUE", ID_COPYBOOK },
+ { "attr", ID_MAM3 },
+ { "binary", ID_YACC },
+ { "block", ID_FORTRAN },
+ { "bss", ID_ASM },
+ { "byte", ID_ASM },
+ { "char", ID_C },
+ { "class", ID_CPLUSPLUS },
+ { "clr", ID_NOTEXT },
+ { "comm", ID_ASM },
+ { "common", ID_FORTRAN },
+ { "data", ID_ASM },
+ { "dimension", ID_FORTRAN },
+ { "done", ID_MAM2 },
+ { "double", ID_C },
+ { "even", ID_ASM },
+ { "exec", ID_MAM3 },
+ { "extern", ID_C },
+ { "float", ID_C },
+ { "function", ID_FORTRAN },
+ { "globl", ID_ASM },
+ { "h", ID_INCL3 },
+ { "html", ID_HTML },
+ { "include", ID_INCL1 },
+ { "int", ID_C },
+ { "integer", ID_FORTRAN },
+ { "jmp", ID_NOTEXT },
+ { "left", ID_YACC },
+ { "libc", ID_INCL2 },
+ { "long", ID_C },
+ { "make", ID_MAM1 },
+ { "mov", ID_NOTEXT },
+ { "private", ID_CPLUSPLUS },
+ { "public", ID_CPLUSPLUS },
+ { "real", ID_FORTRAN },
+ { "register", ID_C },
+ { "right", ID_YACC },
+ { "sfio", ID_INCL2 },
+ { "static", ID_C },
+ { "stdio", ID_INCL2 },
+ { "struct", ID_C },
+ { "subroutine", ID_FORTRAN },
+ { "sys", ID_NOTEXT },
+ { "term", ID_YACC },
+ { "text", ID_ASM },
+ { "tst", ID_NOTEXT },
+ { "type", ID_YACC },
+ { "typedef", ID_C },
+ { "u", ID_INCL2 },
+ { "union", ID_YACC },
+ { "void", ID_C },
+};
+
+static Info_t info[] =
+{
+ { "atime", INFO_atime },
+ { "blocks", INFO_blocks },
+ { "ctime", INFO_ctime },
+ { "fstype", INFO_fstype },
+ { "gid", INFO_gid },
+ { "mode", INFO_mode },
+ { "mtime", INFO_mtime },
+ { "name", INFO_name },
+ { "nlink", INFO_nlink },
+ { "size", INFO_size },
+ { "uid", INFO_uid },
+};
+
+/*
+ * return pointer to data at offset off and size siz
+ */
+
+static char*
+getdata(register Magic_t* mp, register long off, register int siz)
+{
+ register long n;
+
+ if (off < 0)
+ return 0;
+ if (off + siz <= mp->fbsz)
+ return mp->fbuf + off;
+ if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
+ {
+ if (off + siz > mp->fbmx)
+ return 0;
+ n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
+ if (sfseek(mp->fp, n, SEEK_SET) != n)
+ return 0;
+ if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
+ {
+ mp->xoff = 0;
+ mp->xbsz = 0;
+ return 0;
+ }
+ mp->xbuf[mp->xbsz] = 0;
+ mp->xoff = n;
+ if (off + siz > mp->xoff + mp->xbsz)
+ return 0;
+ }
+ return mp->xbuf + off - mp->xoff;
+}
+
+/*
+ * @... evaluator for strexpr()
+ */
+
+static long
+indirect(const char* cs, char** e, void* handle)
+{
+ register char* s = (char*)cs;
+ register Magic_t* mp = (Magic_t*)handle;
+ register long n = 0;
+ register char* p;
+
+ if (s)
+ {
+ if (*s == '@')
+ {
+ n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
+ switch (*(s = *e))
+ {
+ case 'b':
+ case 'B':
+ s++;
+ if (p = getdata(mp, n, 1))
+ n = *(unsigned char*)p;
+ else
+ s = (char*)cs;
+ break;
+ case 'h':
+ case 'H':
+ s++;
+ if (p = getdata(mp, n, 2))
+ n = swapget(mp->swap, p, 2);
+ else
+ s = (char*)cs;
+ break;
+ case 'q':
+ case 'Q':
+ s++;
+ if (p = getdata(mp, n, 8))
+ n = swapget(mp->swap, p, 8);
+ else
+ s = (char*)cs;
+ break;
+ default:
+ if (isalnum(*s))
+ s++;
+ if (p = getdata(mp, n, 4))
+ n = swapget(mp->swap, p, 4);
+ else
+ s = (char*)cs;
+ break;
+ }
+ }
+ *e = s;
+ }
+ else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
+ return n;
+}
+
+/*
+ * emit regex error message
+ */
+
+static void
+regmessage(Magic_t* mp, regex_t* re, int code)
+{
+ char buf[128];
+
+ if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
+ {
+ regerror(code, re, buf, sizeof(buf));
+ (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
+ }
+}
+
+/*
+ * decompose vcodex(3) method composition
+ */
+
+static char*
+vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
+{
+ unsigned char* map;
+ int c;
+ int n;
+ int i;
+
+ map = CCMAP(CC_ASCII, CC_NATIVE);
+ i = 1;
+ for (;;)
+ {
+ if (i)
+ i = 0;
+ else
+ *b++ = '^';
+ while (b < e && m < x && (c = *m++))
+ {
+ if (map)
+ c = map[c];
+ *b++ = c;
+ }
+ if (b >= e)
+ break;
+ n = 0;
+ while (m < x)
+ {
+ n = (n<<7) | (*m & 0x7f);
+ if (!(*m++ & 0x80))
+ break;
+ }
+ if (n >= (x - m))
+ break;
+ m += n;
+ }
+ return b;
+}
+
+/*
+ * check for magic table match in buf
+ */
+
+static char*
+ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off)
+{
+ register Entry_t* ep;
+ register char* p;
+ register char* b;
+ register int level = 0;
+ int call = -1;
+ int c;
+ char* q;
+ char* t;
+ char* base = 0;
+ unsigned long num;
+ unsigned long mask;
+ regmatch_t matches[10];
+
+ mp->swap = 0;
+ b = mp->msg[0] = buf;
+ mp->mime = mp->cap[0] = 0;
+ mp->keep[0] = 0;
+ for (ep = mp->magic; ep; ep = ep->next)
+ {
+ fun:
+ if (ep->nest == '{')
+ {
+ if (++level >= MAXNEST)
+ {
+ call = -1;
+ level = 0;
+ mp->keep[0] = 0;
+ b = mp->msg[0];
+ mp->mime = mp->cap[0];
+ continue;
+ }
+ mp->keep[level] = mp->keep[level - 1] != 0;
+ mp->msg[level] = b;
+ mp->cap[level] = mp->mime;
+ }
+ switch (ep->cont)
+ {
+ case '#':
+ if (mp->keep[level] && b > buf)
+ {
+ *b = 0;
+ return buf;
+ }
+ mp->swap = 0;
+ b = mp->msg[0] = buf;
+ mp->mime = mp->cap[0] = 0;
+ if (ep->type == ' ')
+ continue;
+ break;
+ case '$':
+ if (mp->keep[level] && call < (MAXNEST - 1))
+ {
+ mp->ret[++call] = ep;
+ ep = ep->value.lab;
+ goto fun;
+ }
+ continue;
+ case ':':
+ ep = mp->ret[call--];
+ if (ep->op == 'l')
+ goto fun;
+ continue;
+ case '|':
+ if (mp->keep[level] > 1)
+ goto checknest;
+ /*FALLTHROUGH*/
+ default:
+ if (!mp->keep[level])
+ {
+ b = mp->msg[level];
+ mp->mime = mp->cap[level];
+ goto checknest;
+ }
+ break;
+ }
+ if (!ep->expr)
+ num = ep->offset + off;
+ else
+ switch (ep->offset)
+ {
+ case 0:
+ num = strexpr(ep->expr, NiL, indirect, mp) + off;
+ break;
+ case INFO_atime:
+ num = st->st_atime;
+ ep->type = 'D';
+ break;
+ case INFO_blocks:
+ num = iblocks(st);
+ ep->type = 'N';
+ break;
+ case INFO_ctime:
+ num = st->st_ctime;
+ ep->type = 'D';
+ break;
+ case INFO_fstype:
+ p = fmtfs(st);
+ ep->type = toupper(ep->type);
+ break;
+ case INFO_gid:
+ if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
+ {
+ p = fmtgid(st->st_gid);
+ ep->type = toupper(ep->type);
+ }
+ else
+ {
+ num = st->st_gid;
+ ep->type = 'N';
+ }
+ break;
+ case INFO_mode:
+ if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
+ {
+ p = fmtmode(st->st_mode, 0);
+ ep->type = toupper(ep->type);
+ }
+ else
+ {
+ num = modex(st->st_mode);
+ ep->type = 'N';
+ }
+ break;
+ case INFO_mtime:
+ num = st->st_ctime;
+ ep->type = 'D';
+ break;
+ case INFO_name:
+ if (!base)
+ {
+ if (base = strrchr(file, '/'))
+ base++;
+ else
+ base = (char*)file;
+ }
+ p = base;
+ ep->type = toupper(ep->type);
+ break;
+ case INFO_nlink:
+ num = st->st_nlink;
+ ep->type = 'N';
+ break;
+ case INFO_size:
+ num = st->st_size;
+ ep->type = 'N';
+ break;
+ case INFO_uid:
+ if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
+ {
+ p = fmtuid(st->st_uid);
+ ep->type = toupper(ep->type);
+ }
+ else
+ {
+ num = st->st_uid;
+ ep->type = 'N';
+ }
+ break;
+ }
+ switch (ep->type)
+ {
+
+ case 'b':
+ if (!(p = getdata(mp, num, 1)))
+ goto next;
+ num = *(unsigned char*)p;
+ break;
+
+ case 'h':
+ if (!(p = getdata(mp, num, 2)))
+ goto next;
+ num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
+ break;
+
+ case 'd':
+ case 'l':
+ case 'v':
+ if (!(p = getdata(mp, num, 4)))
+ goto next;
+ num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
+ break;
+
+ case 'q':
+ if (!(p = getdata(mp, num, 8)))
+ goto next;
+ num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
+ break;
+
+ case 'e':
+ if (!(p = getdata(mp, num, 0)))
+ goto next;
+ /*FALLTHROUGH*/
+ case 'E':
+ if (!ep->value.sub)
+ goto next;
+ if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
+ {
+ c = mp->fbsz;
+ if (c >= sizeof(mp->nbuf))
+ c = sizeof(mp->nbuf) - 1;
+ p = (char*)memcpy(mp->nbuf, p, c);
+ p[c] = 0;
+ ccmapstr(mp->x2n, p, c);
+ if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
+ {
+ if (c != REG_NOMATCH)
+ regmessage(mp, ep->value.sub, c);
+ goto next;
+ }
+ }
+ p = ep->value.sub->re_sub->re_buf;
+ q = T(ep->desc);
+ t = *q ? q : p;
+ if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
+ *b++ = ' ';
+ b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b'));
+ if (ep->mime)
+ mp->mime = ep->mime;
+ goto checknest;
+
+ case 's':
+ if (!(p = getdata(mp, num, ep->mask)))
+ goto next;
+ goto checkstr;
+ case 'm':
+ if (!(p = getdata(mp, num, 0)))
+ goto next;
+ /*FALLTHROUGH*/
+ case 'M':
+ case 'S':
+ checkstr:
+ for (;;)
+ {
+ if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
+ break;
+ if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
+ break;
+ if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
+ goto next;
+ p = (char*)memcpy(mp->nbuf, p, ep->mask);
+ p[ep->mask] = 0;
+ ccmapstr(mp->x2n, p, ep->mask);
+ }
+ q = T(ep->desc);
+ if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
+ *b++ = ' ';
+ for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
+ *t = 0;
+ b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p);
+ *t = c;
+ if (ep->mime)
+ mp->mime = ep->mime;
+ goto checknest;
+
+ }
+ if (mask = ep->mask)
+ num &= mask;
+ switch (ep->op)
+ {
+
+ case '=':
+ case '@':
+ if (num == ep->value.num)
+ break;
+ if (ep->cont != '#')
+ goto next;
+ if (!mask)
+ mask = ~mask;
+ if (ep->type == 'h')
+ {
+ if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
+ {
+ if (!(mp->swap & (mp->swap + 1)))
+ mp->swap = 7;
+ goto swapped;
+ }
+ }
+ else if (ep->type == 'l')
+ {
+ for (c = 1; c < 4; c++)
+ if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
+ {
+ if (!(mp->swap & (mp->swap + 1)))
+ mp->swap = 7;
+ goto swapped;
+ }
+ }
+ else if (ep->type == 'q')
+ {
+ for (c = 1; c < 8; c++)
+ if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
+ goto swapped;
+ }
+ goto next;
+
+ case '!':
+ if (num != ep->value.num)
+ break;
+ goto next;
+
+ case '^':
+ if (num ^ ep->value.num)
+ break;
+ goto next;
+
+ case '>':
+ if (num > ep->value.num)
+ break;
+ goto next;
+
+ case '<':
+ if (num < ep->value.num)
+ break;
+ goto next;
+
+ case 'l':
+ if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
+ {
+ if (!ep->value.loop->count)
+ {
+ ep->value.loop->count = num;
+ ep->value.loop->offset = off;
+ off = ep->value.loop->start;
+ }
+ else if (!--ep->value.loop->count)
+ {
+ off = ep->value.loop->offset;
+ goto next;
+ }
+ else
+ off += ep->value.loop->size;
+ mp->ret[++call] = ep;
+ ep = ep->value.loop->lab;
+ goto fun;
+ }
+ goto next;
+
+ case 'm':
+ c = mp->swap;
+ t = ckmagic(mp, file, b + (b > buf), st, num);
+ mp->swap = c;
+ if (!t)
+ goto next;
+ if (b > buf)
+ *b = ' ';
+ b += strlen(b);
+ break;
+
+ case 'r':
+#if _UWIN
+ {
+ char* e;
+ Sfio_t* rp;
+ Sfio_t* gp;
+
+ if (!(t = strrchr(file, '.')))
+ goto next;
+ sfprintf(mp->tmp, "/reg/classes_root/%s", t);
+ if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
+ goto next;
+ *ep->desc = 0;
+ *ep->mime = 0;
+ gp = 0;
+ while (t = sfgetr(rp, '\n', 1))
+ {
+ if (strneq(t, "Content Type=", 13))
+ {
+ ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
+ strcpy(ep->mime, t + 13);
+ if (gp)
+ break;
+ }
+ else
+ {
+ sfprintf(mp->tmp, "/reg/classes_root/%s", t);
+ if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
+ {
+ ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
+ strcpy(ep->desc, t);
+ if (*ep->mime)
+ break;
+ }
+ }
+ }
+ sfclose(rp);
+ if (!gp)
+ goto next;
+ if (!*ep->mime)
+ {
+ t = T(ep->desc);
+ if (!strncasecmp(t, "microsoft", 9))
+ t += 9;
+ while (isspace(*t))
+ t++;
+ e = "application/x-ms-";
+ ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
+ e = strcopy(ep->mime, e);
+ while ((c = *t++) && c != '.' && c != ' ')
+ *e++ = isupper(c) ? tolower(c) : c;
+ *e = 0;
+ }
+ while (t = sfgetr(gp, '\n', 1))
+ if (*t && !streq(t, "\"\""))
+ {
+ ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
+ strcpy(ep->desc, t);
+ break;
+ }
+ sfclose(gp);
+ if (!*ep->desc)
+ goto next;
+ if (!t)
+ for (t = T(ep->desc); *t; t++)
+ if (*t == '.')
+ *t = ' ';
+ if (!mp->keep[level])
+ mp->keep[level] = 2;
+ mp->mime = ep->mime;
+ break;
+ }
+#else
+ if (ep->cont == '#' && !mp->keep[level])
+ mp->keep[level] = 1;
+ goto next;
+#endif
+
+ case 'v':
+ if (!(p = getdata(mp, num, 4)))
+ goto next;
+ c = 0;
+ do
+ {
+ num++;
+ c = (c<<7) | (*p & 0x7f);
+ } while (*p++ & 0x80);
+ if (!(p = getdata(mp, num, c)))
+ goto next;
+ if (mp->keep[level]++ && b > buf && *(b - 1) != ' ')
+ {
+ *b++ = ',';
+ *b++ = ' ';
+ }
+ b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
+ goto checknest;
+
+ }
+ swapped:
+ q = T(ep->desc);
+ if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
+ *b++ = ' ';
+ if (ep->type == 'd' || ep->type == 'D')
+ b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num));
+ else if (ep->type == 'v')
+ b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num));
+ else
+ b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num);
+ if (ep->mime && *ep->mime)
+ mp->mime = ep->mime;
+ checknest:
+ if (ep->nest == '}')
+ {
+ if (!mp->keep[level])
+ {
+ b = mp->msg[level];
+ mp->mime = mp->cap[level];
+ }
+ else if (level > 0)
+ mp->keep[level - 1] = mp->keep[level];
+ if (--level < 0)
+ {
+ level = 0;
+ mp->keep[0] = 0;
+ }
+ }
+ continue;
+ next:
+ if (ep->cont == '&')
+ mp->keep[level] = 0;
+ goto checknest;
+ }
+ if (mp->keep[level] && b > buf)
+ {
+ *b = 0;
+ return buf;
+ }
+ return 0;
+}
+
+/*
+ * check english language stats
+ */
+
+static int
+ckenglish(register Magic_t* mp, int pun, int badpun)
+{
+ register char* s;
+ register int vowl = 0;
+ register int freq = 0;
+ register int rare = 0;
+
+ if (5 * badpun > pun)
+ return 0;
+ if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
+ return 0;
+ if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
+ return 0;
+ for (s = "aeiou"; *s; s++)
+ vowl += mp->count[toupper(*s)] + mp->count[*s];
+ for (s = "etaion"; *s; s++)
+ freq += mp->count[toupper(*s)] + mp->count[*s];
+ for (s = "vjkqxz"; *s; s++)
+ rare += mp->count[toupper(*s)] + mp->count[*s];
+ return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
+}
+
+/*
+ * check programming language stats
+ */
+
+static char*
+cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st)
+{
+ register int c;
+ register unsigned char* b;
+ register unsigned char* e;
+ register int q;
+ register char* s;
+ char* t;
+ char* base;
+ char* suff;
+ char* t1;
+ char* t2;
+ char* t3;
+ int n;
+ int badpun;
+ int code;
+ int pun;
+ Cctype_t flags;
+ Info_t* ip;
+
+ b = (unsigned char*)mp->fbuf;
+ e = b + mp->fbsz;
+ memzero(mp->count, sizeof(mp->count));
+ memzero(mp->multi, sizeof(mp->multi));
+ memzero(mp->identifier, sizeof(mp->identifier));
+
+ /*
+ * check character coding
+ */
+
+ flags = 0;
+ while (b < e)
+ flags |= mp->cctype[*b++];
+ b = (unsigned char*)mp->fbuf;
+ code = 0;
+ q = CC_ASCII;
+ n = CC_MASK;
+ for (c = 0; c < CC_MAPS; c++)
+ {
+ flags ^= CC_text;
+ if ((flags & CC_MASK) < n)
+ {
+ n = flags & CC_MASK;
+ q = c;
+ }
+ flags >>= CC_BIT;
+ }
+ flags = n;
+ if (!(flags & (CC_binary|CC_notext)))
+ {
+ if (q != CC_NATIVE)
+ {
+ code = q;
+ ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
+ }
+ if (b[0] == '#' && b[1] == '!')
+ {
+ for (b += 2; b < e && isspace(*b); b++);
+ for (s = (char*)b; b < e && isprint(*b); b++);
+ c = *b;
+ *b = 0;
+ if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
+ {
+ if (t = strrchr(s, '/'))
+ s = t + 1;
+ for (t = s; *t; t++)
+ if (isspace(*t))
+ {
+ *t = 0;
+ break;
+ }
+ sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
+ mp->mime = mp->mbuf;
+ if (match(s, "*sh"))
+ {
+ t1 = T("command");
+ if (streq(s, "sh"))
+ *s = 0;
+ else
+ {
+ *b++ = ' ';
+ *b = 0;
+ }
+ }
+ else
+ {
+ t1 = T("interpreter");
+ *b++ = ' ';
+ *b = 0;
+ }
+ sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
+ s = mp->sbuf;
+ goto qualify;
+ }
+ *b = c;
+ b = (unsigned char*)mp->fbuf;
+ }
+ badpun = 0;
+ pun = 0;
+ q = 0;
+ s = 0;
+ t = 0;
+ while (b < e)
+ {
+ c = *b++;
+ mp->count[c]++;
+ if (c == q && (q != '*' || *b == '/' && b++))
+ {
+ mp->multi[q]++;
+ q = 0;
+ }
+ else if (c == '\\')
+ {
+ s = 0;
+ b++;
+ }
+ else if (!q)
+ {
+ if (isalpha(c) || c == '_')
+ {
+ if (!s)
+ s = (char*)b - 1;
+ }
+ else if (!isdigit(c))
+ {
+ if (s)
+ {
+ if (s > mp->fbuf)
+ switch (*(s - 1))
+ {
+ case ':':
+ if (*b == ':')
+ mp->multi[':']++;
+ break;
+ case '.':
+ if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
+ mp->multi['.']++;
+ break;
+ case '\n':
+ case '\\':
+ if (*b == '{')
+ t = (char*)b + 1;
+ break;
+ case '{':
+ if (s == t && *b == '}')
+ mp->multi['X']++;
+ break;
+ }
+ if (!mp->idtab)
+ {
+ if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
+ for (q = 0; q < elementsof(dict); q++)
+ dtinsert(mp->idtab, &dict[q]);
+ else if (mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
+ q = 0;
+ }
+ if (mp->idtab)
+ {
+ *(b - 1) = 0;
+ if (ip = (Info_t*)dtmatch(mp->idtab, s))
+ mp->identifier[ip->value]++;
+ *(b - 1) = c;
+ }
+ s = 0;
+ }
+ switch (c)
+ {
+ case '\t':
+ if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
+ mp->multi['\t']++;
+ break;
+ case '"':
+ case '\'':
+ q = c;
+ break;
+ case '/':
+ if (*b == '*')
+ q = *b++;
+ else if (*b == '/')
+ q = '\n';
+ break;
+ case '$':
+ if (*b == '(' && *(b + 1) != ' ')
+ mp->multi['$']++;
+ break;
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case '(':
+ mp->multi[c]++;
+ break;
+ case ')':
+ mp->multi[c]++;
+ goto punctuation;
+ case ':':
+ if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
+ mp->multi[':']++;
+ goto punctuation;
+ case '.':
+ case ',':
+ case '%':
+ case ';':
+ case '?':
+ punctuation:
+ pun++;
+ if (*b != ' ' && *b != '\n')
+ badpun++;
+ break;
+ }
+ }
+ }
+ }
+ }
+ else
+ while (b < e)
+ mp->count[*b++]++;
+ base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
+ suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
+ if (!flags)
+ {
+ if (match(suff, "*sh|bat|cmd"))
+ goto id_sh;
+ if (match(base, "*@(mkfile)"))
+ goto id_mk;
+ if (match(base, "*@(makefile|.mk)"))
+ goto id_make;
+ if (match(base, "*@(mamfile|.mam)"))
+ goto id_mam;
+ if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
+ goto id_c;
+ if (match(suff, "f"))
+ goto id_fortran;
+ if (match(suff, "htm+(l)"))
+ goto id_html;
+ if (match(suff, "cpy"))
+ goto id_copybook;
+ if (match(suff, "cob|cbl|cb2"))
+ goto id_cobol;
+ if (match(suff, "pl[1i]"))
+ goto id_pl1;
+ if (match(suff, "tex"))
+ goto id_tex;
+ if (match(suff, "asm|s"))
+ goto id_asm;
+ if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
+ {
+ id_sh:
+ s = T("command script");
+ mp->mime = "application/sh";
+ goto qualify;
+ }
+ if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
+ {
+ s = T("mail message");
+ mp->mime = "message/rfc822";
+ goto qualify;
+ }
+ if (match(base, "*@(mkfile)"))
+ {
+ id_mk:
+ s = "mkfile";
+ mp->mime = "application/mk";
+ goto qualify;
+ }
+ if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
+ {
+ id_make:
+ s = "makefile";
+ mp->mime = "application/make";
+ goto qualify;
+ }
+ if (mp->multi['.'] >= 3)
+ {
+ s = T("nroff input");
+ mp->mime = "application/x-troff";
+ goto qualify;
+ }
+ if (mp->multi['X'] >= 3)
+ {
+ s = T("TeX input");
+ mp->mime = "application/x-tex";
+ goto qualify;
+ }
+ if (mp->fbsz < SF_BUFSIZE &&
+ (mp->multi['('] == mp->multi[')'] &&
+ mp->multi['{'] == mp->multi['}'] &&
+ mp->multi['['] == mp->multi[']']) ||
+ mp->fbsz >= SF_BUFSIZE &&
+ (mp->multi['('] >= mp->multi[')'] &&
+ mp->multi['{'] >= mp->multi['}'] &&
+ mp->multi['['] >= mp->multi[']']))
+ {
+ c = mp->identifier[ID_INCL1];
+ if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
+ mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
+ mp->count['='] >= 20 && mp->count[';'] >= 20)
+ {
+ id_c:
+ t1 = "";
+ t2 = "c ";
+ t3 = T("program");
+ switch (*suff)
+ {
+ case 'c':
+ case 'C':
+ mp->mime = "application/x-cc";
+ break;
+ case 'l':
+ case 'L':
+ t1 = "lex ";
+ mp->mime = "application/x-lex";
+ break;
+ default:
+ t3 = T("header");
+ if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
+ {
+ mp->mime = "application/x-cc";
+ break;
+ }
+ /*FALLTHROUGH*/
+ case 'y':
+ case 'Y':
+ t1 = "yacc ";
+ mp->mime = "application/x-yacc";
+ break;
+ }
+ if (mp->identifier[ID_CPLUSPLUS] >= 3)
+ {
+ t2 = "c++ ";
+ mp->mime = "application/x-c++";
+ }
+ sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
+ s = mp->sbuf;
+ goto qualify;
+ }
+ }
+ if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
+ (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
+ mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
+ {
+ id_mam:
+ s = T("mam program");
+ mp->mime = "application/x-mam";
+ goto qualify;
+ }
+ if (mp->identifier[ID_FORTRAN] >= 8)
+ {
+ id_fortran:
+ s = T("fortran program");
+ mp->mime = "application/x-fortran";
+ goto qualify;
+ }
+ if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
+ {
+ id_html:
+ s = T("html input");
+ mp->mime = "text/html";
+ goto qualify;
+ }
+ if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
+ {
+ id_copybook:
+ s = T("cobol copybook");
+ mp->mime = "application/x-cobol";
+ goto qualify;
+ }
+ if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
+ {
+ id_cobol:
+ s = T("cobol program");
+ mp->mime = "application/x-cobol";
+ goto qualify;
+ }
+ if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
+ {
+ id_pl1:
+ s = T("pl1 program");
+ mp->mime = "application/x-pl1";
+ goto qualify;
+ }
+ if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
+ {
+ id_tex:
+ s = T("TeX input");
+ mp->mime = "text/tex";
+ goto qualify;
+ }
+ if (mp->identifier[ID_ASM] >= 4)
+ {
+ id_asm:
+ s = T("as program");
+ mp->mime = "application/x-as";
+ goto qualify;
+ }
+ if (ckenglish(mp, pun, badpun))
+ {
+ s = T("english text");
+ mp->mime = "text/plain";
+ goto qualify;
+ }
+ }
+ else if (streq(base, "core"))
+ {
+ mp->mime = "x-system/core";
+ return T("core dump");
+ }
+ if (flags & (CC_binary|CC_notext))
+ {
+ b = (unsigned char*)mp->fbuf;
+ e = b + mp->fbsz;
+ n = 0;
+ for (;;)
+ {
+ c = *b++;
+ q = 0;
+ while (c & 0x80)
+ {
+ c <<= 1;
+ q++;
+ }
+ switch (q)
+ {
+ case 4:
+ if (b < e && (*b++ & 0xc0) != 0x80)
+ break;
+ case 3:
+ if (b < e && (*b++ & 0xc0) != 0x80)
+ break;
+ case 2:
+ if (b < e && (*b++ & 0xc0) != 0x80)
+ break;
+ n = 1;
+ case 0:
+ if (b >= e)
+ {
+ if (n)
+ {
+ flags &= ~(CC_binary|CC_notext);
+ flags |= CC_utf_8;
+ }
+ break;
+ }
+ continue;
+ }
+ break;
+ }
+ }
+ if (flags & (CC_binary|CC_notext))
+ {
+ unsigned long d = 0;
+
+ if ((q = mp->fbsz / UCHAR_MAX) >= 2)
+ {
+ /*
+ * compression/encryption via standard deviation
+ */
+
+
+ for (c = 0; c < UCHAR_MAX; c++)
+ {
+ pun = mp->count[c] - q;
+ d += pun * pun;
+ }
+ d /= mp->fbsz;
+ }
+ if (d <= 0)
+ s = T("binary");
+ else if (d < 4)
+ s = T("encrypted");
+ else if (d < 16)
+ s = T("packed");
+ else if (d < 64)
+ s = T("compressed");
+ else if (d < 256)
+ s = T("delta");
+ else
+ s = T("data");
+ mp->mime = "application/octet-stream";
+ return s;
+ }
+ mp->mime = "text/plain";
+ if (flags & CC_utf_8)
+ s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
+ else if (flags & CC_latin)
+ s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
+ else
+ s = (flags & CC_control) ? T("text with control characters") : T("text");
+ qualify:
+ if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
+ {
+ t = "dos ";
+ mp->mime = "text/dos";
+ }
+ else
+ t = "";
+ if (code)
+ {
+ if (code == CC_ASCII)
+ sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s);
+ else
+ {
+ sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s);
+ mp->mime = "text/ebcdic";
+ }
+ s = buf;
+ }
+ else if (*t)
+ {
+ sfsprintf(buf, PATH_MAX, "%s%s", t, s);
+ s = buf;
+ }
+ return s;
+}
+
+/*
+ * return the basic magic string for file,st in buf,size
+ */
+
+static char*
+type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size)
+{
+ register char* s;
+ register char* t;
+
+ mp->mime = 0;
+ if (!S_ISREG(st->st_mode))
+ {
+ if (S_ISDIR(st->st_mode))
+ {
+ mp->mime = "x-system/dir";
+ return T("directory");
+ }
+ if (S_ISLNK(st->st_mode))
+ {
+ mp->mime = "x-system/lnk";
+ s = buf;
+ s += sfsprintf(s, PATH_MAX, T("symbolic link to "));
+ if (pathgetlink(file, s, size - (s - buf)) < 0)
+ return T("cannot read symbolic link text");
+ return buf;
+ }
+ if (S_ISBLK(st->st_mode))
+ {
+ mp->mime = "x-system/blk";
+ sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
+ return buf;
+ }
+ if (S_ISCHR(st->st_mode))
+ {
+ mp->mime = "x-system/chr";
+ sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st));
+ return buf;
+ }
+ if (S_ISFIFO(st->st_mode))
+ {
+ mp->mime = "x-system/fifo";
+ return "fifo";
+ }
+#ifdef S_ISSOCK
+ if (S_ISSOCK(st->st_mode))
+ {
+ mp->mime = "x-system/sock";
+ return "socket";
+ }
+#endif
+ }
+ if (!(mp->fbmx = st->st_size))
+ s = T("empty");
+ else if (!mp->fp)
+ s = T("cannot read");
+ else
+ {
+ mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
+ if (mp->fbsz < 0)
+ s = fmterror(errno);
+ else if (mp->fbsz == 0)
+ s = T("empty");
+ else
+ {
+ mp->fbuf[mp->fbsz] = 0;
+ mp->xoff = 0;
+ mp->xbsz = 0;
+ if (!(s = ckmagic(mp, file, buf, st, 0)))
+ s = cklang(mp, file, buf, st);
+ }
+ }
+ if (!mp->mime)
+ mp->mime = "application/unknown";
+ else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
+ {
+ register char* b;
+ register char* be;
+ register char* m;
+ register char* me;
+
+ b = mp->mime;
+ me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
+ while (m < me && b < t)
+ *m++ = *b++;
+ b = t = s;
+ for (;;)
+ {
+ if (!(be = strchr(t, ' ')))
+ {
+ be = b + strlen(b);
+ break;
+ }
+ if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
+ break;
+ b = t;
+ t = be + 1;
+ }
+ while (m < me && b < be)
+ if ((*m++ = *b++) == ' ')
+ *(m - 1) = '-';
+ *m = 0;
+ }
+ return s;
+}
+
+/*
+ * low level for magicload()
+ */
+
+static int
+load(register Magic_t* mp, char* file, register Sfio_t* fp)
+{
+ register Entry_t* ep;
+ register char* p;
+ register char* p2;
+ char* p3;
+ char* next;
+ int n;
+ int lge;
+ int lev;
+ int ent;
+ int old;
+ int cont;
+ Info_t* ip;
+ Entry_t* ret;
+ Entry_t* first;
+ Entry_t* last = 0;
+ Entry_t* fun['z' - 'a' + 1];
+
+ memzero(fun, sizeof(fun));
+ cont = '$';
+ ent = 0;
+ lev = 0;
+ old = 0;
+ ret = 0;
+ error_info.file = file;
+ error_info.line = 0;
+ first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
+ while (p = sfgetr(fp, '\n', 1))
+ {
+ error_info.line++;
+ for (; isspace(*p); p++);
+
+ /*
+ * nesting
+ */
+
+ switch (*p)
+ {
+ case 0:
+ case '#':
+ cont = '#';
+ continue;
+ case '{':
+ if (++lev < MAXNEST)
+ ep->nest = *p;
+ else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
+ continue;
+ case '}':
+ if (!last || lev <= 0)
+ {
+ if (mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
+ }
+ else if (lev-- == ent)
+ {
+ ent = 0;
+ ep->cont = ':';
+ ep->offset = ret->offset;
+ ep->nest = ' ';
+ ep->type = ' ';
+ ep->op = ' ';
+ ep->desc = "[RETURN]";
+ last = ep;
+ ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
+ ret = 0;
+ }
+ else
+ last->nest = *p;
+ continue;
+ default:
+ if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
+ {
+ n = *p++;
+ if (n >= 'a' && n <= 'z')
+ n -= 'a';
+ else
+ {
+ if (mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
+ n = 0;
+ }
+ if (ret && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
+ if (*p == '{')
+ {
+ ent = ++lev;
+ ret = ep;
+ ep->desc = "[FUNCTION]";
+ }
+ else
+ {
+ if (*(p + 1) != ')' && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
+ ep->desc = "[CALL]";
+ }
+ ep->cont = cont;
+ ep->offset = n;
+ ep->nest = ' ';
+ ep->type = ' ';
+ ep->op = ' ';
+ last = ep;
+ ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
+ if (ret)
+ fun[n] = last->value.lab = ep;
+ else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
+ continue;
+ }
+ if (!ep->nest)
+ ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
+ break;
+ }
+
+ /*
+ * continuation
+ */
+
+ cont = '$';
+ switch (*p)
+ {
+ case '>':
+ old = 1;
+ if (*(p + 1) == *p)
+ {
+ /*
+ * old style nesting push
+ */
+
+ p++;
+ old = 2;
+ if (!lev && last)
+ {
+ lev = 1;
+ last->nest = '{';
+ if (last->cont == '>')
+ last->cont = '&';
+ ep->nest = '1';
+ }
+ }
+ /*FALLTHROUGH*/
+ case '+':
+ case '&':
+ case '|':
+ ep->cont = *p++;
+ break;
+ default:
+ if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
+ /*FALLTHROUGH*/
+ case '*':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ ep->cont = (lev > 0) ? '&' : '#';
+ break;
+ }
+ switch (old)
+ {
+ case 1:
+ old = 0;
+ if (lev)
+ {
+ /*
+ * old style nesting pop
+ */
+
+ lev = 0;
+ if (last)
+ last->nest = '}';
+ ep->nest = ' ';
+ if (ep->cont == '&')
+ ep->cont = '#';
+ }
+ break;
+ case 2:
+ old = 1;
+ break;
+ }
+ if (isdigit(*p))
+ {
+ /*
+ * absolute offset
+ */
+
+ ep->offset = strton(p, &next, NiL, 0);
+ p2 = next;
+ }
+ else
+ {
+ for (p2 = p; *p2 && !isspace(*p2); p2++);
+ if (!*p2)
+ {
+ if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
+ continue;
+ }
+
+ /*
+ * offset expression
+ */
+
+ *p2++ = 0;
+ ep->expr = vmstrdup(mp->vm, p);
+ if (isalpha(*p))
+ ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
+ else if (*p == '(' && ep->cont == '>')
+ {
+ /*
+ * convert old style indirection to @
+ */
+
+ p = ep->expr + 1;
+ for (;;)
+ {
+ switch (*p++)
+ {
+ case 0:
+ case '@':
+ case '(':
+ break;
+ case ')':
+ break;
+ default:
+ continue;
+ }
+ break;
+ }
+ if (*--p == ')')
+ {
+ *p = 0;
+ *ep->expr = '@';
+ }
+ }
+ }
+ for (; isspace(*p2); p2++);
+ for (p = p2; *p2 && !isspace(*p2); p2++);
+ if (!*p2)
+ {
+ if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
+ continue;
+ }
+ *p2++ = 0;
+
+ /*
+ * type
+ */
+
+ if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
+ {
+ ep->swap = ~(*p == 'l' ? 7 : 0);
+ p += 2;
+ }
+ if (*p == 's')
+ {
+ if (*(p + 1) == 'h')
+ ep->type = 'h';
+ else
+ ep->type = 's';
+ }
+ else if (*p == 'a')
+ ep->type = 's';
+ else
+ ep->type = *p;
+ if (p = strchr(p, '&'))
+ {
+ /*
+ * old style mask
+ */
+
+ ep->mask = strton(++p, NiL, NiL, 0);
+ }
+ for (; isspace(*p2); p2++);
+ if (ep->mask)
+ *--p2 = '=';
+
+ /*
+ * comparison operation
+ */
+
+ p = p2;
+ if (p2 = strchr(p, '\t'))
+ *p2++ = 0;
+ else
+ {
+ int qe = 0;
+ int qn = 0;
+
+ /*
+ * assume balanced {}[]()\\""'' field
+ */
+
+ for (p2 = p;;)
+ {
+ switch (n = *p2++)
+ {
+ case 0:
+ break;
+ case '{':
+ if (!qe)
+ qe = '}';
+ if (qe == '}')
+ qn++;
+ continue;
+ case '(':
+ if (!qe)
+ qe = ')';
+ if (qe == ')')
+ qn++;
+ continue;
+ case '[':
+ if (!qe)
+ qe = ']';
+ if (qe == ']')
+ qn++;
+ continue;
+ case '}':
+ case ')':
+ case ']':
+ if (qe == n && qn > 0)
+ qn--;
+ continue;
+ case '"':
+ case '\'':
+ if (!qe)
+ qe = n;
+ else if (qe == n)
+ qe = 0;
+ continue;
+ case '\\':
+ if (*p2)
+ p2++;
+ continue;
+ default:
+ if (!qe && isspace(n))
+ break;
+ continue;
+ }
+ if (n)
+ *(p2 - 1) = 0;
+ else
+ p2--;
+ break;
+ }
+ }
+ lge = 0;
+ if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
+ ep->op = '=';
+ else
+ {
+ if (*p == '&')
+ {
+ ep->mask = strton(++p, &next, NiL, 0);
+ p = next;
+ }
+ switch (*p)
+ {
+ case '=':
+ case '>':
+ case '<':
+ case '*':
+ ep->op = *p++;
+ if (*p == '=')
+ {
+ p++;
+ switch (ep->op)
+ {
+ case '>':
+ lge = -1;
+ break;
+ case '<':
+ lge = 1;
+ break;
+ }
+ }
+ break;
+ case '!':
+ case '@':
+ ep->op = *p++;
+ if (*p == '=')
+ p++;
+ break;
+ case 'x':
+ p++;
+ ep->op = '*';
+ break;
+ default:
+ ep->op = '=';
+ if (ep->mask)
+ ep->value.num = ep->mask;
+ break;
+ }
+ }
+ if (ep->op != '*' && !ep->value.num)
+ {
+ if (ep->type == 'e')
+ {
+ if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
+ {
+ ep->value.sub->re_disc = &mp->redisc;
+ if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
+ {
+ p += ep->value.sub->re_npat;
+ if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
+ p += ep->value.sub->re_npat;
+ }
+ if (n)
+ {
+ regmessage(mp, ep->value.sub, n);
+ ep->value.sub = 0;
+ }
+ else if (*p && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
+ }
+ }
+ else if (ep->type == 'm')
+ {
+ ep->mask = stresc(p) + 1;
+ ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
+ memcpy(ep->value.str, p, ep->mask);
+ if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
+ ep->value.str[ep->mask - 1] = '*';
+ }
+ else if (ep->type == 's')
+ {
+ ep->mask = stresc(p);
+ ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
+ memcpy(ep->value.str, p, ep->mask);
+ }
+ else if (*p == '\'')
+ {
+ stresc(p);
+ ep->value.num = *(unsigned char*)(p + 1) + lge;
+ }
+ else if (strmatch(p, "+([a-z])\\(*\\)"))
+ {
+ char* t;
+
+ t = p;
+ ep->type = 'V';
+ ep->op = *p;
+ while (*p && *p++ != '(');
+ switch (ep->op)
+ {
+ case 'l':
+ n = *p++;
+ if (n < 'a' || n > 'z')
+ {
+ if (mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
+ }
+ else if (!fun[n -= 'a'])
+ {
+ if (mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
+ }
+ else
+ {
+ ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
+ ep->value.loop->lab = fun[n];
+ while (*p && *p++ != ',');
+ ep->value.loop->start = strton(p, &t, NiL, 0);
+ while (*t && *t++ != ',');
+ ep->value.loop->size = strton(t, &t, NiL, 0);
+ }
+ break;
+ case 'm':
+ case 'r':
+ ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
+ ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
+ break;
+ case 'v':
+ break;
+ default:
+ if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
+ break;
+ }
+ }
+ else
+ {
+ ep->value.num = strton(p, NiL, NiL, 0) + lge;
+ if (ep->op == '@')
+ ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
+ }
+ }
+
+ /*
+ * file description
+ */
+
+ if (p2)
+ {
+ for (; isspace(*p2); p2++);
+ if (p = strchr(p2, '\t'))
+ {
+ /*
+ * check for message catalog index
+ */
+
+ *p++ = 0;
+ if (isalpha(*p2))
+ {
+ for (p3 = p2; isalnum(*p3); p3++);
+ if (*p3++ == ':')
+ {
+ for (; isdigit(*p3); p3++);
+ if (!*p3)
+ {
+ for (p2 = p; isspace(*p2); p2++);
+ if (p = strchr(p2, '\t'))
+ *p++ = 0;
+ }
+ }
+ }
+ }
+ stresc(p2);
+ ep->desc = vmstrdup(mp->vm, p2);
+ if (p)
+ {
+ for (; isspace(*p); p++);
+ if (*p)
+ ep->mime = vmstrdup(mp->vm, p);
+ }
+ }
+ else
+ ep->desc = "";
+
+ /*
+ * get next entry
+ */
+
+ last = ep;
+ ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
+ }
+ if (last)
+ {
+ last->next = 0;
+ if (mp->magiclast)
+ mp->magiclast->next = first;
+ else
+ mp->magic = first;
+ mp->magiclast = last;
+ }
+ vmfree(mp->vm, ep);
+ if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
+ {
+ if (lev < 0)
+ (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
+ else if (lev > 0)
+ (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
+ if (ret)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
+ }
+ error_info.file = 0;
+ error_info.line = 0;
+ return 0;
+}
+
+/*
+ * load a magic file into mp
+ */
+
+int
+magicload(register Magic_t* mp, const char* file, unsigned long flags)
+{
+ register char* s;
+ register char* e;
+ register char* t;
+ int n;
+ int found;
+ int list;
+ Sfio_t* fp;
+
+ mp->flags = mp->disc->flags | flags;
+ found = 0;
+ if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
+ {
+ if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
+ s = MAGIC_FILE;
+ }
+ for (;;)
+ {
+ if (!list)
+ e = 0;
+ else if (e = strchr(s, ':'))
+ {
+ /*
+ * ok, so ~ won't work for the last list element
+ * we do it for MAGIC_FILES_ENV anyway
+ */
+
+ if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
+ {
+ sfputr(mp->tmp, t, -1);
+ s += n - 1;
+ }
+ sfwrite(mp->tmp, s, e - s);
+ if (!(s = sfstruse(mp->tmp)))
+ goto nospace;
+ }
+ if (!*s || streq(s, "-"))
+ s = MAGIC_FILE;
+ if (!(fp = sfopen(NiL, s, "r")))
+ {
+ if (list)
+ {
+ if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/'))
+ {
+ strcpy(mp->fbuf, s);
+ sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
+ if (!(s = sfstruse(mp->tmp)))
+ goto nospace;
+ if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)))
+ goto next;
+ }
+ if (!(fp = sfopen(NiL, t, "r")))
+ goto next;
+ }
+ else
+ {
+ if (mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
+ return -1;
+ }
+ }
+ found = 1;
+ n = load(mp, s, fp);
+ sfclose(fp);
+ if (n && !list)
+ return -1;
+ next:
+ if (!e)
+ break;
+ s = e + 1;
+ }
+ if (!found)
+ {
+ if (mp->flags & MAGIC_VERBOSE)
+ {
+ if (mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
+ }
+ return -1;
+ }
+ return 0;
+ nospace:
+ if (mp->disc->errorf)
+ (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
+ return -1;
+}
+
+/*
+ * open a magic session
+ */
+
+Magic_t*
+magicopen(Magicdisc_t* disc)
+{
+ register Magic_t* mp;
+ register int i;
+ register int n;
+ register int f;
+ register int c;
+ register Vmalloc_t* vm;
+ unsigned char* map[CC_MAPS + 1];
+
+ if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
+ return 0;
+ if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
+ {
+ vmclose(vm);
+ return 0;
+ }
+ mp->id = lib;
+ mp->disc = disc;
+ mp->vm = vm;
+ mp->flags = disc->flags;
+ mp->redisc.re_version = REG_VERSION;
+ mp->redisc.re_flags = REG_NOFREE;
+ mp->redisc.re_errorf = (regerror_t)disc->errorf;
+ mp->redisc.re_resizef = (regresize_t)vmgetmem;
+ mp->redisc.re_resizehandle = (void*)mp->vm;
+ mp->dtdisc.key = offsetof(Info_t, name);
+ mp->dtdisc.link = offsetof(Info_t, link);
+ if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
+ goto bad;
+ for (n = 0; n < elementsof(info); n++)
+ dtinsert(mp->infotab, &info[n]);
+ for (i = 0; i < CC_MAPS; i++)
+ map[i] = ccmap(i, CC_ASCII);
+ mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
+ for (n = 0; n <= UCHAR_MAX; n++)
+ {
+ f = 0;
+ i = CC_MAPS;
+ while (--i >= 0)
+ {
+ c = ccmapchr(map[i], n);
+ f = (f << CC_BIT) | CCTYPE(c);
+ }
+ mp->cctype[n] = f;
+ }
+ return mp;
+ bad:
+ magicclose(mp);
+ return 0;
+}
+
+/*
+ * close a magicopen() session
+ */
+
+int
+magicclose(register Magic_t* mp)
+{
+ if (!mp)
+ return -1;
+ if (mp->tmp)
+ sfstrclose(mp->tmp);
+ if (mp->vm)
+ vmclose(mp->vm);
+ return 0;
+}
+
+/*
+ * return the magic string for file with optional stat info st
+ */
+
+char*
+magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
+{
+ off_t off;
+ char* s;
+
+ mp->flags = mp->disc->flags;
+ mp->mime = 0;
+ if (!st)
+ s = T("cannot stat");
+ else
+ {
+ if (mp->fp = fp)
+ off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
+ s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf));
+ if (mp->fp)
+ sfseek(mp->fp, off, SEEK_SET);
+ if (!(mp->flags & MAGIC_MIME))
+ {
+ if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
+ sfprintf(mp->tmp, "%s ", T("short"));
+ sfprintf(mp->tmp, "%s", s);
+ if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
+ sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
+ if (st->st_mode & S_ISUID)
+ sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
+ if (st->st_mode & S_ISGID)
+ sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
+ if (st->st_mode & S_ISVTX)
+ sfprintf(mp->tmp, ", sticky");
+ if (!(s = sfstruse(mp->tmp)))
+ s = T("out of space");
+ }
+ }
+ if (mp->flags & MAGIC_MIME)
+ s = mp->mime;
+ if (!s)
+ s = T("error");
+ return s;
+}
+
+/*
+ * list the magic table in mp on sp
+ */
+
+int
+magiclist(register Magic_t* mp, register Sfio_t* sp)
+{
+ register Entry_t* ep = mp->magic;
+ register Entry_t* rp = 0;
+
+ mp->flags = mp->disc->flags;
+ sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
+ while (ep)
+ {
+ sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
+ if (ep->expr)
+ sfprintf(sp, "%s", ep->expr);
+ else
+ sfprintf(sp, "%ld", ep->offset);
+ sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
+ switch (ep->type)
+ {
+ case 'm':
+ case 's':
+ sfputr(sp, fmtesc(ep->value.str), -1);
+ break;
+ case 'V':
+ switch (ep->op)
+ {
+ case 'l':
+ sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
+ break;
+ case 'v':
+ sfprintf(sp, "vcodex()");
+ break;
+ default:
+ sfprintf(sp, "%p", ep->value.str);
+ break;
+ }
+ break;
+ default:
+ sfprintf(sp, "%lo", ep->value.num);
+ break;
+ }
+ sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
+ if (ep->cont == '$' && !ep->value.lab->mask)
+ {
+ rp = ep;
+ ep = ep->value.lab;
+ }
+ else
+ {
+ if (ep->cont == ':')
+ {
+ ep = rp;
+ ep->value.lab->mask = 1;
+ }
+ ep = ep->next;
+ }
+ }
+ return 0;
+}