diff options
Diffstat (limited to 'usr/src/lib/libast/common/misc/magic.c')
-rw-r--r-- | usr/src/lib/libast/common/misc/magic.c | 2462 |
1 files changed, 0 insertions, 2462 deletions
diff --git a/usr/src/lib/libast/common/misc/magic.c b/usr/src/lib/libast/common/misc/magic.c deleted file mode 100644 index d451db78e2..0000000000 --- a/usr/src/lib/libast/common/misc/magic.c +++ /dev/null @@ -1,2462 +0,0 @@ -/*********************************************************************** -* * -* This software is part of the ast package * -* Copyright (c) 1985-2010 AT&T Intellectual Property * -* and is licensed under the * -* Common Public License, Version 1.0 * -* by AT&T Intellectual Property * -* * -* A copy of the License is available at * -* http://www.opensource.org/licenses/cpl1.0.txt * -* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * -* * -* Information and Software Systems Research * -* AT&T Research * -* Florham Park NJ * -* * -* Glenn Fowler <gsf@research.att.com> * -* David Korn <dgk@research.att.com> * -* Phong Vo <kpv@research.att.com> * -* * -***********************************************************************/ -#pragma prototyped -/* - * Glenn Fowler - * AT&T Research - * - * library interface to file - * - * the sum of the hacks {s5,v10,planix} is _____ than the parts - */ - -static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2008-09-10 $\0\n"; - -static const char lib[] = "libast:magic"; - -#include <ast.h> -#include <ctype.h> -#include <ccode.h> -#include <dt.h> -#include <modex.h> -#include <error.h> -#include <regex.h> -#include <swap.h> - -#define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m) - -#define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE) - -#define MAXNEST 10 /* { ... } nesting limit */ -#define MINITEM 4 /* magic buffer rounding */ - -typedef struct /* identifier dictionary entry */ -{ - const char name[16]; /* identifier name */ - int value; /* identifier value */ - Dtlink_t link; /* dictionary link */ -} Info_t; - -typedef struct Edit /* edit substitution */ -{ - struct Edit* next; /* next in list */ - regex_t* from; /* from pattern */ -} Edit_t; - -struct Entry; - -typedef struct /* loop info */ -{ - struct Entry* lab; /* call this function */ - int start; /* start here */ - int size; /* increment by this amount */ - int count; /* dynamic loop count */ - int offset; /* dynamic offset */ -} Loop_t; - -typedef struct Entry /* magic file entry */ -{ - struct Entry* next; /* next in list */ - char* expr; /* offset expression */ - union - { - unsigned long num; - char* str; - struct Entry* lab; - regex_t* sub; - Loop_t* loop; - } value; /* comparison value */ - char* desc; /* file description */ - char* mime; /* file mime type */ - unsigned long offset; /* offset in bytes */ - unsigned long mask; /* mask before compare */ - char cont; /* continuation operation */ - char type; /* datum type */ - char op; /* comparison operation */ - char nest; /* { or } nesting operation */ - char swap; /* forced swap order */ -} Entry_t; - -#define CC_BIT 5 - -#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2) -typedef unsigned short Cctype_t; -#else -typedef unsigned long Cctype_t; -#endif - -#define CC_text 0x01 -#define CC_control 0x02 -#define CC_latin 0x04 -#define CC_binary 0x08 -#define CC_utf_8 0x10 - -#define CC_notext CC_text /* CC_text is flipped before checking */ - -#define CC_MASK (CC_binary|CC_latin|CC_control|CC_text) - -#define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text) - -#define ID_NONE 0 -#define ID_ASM 1 -#define ID_C 2 -#define ID_COBOL 3 -#define ID_COPYBOOK 4 -#define ID_CPLUSPLUS 5 -#define ID_FORTRAN 6 -#define ID_HTML 7 -#define ID_INCL1 8 -#define ID_INCL2 9 -#define ID_INCL3 10 -#define ID_MAM1 11 -#define ID_MAM2 12 -#define ID_MAM3 13 -#define ID_NOTEXT 14 -#define ID_PL1 15 -#define ID_YACC 16 - -#define ID_MAX ID_YACC - -#define INFO_atime 1 -#define INFO_blocks 2 -#define INFO_ctime 3 -#define INFO_fstype 4 -#define INFO_gid 5 -#define INFO_mode 6 -#define INFO_mtime 7 -#define INFO_name 8 -#define INFO_nlink 9 -#define INFO_size 10 -#define INFO_uid 11 - -#define _MAGIC_PRIVATE_ \ - Magicdisc_t* disc; /* discipline */ \ - Vmalloc_t* vm; /* vmalloc region */ \ - Entry_t* magic; /* parsed magic table */ \ - Entry_t* magiclast; /* last entry in magic */ \ - char* mime; /* MIME type */ \ - unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \ - char fbuf[SF_BUFSIZE + 1]; /* file data */ \ - char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \ - char nbuf[256]; /* !CC_NATIVE data */ \ - char mbuf[64]; /* mime string */ \ - char sbuf[64]; /* type suffix string */ \ - char tbuf[2 * PATH_MAX]; /* type string */ \ - Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \ - unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \ - unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \ - int keep[MAXNEST]; /* ckmagic nest stack */ \ - char* cap[MAXNEST]; /* ckmagic mime stack */ \ - char* msg[MAXNEST]; /* ckmagic text stack */ \ - Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \ - int fbsz; /* fbuf size */ \ - int fbmx; /* fbuf max size */ \ - int xbsz; /* xbuf size */ \ - int swap; /* swap() operation */ \ - unsigned long flags; /* disc+open flags */ \ - long xoff; /* xbuf offset */ \ - int identifier[ID_MAX + 1]; /* Info_t identifier */ \ - Sfio_t* fp; /* fbuf fp */ \ - Sfio_t* tmp; /* tmp string */ \ - regdisc_t redisc; /* regex discipline */ \ - Dtdisc_t dtdisc; /* dict discipline */ \ - Dt_t* idtab; /* identifier dict */ \ - Dt_t* infotab; /* info keyword dict */ - -#include <magic.h> - -static Info_t dict[] = /* keyword dictionary */ -{ - { "COMMON", ID_FORTRAN }, - { "COMPUTE", ID_COBOL }, - { "COMP", ID_COPYBOOK }, - { "COMPUTATIONAL",ID_COPYBOOK }, - { "DCL", ID_PL1 }, - { "DEFINED", ID_PL1 }, - { "DIMENSION", ID_FORTRAN }, - { "DIVISION", ID_COBOL }, - { "FILLER", ID_COPYBOOK }, - { "FIXED", ID_PL1 }, - { "FUNCTION", ID_FORTRAN }, - { "HTML", ID_HTML }, - { "INTEGER", ID_FORTRAN }, - { "MAIN", ID_PL1 }, - { "OPTIONS", ID_PL1 }, - { "PERFORM", ID_COBOL }, - { "PIC", ID_COPYBOOK }, - { "REAL", ID_FORTRAN }, - { "REDEFINES", ID_COPYBOOK }, - { "S9", ID_COPYBOOK }, - { "SECTION", ID_COBOL }, - { "SELECT", ID_COBOL }, - { "SUBROUTINE", ID_FORTRAN }, - { "TEXT", ID_ASM }, - { "VALUE", ID_COPYBOOK }, - { "attr", ID_MAM3 }, - { "binary", ID_YACC }, - { "block", ID_FORTRAN }, - { "bss", ID_ASM }, - { "byte", ID_ASM }, - { "char", ID_C }, - { "class", ID_CPLUSPLUS }, - { "clr", ID_NOTEXT }, - { "comm", ID_ASM }, - { "common", ID_FORTRAN }, - { "data", ID_ASM }, - { "dimension", ID_FORTRAN }, - { "done", ID_MAM2 }, - { "double", ID_C }, - { "even", ID_ASM }, - { "exec", ID_MAM3 }, - { "extern", ID_C }, - { "float", ID_C }, - { "function", ID_FORTRAN }, - { "globl", ID_ASM }, - { "h", ID_INCL3 }, - { "html", ID_HTML }, - { "include", ID_INCL1 }, - { "int", ID_C }, - { "integer", ID_FORTRAN }, - { "jmp", ID_NOTEXT }, - { "left", ID_YACC }, - { "libc", ID_INCL2 }, - { "long", ID_C }, - { "make", ID_MAM1 }, - { "mov", ID_NOTEXT }, - { "private", ID_CPLUSPLUS }, - { "public", ID_CPLUSPLUS }, - { "real", ID_FORTRAN }, - { "register", ID_C }, - { "right", ID_YACC }, - { "sfio", ID_INCL2 }, - { "static", ID_C }, - { "stdio", ID_INCL2 }, - { "struct", ID_C }, - { "subroutine", ID_FORTRAN }, - { "sys", ID_NOTEXT }, - { "term", ID_YACC }, - { "text", ID_ASM }, - { "tst", ID_NOTEXT }, - { "type", ID_YACC }, - { "typedef", ID_C }, - { "u", ID_INCL2 }, - { "union", ID_YACC }, - { "void", ID_C }, -}; - -static Info_t info[] = -{ - { "atime", INFO_atime }, - { "blocks", INFO_blocks }, - { "ctime", INFO_ctime }, - { "fstype", INFO_fstype }, - { "gid", INFO_gid }, - { "mode", INFO_mode }, - { "mtime", INFO_mtime }, - { "name", INFO_name }, - { "nlink", INFO_nlink }, - { "size", INFO_size }, - { "uid", INFO_uid }, -}; - -/* - * return pointer to data at offset off and size siz - */ - -static char* -getdata(register Magic_t* mp, register long off, register int siz) -{ - register long n; - - if (off < 0) - return 0; - if (off + siz <= mp->fbsz) - return mp->fbuf + off; - if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz) - { - if (off + siz > mp->fbmx) - return 0; - n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2); - if (sfseek(mp->fp, n, SEEK_SET) != n) - return 0; - if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0) - { - mp->xoff = 0; - mp->xbsz = 0; - return 0; - } - mp->xbuf[mp->xbsz] = 0; - mp->xoff = n; - if (off + siz > mp->xoff + mp->xbsz) - return 0; - } - return mp->xbuf + off - mp->xoff; -} - -/* - * @... evaluator for strexpr() - */ - -static long -indirect(const char* cs, char** e, void* handle) -{ - register char* s = (char*)cs; - register Magic_t* mp = (Magic_t*)handle; - register long n = 0; - register char* p; - - if (s) - { - if (*s == '@') - { - n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0); - switch (*(s = *e)) - { - case 'b': - case 'B': - s++; - if (p = getdata(mp, n, 1)) - n = *(unsigned char*)p; - else - s = (char*)cs; - break; - case 'h': - case 'H': - s++; - if (p = getdata(mp, n, 2)) - n = swapget(mp->swap, p, 2); - else - s = (char*)cs; - break; - case 'q': - case 'Q': - s++; - if (p = getdata(mp, n, 8)) - n = swapget(mp->swap, p, 8); - else - s = (char*)cs; - break; - default: - if (isalnum(*s)) - s++; - if (p = getdata(mp, n, 4)) - n = swapget(mp->swap, p, 4); - else - s = (char*)cs; - break; - } - } - *e = s; - } - else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e); - return n; -} - -/* - * emit regex error message - */ - -static void -regmessage(Magic_t* mp, regex_t* re, int code) -{ - char buf[128]; - - if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) - { - regerror(code, re, buf, sizeof(buf)); - (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf); - } -} - -/* - * decompose vcodex(3) method composition - */ - -static char* -vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x) -{ - unsigned char* map; - const char* o; - int c; - int n; - int i; - int a; - - map = CCMAP(CC_ASCII, CC_NATIVE); - a = 0; - i = 1; - for (;;) - { - if (i) - i = 0; - else - *b++ = '^'; - if (m < (x - 1) && !*(m + 1)) - { - /* - * obsolete indices - */ - - if (!a) - { - a = 1; - o = "old, "; - while (b < e && (c = *o++)) - *b++ = c; - } - switch (*m) - { - case 0: o = "delta"; break; - case 1: o = "huffman"; break; - case 2: o = "huffgroup"; break; - case 3: o = "arith"; break; - case 4: o = "bwt"; break; - case 5: o = "rle"; break; - case 6: o = "mtf"; break; - case 7: o = "transpose"; break; - case 8: o = "table"; break; - case 9: o = "huffpart"; break; - case 50: o = "map"; break; - case 100: o = "recfm"; break; - case 101: o = "ss7"; break; - default: o = "UNKNOWN"; break; - } - m += 2; - while (b < e && (c = *o++)) - *b++ = c; - } - else - while (b < e && m < x && (c = *m++)) - { - if (map) - c = map[c]; - *b++ = c; - } - if (b >= e) - break; - n = 0; - while (m < x) - { - n = (n<<7) | (*m & 0x7f); - if (!(*m++ & 0x80)) - break; - } - if (n >= (x - m)) - break; - m += n; - } - return b; -} - -/* - * check for magic table match in buf - */ - -static char* -ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off) -{ - register Entry_t* ep; - register char* p; - register char* b; - register int level = 0; - int call = -1; - int c; - char* q; - char* t; - char* base = 0; - unsigned long num; - unsigned long mask; - regmatch_t matches[10]; - - mp->swap = 0; - b = mp->msg[0] = buf; - mp->mime = mp->cap[0] = 0; - mp->keep[0] = 0; - for (ep = mp->magic; ep; ep = ep->next) - { - fun: - if (ep->nest == '{') - { - if (++level >= MAXNEST) - { - call = -1; - level = 0; - mp->keep[0] = 0; - b = mp->msg[0]; - mp->mime = mp->cap[0]; - continue; - } - mp->keep[level] = mp->keep[level - 1] != 0; - mp->msg[level] = b; - mp->cap[level] = mp->mime; - } - switch (ep->cont) - { - case '#': - if (mp->keep[level] && b > buf) - { - *b = 0; - return buf; - } - mp->swap = 0; - b = mp->msg[0] = buf; - mp->mime = mp->cap[0] = 0; - if (ep->type == ' ') - continue; - break; - case '$': - if (mp->keep[level] && call < (MAXNEST - 1)) - { - mp->ret[++call] = ep; - ep = ep->value.lab; - goto fun; - } - continue; - case ':': - ep = mp->ret[call--]; - if (ep->op == 'l') - goto fun; - continue; - case '|': - if (mp->keep[level] > 1) - goto checknest; - /*FALLTHROUGH*/ - default: - if (!mp->keep[level]) - { - b = mp->msg[level]; - mp->mime = mp->cap[level]; - goto checknest; - } - break; - } - p = ""; - num = 0; - if (!ep->expr) - num = ep->offset + off; - else - switch (ep->offset) - { - case 0: - num = strexpr(ep->expr, NiL, indirect, mp) + off; - break; - case INFO_atime: - num = st->st_atime; - ep->type = 'D'; - break; - case INFO_blocks: - num = iblocks(st); - ep->type = 'N'; - break; - case INFO_ctime: - num = st->st_ctime; - ep->type = 'D'; - break; - case INFO_fstype: - p = fmtfs(st); - ep->type = toupper(ep->type); - break; - case INFO_gid: - if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') - { - p = fmtgid(st->st_gid); - ep->type = toupper(ep->type); - } - else - { - num = st->st_gid; - ep->type = 'N'; - } - break; - case INFO_mode: - if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') - { - p = fmtmode(st->st_mode, 0); - ep->type = toupper(ep->type); - } - else - { - num = modex(st->st_mode); - ep->type = 'N'; - } - break; - case INFO_mtime: - num = st->st_ctime; - ep->type = 'D'; - break; - case INFO_name: - if (!base) - { - if (base = strrchr(file, '/')) - base++; - else - base = (char*)file; - } - p = base; - ep->type = toupper(ep->type); - break; - case INFO_nlink: - num = st->st_nlink; - ep->type = 'N'; - break; - case INFO_size: - num = st->st_size; - ep->type = 'N'; - break; - case INFO_uid: - if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') - { - p = fmtuid(st->st_uid); - ep->type = toupper(ep->type); - } - else - { - num = st->st_uid; - ep->type = 'N'; - } - break; - } - switch (ep->type) - { - - case 'b': - if (!(p = getdata(mp, num, 1))) - goto next; - num = *(unsigned char*)p; - break; - - case 'h': - if (!(p = getdata(mp, num, 2))) - goto next; - num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2); - break; - - case 'd': - case 'l': - case 'v': - if (!(p = getdata(mp, num, 4))) - goto next; - num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4); - break; - - case 'q': - if (!(p = getdata(mp, num, 8))) - goto next; - num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8); - break; - - case 'e': - if (!(p = getdata(mp, num, 0))) - goto next; - /*FALLTHROUGH*/ - case 'E': - if (!ep->value.sub) - goto next; - if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) - { - c = mp->fbsz; - if (c >= sizeof(mp->nbuf)) - c = sizeof(mp->nbuf) - 1; - p = (char*)memcpy(mp->nbuf, p, c); - p[c] = 0; - ccmapstr(mp->x2n, p, c); - if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) - { - if (c != REG_NOMATCH) - regmessage(mp, ep->value.sub, c); - goto next; - } - } - p = ep->value.sub->re_sub->re_buf; - q = T(ep->desc); - t = *q ? q : p; - if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b') - *b++ = ' '; - b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b')); - if (ep->mime) - mp->mime = ep->mime; - goto checknest; - - case 's': - if (!(p = getdata(mp, num, ep->mask))) - goto next; - goto checkstr; - case 'm': - if (!(p = getdata(mp, num, 0))) - goto next; - /*FALLTHROUGH*/ - case 'M': - case 'S': - checkstr: - for (;;) - { - if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p)) - break; - if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask)) - break; - if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf)) - goto next; - p = (char*)memcpy(mp->nbuf, p, ep->mask); - p[ep->mask] = 0; - ccmapstr(mp->x2n, p, ep->mask); - } - q = T(ep->desc); - if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') - *b++ = ' '; - for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++); - *t = 0; - b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p); - *t = c; - if (ep->mime) - mp->mime = ep->mime; - goto checknest; - - } - if (mask = ep->mask) - num &= mask; - switch (ep->op) - { - - case '=': - case '@': - if (num == ep->value.num) - break; - if (ep->cont != '#') - goto next; - if (!mask) - mask = ~mask; - if (ep->type == 'h') - { - if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num) - { - if (!(mp->swap & (mp->swap + 1))) - mp->swap = 7; - goto swapped; - } - } - else if (ep->type == 'l') - { - for (c = 1; c < 4; c++) - if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num) - { - if (!(mp->swap & (mp->swap + 1))) - mp->swap = 7; - goto swapped; - } - } - else if (ep->type == 'q') - { - for (c = 1; c < 8; c++) - if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num) - goto swapped; - } - goto next; - - case '!': - if (num != ep->value.num) - break; - goto next; - - case '^': - if (num ^ ep->value.num) - break; - goto next; - - case '>': - if (num > ep->value.num) - break; - goto next; - - case '<': - if (num < ep->value.num) - break; - goto next; - - case 'l': - if (num > 0 && mp->keep[level] && call < (MAXNEST - 1)) - { - if (!ep->value.loop->count) - { - ep->value.loop->count = num; - ep->value.loop->offset = off; - off = ep->value.loop->start; - } - else if (!--ep->value.loop->count) - { - off = ep->value.loop->offset; - goto next; - } - else - off += ep->value.loop->size; - mp->ret[++call] = ep; - ep = ep->value.loop->lab; - goto fun; - } - goto next; - - case 'm': - c = mp->swap; - t = ckmagic(mp, file, b + (b > buf), st, num); - mp->swap = c; - if (!t) - goto next; - if (b > buf) - *b = ' '; - b += strlen(b); - break; - - case 'r': -#if _UWIN - { - char* e; - Sfio_t* rp; - Sfio_t* gp; - - if (!(t = strrchr(file, '.'))) - goto next; - sfprintf(mp->tmp, "/reg/classes_root/%s", t); - if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r"))) - goto next; - *ep->desc = 0; - *ep->mime = 0; - gp = 0; - while (t = sfgetr(rp, '\n', 1)) - { - if (strneq(t, "Content Type=", 13)) - { - ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0); - strcpy(ep->mime, t + 13); - if (gp) - break; - } - else - { - sfprintf(mp->tmp, "/reg/classes_root/%s", t); - if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r"))) - { - ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1); - strcpy(ep->desc, t); - if (*ep->mime) - break; - } - } - } - sfclose(rp); - if (!gp) - goto next; - if (!*ep->mime) - { - t = T(ep->desc); - if (!strncasecmp(t, "microsoft", 9)) - t += 9; - while (isspace(*t)) - t++; - e = "application/x-ms-"; - ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e)); - e = strcopy(ep->mime, e); - while ((c = *t++) && c != '.' && c != ' ') - *e++ = isupper(c) ? tolower(c) : c; - *e = 0; - } - while (t = sfgetr(gp, '\n', 1)) - if (*t && !streq(t, "\"\"")) - { - ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0); - strcpy(ep->desc, t); - break; - } - sfclose(gp); - if (!*ep->desc) - goto next; - if (!t) - for (t = T(ep->desc); *t; t++) - if (*t == '.') - *t = ' '; - if (!mp->keep[level]) - mp->keep[level] = 2; - mp->mime = ep->mime; - break; - } -#else - if (ep->cont == '#' && !mp->keep[level]) - mp->keep[level] = 1; - goto next; -#endif - - case 'v': - if (!(p = getdata(mp, num, 4))) - goto next; - c = 0; - do - { - num++; - c = (c<<7) | (*p & 0x7f); - } while (*p++ & 0x80); - if (!(p = getdata(mp, num, c))) - goto next; - if (mp->keep[level]++ && b > buf && *(b - 1) != ' ') - { - *b++ = ','; - *b++ = ' '; - } - b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c); - goto checknest; - - } - swapped: - q = T(ep->desc); - if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') - *b++ = ' '; - if (ep->type == 'd' || ep->type == 'D') - b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num)); - else if (ep->type == 'v') - b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num)); - else - b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num); - if (ep->mime && *ep->mime) - mp->mime = ep->mime; - checknest: - if (ep->nest == '}') - { - if (!mp->keep[level]) - { - b = mp->msg[level]; - mp->mime = mp->cap[level]; - } - else if (level > 0) - mp->keep[level - 1] = mp->keep[level]; - if (--level < 0) - { - level = 0; - mp->keep[0] = 0; - } - } - continue; - next: - if (ep->cont == '&') - mp->keep[level] = 0; - goto checknest; - } - if (mp->keep[level] && b > buf) - { - *b = 0; - return buf; - } - return 0; -} - -/* - * check english language stats - */ - -static int -ckenglish(register Magic_t* mp, int pun, int badpun) -{ - register char* s; - register int vowl = 0; - register int freq = 0; - register int rare = 0; - - if (5 * badpun > pun) - return 0; - if (2 * mp->count[';'] > mp->count['E'] + mp->count['e']) - return 0; - if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e']) - return 0; - for (s = "aeiou"; *s; s++) - vowl += mp->count[toupper(*s)] + mp->count[*s]; - for (s = "etaion"; *s; s++) - freq += mp->count[toupper(*s)] + mp->count[*s]; - for (s = "vjkqxz"; *s; s++) - rare += mp->count[toupper(*s)] + mp->count[*s]; - return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare; -} - -/* - * check programming language stats - */ - -static char* -cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st) -{ - register int c; - register unsigned char* b; - register unsigned char* e; - register int q; - register char* s; - char* t; - char* base; - char* suff; - char* t1; - char* t2; - char* t3; - int n; - int badpun; - int code; - int pun; - Cctype_t flags; - Info_t* ip; - - b = (unsigned char*)mp->fbuf; - e = b + mp->fbsz; - memzero(mp->count, sizeof(mp->count)); - memzero(mp->multi, sizeof(mp->multi)); - memzero(mp->identifier, sizeof(mp->identifier)); - - /* - * check character coding - */ - - flags = 0; - while (b < e) - flags |= mp->cctype[*b++]; - b = (unsigned char*)mp->fbuf; - code = 0; - q = CC_ASCII; - n = CC_MASK; - for (c = 0; c < CC_MAPS; c++) - { - flags ^= CC_text; - if ((flags & CC_MASK) < n) - { - n = flags & CC_MASK; - q = c; - } - flags >>= CC_BIT; - } - flags = n; - if (!(flags & (CC_binary|CC_notext))) - { - if (q != CC_NATIVE) - { - code = q; - ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE); - } - if (b[0] == '#' && b[1] == '!') - { - for (b += 2; b < e && isspace(*b); b++); - for (s = (char*)b; b < e && isprint(*b); b++); - c = *b; - *b = 0; - if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK)) - { - if (t = strrchr(s, '/')) - s = t + 1; - for (t = s; *t; t++) - if (isspace(*t)) - { - *t = 0; - break; - } - sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh"); - mp->mime = mp->mbuf; - if (match(s, "*sh")) - { - t1 = T("command"); - if (streq(s, "sh")) - *s = 0; - else - { - *b++ = ' '; - *b = 0; - } - } - else - { - t1 = T("interpreter"); - *b++ = ' '; - *b = 0; - } - sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1); - s = mp->sbuf; - goto qualify; - } - *b = c; - b = (unsigned char*)mp->fbuf; - } - badpun = 0; - pun = 0; - q = 0; - s = 0; - t = 0; - while (b < e) - { - c = *b++; - mp->count[c]++; - if (c == q && (q != '*' || *b == '/' && b++)) - { - mp->multi[q]++; - q = 0; - } - else if (c == '\\') - { - s = 0; - b++; - } - else if (!q) - { - if (isalpha(c) || c == '_') - { - if (!s) - s = (char*)b - 1; - } - else if (!isdigit(c)) - { - if (s) - { - if (s > mp->fbuf) - switch (*(s - 1)) - { - case ':': - if (*b == ':') - mp->multi[':']++; - break; - case '.': - if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n')) - mp->multi['.']++; - break; - case '\n': - case '\\': - if (*b == '{') - t = (char*)b + 1; - break; - case '{': - if (s == t && *b == '}') - mp->multi['X']++; - break; - } - if (!mp->idtab) - { - if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash)) - for (q = 0; q < elementsof(dict); q++) - dtinsert(mp->idtab, &dict[q]); - else if (mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); - q = 0; - } - if (mp->idtab) - { - *(b - 1) = 0; - if (ip = (Info_t*)dtmatch(mp->idtab, s)) - mp->identifier[ip->value]++; - *(b - 1) = c; - } - s = 0; - } - switch (c) - { - case '\t': - if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n') - mp->multi['\t']++; - break; - case '"': - case '\'': - q = c; - break; - case '/': - if (*b == '*') - q = *b++; - else if (*b == '/') - q = '\n'; - break; - case '$': - if (*b == '(' && *(b + 1) != ' ') - mp->multi['$']++; - break; - case '{': - case '}': - case '[': - case ']': - case '(': - mp->multi[c]++; - break; - case ')': - mp->multi[c]++; - goto punctuation; - case ':': - if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2))) - mp->multi[':']++; - goto punctuation; - case '.': - case ',': - case '%': - case ';': - case '?': - punctuation: - pun++; - if (*b != ' ' && *b != '\n') - badpun++; - break; - } - } - } - } - } - else - while (b < e) - mp->count[*b++]++; - base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file; - suff = (t1 = strrchr(base, '.')) ? t1 + 1 : ""; - if (!flags) - { - if (match(suff, "*sh|bat|cmd")) - goto id_sh; - if (match(base, "*@(mkfile)")) - goto id_mk; - if (match(base, "*@(makefile|.mk)")) - goto id_make; - if (match(base, "*@(mamfile|.mam)")) - goto id_mam; - if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy")) - goto id_c; - if (match(suff, "f")) - goto id_fortran; - if (match(suff, "htm+(l)")) - goto id_html; - if (match(suff, "cpy")) - goto id_copybook; - if (match(suff, "cob|cbl|cb2")) - goto id_cobol; - if (match(suff, "pl[1i]")) - goto id_pl1; - if (match(suff, "tex")) - goto id_tex; - if (match(suff, "asm|s")) - goto id_asm; - if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.'))) - { - id_sh: - s = T("command script"); - mp->mime = "application/sh"; - goto qualify; - } - if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *")) - { - s = T("mail message"); - mp->mime = "message/rfc822"; - goto qualify; - } - if (match(base, "*@(mkfile)")) - { - id_mk: - s = "mkfile"; - mp->mime = "application/mk"; - goto qualify; - } - if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0)) - { - id_make: - s = "makefile"; - mp->mime = "application/make"; - goto qualify; - } - if (mp->multi['.'] >= 3) - { - s = T("nroff input"); - mp->mime = "application/x-troff"; - goto qualify; - } - if (mp->multi['X'] >= 3) - { - s = T("TeX input"); - mp->mime = "application/x-tex"; - goto qualify; - } - if (mp->fbsz < SF_BUFSIZE && - (mp->multi['('] == mp->multi[')'] && - mp->multi['{'] == mp->multi['}'] && - mp->multi['['] == mp->multi[']']) || - mp->fbsz >= SF_BUFSIZE && - (mp->multi['('] >= mp->multi[')'] && - mp->multi['{'] >= mp->multi['}'] && - mp->multi['['] >= mp->multi[']'])) - { - c = mp->identifier[ID_INCL1]; - if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c || - mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 || - mp->count['='] >= 20 && mp->count[';'] >= 20) - { - id_c: - t1 = ""; - t2 = "c "; - t3 = T("program"); - switch (*suff) - { - case 'c': - case 'C': - mp->mime = "application/x-cc"; - break; - case 'l': - case 'L': - t1 = "lex "; - mp->mime = "application/x-lex"; - break; - default: - t3 = T("header"); - if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5) - { - mp->mime = "application/x-cc"; - break; - } - /*FALLTHROUGH*/ - case 'y': - case 'Y': - t1 = "yacc "; - mp->mime = "application/x-yacc"; - break; - } - if (mp->identifier[ID_CPLUSPLUS] >= 3) - { - t2 = "c++ "; - mp->mime = "application/x-c++"; - } - sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3); - s = mp->sbuf; - goto qualify; - } - } - if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 && - (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] || - mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2])) - { - id_mam: - s = T("mam program"); - mp->mime = "application/x-mam"; - goto qualify; - } - if (mp->identifier[ID_FORTRAN] >= 8) - { - id_fortran: - s = T("fortran program"); - mp->mime = "application/x-fortran"; - goto qualify; - } - if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2) - { - id_html: - s = T("html input"); - mp->mime = "text/html"; - goto qualify; - } - if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) - { - id_copybook: - s = T("cobol copybook"); - mp->mime = "application/x-cobol"; - goto qualify; - } - if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) - { - id_cobol: - s = T("cobol program"); - mp->mime = "application/x-cobol"; - goto qualify; - } - if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) - { - id_pl1: - s = T("pl1 program"); - mp->mime = "application/x-pl1"; - goto qualify; - } - if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{']) - { - id_tex: - s = T("TeX input"); - mp->mime = "text/tex"; - goto qualify; - } - if (mp->identifier[ID_ASM] >= 4) - { - id_asm: - s = T("as program"); - mp->mime = "application/x-as"; - goto qualify; - } - if (ckenglish(mp, pun, badpun)) - { - s = T("english text"); - mp->mime = "text/plain"; - goto qualify; - } - } - else if (streq(base, "core")) - { - mp->mime = "x-system/core"; - return T("core dump"); - } - if (flags & (CC_binary|CC_notext)) - { - b = (unsigned char*)mp->fbuf; - e = b + mp->fbsz; - n = 0; - for (;;) - { - c = *b++; - q = 0; - while (c & 0x80) - { - c <<= 1; - q++; - } - switch (q) - { - case 4: - if (b < e && (*b++ & 0xc0) != 0x80) - break; - /* FALLTHROUGH */ - case 3: - if (b < e && (*b++ & 0xc0) != 0x80) - break; - /* FALLTHROUGH */ - case 2: - if (b < e && (*b++ & 0xc0) != 0x80) - break; - n = 1; - /* FALLTHROUGH */ - case 0: - if (b >= e) - { - if (n) - { - flags &= ~(CC_binary|CC_notext); - flags |= CC_utf_8; - } - break; - } - continue; - } - break; - } - } - if (flags & (CC_binary|CC_notext)) - { - unsigned long d = 0; - - if ((q = mp->fbsz / UCHAR_MAX) >= 2) - { - /* - * compression/encryption via standard deviation - */ - - - for (c = 0; c < UCHAR_MAX; c++) - { - pun = mp->count[c] - q; - d += pun * pun; - } - d /= mp->fbsz; - } - if (d <= 0) - s = T("binary"); - else if (d < 4) - s = T("encrypted"); - else if (d < 16) - s = T("packed"); - else if (d < 64) - s = T("compressed"); - else if (d < 256) - s = T("delta"); - else - s = T("data"); - mp->mime = "application/octet-stream"; - return s; - } - mp->mime = "text/plain"; - if (flags & CC_utf_8) - s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text"); - else if (flags & CC_latin) - s = (flags & CC_control) ? T("latin text with control characters") : T("latin text"); - else - s = (flags & CC_control) ? T("text with control characters") : T("text"); - qualify: - if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r']) - { - t = "dos "; - mp->mime = "text/dos"; - } - else - t = ""; - if (code) - { - if (code == CC_ASCII) - sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s); - else - { - sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s); - mp->mime = "text/ebcdic"; - } - s = buf; - } - else if (*t) - { - sfsprintf(buf, PATH_MAX, "%s%s", t, s); - s = buf; - } - return s; -} - -/* - * return the basic magic string for file,st in buf,size - */ - -static char* -type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size) -{ - register char* s; - register char* t; - - mp->mime = 0; - if (!S_ISREG(st->st_mode)) - { - if (S_ISDIR(st->st_mode)) - { - mp->mime = "x-system/dir"; - return T("directory"); - } - if (S_ISLNK(st->st_mode)) - { - mp->mime = "x-system/lnk"; - s = buf; - s += sfsprintf(s, PATH_MAX, T("symbolic link to ")); - if (pathgetlink(file, s, size - (s - buf)) < 0) - return T("cannot read symbolic link text"); - return buf; - } - if (S_ISBLK(st->st_mode)) - { - mp->mime = "x-system/blk"; - sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st)); - return buf; - } - if (S_ISCHR(st->st_mode)) - { - mp->mime = "x-system/chr"; - sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st)); - return buf; - } - if (S_ISFIFO(st->st_mode)) - { - mp->mime = "x-system/fifo"; - return "fifo"; - } -#ifdef S_ISSOCK - if (S_ISSOCK(st->st_mode)) - { - mp->mime = "x-system/sock"; - return "socket"; - } -#endif - } - if (!(mp->fbmx = st->st_size)) - s = T("empty"); - else if (!mp->fp) - s = T("cannot read"); - else - { - mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1); - if (mp->fbsz < 0) - s = fmterror(errno); - else if (mp->fbsz == 0) - s = T("empty"); - else - { - mp->fbuf[mp->fbsz] = 0; - mp->xoff = 0; - mp->xbsz = 0; - if (!(s = ckmagic(mp, file, buf, st, 0))) - s = cklang(mp, file, buf, st); - } - } - if (!mp->mime) - mp->mime = "application/unknown"; - else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2)) - { - register char* b; - register char* be; - register char* m; - register char* me; - - b = mp->mime; - me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1; - while (m < me && b < t) - *m++ = *b++; - b = t = s; - for (;;) - { - if (!(be = strchr(t, ' '))) - { - be = b + strlen(b); - break; - } - if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4)) - break; - b = t; - t = be + 1; - } - while (m < me && b < be) - if ((*m++ = *b++) == ' ') - *(m - 1) = '-'; - *m = 0; - } - return s; -} - -/* - * low level for magicload() - */ - -static int -load(register Magic_t* mp, char* file, register Sfio_t* fp) -{ - register Entry_t* ep; - register char* p; - register char* p2; - char* p3; - char* next; - int n; - int lge; - int lev; - int ent; - int old; - int cont; - Info_t* ip; - Entry_t* ret; - Entry_t* first; - Entry_t* last = 0; - Entry_t* fun['z' - 'a' + 1]; - - memzero(fun, sizeof(fun)); - cont = '$'; - ent = 0; - lev = 0; - old = 0; - ret = 0; - error_info.file = file; - error_info.line = 0; - first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0); - while (p = sfgetr(fp, '\n', 1)) - { - error_info.line++; - for (; isspace(*p); p++); - - /* - * nesting - */ - - switch (*p) - { - case 0: - case '#': - cont = '#'; - continue; - case '{': - if (++lev < MAXNEST) - ep->nest = *p; - else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST); - continue; - case '}': - if (!last || lev <= 0) - { - if (mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p); - } - else if (lev-- == ent) - { - ent = 0; - ep->cont = ':'; - ep->offset = ret->offset; - ep->nest = ' '; - ep->type = ' '; - ep->op = ' '; - ep->desc = "[RETURN]"; - last = ep; - ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); - ret = 0; - } - else - last->nest = *p; - continue; - default: - if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|') - { - n = *p++; - if (n >= 'a' && n <= 'z') - n -= 'a'; - else - { - if (mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); - n = 0; - } - if (ret && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); - if (*p == '{') - { - ent = ++lev; - ret = ep; - ep->desc = "[FUNCTION]"; - } - else - { - if (*(p + 1) != ')' && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a'); - ep->desc = "[CALL]"; - } - ep->cont = cont; - ep->offset = n; - ep->nest = ' '; - ep->type = ' '; - ep->op = ' '; - last = ep; - ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); - if (ret) - fun[n] = last->value.lab = ep; - else if (!(last->value.lab = fun[n]) && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); - continue; - } - if (!ep->nest) - ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' '; - break; - } - - /* - * continuation - */ - - cont = '$'; - switch (*p) - { - case '>': - old = 1; - if (*(p + 1) == *p) - { - /* - * old style nesting push - */ - - p++; - old = 2; - if (!lev && last) - { - lev = 1; - last->nest = '{'; - if (last->cont == '>') - last->cont = '&'; - ep->nest = '1'; - } - } - /*FALLTHROUGH*/ - case '+': - case '&': - case '|': - ep->cont = *p++; - break; - default: - if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p); - /*FALLTHROUGH*/ - case '*': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - ep->cont = (lev > 0) ? '&' : '#'; - break; - } - switch (old) - { - case 1: - old = 0; - if (lev) - { - /* - * old style nesting pop - */ - - lev = 0; - if (last) - last->nest = '}'; - ep->nest = ' '; - if (ep->cont == '&') - ep->cont = '#'; - } - break; - case 2: - old = 1; - break; - } - if (isdigit(*p)) - { - /* - * absolute offset - */ - - ep->offset = strton(p, &next, NiL, 0); - p2 = next; - } - else - { - for (p2 = p; *p2 && !isspace(*p2); p2++); - if (!*p2) - { - if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); - continue; - } - - /* - * offset expression - */ - - *p2++ = 0; - ep->expr = vmstrdup(mp->vm, p); - if (isalpha(*p)) - ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0; - else if (*p == '(' && ep->cont == '>') - { - /* - * convert old style indirection to @ - */ - - p = ep->expr + 1; - for (;;) - { - switch (*p++) - { - case 0: - case '@': - case '(': - break; - case ')': - break; - default: - continue; - } - break; - } - if (*--p == ')') - { - *p = 0; - *ep->expr = '@'; - } - } - } - for (; isspace(*p2); p2++); - for (p = p2; *p2 && !isspace(*p2); p2++); - if (!*p2) - { - if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); - continue; - } - *p2++ = 0; - - /* - * type - */ - - if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e') - { - ep->swap = ~(*p == 'l' ? 7 : 0); - p += 2; - } - if (*p == 's') - { - if (*(p + 1) == 'h') - ep->type = 'h'; - else - ep->type = 's'; - } - else if (*p == 'a') - ep->type = 's'; - else - ep->type = *p; - if (p = strchr(p, '&')) - { - /* - * old style mask - */ - - ep->mask = strton(++p, NiL, NiL, 0); - } - for (; isspace(*p2); p2++); - if (ep->mask) - *--p2 = '='; - - /* - * comparison operation - */ - - p = p2; - if (p2 = strchr(p, '\t')) - *p2++ = 0; - else - { - int qe = 0; - int qn = 0; - - /* - * assume balanced {}[]()\\""'' field - */ - - for (p2 = p;;) - { - switch (n = *p2++) - { - case 0: - break; - case '{': - if (!qe) - qe = '}'; - if (qe == '}') - qn++; - continue; - case '(': - if (!qe) - qe = ')'; - if (qe == ')') - qn++; - continue; - case '[': - if (!qe) - qe = ']'; - if (qe == ']') - qn++; - continue; - case '}': - case ')': - case ']': - if (qe == n && qn > 0) - qn--; - continue; - case '"': - case '\'': - if (!qe) - qe = n; - else if (qe == n) - qe = 0; - continue; - case '\\': - if (*p2) - p2++; - continue; - default: - if (!qe && isspace(n)) - break; - continue; - } - if (n) - *(p2 - 1) = 0; - else - p2--; - break; - } - } - lge = 0; - if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') - ep->op = '='; - else - { - if (*p == '&') - { - ep->mask = strton(++p, &next, NiL, 0); - p = next; - } - switch (*p) - { - case '=': - case '>': - case '<': - case '*': - ep->op = *p++; - if (*p == '=') - { - p++; - switch (ep->op) - { - case '>': - lge = -1; - break; - case '<': - lge = 1; - break; - } - } - break; - case '!': - case '@': - ep->op = *p++; - if (*p == '=') - p++; - break; - case 'x': - p++; - ep->op = '*'; - break; - default: - ep->op = '='; - if (ep->mask) - ep->value.num = ep->mask; - break; - } - } - if (ep->op != '*' && !ep->value.num) - { - if (ep->type == 'e') - { - if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0)) - { - ep->value.sub->re_disc = &mp->redisc; - if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE))) - { - p += ep->value.sub->re_npat; - if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0))) - p += ep->value.sub->re_npat; - } - if (n) - { - regmessage(mp, ep->value.sub, n); - ep->value.sub = 0; - } - else if (*p && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p); - } - } - else if (ep->type == 'm') - { - ep->mask = stresc(p) + 1; - ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0); - memcpy(ep->value.str, p, ep->mask); - if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)")) - ep->value.str[ep->mask - 1] = '*'; - } - else if (ep->type == 's') - { - ep->mask = stresc(p); - ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0); - memcpy(ep->value.str, p, ep->mask); - } - else if (*p == '\'') - { - stresc(p); - ep->value.num = *(unsigned char*)(p + 1) + lge; - } - else if (strmatch(p, "+([a-z])\\(*\\)")) - { - char* t; - - t = p; - ep->type = 'V'; - ep->op = *p; - while (*p && *p++ != '('); - switch (ep->op) - { - case 'l': - n = *p++; - if (n < 'a' || n > 'z') - { - if (mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); - } - else if (!fun[n -= 'a']) - { - if (mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); - } - else - { - ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0); - ep->value.loop->lab = fun[n]; - while (*p && *p++ != ','); - ep->value.loop->start = strton(p, &t, NiL, 0); - while (*t && *t++ != ','); - ep->value.loop->size = strton(t, &t, NiL, 0); - } - break; - case 'm': - case 'r': - ep->desc = vmnewof(mp->vm, 0, char, 32, 0); - ep->mime = vmnewof(mp->vm, 0, char, 32, 0); - break; - case 'v': - break; - default: - if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t); - break; - } - } - else - { - ep->value.num = strton(p, NiL, NiL, 0) + lge; - if (ep->op == '@') - ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num)); - } - } - - /* - * file description - */ - - if (p2) - { - for (; isspace(*p2); p2++); - if (p = strchr(p2, '\t')) - { - /* - * check for message catalog index - */ - - *p++ = 0; - if (isalpha(*p2)) - { - for (p3 = p2; isalnum(*p3); p3++); - if (*p3++ == ':') - { - for (; isdigit(*p3); p3++); - if (!*p3) - { - for (p2 = p; isspace(*p2); p2++); - if (p = strchr(p2, '\t')) - *p++ = 0; - } - } - } - } - stresc(p2); - ep->desc = vmstrdup(mp->vm, p2); - if (p) - { - for (; isspace(*p); p++); - if (*p) - ep->mime = vmstrdup(mp->vm, p); - } - } - else - ep->desc = ""; - - /* - * get next entry - */ - - last = ep; - ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); - } - if (last) - { - last->next = 0; - if (mp->magiclast) - mp->magiclast->next = first; - else - mp->magic = first; - mp->magiclast = last; - } - vmfree(mp->vm, ep); - if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) - { - if (lev < 0) - (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators"); - else if (lev > 0) - (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators"); - if (ret) - (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); - } - error_info.file = 0; - error_info.line = 0; - return 0; -} - -/* - * load a magic file into mp - */ - -int -magicload(register Magic_t* mp, const char* file, unsigned long flags) -{ - register char* s; - register char* e; - register char* t; - int n; - int found; - int list; - Sfio_t* fp; - - mp->flags = mp->disc->flags | flags; - found = 0; - if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1)) - { - if (!(s = getenv(MAGIC_FILE_ENV)) || !*s) - s = MAGIC_FILE; - } - for (;;) - { - if (!list) - e = 0; - else if (e = strchr(s, ':')) - { - /* - * ok, so ~ won't work for the last list element - * we do it for MAGIC_FILES_ENV anyway - */ - - if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME"))) - { - sfputr(mp->tmp, t, -1); - s += n - 1; - } - sfwrite(mp->tmp, s, e - s); - if (!(s = sfstruse(mp->tmp))) - goto nospace; - } - if (!*s || streq(s, "-")) - s = MAGIC_FILE; - if (!(fp = sfopen(NiL, s, "r"))) - { - if (list) - { - if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/')) - { - strcpy(mp->fbuf, s); - sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf); - if (!(s = sfstruse(mp->tmp))) - goto nospace; - if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ))) - goto next; - } - if (!(fp = sfopen(NiL, t, "r"))) - goto next; - } - else - { - if (mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s); - return -1; - } - } - found = 1; - n = load(mp, s, fp); - sfclose(fp); - if (n && !list) - return -1; - next: - if (!e) - break; - s = e + 1; - } - if (!found) - { - if (mp->flags & MAGIC_VERBOSE) - { - if (mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file"); - } - return -1; - } - return 0; - nospace: - if (mp->disc->errorf) - (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); - return -1; -} - -/* - * open a magic session - */ - -Magic_t* -magicopen(Magicdisc_t* disc) -{ - register Magic_t* mp; - register int i; - register int n; - register int f; - register int c; - register Vmalloc_t* vm; - unsigned char* map[CC_MAPS + 1]; - - if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) - return 0; - if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0))) - { - vmclose(vm); - return 0; - } - mp->id = lib; - mp->disc = disc; - mp->vm = vm; - mp->flags = disc->flags; - mp->redisc.re_version = REG_VERSION; - mp->redisc.re_flags = REG_NOFREE; - mp->redisc.re_errorf = (regerror_t)disc->errorf; - mp->redisc.re_resizef = (regresize_t)vmgetmem; - mp->redisc.re_resizehandle = (void*)mp->vm; - mp->dtdisc.key = offsetof(Info_t, name); - mp->dtdisc.link = offsetof(Info_t, link); - if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash))) - goto bad; - for (n = 0; n < elementsof(info); n++) - dtinsert(mp->infotab, &info[n]); - for (i = 0; i < CC_MAPS; i++) - map[i] = ccmap(i, CC_ASCII); - mp->x2n = ccmap(CC_ALIEN, CC_NATIVE); - for (n = 0; n <= UCHAR_MAX; n++) - { - f = 0; - i = CC_MAPS; - while (--i >= 0) - { - c = ccmapchr(map[i], n); - f = (f << CC_BIT) | CCTYPE(c); - } - mp->cctype[n] = f; - } - return mp; - bad: - magicclose(mp); - return 0; -} - -/* - * close a magicopen() session - */ - -int -magicclose(register Magic_t* mp) -{ - if (!mp) - return -1; - if (mp->tmp) - sfstrclose(mp->tmp); - if (mp->vm) - vmclose(mp->vm); - return 0; -} - -/* - * return the magic string for file with optional stat info st - */ - -char* -magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st) -{ - off_t off; - char* s; - - mp->flags = mp->disc->flags; - mp->mime = 0; - if (!st) - s = T("cannot stat"); - else - { - if (mp->fp = fp) - off = sfseek(mp->fp, (off_t)0, SEEK_CUR); - s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf)); - if (mp->fp) - sfseek(mp->fp, off, SEEK_SET); - if (!(mp->flags & MAGIC_MIME)) - { - if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128)) - sfprintf(mp->tmp, "%s ", T("short")); - sfprintf(mp->tmp, "%s", s); - if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) - sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable")); - if (st->st_mode & S_ISUID) - sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid)); - if (st->st_mode & S_ISGID) - sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid)); - if (st->st_mode & S_ISVTX) - sfprintf(mp->tmp, ", sticky"); - if (!(s = sfstruse(mp->tmp))) - s = T("out of space"); - } - } - if (mp->flags & MAGIC_MIME) - s = mp->mime; - if (!s) - s = T("error"); - return s; -} - -/* - * list the magic table in mp on sp - */ - -int -magiclist(register Magic_t* mp, register Sfio_t* sp) -{ - register Entry_t* ep = mp->magic; - register Entry_t* rp = 0; - - mp->flags = mp->disc->flags; - sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n"); - while (ep) - { - sfprintf(sp, "%c %c\t", ep->cont, ep->nest); - if (ep->expr) - sfprintf(sp, "%s", ep->expr); - else - sfprintf(sp, "%ld", ep->offset); - sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask); - switch (ep->type) - { - case 'm': - case 's': - sfputr(sp, fmtesc(ep->value.str), -1); - break; - case 'V': - switch (ep->op) - { - case 'l': - sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset); - break; - case 'v': - sfprintf(sp, "vcodex()"); - break; - default: - sfprintf(sp, "%p", ep->value.str); - break; - } - break; - default: - sfprintf(sp, "%lo", ep->value.num); - break; - } - sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc)); - if (ep->cont == '$' && !ep->value.lab->mask) - { - rp = ep; - ep = ep->value.lab; - } - else - { - if (ep->cont == ':') - { - ep = rp; - ep->value.lab->mask = 1; - } - ep = ep->next; - } - } - return 0; -} |