diff options
Diffstat (limited to 'usr/src/lib/libast/common/misc/magic.c')
-rw-r--r-- | usr/src/lib/libast/common/misc/magic.c | 2419 |
1 files changed, 2419 insertions, 0 deletions
diff --git a/usr/src/lib/libast/common/misc/magic.c b/usr/src/lib/libast/common/misc/magic.c new file mode 100644 index 0000000000..91a9b05e00 --- /dev/null +++ b/usr/src/lib/libast/common/misc/magic.c @@ -0,0 +1,2419 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1985-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* Phong Vo <kpv@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * Glenn Fowler + * AT&T Research + * + * library interface to file + * + * the sum of the hacks {s5,v10,planix} is _____ than the parts + */ + +static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2007-01-08 $\0\n"; + +static const char lib[] = "libast:magic"; + +#include <ast.h> +#include <ctype.h> +#include <ccode.h> +#include <dt.h> +#include <modex.h> +#include <error.h> +#include <regex.h> +#include <swap.h> + +#define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m) + +#define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE) + +#define MAXNEST 10 /* { ... } nesting limit */ +#define MINITEM 4 /* magic buffer rounding */ + +typedef struct /* identifier dictionary entry */ +{ + const char name[16]; /* identifier name */ + int value; /* identifier value */ + Dtlink_t link; /* dictionary link */ +} Info_t; + +typedef struct Edit /* edit substitution */ +{ + struct Edit* next; /* next in list */ + regex_t* from; /* from pattern */ +} Edit_t; + +struct Entry; + +typedef struct /* loop info */ +{ + struct Entry* lab; /* call this function */ + int start; /* start here */ + int size; /* increment by this amount */ + int count; /* dynamic loop count */ + int offset; /* dynamic offset */ +} Loop_t; + +typedef struct Entry /* magic file entry */ +{ + struct Entry* next; /* next in list */ + char* expr; /* offset expression */ + union + { + unsigned long num; + char* str; + struct Entry* lab; + regex_t* sub; + Loop_t* loop; + } value; /* comparison value */ + char* desc; /* file description */ + char* mime; /* file mime type */ + unsigned long offset; /* offset in bytes */ + unsigned long mask; /* mask before compare */ + char cont; /* continuation operation */ + char type; /* datum type */ + char op; /* comparison operation */ + char nest; /* { or } nesting operation */ + char swap; /* forced swap order */ +} Entry_t; + +#define CC_BIT 5 + +#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2) +typedef unsigned short Cctype_t; +#else +typedef unsigned long Cctype_t; +#endif + +#define CC_text 0x01 +#define CC_control 0x02 +#define CC_latin 0x04 +#define CC_binary 0x08 +#define CC_utf_8 0x10 + +#define CC_notext CC_text /* CC_text is flipped before checking */ + +#define CC_MASK (CC_binary|CC_latin|CC_control|CC_text) + +#define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text) + +#define ID_NONE 0 +#define ID_ASM 1 +#define ID_C 2 +#define ID_COBOL 3 +#define ID_COPYBOOK 4 +#define ID_CPLUSPLUS 5 +#define ID_FORTRAN 6 +#define ID_HTML 7 +#define ID_INCL1 8 +#define ID_INCL2 9 +#define ID_INCL3 10 +#define ID_MAM1 11 +#define ID_MAM2 12 +#define ID_MAM3 13 +#define ID_NOTEXT 14 +#define ID_PL1 15 +#define ID_YACC 16 + +#define ID_MAX ID_YACC + +#define INFO_atime 1 +#define INFO_blocks 2 +#define INFO_ctime 3 +#define INFO_fstype 4 +#define INFO_gid 5 +#define INFO_mode 6 +#define INFO_mtime 7 +#define INFO_name 8 +#define INFO_nlink 9 +#define INFO_size 10 +#define INFO_uid 11 + +#define _MAGIC_PRIVATE_ \ + Magicdisc_t* disc; /* discipline */ \ + Vmalloc_t* vm; /* vmalloc region */ \ + Entry_t* magic; /* parsed magic table */ \ + Entry_t* magiclast; /* last entry in magic */ \ + char* mime; /* MIME type */ \ + unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \ + char fbuf[SF_BUFSIZE + 1]; /* file data */ \ + char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \ + char nbuf[256]; /* !CC_NATIVE data */ \ + char mbuf[64]; /* mime string */ \ + char sbuf[64]; /* type suffix string */ \ + char tbuf[2 * PATH_MAX]; /* type string */ \ + Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \ + unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \ + unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \ + int keep[MAXNEST]; /* ckmagic nest stack */ \ + char* cap[MAXNEST]; /* ckmagic mime stack */ \ + char* msg[MAXNEST]; /* ckmagic text stack */ \ + Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \ + int fbsz; /* fbuf size */ \ + int fbmx; /* fbuf max size */ \ + int xbsz; /* xbuf size */ \ + int swap; /* swap() operation */ \ + unsigned long flags; /* disc+open flags */ \ + long xoff; /* xbuf offset */ \ + int identifier[ID_MAX + 1]; /* Info_t identifier */ \ + Sfio_t* fp; /* fbuf fp */ \ + Sfio_t* tmp; /* tmp string */ \ + regdisc_t redisc; /* regex discipline */ \ + Dtdisc_t dtdisc; /* dict discipline */ \ + Dt_t* idtab; /* identifier dict */ \ + Dt_t* infotab; /* info keyword dict */ + +#include <magic.h> + +static Info_t dict[] = /* keyword dictionary */ +{ + { "COMMON", ID_FORTRAN }, + { "COMPUTE", ID_COBOL }, + { "COMP", ID_COPYBOOK }, + { "COMPUTATIONAL",ID_COPYBOOK }, + { "DCL", ID_PL1 }, + { "DEFINED", ID_PL1 }, + { "DIMENSION", ID_FORTRAN }, + { "DIVISION", ID_COBOL }, + { "FILLER", ID_COPYBOOK }, + { "FIXED", ID_PL1 }, + { "FUNCTION", ID_FORTRAN }, + { "HTML", ID_HTML }, + { "INTEGER", ID_FORTRAN }, + { "MAIN", ID_PL1 }, + { "OPTIONS", ID_PL1 }, + { "PERFORM", ID_COBOL }, + { "PIC", ID_COPYBOOK }, + { "REAL", ID_FORTRAN }, + { "REDEFINES", ID_COPYBOOK }, + { "S9", ID_COPYBOOK }, + { "SECTION", ID_COBOL }, + { "SELECT", ID_COBOL }, + { "SUBROUTINE", ID_FORTRAN }, + { "TEXT", ID_ASM }, + { "VALUE", ID_COPYBOOK }, + { "attr", ID_MAM3 }, + { "binary", ID_YACC }, + { "block", ID_FORTRAN }, + { "bss", ID_ASM }, + { "byte", ID_ASM }, + { "char", ID_C }, + { "class", ID_CPLUSPLUS }, + { "clr", ID_NOTEXT }, + { "comm", ID_ASM }, + { "common", ID_FORTRAN }, + { "data", ID_ASM }, + { "dimension", ID_FORTRAN }, + { "done", ID_MAM2 }, + { "double", ID_C }, + { "even", ID_ASM }, + { "exec", ID_MAM3 }, + { "extern", ID_C }, + { "float", ID_C }, + { "function", ID_FORTRAN }, + { "globl", ID_ASM }, + { "h", ID_INCL3 }, + { "html", ID_HTML }, + { "include", ID_INCL1 }, + { "int", ID_C }, + { "integer", ID_FORTRAN }, + { "jmp", ID_NOTEXT }, + { "left", ID_YACC }, + { "libc", ID_INCL2 }, + { "long", ID_C }, + { "make", ID_MAM1 }, + { "mov", ID_NOTEXT }, + { "private", ID_CPLUSPLUS }, + { "public", ID_CPLUSPLUS }, + { "real", ID_FORTRAN }, + { "register", ID_C }, + { "right", ID_YACC }, + { "sfio", ID_INCL2 }, + { "static", ID_C }, + { "stdio", ID_INCL2 }, + { "struct", ID_C }, + { "subroutine", ID_FORTRAN }, + { "sys", ID_NOTEXT }, + { "term", ID_YACC }, + { "text", ID_ASM }, + { "tst", ID_NOTEXT }, + { "type", ID_YACC }, + { "typedef", ID_C }, + { "u", ID_INCL2 }, + { "union", ID_YACC }, + { "void", ID_C }, +}; + +static Info_t info[] = +{ + { "atime", INFO_atime }, + { "blocks", INFO_blocks }, + { "ctime", INFO_ctime }, + { "fstype", INFO_fstype }, + { "gid", INFO_gid }, + { "mode", INFO_mode }, + { "mtime", INFO_mtime }, + { "name", INFO_name }, + { "nlink", INFO_nlink }, + { "size", INFO_size }, + { "uid", INFO_uid }, +}; + +/* + * return pointer to data at offset off and size siz + */ + +static char* +getdata(register Magic_t* mp, register long off, register int siz) +{ + register long n; + + if (off < 0) + return 0; + if (off + siz <= mp->fbsz) + return mp->fbuf + off; + if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz) + { + if (off + siz > mp->fbmx) + return 0; + n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2); + if (sfseek(mp->fp, n, SEEK_SET) != n) + return 0; + if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0) + { + mp->xoff = 0; + mp->xbsz = 0; + return 0; + } + mp->xbuf[mp->xbsz] = 0; + mp->xoff = n; + if (off + siz > mp->xoff + mp->xbsz) + return 0; + } + return mp->xbuf + off - mp->xoff; +} + +/* + * @... evaluator for strexpr() + */ + +static long +indirect(const char* cs, char** e, void* handle) +{ + register char* s = (char*)cs; + register Magic_t* mp = (Magic_t*)handle; + register long n = 0; + register char* p; + + if (s) + { + if (*s == '@') + { + n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0); + switch (*(s = *e)) + { + case 'b': + case 'B': + s++; + if (p = getdata(mp, n, 1)) + n = *(unsigned char*)p; + else + s = (char*)cs; + break; + case 'h': + case 'H': + s++; + if (p = getdata(mp, n, 2)) + n = swapget(mp->swap, p, 2); + else + s = (char*)cs; + break; + case 'q': + case 'Q': + s++; + if (p = getdata(mp, n, 8)) + n = swapget(mp->swap, p, 8); + else + s = (char*)cs; + break; + default: + if (isalnum(*s)) + s++; + if (p = getdata(mp, n, 4)) + n = swapget(mp->swap, p, 4); + else + s = (char*)cs; + break; + } + } + *e = s; + } + else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e); + return n; +} + +/* + * emit regex error message + */ + +static void +regmessage(Magic_t* mp, regex_t* re, int code) +{ + char buf[128]; + + if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) + { + regerror(code, re, buf, sizeof(buf)); + (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf); + } +} + +/* + * decompose vcodex(3) method composition + */ + +static char* +vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x) +{ + unsigned char* map; + int c; + int n; + int i; + + map = CCMAP(CC_ASCII, CC_NATIVE); + i = 1; + for (;;) + { + if (i) + i = 0; + else + *b++ = '^'; + while (b < e && m < x && (c = *m++)) + { + if (map) + c = map[c]; + *b++ = c; + } + if (b >= e) + break; + n = 0; + while (m < x) + { + n = (n<<7) | (*m & 0x7f); + if (!(*m++ & 0x80)) + break; + } + if (n >= (x - m)) + break; + m += n; + } + return b; +} + +/* + * check for magic table match in buf + */ + +static char* +ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off) +{ + register Entry_t* ep; + register char* p; + register char* b; + register int level = 0; + int call = -1; + int c; + char* q; + char* t; + char* base = 0; + unsigned long num; + unsigned long mask; + regmatch_t matches[10]; + + mp->swap = 0; + b = mp->msg[0] = buf; + mp->mime = mp->cap[0] = 0; + mp->keep[0] = 0; + for (ep = mp->magic; ep; ep = ep->next) + { + fun: + if (ep->nest == '{') + { + if (++level >= MAXNEST) + { + call = -1; + level = 0; + mp->keep[0] = 0; + b = mp->msg[0]; + mp->mime = mp->cap[0]; + continue; + } + mp->keep[level] = mp->keep[level - 1] != 0; + mp->msg[level] = b; + mp->cap[level] = mp->mime; + } + switch (ep->cont) + { + case '#': + if (mp->keep[level] && b > buf) + { + *b = 0; + return buf; + } + mp->swap = 0; + b = mp->msg[0] = buf; + mp->mime = mp->cap[0] = 0; + if (ep->type == ' ') + continue; + break; + case '$': + if (mp->keep[level] && call < (MAXNEST - 1)) + { + mp->ret[++call] = ep; + ep = ep->value.lab; + goto fun; + } + continue; + case ':': + ep = mp->ret[call--]; + if (ep->op == 'l') + goto fun; + continue; + case '|': + if (mp->keep[level] > 1) + goto checknest; + /*FALLTHROUGH*/ + default: + if (!mp->keep[level]) + { + b = mp->msg[level]; + mp->mime = mp->cap[level]; + goto checknest; + } + break; + } + if (!ep->expr) + num = ep->offset + off; + else + switch (ep->offset) + { + case 0: + num = strexpr(ep->expr, NiL, indirect, mp) + off; + break; + case INFO_atime: + num = st->st_atime; + ep->type = 'D'; + break; + case INFO_blocks: + num = iblocks(st); + ep->type = 'N'; + break; + case INFO_ctime: + num = st->st_ctime; + ep->type = 'D'; + break; + case INFO_fstype: + p = fmtfs(st); + ep->type = toupper(ep->type); + break; + case INFO_gid: + if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') + { + p = fmtgid(st->st_gid); + ep->type = toupper(ep->type); + } + else + { + num = st->st_gid; + ep->type = 'N'; + } + break; + case INFO_mode: + if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') + { + p = fmtmode(st->st_mode, 0); + ep->type = toupper(ep->type); + } + else + { + num = modex(st->st_mode); + ep->type = 'N'; + } + break; + case INFO_mtime: + num = st->st_ctime; + ep->type = 'D'; + break; + case INFO_name: + if (!base) + { + if (base = strrchr(file, '/')) + base++; + else + base = (char*)file; + } + p = base; + ep->type = toupper(ep->type); + break; + case INFO_nlink: + num = st->st_nlink; + ep->type = 'N'; + break; + case INFO_size: + num = st->st_size; + ep->type = 'N'; + break; + case INFO_uid: + if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') + { + p = fmtuid(st->st_uid); + ep->type = toupper(ep->type); + } + else + { + num = st->st_uid; + ep->type = 'N'; + } + break; + } + switch (ep->type) + { + + case 'b': + if (!(p = getdata(mp, num, 1))) + goto next; + num = *(unsigned char*)p; + break; + + case 'h': + if (!(p = getdata(mp, num, 2))) + goto next; + num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2); + break; + + case 'd': + case 'l': + case 'v': + if (!(p = getdata(mp, num, 4))) + goto next; + num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4); + break; + + case 'q': + if (!(p = getdata(mp, num, 8))) + goto next; + num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8); + break; + + case 'e': + if (!(p = getdata(mp, num, 0))) + goto next; + /*FALLTHROUGH*/ + case 'E': + if (!ep->value.sub) + goto next; + if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) + { + c = mp->fbsz; + if (c >= sizeof(mp->nbuf)) + c = sizeof(mp->nbuf) - 1; + p = (char*)memcpy(mp->nbuf, p, c); + p[c] = 0; + ccmapstr(mp->x2n, p, c); + if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) + { + if (c != REG_NOMATCH) + regmessage(mp, ep->value.sub, c); + goto next; + } + } + p = ep->value.sub->re_sub->re_buf; + q = T(ep->desc); + t = *q ? q : p; + if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b') + *b++ = ' '; + b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b')); + if (ep->mime) + mp->mime = ep->mime; + goto checknest; + + case 's': + if (!(p = getdata(mp, num, ep->mask))) + goto next; + goto checkstr; + case 'm': + if (!(p = getdata(mp, num, 0))) + goto next; + /*FALLTHROUGH*/ + case 'M': + case 'S': + checkstr: + for (;;) + { + if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p)) + break; + if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask)) + break; + if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf)) + goto next; + p = (char*)memcpy(mp->nbuf, p, ep->mask); + p[ep->mask] = 0; + ccmapstr(mp->x2n, p, ep->mask); + } + q = T(ep->desc); + if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') + *b++ = ' '; + for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++); + *t = 0; + b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p); + *t = c; + if (ep->mime) + mp->mime = ep->mime; + goto checknest; + + } + if (mask = ep->mask) + num &= mask; + switch (ep->op) + { + + case '=': + case '@': + if (num == ep->value.num) + break; + if (ep->cont != '#') + goto next; + if (!mask) + mask = ~mask; + if (ep->type == 'h') + { + if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num) + { + if (!(mp->swap & (mp->swap + 1))) + mp->swap = 7; + goto swapped; + } + } + else if (ep->type == 'l') + { + for (c = 1; c < 4; c++) + if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num) + { + if (!(mp->swap & (mp->swap + 1))) + mp->swap = 7; + goto swapped; + } + } + else if (ep->type == 'q') + { + for (c = 1; c < 8; c++) + if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num) + goto swapped; + } + goto next; + + case '!': + if (num != ep->value.num) + break; + goto next; + + case '^': + if (num ^ ep->value.num) + break; + goto next; + + case '>': + if (num > ep->value.num) + break; + goto next; + + case '<': + if (num < ep->value.num) + break; + goto next; + + case 'l': + if (num > 0 && mp->keep[level] && call < (MAXNEST - 1)) + { + if (!ep->value.loop->count) + { + ep->value.loop->count = num; + ep->value.loop->offset = off; + off = ep->value.loop->start; + } + else if (!--ep->value.loop->count) + { + off = ep->value.loop->offset; + goto next; + } + else + off += ep->value.loop->size; + mp->ret[++call] = ep; + ep = ep->value.loop->lab; + goto fun; + } + goto next; + + case 'm': + c = mp->swap; + t = ckmagic(mp, file, b + (b > buf), st, num); + mp->swap = c; + if (!t) + goto next; + if (b > buf) + *b = ' '; + b += strlen(b); + break; + + case 'r': +#if _UWIN + { + char* e; + Sfio_t* rp; + Sfio_t* gp; + + if (!(t = strrchr(file, '.'))) + goto next; + sfprintf(mp->tmp, "/reg/classes_root/%s", t); + if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r"))) + goto next; + *ep->desc = 0; + *ep->mime = 0; + gp = 0; + while (t = sfgetr(rp, '\n', 1)) + { + if (strneq(t, "Content Type=", 13)) + { + ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0); + strcpy(ep->mime, t + 13); + if (gp) + break; + } + else + { + sfprintf(mp->tmp, "/reg/classes_root/%s", t); + if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r"))) + { + ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1); + strcpy(ep->desc, t); + if (*ep->mime) + break; + } + } + } + sfclose(rp); + if (!gp) + goto next; + if (!*ep->mime) + { + t = T(ep->desc); + if (!strncasecmp(t, "microsoft", 9)) + t += 9; + while (isspace(*t)) + t++; + e = "application/x-ms-"; + ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e)); + e = strcopy(ep->mime, e); + while ((c = *t++) && c != '.' && c != ' ') + *e++ = isupper(c) ? tolower(c) : c; + *e = 0; + } + while (t = sfgetr(gp, '\n', 1)) + if (*t && !streq(t, "\"\"")) + { + ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0); + strcpy(ep->desc, t); + break; + } + sfclose(gp); + if (!*ep->desc) + goto next; + if (!t) + for (t = T(ep->desc); *t; t++) + if (*t == '.') + *t = ' '; + if (!mp->keep[level]) + mp->keep[level] = 2; + mp->mime = ep->mime; + break; + } +#else + if (ep->cont == '#' && !mp->keep[level]) + mp->keep[level] = 1; + goto next; +#endif + + case 'v': + if (!(p = getdata(mp, num, 4))) + goto next; + c = 0; + do + { + num++; + c = (c<<7) | (*p & 0x7f); + } while (*p++ & 0x80); + if (!(p = getdata(mp, num, c))) + goto next; + if (mp->keep[level]++ && b > buf && *(b - 1) != ' ') + { + *b++ = ','; + *b++ = ' '; + } + b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c); + goto checknest; + + } + swapped: + q = T(ep->desc); + if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') + *b++ = ' '; + if (ep->type == 'd' || ep->type == 'D') + b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num)); + else if (ep->type == 'v') + b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num)); + else + b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num); + if (ep->mime && *ep->mime) + mp->mime = ep->mime; + checknest: + if (ep->nest == '}') + { + if (!mp->keep[level]) + { + b = mp->msg[level]; + mp->mime = mp->cap[level]; + } + else if (level > 0) + mp->keep[level - 1] = mp->keep[level]; + if (--level < 0) + { + level = 0; + mp->keep[0] = 0; + } + } + continue; + next: + if (ep->cont == '&') + mp->keep[level] = 0; + goto checknest; + } + if (mp->keep[level] && b > buf) + { + *b = 0; + return buf; + } + return 0; +} + +/* + * check english language stats + */ + +static int +ckenglish(register Magic_t* mp, int pun, int badpun) +{ + register char* s; + register int vowl = 0; + register int freq = 0; + register int rare = 0; + + if (5 * badpun > pun) + return 0; + if (2 * mp->count[';'] > mp->count['E'] + mp->count['e']) + return 0; + if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e']) + return 0; + for (s = "aeiou"; *s; s++) + vowl += mp->count[toupper(*s)] + mp->count[*s]; + for (s = "etaion"; *s; s++) + freq += mp->count[toupper(*s)] + mp->count[*s]; + for (s = "vjkqxz"; *s; s++) + rare += mp->count[toupper(*s)] + mp->count[*s]; + return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare; +} + +/* + * check programming language stats + */ + +static char* +cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st) +{ + register int c; + register unsigned char* b; + register unsigned char* e; + register int q; + register char* s; + char* t; + char* base; + char* suff; + char* t1; + char* t2; + char* t3; + int n; + int badpun; + int code; + int pun; + Cctype_t flags; + Info_t* ip; + + b = (unsigned char*)mp->fbuf; + e = b + mp->fbsz; + memzero(mp->count, sizeof(mp->count)); + memzero(mp->multi, sizeof(mp->multi)); + memzero(mp->identifier, sizeof(mp->identifier)); + + /* + * check character coding + */ + + flags = 0; + while (b < e) + flags |= mp->cctype[*b++]; + b = (unsigned char*)mp->fbuf; + code = 0; + q = CC_ASCII; + n = CC_MASK; + for (c = 0; c < CC_MAPS; c++) + { + flags ^= CC_text; + if ((flags & CC_MASK) < n) + { + n = flags & CC_MASK; + q = c; + } + flags >>= CC_BIT; + } + flags = n; + if (!(flags & (CC_binary|CC_notext))) + { + if (q != CC_NATIVE) + { + code = q; + ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE); + } + if (b[0] == '#' && b[1] == '!') + { + for (b += 2; b < e && isspace(*b); b++); + for (s = (char*)b; b < e && isprint(*b); b++); + c = *b; + *b = 0; + if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK)) + { + if (t = strrchr(s, '/')) + s = t + 1; + for (t = s; *t; t++) + if (isspace(*t)) + { + *t = 0; + break; + } + sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh"); + mp->mime = mp->mbuf; + if (match(s, "*sh")) + { + t1 = T("command"); + if (streq(s, "sh")) + *s = 0; + else + { + *b++ = ' '; + *b = 0; + } + } + else + { + t1 = T("interpreter"); + *b++ = ' '; + *b = 0; + } + sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1); + s = mp->sbuf; + goto qualify; + } + *b = c; + b = (unsigned char*)mp->fbuf; + } + badpun = 0; + pun = 0; + q = 0; + s = 0; + t = 0; + while (b < e) + { + c = *b++; + mp->count[c]++; + if (c == q && (q != '*' || *b == '/' && b++)) + { + mp->multi[q]++; + q = 0; + } + else if (c == '\\') + { + s = 0; + b++; + } + else if (!q) + { + if (isalpha(c) || c == '_') + { + if (!s) + s = (char*)b - 1; + } + else if (!isdigit(c)) + { + if (s) + { + if (s > mp->fbuf) + switch (*(s - 1)) + { + case ':': + if (*b == ':') + mp->multi[':']++; + break; + case '.': + if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n')) + mp->multi['.']++; + break; + case '\n': + case '\\': + if (*b == '{') + t = (char*)b + 1; + break; + case '{': + if (s == t && *b == '}') + mp->multi['X']++; + break; + } + if (!mp->idtab) + { + if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash)) + for (q = 0; q < elementsof(dict); q++) + dtinsert(mp->idtab, &dict[q]); + else if (mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); + q = 0; + } + if (mp->idtab) + { + *(b - 1) = 0; + if (ip = (Info_t*)dtmatch(mp->idtab, s)) + mp->identifier[ip->value]++; + *(b - 1) = c; + } + s = 0; + } + switch (c) + { + case '\t': + if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n') + mp->multi['\t']++; + break; + case '"': + case '\'': + q = c; + break; + case '/': + if (*b == '*') + q = *b++; + else if (*b == '/') + q = '\n'; + break; + case '$': + if (*b == '(' && *(b + 1) != ' ') + mp->multi['$']++; + break; + case '{': + case '}': + case '[': + case ']': + case '(': + mp->multi[c]++; + break; + case ')': + mp->multi[c]++; + goto punctuation; + case ':': + if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2))) + mp->multi[':']++; + goto punctuation; + case '.': + case ',': + case '%': + case ';': + case '?': + punctuation: + pun++; + if (*b != ' ' && *b != '\n') + badpun++; + break; + } + } + } + } + } + else + while (b < e) + mp->count[*b++]++; + base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file; + suff = (t1 = strrchr(base, '.')) ? t1 + 1 : ""; + if (!flags) + { + if (match(suff, "*sh|bat|cmd")) + goto id_sh; + if (match(base, "*@(mkfile)")) + goto id_mk; + if (match(base, "*@(makefile|.mk)")) + goto id_make; + if (match(base, "*@(mamfile|.mam)")) + goto id_mam; + if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy")) + goto id_c; + if (match(suff, "f")) + goto id_fortran; + if (match(suff, "htm+(l)")) + goto id_html; + if (match(suff, "cpy")) + goto id_copybook; + if (match(suff, "cob|cbl|cb2")) + goto id_cobol; + if (match(suff, "pl[1i]")) + goto id_pl1; + if (match(suff, "tex")) + goto id_tex; + if (match(suff, "asm|s")) + goto id_asm; + if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.'))) + { + id_sh: + s = T("command script"); + mp->mime = "application/sh"; + goto qualify; + } + if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *")) + { + s = T("mail message"); + mp->mime = "message/rfc822"; + goto qualify; + } + if (match(base, "*@(mkfile)")) + { + id_mk: + s = "mkfile"; + mp->mime = "application/mk"; + goto qualify; + } + if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0)) + { + id_make: + s = "makefile"; + mp->mime = "application/make"; + goto qualify; + } + if (mp->multi['.'] >= 3) + { + s = T("nroff input"); + mp->mime = "application/x-troff"; + goto qualify; + } + if (mp->multi['X'] >= 3) + { + s = T("TeX input"); + mp->mime = "application/x-tex"; + goto qualify; + } + if (mp->fbsz < SF_BUFSIZE && + (mp->multi['('] == mp->multi[')'] && + mp->multi['{'] == mp->multi['}'] && + mp->multi['['] == mp->multi[']']) || + mp->fbsz >= SF_BUFSIZE && + (mp->multi['('] >= mp->multi[')'] && + mp->multi['{'] >= mp->multi['}'] && + mp->multi['['] >= mp->multi[']'])) + { + c = mp->identifier[ID_INCL1]; + if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c || + mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 || + mp->count['='] >= 20 && mp->count[';'] >= 20) + { + id_c: + t1 = ""; + t2 = "c "; + t3 = T("program"); + switch (*suff) + { + case 'c': + case 'C': + mp->mime = "application/x-cc"; + break; + case 'l': + case 'L': + t1 = "lex "; + mp->mime = "application/x-lex"; + break; + default: + t3 = T("header"); + if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5) + { + mp->mime = "application/x-cc"; + break; + } + /*FALLTHROUGH*/ + case 'y': + case 'Y': + t1 = "yacc "; + mp->mime = "application/x-yacc"; + break; + } + if (mp->identifier[ID_CPLUSPLUS] >= 3) + { + t2 = "c++ "; + mp->mime = "application/x-c++"; + } + sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3); + s = mp->sbuf; + goto qualify; + } + } + if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 && + (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] || + mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2])) + { + id_mam: + s = T("mam program"); + mp->mime = "application/x-mam"; + goto qualify; + } + if (mp->identifier[ID_FORTRAN] >= 8) + { + id_fortran: + s = T("fortran program"); + mp->mime = "application/x-fortran"; + goto qualify; + } + if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2) + { + id_html: + s = T("html input"); + mp->mime = "text/html"; + goto qualify; + } + if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) + { + id_copybook: + s = T("cobol copybook"); + mp->mime = "application/x-cobol"; + goto qualify; + } + if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) + { + id_cobol: + s = T("cobol program"); + mp->mime = "application/x-cobol"; + goto qualify; + } + if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) + { + id_pl1: + s = T("pl1 program"); + mp->mime = "application/x-pl1"; + goto qualify; + } + if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{']) + { + id_tex: + s = T("TeX input"); + mp->mime = "text/tex"; + goto qualify; + } + if (mp->identifier[ID_ASM] >= 4) + { + id_asm: + s = T("as program"); + mp->mime = "application/x-as"; + goto qualify; + } + if (ckenglish(mp, pun, badpun)) + { + s = T("english text"); + mp->mime = "text/plain"; + goto qualify; + } + } + else if (streq(base, "core")) + { + mp->mime = "x-system/core"; + return T("core dump"); + } + if (flags & (CC_binary|CC_notext)) + { + b = (unsigned char*)mp->fbuf; + e = b + mp->fbsz; + n = 0; + for (;;) + { + c = *b++; + q = 0; + while (c & 0x80) + { + c <<= 1; + q++; + } + switch (q) + { + case 4: + if (b < e && (*b++ & 0xc0) != 0x80) + break; + case 3: + if (b < e && (*b++ & 0xc0) != 0x80) + break; + case 2: + if (b < e && (*b++ & 0xc0) != 0x80) + break; + n = 1; + case 0: + if (b >= e) + { + if (n) + { + flags &= ~(CC_binary|CC_notext); + flags |= CC_utf_8; + } + break; + } + continue; + } + break; + } + } + if (flags & (CC_binary|CC_notext)) + { + unsigned long d = 0; + + if ((q = mp->fbsz / UCHAR_MAX) >= 2) + { + /* + * compression/encryption via standard deviation + */ + + + for (c = 0; c < UCHAR_MAX; c++) + { + pun = mp->count[c] - q; + d += pun * pun; + } + d /= mp->fbsz; + } + if (d <= 0) + s = T("binary"); + else if (d < 4) + s = T("encrypted"); + else if (d < 16) + s = T("packed"); + else if (d < 64) + s = T("compressed"); + else if (d < 256) + s = T("delta"); + else + s = T("data"); + mp->mime = "application/octet-stream"; + return s; + } + mp->mime = "text/plain"; + if (flags & CC_utf_8) + s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text"); + else if (flags & CC_latin) + s = (flags & CC_control) ? T("latin text with control characters") : T("latin text"); + else + s = (flags & CC_control) ? T("text with control characters") : T("text"); + qualify: + if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r']) + { + t = "dos "; + mp->mime = "text/dos"; + } + else + t = ""; + if (code) + { + if (code == CC_ASCII) + sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s); + else + { + sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s); + mp->mime = "text/ebcdic"; + } + s = buf; + } + else if (*t) + { + sfsprintf(buf, PATH_MAX, "%s%s", t, s); + s = buf; + } + return s; +} + +/* + * return the basic magic string for file,st in buf,size + */ + +static char* +type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size) +{ + register char* s; + register char* t; + + mp->mime = 0; + if (!S_ISREG(st->st_mode)) + { + if (S_ISDIR(st->st_mode)) + { + mp->mime = "x-system/dir"; + return T("directory"); + } + if (S_ISLNK(st->st_mode)) + { + mp->mime = "x-system/lnk"; + s = buf; + s += sfsprintf(s, PATH_MAX, T("symbolic link to ")); + if (pathgetlink(file, s, size - (s - buf)) < 0) + return T("cannot read symbolic link text"); + return buf; + } + if (S_ISBLK(st->st_mode)) + { + mp->mime = "x-system/blk"; + sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st)); + return buf; + } + if (S_ISCHR(st->st_mode)) + { + mp->mime = "x-system/chr"; + sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st)); + return buf; + } + if (S_ISFIFO(st->st_mode)) + { + mp->mime = "x-system/fifo"; + return "fifo"; + } +#ifdef S_ISSOCK + if (S_ISSOCK(st->st_mode)) + { + mp->mime = "x-system/sock"; + return "socket"; + } +#endif + } + if (!(mp->fbmx = st->st_size)) + s = T("empty"); + else if (!mp->fp) + s = T("cannot read"); + else + { + mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1); + if (mp->fbsz < 0) + s = fmterror(errno); + else if (mp->fbsz == 0) + s = T("empty"); + else + { + mp->fbuf[mp->fbsz] = 0; + mp->xoff = 0; + mp->xbsz = 0; + if (!(s = ckmagic(mp, file, buf, st, 0))) + s = cklang(mp, file, buf, st); + } + } + if (!mp->mime) + mp->mime = "application/unknown"; + else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2)) + { + register char* b; + register char* be; + register char* m; + register char* me; + + b = mp->mime; + me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1; + while (m < me && b < t) + *m++ = *b++; + b = t = s; + for (;;) + { + if (!(be = strchr(t, ' '))) + { + be = b + strlen(b); + break; + } + if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4)) + break; + b = t; + t = be + 1; + } + while (m < me && b < be) + if ((*m++ = *b++) == ' ') + *(m - 1) = '-'; + *m = 0; + } + return s; +} + +/* + * low level for magicload() + */ + +static int +load(register Magic_t* mp, char* file, register Sfio_t* fp) +{ + register Entry_t* ep; + register char* p; + register char* p2; + char* p3; + char* next; + int n; + int lge; + int lev; + int ent; + int old; + int cont; + Info_t* ip; + Entry_t* ret; + Entry_t* first; + Entry_t* last = 0; + Entry_t* fun['z' - 'a' + 1]; + + memzero(fun, sizeof(fun)); + cont = '$'; + ent = 0; + lev = 0; + old = 0; + ret = 0; + error_info.file = file; + error_info.line = 0; + first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0); + while (p = sfgetr(fp, '\n', 1)) + { + error_info.line++; + for (; isspace(*p); p++); + + /* + * nesting + */ + + switch (*p) + { + case 0: + case '#': + cont = '#'; + continue; + case '{': + if (++lev < MAXNEST) + ep->nest = *p; + else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST); + continue; + case '}': + if (!last || lev <= 0) + { + if (mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p); + } + else if (lev-- == ent) + { + ent = 0; + ep->cont = ':'; + ep->offset = ret->offset; + ep->nest = ' '; + ep->type = ' '; + ep->op = ' '; + ep->desc = "[RETURN]"; + last = ep; + ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); + ret = 0; + } + else + last->nest = *p; + continue; + default: + if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|') + { + n = *p++; + if (n >= 'a' && n <= 'z') + n -= 'a'; + else + { + if (mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); + n = 0; + } + if (ret && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); + if (*p == '{') + { + ent = ++lev; + ret = ep; + ep->desc = "[FUNCTION]"; + } + else + { + if (*(p + 1) != ')' && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a'); + ep->desc = "[CALL]"; + } + ep->cont = cont; + ep->offset = n; + ep->nest = ' '; + ep->type = ' '; + ep->op = ' '; + last = ep; + ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); + if (ret) + fun[n] = last->value.lab = ep; + else if (!(last->value.lab = fun[n]) && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); + continue; + } + if (!ep->nest) + ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' '; + break; + } + + /* + * continuation + */ + + cont = '$'; + switch (*p) + { + case '>': + old = 1; + if (*(p + 1) == *p) + { + /* + * old style nesting push + */ + + p++; + old = 2; + if (!lev && last) + { + lev = 1; + last->nest = '{'; + if (last->cont == '>') + last->cont = '&'; + ep->nest = '1'; + } + } + /*FALLTHROUGH*/ + case '+': + case '&': + case '|': + ep->cont = *p++; + break; + default: + if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p); + /*FALLTHROUGH*/ + case '*': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + ep->cont = (lev > 0) ? '&' : '#'; + break; + } + switch (old) + { + case 1: + old = 0; + if (lev) + { + /* + * old style nesting pop + */ + + lev = 0; + if (last) + last->nest = '}'; + ep->nest = ' '; + if (ep->cont == '&') + ep->cont = '#'; + } + break; + case 2: + old = 1; + break; + } + if (isdigit(*p)) + { + /* + * absolute offset + */ + + ep->offset = strton(p, &next, NiL, 0); + p2 = next; + } + else + { + for (p2 = p; *p2 && !isspace(*p2); p2++); + if (!*p2) + { + if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); + continue; + } + + /* + * offset expression + */ + + *p2++ = 0; + ep->expr = vmstrdup(mp->vm, p); + if (isalpha(*p)) + ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0; + else if (*p == '(' && ep->cont == '>') + { + /* + * convert old style indirection to @ + */ + + p = ep->expr + 1; + for (;;) + { + switch (*p++) + { + case 0: + case '@': + case '(': + break; + case ')': + break; + default: + continue; + } + break; + } + if (*--p == ')') + { + *p = 0; + *ep->expr = '@'; + } + } + } + for (; isspace(*p2); p2++); + for (p = p2; *p2 && !isspace(*p2); p2++); + if (!*p2) + { + if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); + continue; + } + *p2++ = 0; + + /* + * type + */ + + if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e') + { + ep->swap = ~(*p == 'l' ? 7 : 0); + p += 2; + } + if (*p == 's') + { + if (*(p + 1) == 'h') + ep->type = 'h'; + else + ep->type = 's'; + } + else if (*p == 'a') + ep->type = 's'; + else + ep->type = *p; + if (p = strchr(p, '&')) + { + /* + * old style mask + */ + + ep->mask = strton(++p, NiL, NiL, 0); + } + for (; isspace(*p2); p2++); + if (ep->mask) + *--p2 = '='; + + /* + * comparison operation + */ + + p = p2; + if (p2 = strchr(p, '\t')) + *p2++ = 0; + else + { + int qe = 0; + int qn = 0; + + /* + * assume balanced {}[]()\\""'' field + */ + + for (p2 = p;;) + { + switch (n = *p2++) + { + case 0: + break; + case '{': + if (!qe) + qe = '}'; + if (qe == '}') + qn++; + continue; + case '(': + if (!qe) + qe = ')'; + if (qe == ')') + qn++; + continue; + case '[': + if (!qe) + qe = ']'; + if (qe == ']') + qn++; + continue; + case '}': + case ')': + case ']': + if (qe == n && qn > 0) + qn--; + continue; + case '"': + case '\'': + if (!qe) + qe = n; + else if (qe == n) + qe = 0; + continue; + case '\\': + if (*p2) + p2++; + continue; + default: + if (!qe && isspace(n)) + break; + continue; + } + if (n) + *(p2 - 1) = 0; + else + p2--; + break; + } + } + lge = 0; + if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') + ep->op = '='; + else + { + if (*p == '&') + { + ep->mask = strton(++p, &next, NiL, 0); + p = next; + } + switch (*p) + { + case '=': + case '>': + case '<': + case '*': + ep->op = *p++; + if (*p == '=') + { + p++; + switch (ep->op) + { + case '>': + lge = -1; + break; + case '<': + lge = 1; + break; + } + } + break; + case '!': + case '@': + ep->op = *p++; + if (*p == '=') + p++; + break; + case 'x': + p++; + ep->op = '*'; + break; + default: + ep->op = '='; + if (ep->mask) + ep->value.num = ep->mask; + break; + } + } + if (ep->op != '*' && !ep->value.num) + { + if (ep->type == 'e') + { + if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0)) + { + ep->value.sub->re_disc = &mp->redisc; + if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE))) + { + p += ep->value.sub->re_npat; + if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0))) + p += ep->value.sub->re_npat; + } + if (n) + { + regmessage(mp, ep->value.sub, n); + ep->value.sub = 0; + } + else if (*p && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p); + } + } + else if (ep->type == 'm') + { + ep->mask = stresc(p) + 1; + ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0); + memcpy(ep->value.str, p, ep->mask); + if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)")) + ep->value.str[ep->mask - 1] = '*'; + } + else if (ep->type == 's') + { + ep->mask = stresc(p); + ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0); + memcpy(ep->value.str, p, ep->mask); + } + else if (*p == '\'') + { + stresc(p); + ep->value.num = *(unsigned char*)(p + 1) + lge; + } + else if (strmatch(p, "+([a-z])\\(*\\)")) + { + char* t; + + t = p; + ep->type = 'V'; + ep->op = *p; + while (*p && *p++ != '('); + switch (ep->op) + { + case 'l': + n = *p++; + if (n < 'a' || n > 'z') + { + if (mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); + } + else if (!fun[n -= 'a']) + { + if (mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); + } + else + { + ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0); + ep->value.loop->lab = fun[n]; + while (*p && *p++ != ','); + ep->value.loop->start = strton(p, &t, NiL, 0); + while (*t && *t++ != ','); + ep->value.loop->size = strton(t, &t, NiL, 0); + } + break; + case 'm': + case 'r': + ep->desc = vmnewof(mp->vm, 0, char, 32, 0); + ep->mime = vmnewof(mp->vm, 0, char, 32, 0); + break; + case 'v': + break; + default: + if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t); + break; + } + } + else + { + ep->value.num = strton(p, NiL, NiL, 0) + lge; + if (ep->op == '@') + ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num)); + } + } + + /* + * file description + */ + + if (p2) + { + for (; isspace(*p2); p2++); + if (p = strchr(p2, '\t')) + { + /* + * check for message catalog index + */ + + *p++ = 0; + if (isalpha(*p2)) + { + for (p3 = p2; isalnum(*p3); p3++); + if (*p3++ == ':') + { + for (; isdigit(*p3); p3++); + if (!*p3) + { + for (p2 = p; isspace(*p2); p2++); + if (p = strchr(p2, '\t')) + *p++ = 0; + } + } + } + } + stresc(p2); + ep->desc = vmstrdup(mp->vm, p2); + if (p) + { + for (; isspace(*p); p++); + if (*p) + ep->mime = vmstrdup(mp->vm, p); + } + } + else + ep->desc = ""; + + /* + * get next entry + */ + + last = ep; + ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); + } + if (last) + { + last->next = 0; + if (mp->magiclast) + mp->magiclast->next = first; + else + mp->magic = first; + mp->magiclast = last; + } + vmfree(mp->vm, ep); + if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) + { + if (lev < 0) + (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators"); + else if (lev > 0) + (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators"); + if (ret) + (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); + } + error_info.file = 0; + error_info.line = 0; + return 0; +} + +/* + * load a magic file into mp + */ + +int +magicload(register Magic_t* mp, const char* file, unsigned long flags) +{ + register char* s; + register char* e; + register char* t; + int n; + int found; + int list; + Sfio_t* fp; + + mp->flags = mp->disc->flags | flags; + found = 0; + if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1)) + { + if (!(s = getenv(MAGIC_FILE_ENV)) || !*s) + s = MAGIC_FILE; + } + for (;;) + { + if (!list) + e = 0; + else if (e = strchr(s, ':')) + { + /* + * ok, so ~ won't work for the last list element + * we do it for MAGIC_FILES_ENV anyway + */ + + if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME"))) + { + sfputr(mp->tmp, t, -1); + s += n - 1; + } + sfwrite(mp->tmp, s, e - s); + if (!(s = sfstruse(mp->tmp))) + goto nospace; + } + if (!*s || streq(s, "-")) + s = MAGIC_FILE; + if (!(fp = sfopen(NiL, s, "r"))) + { + if (list) + { + if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/')) + { + strcpy(mp->fbuf, s); + sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf); + if (!(s = sfstruse(mp->tmp))) + goto nospace; + if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ))) + goto next; + } + if (!(fp = sfopen(NiL, t, "r"))) + goto next; + } + else + { + if (mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s); + return -1; + } + } + found = 1; + n = load(mp, s, fp); + sfclose(fp); + if (n && !list) + return -1; + next: + if (!e) + break; + s = e + 1; + } + if (!found) + { + if (mp->flags & MAGIC_VERBOSE) + { + if (mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file"); + } + return -1; + } + return 0; + nospace: + if (mp->disc->errorf) + (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); + return -1; +} + +/* + * open a magic session + */ + +Magic_t* +magicopen(Magicdisc_t* disc) +{ + register Magic_t* mp; + register int i; + register int n; + register int f; + register int c; + register Vmalloc_t* vm; + unsigned char* map[CC_MAPS + 1]; + + if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) + return 0; + if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0))) + { + vmclose(vm); + return 0; + } + mp->id = lib; + mp->disc = disc; + mp->vm = vm; + mp->flags = disc->flags; + mp->redisc.re_version = REG_VERSION; + mp->redisc.re_flags = REG_NOFREE; + mp->redisc.re_errorf = (regerror_t)disc->errorf; + mp->redisc.re_resizef = (regresize_t)vmgetmem; + mp->redisc.re_resizehandle = (void*)mp->vm; + mp->dtdisc.key = offsetof(Info_t, name); + mp->dtdisc.link = offsetof(Info_t, link); + if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash))) + goto bad; + for (n = 0; n < elementsof(info); n++) + dtinsert(mp->infotab, &info[n]); + for (i = 0; i < CC_MAPS; i++) + map[i] = ccmap(i, CC_ASCII); + mp->x2n = ccmap(CC_ALIEN, CC_NATIVE); + for (n = 0; n <= UCHAR_MAX; n++) + { + f = 0; + i = CC_MAPS; + while (--i >= 0) + { + c = ccmapchr(map[i], n); + f = (f << CC_BIT) | CCTYPE(c); + } + mp->cctype[n] = f; + } + return mp; + bad: + magicclose(mp); + return 0; +} + +/* + * close a magicopen() session + */ + +int +magicclose(register Magic_t* mp) +{ + if (!mp) + return -1; + if (mp->tmp) + sfstrclose(mp->tmp); + if (mp->vm) + vmclose(mp->vm); + return 0; +} + +/* + * return the magic string for file with optional stat info st + */ + +char* +magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st) +{ + off_t off; + char* s; + + mp->flags = mp->disc->flags; + mp->mime = 0; + if (!st) + s = T("cannot stat"); + else + { + if (mp->fp = fp) + off = sfseek(mp->fp, (off_t)0, SEEK_CUR); + s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf)); + if (mp->fp) + sfseek(mp->fp, off, SEEK_SET); + if (!(mp->flags & MAGIC_MIME)) + { + if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128)) + sfprintf(mp->tmp, "%s ", T("short")); + sfprintf(mp->tmp, "%s", s); + if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) + sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable")); + if (st->st_mode & S_ISUID) + sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid)); + if (st->st_mode & S_ISGID) + sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid)); + if (st->st_mode & S_ISVTX) + sfprintf(mp->tmp, ", sticky"); + if (!(s = sfstruse(mp->tmp))) + s = T("out of space"); + } + } + if (mp->flags & MAGIC_MIME) + s = mp->mime; + if (!s) + s = T("error"); + return s; +} + +/* + * list the magic table in mp on sp + */ + +int +magiclist(register Magic_t* mp, register Sfio_t* sp) +{ + register Entry_t* ep = mp->magic; + register Entry_t* rp = 0; + + mp->flags = mp->disc->flags; + sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n"); + while (ep) + { + sfprintf(sp, "%c %c\t", ep->cont, ep->nest); + if (ep->expr) + sfprintf(sp, "%s", ep->expr); + else + sfprintf(sp, "%ld", ep->offset); + sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask); + switch (ep->type) + { + case 'm': + case 's': + sfputr(sp, fmtesc(ep->value.str), -1); + break; + case 'V': + switch (ep->op) + { + case 'l': + sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset); + break; + case 'v': + sfprintf(sp, "vcodex()"); + break; + default: + sfprintf(sp, "%p", ep->value.str); + break; + } + break; + default: + sfprintf(sp, "%lo", ep->value.num); + break; + } + sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc)); + if (ep->cont == '$' && !ep->value.lab->mask) + { + rp = ep; + ep = ep->value.lab; + } + else + { + if (ep->cont == ':') + { + ep = rp; + ep->value.lab->mask = 1; + } + ep = ep->next; + } + } + return 0; +} |