1 files changed, 493 insertions, 0 deletions
diff --git a/dwarfdump/uri.c b/dwarfdump/uri.c
new file mode 100644
index 0000000..1353ab8
--- /dev/null
+++ b/dwarfdump/uri.c
@@ -0,0 +1,493 @@
+/* 
+    Copyright 2011 David Anderson. All rights reserved.
+
+    This program is free software; you can redistribute it and/or modify it
+    under the terms of version 2 of the GNU General Public License as
+    published by the Free Software Foundation.
+
+    This program is distributed in the hope that it would be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+    Further, this software is distributed without any warranty that it is
+    free of the rightful claim of any third person regarding infringement
+    or the like.  Any license provided herein, whether implied or
+    otherwise, applies only to this software file.  Patent licenses, if
+    any, provided herein do not apply to combinations of this program with
+    other software, or any other product whatsoever.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write the Free Software Foundation, Inc., 51
+    Franklin Street - Fifth Floor, Boston MA 02110-1301, USA.
+
+    Contact information:  Silicon Graphics, Inc., 1500 Crittenden Lane,
+    Mountain View, CA 94043, or:
+
+    http://www.sgi.com
+
+    For further information regarding this notice, see:
+
+    http://oss.sgi.com/projects/GenInfo/NoticeExplan
+
+*/
+
+/*  The address of the Free Software Foundation is
+    Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 
+    Boston, MA 02110-1301, USA.  
+    SGI has moved from the Crittenden Lane address.
+*/
+
+#include "globals.h"
+#include "esb.h"
+#include "uri.h"
+#include <stdio.h>
+#include <ctype.h>
+
+/* dwarfdump_ctype table. See uritablebuild.c */
+static char dwarfdump_ctype_table[256] = { 
+0, /* NUL 0x00 */
+0, /* control 0x01 */
+0, /* control 0x02 */
+0, /* control 0x03 */
+0, /* control 0x04 */
+0, /* control 0x05 */
+0, /* control 0x06 */
+0, /* control 0x07 */
+0, /* control 0x08 */
+0, /* whitespace 0x09 */
+0, /* whitespace 0x0a */
+0, /* whitespace 0x0b */
+0, /* whitespace 0x0c */
+0, /* whitespace 0x0d */
+0, /* control 0x0e */
+0, /* control 0x0f */
+0, /* control 0x10 */
+0, /* control 0x11 */
+0, /* control 0x12 */
+0, /* control 0x13 */
+0, /* control 0x14 */
+0, /* control 0x15 */
+0, /* control 0x16 */
+0, /* control 0x17 */
+0, /* control 0x18 */
+0, /* control 0x19 */
+0, /* control 0x1a */
+0, /* control 0x1b */
+0, /* control 0x1c */
+0, /* control 0x1d */
+0, /* control 0x1e */
+0, /* control 0x1f */
+1, /* ' ' 0x20 */
+1, /* '!' 0x21 */
+0, /* '"' 0x22 */
+1, /* '#' 0x23 */
+1, /* '$' 0x24 */
+0, /* '%' 0x25 */
+1, /* '&' 0x26 */
+0, /* ''' 0x27 */
+1, /* '(' 0x28 */
+1, /* ')' 0x29 */
+1, /* '*' 0x2a */
+1, /* '+' 0x2b */
+1, /* ',' 0x2c */
+1, /* '-' 0x2d */
+1, /* '.' 0x2e */
+1, /* '/' 0x2f */
+1, /* '0' 0x30 */
+1, /* '1' 0x31 */
+1, /* '2' 0x32 */
+1, /* '3' 0x33 */
+1, /* '4' 0x34 */
+1, /* '5' 0x35 */
+1, /* '6' 0x36 */
+1, /* '7' 0x37 */
+1, /* '8' 0x38 */
+1, /* '9' 0x39 */
+1, /* ':' 0x3a */
+0, /* ';' 0x3b */
+1, /* '<' 0x3c */
+1, /* '=' 0x3d */
+1, /* '>' 0x3e */
+1, /* '?' 0x3f */
+1, /* '@' 0x40 */
+1, /* 'A' 0x41 */
+1, /* 'B' 0x42 */
+1, /* 'C' 0x43 */
+1, /* 'D' 0x44 */
+1, /* 'E' 0x45 */
+1, /* 'F' 0x46 */
+1, /* 'G' 0x47 */
+1, /* 'H' 0x48 */
+1, /* 'I' 0x49 */
+1, /* 'J' 0x4a */
+1, /* 'K' 0x4b */
+1, /* 'L' 0x4c */
+1, /* 'M' 0x4d */
+1, /* 'N' 0x4e */
+1, /* 'O' 0x4f */
+1, /* 'P' 0x50 */
+1, /* 'Q' 0x51 */
+1, /* 'R' 0x52 */
+1, /* 'S' 0x53 */
+1, /* 'T' 0x54 */
+1, /* 'U' 0x55 */
+1, /* 'V' 0x56 */
+1, /* 'W' 0x57 */
+1, /* 'X' 0x58 */
+1, /* 'Y' 0x59 */
+1, /* 'Z' 0x5a */
+1, /* '[' 0x5b */
+1, /* '\' 0x5c */
+1, /* ']' 0x5d */
+1, /* '^' 0x5e */
+1, /* '_' 0x5f */
+0, /* '`' 0x60 */
+1, /* 'a' 0x61 */
+1, /* 'b' 0x62 */
+1, /* 'c' 0x63 */
+1, /* 'd' 0x64 */
+1, /* 'e' 0x65 */
+1, /* 'f' 0x66 */
+1, /* 'g' 0x67 */
+1, /* 'h' 0x68 */
+1, /* 'i' 0x69 */
+1, /* 'j' 0x6a */
+1, /* 'k' 0x6b */
+1, /* 'l' 0x6c */
+1, /* 'm' 0x6d */
+1, /* 'n' 0x6e */
+1, /* 'o' 0x6f */
+1, /* 'p' 0x70 */
+1, /* 'q' 0x71 */
+1, /* 'r' 0x72 */
+1, /* 's' 0x73 */
+1, /* 't' 0x74 */
+1, /* 'u' 0x75 */
+1, /* 'v' 0x76 */
+1, /* 'w' 0x77 */
+1, /* 'x' 0x78 */
+1, /* 'y' 0x79 */
+1, /* 'z' 0x7a */
+1, /* '{' 0x7b */
+1, /* '|' 0x7c */
+1, /* '}' 0x7d */
+1, /* '~' 0x7e */
+0, /* DEL 0x7f */
+1, /* 0x80 */
+1, /* 0x81 */
+1, /* 0x82 */
+1, /* 0x83 */
+1, /* 0x84 */
+1, /* 0x85 */
+1, /* 0x86 */
+1, /* 0x87 */
+1, /* 0x88 */
+1, /* 0x89 */
+1, /* 0x8a */
+1, /* 0x8b */
+1, /* 0x8c */
+1, /* 0x8d */
+1, /* 0x8e */
+1, /* 0x8f */
+1, /* 0x90 */
+1, /* 0x91 */
+1, /* 0x92 */
+1, /* 0x93 */
+1, /* 0x94 */
+1, /* 0x95 */
+1, /* 0x96 */
+1, /* 0x97 */
+1, /* 0x98 */
+1, /* 0x99 */
+1, /* 0x9a */
+1, /* 0x9b */
+1, /* 0x9c */
+1, /* 0x9d */
+1, /* 0x9e */
+1, /* 0x9f */
+0, /* other: 0xa0 */
+1, /* 0xa1 */
+1, /* 0xa2 */
+1, /* 0xa3 */
+1, /* 0xa4 */
+1, /* 0xa5 */
+1, /* 0xa6 */
+1, /* 0xa7 */
+1, /* 0xa8 */
+1, /* 0xa9 */
+1, /* 0xaa */
+1, /* 0xab */
+1, /* 0xac */
+1, /* 0xad */
+1, /* 0xae */
+1, /* 0xaf */
+1, /* 0xb0 */
+1, /* 0xb1 */
+1, /* 0xb2 */
+1, /* 0xb3 */
+1, /* 0xb4 */
+1, /* 0xb5 */
+1, /* 0xb6 */
+1, /* 0xb7 */
+1, /* 0xb8 */
+1, /* 0xb9 */
+1, /* 0xba */
+1, /* 0xbb */
+1, /* 0xbc */
+1, /* 0xbd */
+1, /* 0xbe */
+1, /* 0xbf */
+1, /* 0xc0 */
+1, /* 0xc1 */
+1, /* 0xc2 */
+1, /* 0xc3 */
+1, /* 0xc4 */
+1, /* 0xc5 */
+1, /* 0xc6 */
+1, /* 0xc7 */
+1, /* 0xc8 */
+1, /* 0xc9 */
+1, /* 0xca */
+1, /* 0xcb */
+1, /* 0xcc */
+1, /* 0xcd */
+1, /* 0xce */
+1, /* 0xcf */
+1, /* 0xd0 */
+1, /* 0xd1 */
+1, /* 0xd2 */
+1, /* 0xd3 */
+1, /* 0xd4 */
+1, /* 0xd5 */
+1, /* 0xd6 */
+1, /* 0xd7 */
+1, /* 0xd8 */
+1, /* 0xd9 */
+1, /* 0xda */
+1, /* 0xdb */
+1, /* 0xdc */
+1, /* 0xdd */
+1, /* 0xde */
+1, /* 0xdf */
+1, /* 0xe0 */
+1, /* 0xe1 */
+1, /* 0xe2 */
+1, /* 0xe3 */
+1, /* 0xe4 */
+1, /* 0xe5 */
+1, /* 0xe6 */
+1, /* 0xe7 */
+1, /* 0xe8 */
+1, /* 0xe9 */
+1, /* 0xea */
+1, /* 0xeb */
+1, /* 0xec */
+1, /* 0xed */
+1, /* 0xee */
+1, /* 0xef */
+1, /* 0xf0 */
+1, /* 0xf1 */
+1, /* 0xf2 */
+1, /* 0xf3 */
+1, /* 0xf4 */
+1, /* 0xf5 */
+1, /* 0xf6 */
+1, /* 0xf7 */
+1, /* 0xf8 */
+1, /* 0xf9 */
+1, /* 0xfa */
+1, /* 0xfb */
+1, /* 0xfc */
+1, /* 0xfd */
+1, /* 0xfe */
+0, /* other: 0xff */
+};
+static char *
+xchar(int c, char *buf, int size)
+{
+    snprintf(buf, size,"%%%02x",c);
+    return buf;
+}
+
+/* Translate dangerous and some other characters to safe 
+   %xx form.
+*/
+void
+translate_to_uri(const char * filename, struct esb_s *out)
+{
+    char buf[8];
+    const char *cp = 0;
+    for(cp = filename  ; *cp; ++cp) {
+        char v[2];
+        int c = 0xff & (unsigned char)*cp;
+        if(dwarfdump_ctype_table[c]) {
+            v[0] = c;
+            v[1] = 0;
+            esb_append(out,v);
+        } else {
+            char *b = xchar(c,buf,sizeof(buf));
+            esb_append(out,b);
+        }
+    }
+}
+
+/* This is not very efficient, but it is seldom called. */
+static char
+hexdig(char c)
+{
+    char ochar = 0;
+    if(c >= 0 && c <= '9') {
+        ochar = (c - '0');
+        return ochar;
+    }
+    if(c >= 'a' && c <= 'f') {
+        ochar = (c - 'a')+10;
+        return ochar;
+    }
+    if(c >= 'A' && c <= 'F') {
+        ochar = (c - 'A')+10;
+        return ochar;
+    }
+    // We have an input botch here.
+    fprintf(stderr,"Translating from uri: "
+        "A supposed hexadecimal input character is "
+        "not 0-9 or a-f or A-F, it is (shown as hex here): %x\n",c);
+    return ochar;
+}
+
+static char tohex(char c1, char c2)
+{
+    char out = (hexdig(c1) << 4) | hexdig(c2);
+    return out;
+}
+static int
+hexpairtochar(const char *cp, char*myochar)
+{
+    char ochar = 0;
+    int olen = 0;
+    char c = cp[0];
+    if(c) {
+        char c2 = cp[1];
+        if(c2) {
+            ochar = tohex(c,c2);
+            olen = 2;
+        } else {
+            fprintf(stderr,"Translating from uri: "
+                "A supposed hexadecimal input character pair "
+                "runs off the end of the input after 1 hex digit.\n");
+            /* botched input. */
+            ochar = c;
+            olen = 1;
+        }
+    } else {
+        /* botched input. */
+        fprintf(stderr,"Translating from uri: "
+            "A supposed hexadecimal input character pair "
+            "runs off the end of the input.\n");
+        ochar = '%';
+        olen = 0;
+    }
+    *myochar = ochar;
+    return olen;
+}
+
+void
+translate_from_uri(const char * input, struct esb_s* out)
+{
+    const char *cp = input;
+    char tempstr[2];
+    for(; *cp; ++cp) {
+        char c = *cp;
+        if(c == '%') {
+            int increment = 0;
+            char c2 = cp[1];
+            // hexpairtochar deals with c2 being NUL.
+            if ( c2  == '%') {
+                tempstr[0] = c;
+                tempstr[1] = 0;
+                esb_append(out,tempstr);
+                ++cp;
+                continue;
+            }
+           
+            increment = hexpairtochar(cp+1,&c);
+            tempstr[0] = c;
+            tempstr[1] = 0;
+            esb_append(out,tempstr);
+            cp +=increment;
+            continue;
+        }
+        tempstr[0] = c;
+        tempstr[1] = 0;
+        esb_append(out,tempstr);
+    }
+}
+
+
+
+
+#ifdef TEST
+
+unsigned errcnt = 0;
+
+static void
+mytestfrom(const char * in,const char *expected,int testnum)
+{
+    struct esb_s out;
+    esb_constructor(&out);
+    translate_from_uri(in, &out);
+    if(strcmp(expected, esb_get_string(&out))) {
+        printf(" Fail test %d expected \"%s\" got \"%s\"\n",
+            testnum,expected,esb_get_string(&out));
+        ++errcnt;
+    }
+    esb_destructor(&out);
+}
+
+
+static void
+mytest(char *in,char *expected,int testnum)
+{
+    struct esb_s out;
+    esb_constructor(&out);
+    translate_to_uri(in, &out);
+    if(strcmp(expected, esb_get_string(&out))) {
+        printf(" Fail test %d expected %s got %s\n",testnum,expected,esb_get_string(&out));
+        ++errcnt;
+    }
+    esb_destructor(&out);
+}
+
+
+int 
+main()
+{
+    /* We no longer translate space to %20, that
+    turns out not to help all that much. */
+    mytest("aaa","aaa",1);
+    mytest(" bc"," bc",2);
+    mytest(";bc","%3bbc",3);
+    mytest(" bc\n"," bc%0a",4);
+    mytest(";bc\n","%3bbc%0a",5);
+    mytest(" bc\r"," bc%0d",6);
+    mytest(";bc\r","%3bbc%0d",7);
+    mytest(" \x01"," %01",8);
+    mytest(";\x01","%3b%01",9);
+    mytestfrom("abc","abc",10);
+    mytestfrom("a%20bc","a bc",11);
+    mytestfrom("a%%20bc","a%20bc",12);
+    mytestfrom("a%%%20bc","a% bc",13);
+    mytestfrom("a%%%%20bc","a%%20bc",14);
+    mytestfrom("a%20","a ",15);
+    /* The following is mistaken input. */
+    mytestfrom("a%2","a2",16);
+    mytestfrom("a%","a%",17);
+    mytest("%bc","%25bc",18);
+
+    if(errcnt) {
+        printf("uri errcount ",errcnt);
+    }
+    return errcnt? 1:0;
+}
+#endif
+