diff options
Diffstat (limited to 'dwarfdump2/uri.cc')
-rw-r--r-- | dwarfdump2/uri.cc | 477 |
1 files changed, 477 insertions, 0 deletions
diff --git a/dwarfdump2/uri.cc b/dwarfdump2/uri.cc new file mode 100644 index 0000000..545bad7 --- /dev/null +++ b/dwarfdump2/uri.cc @@ -0,0 +1,477 @@ +/* + Copyright 2011 David Anderson. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it would be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + Further, this software is distributed without any warranty that it is + free of the rightful claim of any third person regarding infringement + or the like. Any license provided herein, whether implied or + otherwise, applies only to this software file. Patent licenses, if + any, provided herein do not apply to combinations of this program with + other software, or any other product whatsoever. + + You should have received a copy of the GNU General Public License along + with this program; if not, write the Free Software Foundation, Inc., 51 + Franklin Street - Fifth Floor, Boston MA 02110-1301, USA. + + Contact information: Silicon Graphics, Inc., 1500 Crittenden Lane, + Mountain View, CA 94043, or: + + http://www.sgi.com + + For further information regarding this notice, see: + + http://oss.sgi.com/projects/GenInfo/NoticeExplan + +*/ + +/* The address of the Free Software Foundation is + Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + Boston, MA 02110-1301, USA. + SGI has moved from the Crittenden Lane address. +*/ + +#include "globals.h" +#include <iostream> //* for error and debug output. +#include <string> +#include "uri.h" +#include <stdio.h> +#include <ctype.h> +using std::string; + + +/* dwarfdump_ctype table. See dwarfdump/uritablebuild.c */ +static char dwarfdump_ctype_table[256] = { +0, /* NUL 0x00 */ +0, /* control 0x01 */ +0, /* control 0x02 */ +0, /* control 0x03 */ +0, /* control 0x04 */ +0, /* control 0x05 */ +0, /* control 0x06 */ +0, /* control 0x07 */ +0, /* control 0x08 */ +0, /* whitespace 0x09 */ +0, /* whitespace 0x0a */ +0, /* whitespace 0x0b */ +0, /* whitespace 0x0c */ +0, /* whitespace 0x0d */ +0, /* control 0x0e */ +0, /* control 0x0f */ +0, /* control 0x10 */ +0, /* control 0x11 */ +0, /* control 0x12 */ +0, /* control 0x13 */ +0, /* control 0x14 */ +0, /* control 0x15 */ +0, /* control 0x16 */ +0, /* control 0x17 */ +0, /* control 0x18 */ +0, /* control 0x19 */ +0, /* control 0x1a */ +0, /* control 0x1b */ +0, /* control 0x1c */ +0, /* control 0x1d */ +0, /* control 0x1e */ +0, /* control 0x1f */ +1, /* ' ' 0x20 */ +1, /* '!' 0x21 */ +0, /* '"' 0x22 */ +1, /* '#' 0x23 */ +1, /* '$' 0x24 */ +0, /* '%' 0x25 */ +1, /* '&' 0x26 */ +0, /* ''' 0x27 */ +1, /* '(' 0x28 */ +1, /* ')' 0x29 */ +1, /* '*' 0x2a */ +1, /* '+' 0x2b */ +1, /* ',' 0x2c */ +1, /* '-' 0x2d */ +1, /* '.' 0x2e */ +1, /* '/' 0x2f */ +1, /* '0' 0x30 */ +1, /* '1' 0x31 */ +1, /* '2' 0x32 */ +1, /* '3' 0x33 */ +1, /* '4' 0x34 */ +1, /* '5' 0x35 */ +1, /* '6' 0x36 */ +1, /* '7' 0x37 */ +1, /* '8' 0x38 */ +1, /* '9' 0x39 */ +1, /* ':' 0x3a */ +0, /* ';' 0x3b */ +1, /* '<' 0x3c */ +1, /* '=' 0x3d */ +1, /* '>' 0x3e */ +1, /* '?' 0x3f */ +1, /* '@' 0x40 */ +1, /* 'A' 0x41 */ +1, /* 'B' 0x42 */ +1, /* 'C' 0x43 */ +1, /* 'D' 0x44 */ +1, /* 'E' 0x45 */ +1, /* 'F' 0x46 */ +1, /* 'G' 0x47 */ +1, /* 'H' 0x48 */ +1, /* 'I' 0x49 */ +1, /* 'J' 0x4a */ +1, /* 'K' 0x4b */ +1, /* 'L' 0x4c */ +1, /* 'M' 0x4d */ +1, /* 'N' 0x4e */ +1, /* 'O' 0x4f */ +1, /* 'P' 0x50 */ +1, /* 'Q' 0x51 */ +1, /* 'R' 0x52 */ +1, /* 'S' 0x53 */ +1, /* 'T' 0x54 */ +1, /* 'U' 0x55 */ +1, /* 'V' 0x56 */ +1, /* 'W' 0x57 */ +1, /* 'X' 0x58 */ +1, /* 'Y' 0x59 */ +1, /* 'Z' 0x5a */ +1, /* '[' 0x5b */ +1, /* '\' 0x5c */ +1, /* ']' 0x5d */ +1, /* '^' 0x5e */ +1, /* '_' 0x5f */ +0, /* '`' 0x60 */ +1, /* 'a' 0x61 */ +1, /* 'b' 0x62 */ +1, /* 'c' 0x63 */ +1, /* 'd' 0x64 */ +1, /* 'e' 0x65 */ +1, /* 'f' 0x66 */ +1, /* 'g' 0x67 */ +1, /* 'h' 0x68 */ +1, /* 'i' 0x69 */ +1, /* 'j' 0x6a */ +1, /* 'k' 0x6b */ +1, /* 'l' 0x6c */ +1, /* 'm' 0x6d */ +1, /* 'n' 0x6e */ +1, /* 'o' 0x6f */ +1, /* 'p' 0x70 */ +1, /* 'q' 0x71 */ +1, /* 'r' 0x72 */ +1, /* 's' 0x73 */ +1, /* 't' 0x74 */ +1, /* 'u' 0x75 */ +1, /* 'v' 0x76 */ +1, /* 'w' 0x77 */ +1, /* 'x' 0x78 */ +1, /* 'y' 0x79 */ +1, /* 'z' 0x7a */ +1, /* '{' 0x7b */ +1, /* '|' 0x7c */ +1, /* '}' 0x7d */ +1, /* '~' 0x7e */ +0, /* DEL 0x7f */ +1, /* 0x80 */ +1, /* 0x81 */ +1, /* 0x82 */ +1, /* 0x83 */ +1, /* 0x84 */ +1, /* 0x85 */ +1, /* 0x86 */ +1, /* 0x87 */ +1, /* 0x88 */ +1, /* 0x89 */ +1, /* 0x8a */ +1, /* 0x8b */ +1, /* 0x8c */ +1, /* 0x8d */ +1, /* 0x8e */ +1, /* 0x8f */ +1, /* 0x90 */ +1, /* 0x91 */ +1, /* 0x92 */ +1, /* 0x93 */ +1, /* 0x94 */ +1, /* 0x95 */ +1, /* 0x96 */ +1, /* 0x97 */ +1, /* 0x98 */ +1, /* 0x99 */ +1, /* 0x9a */ +1, /* 0x9b */ +1, /* 0x9c */ +1, /* 0x9d */ +1, /* 0x9e */ +1, /* 0x9f */ +0, /* other: 0xa0 */ +1, /* 0xa1 */ +1, /* 0xa2 */ +1, /* 0xa3 */ +1, /* 0xa4 */ +1, /* 0xa5 */ +1, /* 0xa6 */ +1, /* 0xa7 */ +1, /* 0xa8 */ +1, /* 0xa9 */ +1, /* 0xaa */ +1, /* 0xab */ +1, /* 0xac */ +1, /* 0xad */ +1, /* 0xae */ +1, /* 0xaf */ +1, /* 0xb0 */ +1, /* 0xb1 */ +1, /* 0xb2 */ +1, /* 0xb3 */ +1, /* 0xb4 */ +1, /* 0xb5 */ +1, /* 0xb6 */ +1, /* 0xb7 */ +1, /* 0xb8 */ +1, /* 0xb9 */ +1, /* 0xba */ +1, /* 0xbb */ +1, /* 0xbc */ +1, /* 0xbd */ +1, /* 0xbe */ +1, /* 0xbf */ +1, /* 0xc0 */ +1, /* 0xc1 */ +1, /* 0xc2 */ +1, /* 0xc3 */ +1, /* 0xc4 */ +1, /* 0xc5 */ +1, /* 0xc6 */ +1, /* 0xc7 */ +1, /* 0xc8 */ +1, /* 0xc9 */ +1, /* 0xca */ +1, /* 0xcb */ +1, /* 0xcc */ +1, /* 0xcd */ +1, /* 0xce */ +1, /* 0xcf */ +1, /* 0xd0 */ +1, /* 0xd1 */ +1, /* 0xd2 */ +1, /* 0xd3 */ +1, /* 0xd4 */ +1, /* 0xd5 */ +1, /* 0xd6 */ +1, /* 0xd7 */ +1, /* 0xd8 */ +1, /* 0xd9 */ +1, /* 0xda */ +1, /* 0xdb */ +1, /* 0xdc */ +1, /* 0xdd */ +1, /* 0xde */ +1, /* 0xdf */ +1, /* 0xe0 */ +1, /* 0xe1 */ +1, /* 0xe2 */ +1, /* 0xe3 */ +1, /* 0xe4 */ +1, /* 0xe5 */ +1, /* 0xe6 */ +1, /* 0xe7 */ +1, /* 0xe8 */ +1, /* 0xe9 */ +1, /* 0xea */ +1, /* 0xeb */ +1, /* 0xec */ +1, /* 0xed */ +1, /* 0xee */ +1, /* 0xef */ +1, /* 0xf0 */ +1, /* 0xf1 */ +1, /* 0xf2 */ +1, /* 0xf3 */ +1, /* 0xf4 */ +1, /* 0xf5 */ +1, /* 0xf6 */ +1, /* 0xf7 */ +1, /* 0xf8 */ +1, /* 0xf9 */ +1, /* 0xfa */ +1, /* 0xfb */ +1, /* 0xfc */ +1, /* 0xfd */ +1, /* 0xfe */ +0, /* other: 0xff */ +}; +static char * +xchar(int c, char *buf, int size) +{ + snprintf(buf, size,"%%%02x",c); + return buf; +} + +/* Translate dangerous and some other characters to safe + %xx form. +*/ +void +translate_to_uri(const string &s, string &out) +{ + const char *cp = s.c_str(); + for( ; *cp; ++cp) { + int c = 0xff & (unsigned char)*cp; + if(dwarfdump_ctype_table[c]) { + out.push_back((char)c); + } else { + char buf[8]; + string b = xchar(c,buf,sizeof(buf)); + out.append(b); + } + } +} + +/* This is not very efficient, but it is seldom called. */ +static char +hexdig(char c) +{ + char ochar = 0; + if(c >= 0 && c <= '9') { + ochar = (c - '0'); + return ochar; + } + if(c >= 'a' && c <= 'f') { + ochar = (c - 'a')+10; + return ochar; + } + if(c >= 'A' && c <= 'F') { + ochar = (c - 'A')+10; + return ochar; + } + // We have an input botch here. + fprintf(stderr,"Translating from uri: " + "A supposed hexadecimal input character is " + "not 0-9 or a-f or A-F, it is (shown as hex here): %x\n",c); + return ochar; +} + +static char tohex(char c1, char c2) +{ + char out = (hexdig(c1) << 4) | hexdig(c2); + return out; +} + +static int +hexpairtochar(const char *cp, char *myochar) +{ + char ochar = 0; + int olen = 0; + char c = cp[0]; + if(c) { + char c2 = cp[1]; + if(c2) { + ochar = tohex(c,c2); + olen = 2; + } else { + std::cerr << "Translating from uri: " + "A supposed hexadecimal input character pair " + "runs off the end of the input after 1 hex digit."<< + std::endl; + /* botched input. */ + ochar = c; + olen = 1; + } + } else { + /* botched input. */ + std::cerr << "Translating from uri: " + "A supposed hexadecimal input character pair " + "runs off the end of the input." << std::endl; + ochar = '%'; + olen = 0; + } + *myochar = ochar; + return olen; +} +void +translate_from_uri(const std::string & input, string &out) +{ + const char *cp = input.c_str(); + for(; *cp; ++cp) { + char c = *cp; + if(c == '%') { + char c2 = cp[1]; + // hexpairtochar deals with c2 being NUL. + if ( c2 == '%') { + out.push_back(c); + ++cp; + continue; + } + int increment = hexpairtochar(cp+1,&c); + out.push_back(c); + cp += increment; + continue; + } + out.push_back(c); + } +} + +#ifdef TEST + +unsigned errcnt = 0; +static void +mytestfrom(const std::string & in,const std::string & expected,int testnum) +{ + string out; + translate_from_uri(in, out); + if(expected != out) { + printf(" Fail test %d expected \"%s\" got \"%s\"\n", + testnum,expected.c_str(),out.c_str()); + ++errcnt; + } +} + +static void +mytest(const std::string & in,const std::string & expected,int testnum) +{ + string out; + translate_to_uri(in.c_str(), out); + if(expected != out) { + printf(" Fail test %d expected \"%s\" got \"%s\"\n", + testnum,expected.c_str(),out.c_str()); + ++errcnt; + } +} + + +int +main() +{ + /* We no longer translate space to %20, that + turns out not to help all that much. */ + mytest("aaa","aaa",1); + mytest(" bc"," bc",2); + mytest(";bc","%3bbc",3); + mytest(" bc\n"," bc%0a",4); + mytest(";bc\n","%3bbc%0a",5); + mytest(" bc\r"," bc%0d",6); + mytest(";bc\r","%3bbc%0d",7); + mytest(" \x01"," %01",8); + mytest(";\x01","%3b%01",9); + mytestfrom("abc","abc",10); + mytestfrom("a%20bc","a bc",11); + mytestfrom("a%%20bc","a%20bc",12); + mytestfrom("a%%%20bc","a% bc",13); + mytestfrom("a%%%%20bc","a%%20bc",14); + mytestfrom("a%20","a ",15); + /* The following is mistaken input. */ + mytestfrom("a%2","a2",16); + mytestfrom("a%","a%",17); + mytest("%bc","%25bc",18); + + if(errcnt) { + printf("uri errcount ",errcnt); + } + return errcnt? 1:0; +} +#endif + |