summaryrefslogtreecommitdiff
path: root/dwarfdump2/uri.cc
diff options
context:
space:
mode:
Diffstat (limited to 'dwarfdump2/uri.cc')
-rw-r--r--dwarfdump2/uri.cc477
1 files changed, 477 insertions, 0 deletions
diff --git a/dwarfdump2/uri.cc b/dwarfdump2/uri.cc
new file mode 100644
index 0000000..545bad7
--- /dev/null
+++ b/dwarfdump2/uri.cc
@@ -0,0 +1,477 @@
+/*
+ Copyright 2011 David Anderson. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of version 2 of the GNU General Public License as
+ published by the Free Software Foundation.
+
+ This program is distributed in the hope that it would be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ Further, this software is distributed without any warranty that it is
+ free of the rightful claim of any third person regarding infringement
+ or the like. Any license provided herein, whether implied or
+ otherwise, applies only to this software file. Patent licenses, if
+ any, provided herein do not apply to combinations of this program with
+ other software, or any other product whatsoever.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write the Free Software Foundation, Inc., 51
+ Franklin Street - Fifth Floor, Boston MA 02110-1301, USA.
+
+ Contact information: Silicon Graphics, Inc., 1500 Crittenden Lane,
+ Mountain View, CA 94043, or:
+
+ http://www.sgi.com
+
+ For further information regarding this notice, see:
+
+ http://oss.sgi.com/projects/GenInfo/NoticeExplan
+
+*/
+
+/* The address of the Free Software Foundation is
+ Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+ SGI has moved from the Crittenden Lane address.
+*/
+
+#include "globals.h"
+#include <iostream> //* for error and debug output.
+#include <string>
+#include "uri.h"
+#include <stdio.h>
+#include <ctype.h>
+using std::string;
+
+
+/* dwarfdump_ctype table. See dwarfdump/uritablebuild.c */
+static char dwarfdump_ctype_table[256] = {
+0, /* NUL 0x00 */
+0, /* control 0x01 */
+0, /* control 0x02 */
+0, /* control 0x03 */
+0, /* control 0x04 */
+0, /* control 0x05 */
+0, /* control 0x06 */
+0, /* control 0x07 */
+0, /* control 0x08 */
+0, /* whitespace 0x09 */
+0, /* whitespace 0x0a */
+0, /* whitespace 0x0b */
+0, /* whitespace 0x0c */
+0, /* whitespace 0x0d */
+0, /* control 0x0e */
+0, /* control 0x0f */
+0, /* control 0x10 */
+0, /* control 0x11 */
+0, /* control 0x12 */
+0, /* control 0x13 */
+0, /* control 0x14 */
+0, /* control 0x15 */
+0, /* control 0x16 */
+0, /* control 0x17 */
+0, /* control 0x18 */
+0, /* control 0x19 */
+0, /* control 0x1a */
+0, /* control 0x1b */
+0, /* control 0x1c */
+0, /* control 0x1d */
+0, /* control 0x1e */
+0, /* control 0x1f */
+1, /* ' ' 0x20 */
+1, /* '!' 0x21 */
+0, /* '"' 0x22 */
+1, /* '#' 0x23 */
+1, /* '$' 0x24 */
+0, /* '%' 0x25 */
+1, /* '&' 0x26 */
+0, /* ''' 0x27 */
+1, /* '(' 0x28 */
+1, /* ')' 0x29 */
+1, /* '*' 0x2a */
+1, /* '+' 0x2b */
+1, /* ',' 0x2c */
+1, /* '-' 0x2d */
+1, /* '.' 0x2e */
+1, /* '/' 0x2f */
+1, /* '0' 0x30 */
+1, /* '1' 0x31 */
+1, /* '2' 0x32 */
+1, /* '3' 0x33 */
+1, /* '4' 0x34 */
+1, /* '5' 0x35 */
+1, /* '6' 0x36 */
+1, /* '7' 0x37 */
+1, /* '8' 0x38 */
+1, /* '9' 0x39 */
+1, /* ':' 0x3a */
+0, /* ';' 0x3b */
+1, /* '<' 0x3c */
+1, /* '=' 0x3d */
+1, /* '>' 0x3e */
+1, /* '?' 0x3f */
+1, /* '@' 0x40 */
+1, /* 'A' 0x41 */
+1, /* 'B' 0x42 */
+1, /* 'C' 0x43 */
+1, /* 'D' 0x44 */
+1, /* 'E' 0x45 */
+1, /* 'F' 0x46 */
+1, /* 'G' 0x47 */
+1, /* 'H' 0x48 */
+1, /* 'I' 0x49 */
+1, /* 'J' 0x4a */
+1, /* 'K' 0x4b */
+1, /* 'L' 0x4c */
+1, /* 'M' 0x4d */
+1, /* 'N' 0x4e */
+1, /* 'O' 0x4f */
+1, /* 'P' 0x50 */
+1, /* 'Q' 0x51 */
+1, /* 'R' 0x52 */
+1, /* 'S' 0x53 */
+1, /* 'T' 0x54 */
+1, /* 'U' 0x55 */
+1, /* 'V' 0x56 */
+1, /* 'W' 0x57 */
+1, /* 'X' 0x58 */
+1, /* 'Y' 0x59 */
+1, /* 'Z' 0x5a */
+1, /* '[' 0x5b */
+1, /* '\' 0x5c */
+1, /* ']' 0x5d */
+1, /* '^' 0x5e */
+1, /* '_' 0x5f */
+0, /* '`' 0x60 */
+1, /* 'a' 0x61 */
+1, /* 'b' 0x62 */
+1, /* 'c' 0x63 */
+1, /* 'd' 0x64 */
+1, /* 'e' 0x65 */
+1, /* 'f' 0x66 */
+1, /* 'g' 0x67 */
+1, /* 'h' 0x68 */
+1, /* 'i' 0x69 */
+1, /* 'j' 0x6a */
+1, /* 'k' 0x6b */
+1, /* 'l' 0x6c */
+1, /* 'm' 0x6d */
+1, /* 'n' 0x6e */
+1, /* 'o' 0x6f */
+1, /* 'p' 0x70 */
+1, /* 'q' 0x71 */
+1, /* 'r' 0x72 */
+1, /* 's' 0x73 */
+1, /* 't' 0x74 */
+1, /* 'u' 0x75 */
+1, /* 'v' 0x76 */
+1, /* 'w' 0x77 */
+1, /* 'x' 0x78 */
+1, /* 'y' 0x79 */
+1, /* 'z' 0x7a */
+1, /* '{' 0x7b */
+1, /* '|' 0x7c */
+1, /* '}' 0x7d */
+1, /* '~' 0x7e */
+0, /* DEL 0x7f */
+1, /* 0x80 */
+1, /* 0x81 */
+1, /* 0x82 */
+1, /* 0x83 */
+1, /* 0x84 */
+1, /* 0x85 */
+1, /* 0x86 */
+1, /* 0x87 */
+1, /* 0x88 */
+1, /* 0x89 */
+1, /* 0x8a */
+1, /* 0x8b */
+1, /* 0x8c */
+1, /* 0x8d */
+1, /* 0x8e */
+1, /* 0x8f */
+1, /* 0x90 */
+1, /* 0x91 */
+1, /* 0x92 */
+1, /* 0x93 */
+1, /* 0x94 */
+1, /* 0x95 */
+1, /* 0x96 */
+1, /* 0x97 */
+1, /* 0x98 */
+1, /* 0x99 */
+1, /* 0x9a */
+1, /* 0x9b */
+1, /* 0x9c */
+1, /* 0x9d */
+1, /* 0x9e */
+1, /* 0x9f */
+0, /* other: 0xa0 */
+1, /* 0xa1 */
+1, /* 0xa2 */
+1, /* 0xa3 */
+1, /* 0xa4 */
+1, /* 0xa5 */
+1, /* 0xa6 */
+1, /* 0xa7 */
+1, /* 0xa8 */
+1, /* 0xa9 */
+1, /* 0xaa */
+1, /* 0xab */
+1, /* 0xac */
+1, /* 0xad */
+1, /* 0xae */
+1, /* 0xaf */
+1, /* 0xb0 */
+1, /* 0xb1 */
+1, /* 0xb2 */
+1, /* 0xb3 */
+1, /* 0xb4 */
+1, /* 0xb5 */
+1, /* 0xb6 */
+1, /* 0xb7 */
+1, /* 0xb8 */
+1, /* 0xb9 */
+1, /* 0xba */
+1, /* 0xbb */
+1, /* 0xbc */
+1, /* 0xbd */
+1, /* 0xbe */
+1, /* 0xbf */
+1, /* 0xc0 */
+1, /* 0xc1 */
+1, /* 0xc2 */
+1, /* 0xc3 */
+1, /* 0xc4 */
+1, /* 0xc5 */
+1, /* 0xc6 */
+1, /* 0xc7 */
+1, /* 0xc8 */
+1, /* 0xc9 */
+1, /* 0xca */
+1, /* 0xcb */
+1, /* 0xcc */
+1, /* 0xcd */
+1, /* 0xce */
+1, /* 0xcf */
+1, /* 0xd0 */
+1, /* 0xd1 */
+1, /* 0xd2 */
+1, /* 0xd3 */
+1, /* 0xd4 */
+1, /* 0xd5 */
+1, /* 0xd6 */
+1, /* 0xd7 */
+1, /* 0xd8 */
+1, /* 0xd9 */
+1, /* 0xda */
+1, /* 0xdb */
+1, /* 0xdc */
+1, /* 0xdd */
+1, /* 0xde */
+1, /* 0xdf */
+1, /* 0xe0 */
+1, /* 0xe1 */
+1, /* 0xe2 */
+1, /* 0xe3 */
+1, /* 0xe4 */
+1, /* 0xe5 */
+1, /* 0xe6 */
+1, /* 0xe7 */
+1, /* 0xe8 */
+1, /* 0xe9 */
+1, /* 0xea */
+1, /* 0xeb */
+1, /* 0xec */
+1, /* 0xed */
+1, /* 0xee */
+1, /* 0xef */
+1, /* 0xf0 */
+1, /* 0xf1 */
+1, /* 0xf2 */
+1, /* 0xf3 */
+1, /* 0xf4 */
+1, /* 0xf5 */
+1, /* 0xf6 */
+1, /* 0xf7 */
+1, /* 0xf8 */
+1, /* 0xf9 */
+1, /* 0xfa */
+1, /* 0xfb */
+1, /* 0xfc */
+1, /* 0xfd */
+1, /* 0xfe */
+0, /* other: 0xff */
+};
+static char *
+xchar(int c, char *buf, int size)
+{
+ snprintf(buf, size,"%%%02x",c);
+ return buf;
+}
+
+/* Translate dangerous and some other characters to safe
+ %xx form.
+*/
+void
+translate_to_uri(const string &s, string &out)
+{
+ const char *cp = s.c_str();
+ for( ; *cp; ++cp) {
+ int c = 0xff & (unsigned char)*cp;
+ if(dwarfdump_ctype_table[c]) {
+ out.push_back((char)c);
+ } else {
+ char buf[8];
+ string b = xchar(c,buf,sizeof(buf));
+ out.append(b);
+ }
+ }
+}
+
+/* This is not very efficient, but it is seldom called. */
+static char
+hexdig(char c)
+{
+ char ochar = 0;
+ if(c >= 0 && c <= '9') {
+ ochar = (c - '0');
+ return ochar;
+ }
+ if(c >= 'a' && c <= 'f') {
+ ochar = (c - 'a')+10;
+ return ochar;
+ }
+ if(c >= 'A' && c <= 'F') {
+ ochar = (c - 'A')+10;
+ return ochar;
+ }
+ // We have an input botch here.
+ fprintf(stderr,"Translating from uri: "
+ "A supposed hexadecimal input character is "
+ "not 0-9 or a-f or A-F, it is (shown as hex here): %x\n",c);
+ return ochar;
+}
+
+static char tohex(char c1, char c2)
+{
+ char out = (hexdig(c1) << 4) | hexdig(c2);
+ return out;
+}
+
+static int
+hexpairtochar(const char *cp, char *myochar)
+{
+ char ochar = 0;
+ int olen = 0;
+ char c = cp[0];
+ if(c) {
+ char c2 = cp[1];
+ if(c2) {
+ ochar = tohex(c,c2);
+ olen = 2;
+ } else {
+ std::cerr << "Translating from uri: "
+ "A supposed hexadecimal input character pair "
+ "runs off the end of the input after 1 hex digit."<<
+ std::endl;
+ /* botched input. */
+ ochar = c;
+ olen = 1;
+ }
+ } else {
+ /* botched input. */
+ std::cerr << "Translating from uri: "
+ "A supposed hexadecimal input character pair "
+ "runs off the end of the input." << std::endl;
+ ochar = '%';
+ olen = 0;
+ }
+ *myochar = ochar;
+ return olen;
+}
+void
+translate_from_uri(const std::string & input, string &out)
+{
+ const char *cp = input.c_str();
+ for(; *cp; ++cp) {
+ char c = *cp;
+ if(c == '%') {
+ char c2 = cp[1];
+ // hexpairtochar deals with c2 being NUL.
+ if ( c2 == '%') {
+ out.push_back(c);
+ ++cp;
+ continue;
+ }
+ int increment = hexpairtochar(cp+1,&c);
+ out.push_back(c);
+ cp += increment;
+ continue;
+ }
+ out.push_back(c);
+ }
+}
+
+#ifdef TEST
+
+unsigned errcnt = 0;
+static void
+mytestfrom(const std::string & in,const std::string & expected,int testnum)
+{
+ string out;
+ translate_from_uri(in, out);
+ if(expected != out) {
+ printf(" Fail test %d expected \"%s\" got \"%s\"\n",
+ testnum,expected.c_str(),out.c_str());
+ ++errcnt;
+ }
+}
+
+static void
+mytest(const std::string & in,const std::string & expected,int testnum)
+{
+ string out;
+ translate_to_uri(in.c_str(), out);
+ if(expected != out) {
+ printf(" Fail test %d expected \"%s\" got \"%s\"\n",
+ testnum,expected.c_str(),out.c_str());
+ ++errcnt;
+ }
+}
+
+
+int
+main()
+{
+ /* We no longer translate space to %20, that
+ turns out not to help all that much. */
+ mytest("aaa","aaa",1);
+ mytest(" bc"," bc",2);
+ mytest(";bc","%3bbc",3);
+ mytest(" bc\n"," bc%0a",4);
+ mytest(";bc\n","%3bbc%0a",5);
+ mytest(" bc\r"," bc%0d",6);
+ mytest(";bc\r","%3bbc%0d",7);
+ mytest(" \x01"," %01",8);
+ mytest(";\x01","%3b%01",9);
+ mytestfrom("abc","abc",10);
+ mytestfrom("a%20bc","a bc",11);
+ mytestfrom("a%%20bc","a%20bc",12);
+ mytestfrom("a%%%20bc","a% bc",13);
+ mytestfrom("a%%%%20bc","a%%20bc",14);
+ mytestfrom("a%20","a ",15);
+ /* The following is mistaken input. */
+ mytestfrom("a%2","a2",16);
+ mytestfrom("a%","a%",17);
+ mytest("%bc","%25bc",18);
+
+ if(errcnt) {
+ printf("uri errcount ",errcnt);
+ }
+ return errcnt? 1:0;
+}
+#endif
+