diff options
author | joerg <joerg@pkgsrc.org> | 2008-04-24 07:55:00 +0000 |
---|---|---|
committer | joerg <joerg@pkgsrc.org> | 2008-04-24 07:55:00 +0000 |
commit | bda08cd4028e9af99ce4849b9e90b1c31a7746fa (patch) | |
tree | 050ca03120365a9d7c33147ae52ca484180708fd /net/libfetch/files | |
parent | f8f9e4deb621a162171b0ea11fe9d79f1d33495b (diff) | |
download | pkgsrc-bda08cd4028e9af99ce4849b9e90b1c31a7746fa.tar.gz |
libfetch-2.4.10:
Start URL quoting cleanup. All URLs are now quoted correctly on parsing
and when appending URLs. URLs without schema and starting with slash are
considered to be file:// URLs.
Diffstat (limited to 'net/libfetch/files')
-rw-r--r-- | net/libfetch/files/common.c | 50 | ||||
-rw-r--r-- | net/libfetch/files/common.h | 5 | ||||
-rw-r--r-- | net/libfetch/files/fetch.c | 154 | ||||
-rw-r--r-- | net/libfetch/files/file.c | 14 | ||||
-rw-r--r-- | net/libfetch/files/ftp.c | 4 | ||||
-rw-r--r-- | net/libfetch/files/http.c | 4 |
6 files changed, 160 insertions, 71 deletions
diff --git a/net/libfetch/files/common.c b/net/libfetch/files/common.c index 15166d39602..1403d6ddff6 100644 --- a/net/libfetch/files/common.c +++ b/net/libfetch/files/common.c @@ -1,4 +1,4 @@ -/* $NetBSD: common.c,v 1.9 2008/04/21 21:15:53 joerg Exp $ */ +/* $NetBSD: common.c,v 1.10 2008/04/24 07:55:00 joerg Exp $ */ /*- * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org> @@ -620,18 +620,33 @@ fetch_close(conn_t *conn) /*** Directory-related utility functions *************************************/ int -fetch_add_entry(struct url_list *ue, struct url *base, const char *name) +fetch_add_entry(struct url_list *ue, struct url *base, const char *name, + int pre_quoted) { struct url *tmp; char *tmp_name; - size_t base_doc_len, name_len; + size_t base_doc_len, name_len, i; + unsigned char c; if (strchr(name, '/') != NULL || strcmp(name, "..") == 0 || strcmp(name, ".") == 0) return 0; - base_doc_len = strlen(base->doc); + if (strcmp(base->doc, "/") == 0) + base_doc_len = 0; + else + base_doc_len = strlen(base->doc); + + name_len = 1; + for (i = 0; name[i] != '\0'; ++i) { + if ((!pre_quoted && name[i] == '%') || + !fetch_urlpath_safe(name[i])) + name_len += 3; + else + ++name_len; + } + name_len = strlen(name); tmp_name = malloc( base_doc_len + name_len + 2); if (tmp_name == NULL) { @@ -658,11 +673,30 @@ fetch_add_entry(struct url_list *ue, struct url *base, const char *name) strcpy(tmp->pwd, base->pwd); strcpy(tmp->host, base->host); tmp->port = base->port; - memcpy(tmp_name, base->doc, base_doc_len); - tmp_name[base_doc_len] = '/'; - memcpy(tmp_name + base_doc_len + 1, name, name_len); - tmp_name[base_doc_len + name_len + 1] = '\0'; tmp->doc = tmp_name; + memcpy(tmp->doc, base->doc, base_doc_len); + tmp->doc[base_doc_len] = '/'; + + for (i = base_doc_len + 1; *name != '\0'; ++name) { + if ((!pre_quoted && *name == '%') || + !fetch_urlpath_safe(*name)) { + tmp->doc[i++] = '%'; + c = (unsigned char)*name / 16; + if (c < 160) + tmp->doc[i++] = '0' + c; + else + tmp->doc[i++] = 'a' - 10 + c; + c = (unsigned char)*name % 16; + if (c < 16) + tmp->doc[i++] = '0' + c; + else + tmp->doc[i++] = 'a' - 10 + c; + } else { + tmp->doc[i++] = *name; + } + } + tmp->doc[i] = '\0'; + tmp->offset = 0; tmp->length = 0; diff --git a/net/libfetch/files/common.h b/net/libfetch/files/common.h index f19230674a5..49890ed29a6 100644 --- a/net/libfetch/files/common.h +++ b/net/libfetch/files/common.h @@ -1,4 +1,4 @@ -/* $NetBSD: common.h,v 1.6 2008/04/19 14:49:23 joerg Exp $ */ +/* $NetBSD: common.h,v 1.7 2008/04/24 07:55:00 joerg Exp $ */ /*- * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav * All rights reserved. @@ -93,9 +93,10 @@ ssize_t fetch_write(conn_t *, const char *, size_t); ssize_t fetch_writev(conn_t *, struct iovec *, int); int fetch_putln(conn_t *, const char *, size_t); int fetch_close(conn_t *); -int fetch_add_entry(struct url_list *, struct url *, const char *); +int fetch_add_entry(struct url_list *, struct url *, const char *, int); int fetch_netrc_auth(struct url *url); int fetch_no_proxy_match(const char *); +int fetch_urlpath_safe(char); #define ftp_seterr(n) fetch_seterr(ftp_errlist, n) #define http_seterr(n) fetch_seterr(http_errlist, n) diff --git a/net/libfetch/files/fetch.c b/net/libfetch/files/fetch.c index 4e89429dac1..a9e0fddea8e 100644 --- a/net/libfetch/files/fetch.c +++ b/net/libfetch/files/fetch.c @@ -1,4 +1,4 @@ -/* $NetBSD: fetch.c,v 1.6 2008/04/21 17:15:31 joerg Exp $ */ +/* $NetBSD: fetch.c,v 1.7 2008/04/24 07:55:00 joerg Exp $ */ /*- * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav * All rights reserved. @@ -296,6 +296,40 @@ fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, return (u); } +int +fetch_urlpath_safe(char x) +{ + switch (x) { + case 'a'...'z': + case 'A'...'Z': + case '0'...'9': + case '$': + case '-': + case '_': + case '.': + case '+': + case '!': + case '*': + case '\'': + case '(': + case ')': + case ',': + /* The following are allowed in segment and path components: */ + case '?': + case ':': + case '@': + case '&': + case '=': + case '/': + case ';': + /* If something is already quoted... */ + case '%': + return 1; + default: + return 0; + } +} + /* * Copy an existing URL. */ @@ -329,10 +363,9 @@ fetchCopyURL(const struct url *src) struct url * fetchParseURL(const char *URL) { - char *doc; const char *p, *q; struct url *u; - int i; + size_t i, count; /* allocate struct url */ if ((u = calloc(1, sizeof(*u))) == NULL) { @@ -340,43 +373,76 @@ fetchParseURL(const char *URL) return (NULL); } - /* scheme name */ - if ((p = strstr(URL, ":/"))) { - snprintf(u->scheme, URL_SCHEMELEN+1, - "%.*s", (int)(p - URL), URL); - URL = ++p; - /* - * Only one slash: no host, leave slash as part of document - * Two slashes: host follows, strip slashes - */ - if (URL[1] == '/') - URL = (p += 2); - } else { + if (*URL == '/') { + strcpy(u->scheme, SCHEME_FILE); p = URL; + goto quote_doc; + } + if (strncmp(URL, "file:", 5) == 0) { + strcpy(u->scheme, SCHEME_FILE); + URL += 5; + if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') { + url_seterr(URL_MALFORMED); + goto ouch; + } + p = URL + 2; + goto quote_doc; } - if (!*URL || *URL == '/' || *URL == '.' || - (u->scheme[0] == '\0' && - strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) - goto nohost; + if (strncmp(URL, "http:", 5) == 0 || + strncmp(URL, "https:", 6) == 0) { + if (URL[4] == ':') { + strcpy(u->scheme, SCHEME_HTTP); + URL += 5; + } else { + strcpy(u->scheme, SCHEME_HTTPS); + URL += 6; + } + if (URL[0] != '/' || URL[1] != '/') { + url_seterr(URL_MALFORMED); + goto ouch; + } + URL += 2; + p = URL; + goto find_hostname; + } + if (strncmp(URL, "ftp:", 4) == 0) { + strcpy(u->scheme, SCHEME_FTP); + URL += 4; + if (URL[0] != '/' || URL[1] != '/') { + url_seterr(URL_MALFORMED); + goto ouch; + } + URL += 2; + p = URL; + goto find_user; + } + + url_seterr(URL_BAD_SCHEME); + goto ouch; + +find_user: p = strpbrk(URL, "/@"); - if (p && *p == '@') { + if (p != NULL && *p == '@') { /* username */ - for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) + for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) { if (i < URL_USERLEN) u->user[i++] = *q; + } /* password */ - if (*q == ':') + if (*q == ':') { for (q++, i = 0; (*q != ':') && (*q != '@'); q++) if (i < URL_PWDLEN) u->pwd[i++] = *q; + } p++; } else { p = URL; } +find_hostname: /* hostname */ #ifdef INET6 if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && @@ -404,36 +470,34 @@ fetchParseURL(const char *URL) p = q; } -nohost: /* document */ if (!*p) p = "/"; - if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || - strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { - const char hexnums[] = "0123456789abcdef"; - - /* percent-escape whitespace. */ - if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { - fetch_syserr(); - goto ouch; - } - u->doc = doc; - while (*p != '\0') { - if (!isspace((unsigned char)*p)) { - *doc++ = *p++; - } else { - *doc++ = '%'; - *doc++ = hexnums[((unsigned int)*p) >> 4]; - *doc++ = hexnums[((unsigned int)*p) & 0xf]; - p++; - } - } - *doc = '\0'; - } else if ((u->doc = strdup(p)) == NULL) { +quote_doc: + count = 1; + for (i = 0; p[i] != '\0'; ++i) + count += fetch_urlpath_safe(p[i]) ? 1 : 3; + if ((u->doc = malloc(count)) == NULL) { fetch_syserr(); goto ouch; } + for (i = 0; *p != '\0'; ++p) { + if (fetch_urlpath_safe(*p)) + u->doc[i++] = *p; + else { + u->doc[i++] = '%'; + if ((unsigned char)*p < 160) + u->doc[i++] = '0' + ((unsigned char)*p) / 16; + else + u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16; + if ((unsigned char)*p % 16 < 16) + u->doc[i++] = '0' + ((unsigned char)*p) % 16; + else + u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16; + } + } + u->doc[i] = '\0'; return (u); diff --git a/net/libfetch/files/file.c b/net/libfetch/files/file.c index 69381dd6ee2..2d0d6f6617d 100644 --- a/net/libfetch/files/file.c +++ b/net/libfetch/files/file.c @@ -1,4 +1,4 @@ -/* $NetBSD: file.c,v 1.7 2008/04/21 13:09:57 joerg Exp $ */ +/* $NetBSD: file.c,v 1.8 2008/04/24 07:55:00 joerg Exp $ */ /*- * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav * All rights reserved. @@ -191,27 +191,17 @@ int fetchListFile(struct url_list *ue, struct url *u, const char *pattern, const char *flags) { struct dirent *de; - char fn[PATH_MAX], *p; DIR *dir; - int l; if ((dir = opendir(u->doc)) == NULL) { fetch_syserr(); return -1; } - strncpy(fn, u->doc, sizeof(fn) - 2); - fn[sizeof(fn) - 2] = 0; - strcat(fn, "/"); - p = strchr(fn, 0); - l = sizeof(fn) - strlen(fn) - 1; - while ((de = readdir(dir)) != NULL) { if (pattern && fnmatch(pattern, de->d_name, 0) != 0) continue; - strncpy(p, de->d_name, l - 1); - p[l - 1] = 0; - fetch_add_entry(ue, u, de->d_name); + fetch_add_entry(ue, u, de->d_name, 0); } return 0; diff --git a/net/libfetch/files/ftp.c b/net/libfetch/files/ftp.c index 988a7011aee..1d44107e8d6 100644 --- a/net/libfetch/files/ftp.c +++ b/net/libfetch/files/ftp.c @@ -1,4 +1,4 @@ -/* $NetBSD: ftp.c,v 1.19 2008/04/21 13:09:57 joerg Exp $ */ +/* $NetBSD: ftp.c,v 1.20 2008/04/24 07:55:00 joerg Exp $ */ /*- * Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org> @@ -1219,7 +1219,7 @@ fetchListFTP(struct url_list *ue, struct url *url, const char *pattern, const ch else eos = eol; *eos = '\0'; - fetch_add_entry(ue, url, buf); + fetch_add_entry(ue, url, buf, 0); cur_off -= eol - buf + 1; memmove(buf, eol + 1, cur_off); } diff --git a/net/libfetch/files/http.c b/net/libfetch/files/http.c index d6931184419..1948e6f6f3f 100644 --- a/net/libfetch/files/http.c +++ b/net/libfetch/files/http.c @@ -1,4 +1,4 @@ -/* $NetBSD: http.c,v 1.17 2008/04/21 22:39:15 joerg Exp $ */ +/* $NetBSD: http.c,v 1.18 2008/04/24 07:55:00 joerg Exp $ */ /*- * Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org> @@ -1308,7 +1308,7 @@ parse_index(struct index_parser *parser, const char *buf, size_t len) return 0; *end_attr = '\0'; parser->state = ST_TAGA; - fetch_add_entry(parser->ue, parser->url, buf); + fetch_add_entry(parser->ue, parser->url, buf, 1); return end_attr + 1 - buf; } abort(); |