diff options
author | joerg <joerg@pkgsrc.org> | 2008-04-18 21:13:10 +0000 |
---|---|---|
committer | joerg <joerg@pkgsrc.org> | 2008-04-18 21:13:10 +0000 |
commit | b05cf0700bc8caa5ece25da9183213f379004215 (patch) | |
tree | ad69bf99d907426076dcfc2a294841ee67da1704 /net | |
parent | 774228c6e772813f0d9e9e13338a1db88e4edb0d (diff) | |
download | pkgsrc-b05cf0700bc8caa5ece25da9183213f379004215.tar.gz |
libfetch-2.5:
Add basic index parsing support for HTTP based on the ftpio.c code in
pkg_install. Permission to use the 3-clause BSD license from Thomas
Klausner in private mail.
Diffstat (limited to 'net')
-rw-r--r-- | net/libfetch/Makefile | 4 | ||||
-rw-r--r-- | net/libfetch/files/http.c | 199 |
2 files changed, 196 insertions, 7 deletions
diff --git a/net/libfetch/Makefile b/net/libfetch/Makefile index bc3a319ba30..d690dd9acd4 100644 --- a/net/libfetch/Makefile +++ b/net/libfetch/Makefile @@ -1,7 +1,7 @@ -# $NetBSD: Makefile,v 1.7 2008/04/16 01:01:50 joerg Exp $ +# $NetBSD: Makefile,v 1.8 2008/04/18 21:13:10 joerg Exp $ # -DISTNAME= libfetch-2.4 +DISTNAME= libfetch-2.5 CATEGORIES= net MASTER_SITES= # empty DISTFILES= # empty diff --git a/net/libfetch/files/http.c b/net/libfetch/files/http.c index 86f4f2f6609..ba548f745ef 100644 --- a/net/libfetch/files/http.c +++ b/net/libfetch/files/http.c @@ -1,6 +1,8 @@ -/* $NetBSD: http.c,v 1.13 2008/04/16 15:10:18 joerg Exp $ */ +/* $NetBSD: http.c,v 1.14 2008/04/18 21:13:10 joerg Exp $ */ /*- * Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav + * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org> + * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1150,14 +1152,202 @@ fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) return (0); } +enum http_states { + ST_NONE, + ST_LT, + ST_LTA, + ST_TAGA, + ST_H, + ST_R, + ST_E, + ST_F, + ST_HREF, + ST_HREFQ, + ST_TAG, + ST_TAGAX, + ST_TAGAQ +}; + +struct index_parser { + enum http_states state; + struct url_ent *ue; + int list_size, list_len; +}; + +static size_t +parse_index(struct index_parser *parser, const char *buf, size_t len) +{ + char *end_attr, p = *buf; + + switch (parser->state) { + case ST_NONE: + /* Plain text, not in markup */ + if (p == '<') + parser->state = ST_LT; + return 1; + case ST_LT: + /* In tag -- "<" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == 'a' || p == 'A') + parser->state = ST_LTA; + else if (!isspace((unsigned char)p)) + parser->state = ST_TAG; + return 1; + case ST_LTA: + /* In tag -- "<a" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + else + parser->state = ST_TAG; + return 1; + case ST_TAG: + /* In tag, but not "<a" -- disregard */ + if (p == '>') + parser->state = ST_NONE; + return 1; + case ST_TAGA: + /* In a-tag -- "<a " already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == 'h' || p == 'H') + parser->state = ST_H; + else if (!isspace((unsigned char)p)) + parser->state = ST_TAGAX; + return 1; + case ST_TAGAX: + /* In unknown keyword in a-tag */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + return 1; + case ST_TAGAQ: + /* In a-tag, unknown argument for keys. */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGA; + return 1; + case ST_H: + /* In a-tag -- "<a h" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == 'r' || p == 'R') + parser->state = ST_R; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + else + parser->state = ST_TAGAX; + return 1; + case ST_R: + /* In a-tag -- "<a hr" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == 'e' || p == 'E') + parser->state = ST_E; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + else + parser->state = ST_TAGAX; + return 1; + case ST_E: + /* In a-tag -- "<a hre" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == 'f' || p == 'F') + parser->state = ST_F; + else if (isspace((unsigned char)p)) + parser->state = ST_TAGA; + else + parser->state = ST_TAGAX; + return 1; + case ST_F: + /* In a-tag -- "<a href" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_TAGAQ; + else if (p == '=') + parser->state = ST_HREF; + else if (!isspace((unsigned char)p)) + parser->state = ST_TAGAX; + return 1; + case ST_HREF: + /* In a-tag -- "<a href=" already found */ + if (p == '>') + parser->state = ST_NONE; + else if (p == '"') + parser->state = ST_HREFQ; + else if (!isspace((unsigned char)p)) + parser->state = ST_TAGA; + return 1; + case ST_HREFQ: + /* In href of the a-tag */ + end_attr = memchr(buf, '"', len); + if (end_attr == NULL) + return 0; + *end_attr = '\0'; + parser->state = ST_TAGA; + fetch_add_entry(&parser->ue, &parser->list_size, &parser->list_len, buf, NULL); + return end_attr + 1 - buf; + } + abort(); +} + /* * List a directory */ struct url_ent * fetchFilteredListHTTP(struct url *url, const char *pattern, const char *flags) { - fprintf(stderr, "fetchFilteredListHTTP(): not implemented\n"); - return (NULL); + fetchIO *f; + char buf[2 * PATH_MAX]; + size_t buf_len, processed, sum_processed; + ssize_t read_len; + struct index_parser state; + + state.state = ST_NONE; + state.ue = NULL; + state.list_size = state.list_len = 0; + + f = fetchGetHTTP(url, flags); + if (f == NULL) + return NULL; + + buf_len = 0; + + while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) { + buf_len += read_len; + sum_processed = 0; + do { + processed = parse_index(&state, buf + sum_processed, buf_len); + buf_len -= processed; + sum_processed += processed; + } while (processed != 0 && buf_len > 0); + memmove(buf, buf + sum_processed, buf_len); + } + + fetchIO_close(f); + if (read_len < 0) { + free(state.ue); + state.ue = NULL; + } + return state.ue; } /* @@ -1166,6 +1356,5 @@ fetchFilteredListHTTP(struct url *url, const char *pattern, const char *flags) struct url_ent * fetchListHTTP(struct url *url, const char *flags) { - fprintf(stderr, "fetchListHTTP(): not implemented\n"); - return (NULL); + return fetchFilteredList(url, "*", flags); } |