summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorjoerg <joerg@pkgsrc.org>2008-04-18 21:13:10 +0000
committerjoerg <joerg@pkgsrc.org>2008-04-18 21:13:10 +0000
commitb05cf0700bc8caa5ece25da9183213f379004215 (patch)
treead69bf99d907426076dcfc2a294841ee67da1704 /net
parent774228c6e772813f0d9e9e13338a1db88e4edb0d (diff)
downloadpkgsrc-b05cf0700bc8caa5ece25da9183213f379004215.tar.gz
libfetch-2.5:
Add basic index parsing support for HTTP based on the ftpio.c code in pkg_install. Permission to use the 3-clause BSD license from Thomas Klausner in private mail.
Diffstat (limited to 'net')
-rw-r--r--net/libfetch/Makefile4
-rw-r--r--net/libfetch/files/http.c199
2 files changed, 196 insertions, 7 deletions
diff --git a/net/libfetch/Makefile b/net/libfetch/Makefile
index bc3a319ba30..d690dd9acd4 100644
--- a/net/libfetch/Makefile
+++ b/net/libfetch/Makefile
@@ -1,7 +1,7 @@
-# $NetBSD: Makefile,v 1.7 2008/04/16 01:01:50 joerg Exp $
+# $NetBSD: Makefile,v 1.8 2008/04/18 21:13:10 joerg Exp $
#
-DISTNAME= libfetch-2.4
+DISTNAME= libfetch-2.5
CATEGORIES= net
MASTER_SITES= # empty
DISTFILES= # empty
diff --git a/net/libfetch/files/http.c b/net/libfetch/files/http.c
index 86f4f2f6609..ba548f745ef 100644
--- a/net/libfetch/files/http.c
+++ b/net/libfetch/files/http.c
@@ -1,6 +1,8 @@
-/* $NetBSD: http.c,v 1.13 2008/04/16 15:10:18 joerg Exp $ */
+/* $NetBSD: http.c,v 1.14 2008/04/18 21:13:10 joerg Exp $ */
/*-
* Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav
+ * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
+ * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -1150,14 +1152,202 @@ fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
return (0);
}
+enum http_states {
+ ST_NONE,
+ ST_LT,
+ ST_LTA,
+ ST_TAGA,
+ ST_H,
+ ST_R,
+ ST_E,
+ ST_F,
+ ST_HREF,
+ ST_HREFQ,
+ ST_TAG,
+ ST_TAGAX,
+ ST_TAGAQ
+};
+
+struct index_parser {
+ enum http_states state;
+ struct url_ent *ue;
+ int list_size, list_len;
+};
+
+static size_t
+parse_index(struct index_parser *parser, const char *buf, size_t len)
+{
+ char *end_attr, p = *buf;
+
+ switch (parser->state) {
+ case ST_NONE:
+ /* Plain text, not in markup */
+ if (p == '<')
+ parser->state = ST_LT;
+ return 1;
+ case ST_LT:
+ /* In tag -- "<" already found */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == 'a' || p == 'A')
+ parser->state = ST_LTA;
+ else if (!isspace((unsigned char)p))
+ parser->state = ST_TAG;
+ return 1;
+ case ST_LTA:
+ /* In tag -- "<a" already found */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_TAGAQ;
+ else if (isspace((unsigned char)p))
+ parser->state = ST_TAGA;
+ else
+ parser->state = ST_TAG;
+ return 1;
+ case ST_TAG:
+ /* In tag, but not "<a" -- disregard */
+ if (p == '>')
+ parser->state = ST_NONE;
+ return 1;
+ case ST_TAGA:
+ /* In a-tag -- "<a " already found */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_TAGAQ;
+ else if (p == 'h' || p == 'H')
+ parser->state = ST_H;
+ else if (!isspace((unsigned char)p))
+ parser->state = ST_TAGAX;
+ return 1;
+ case ST_TAGAX:
+ /* In unknown keyword in a-tag */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_TAGAQ;
+ else if (isspace((unsigned char)p))
+ parser->state = ST_TAGA;
+ return 1;
+ case ST_TAGAQ:
+ /* In a-tag, unknown argument for keys. */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_TAGA;
+ return 1;
+ case ST_H:
+ /* In a-tag -- "<a h" already found */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_TAGAQ;
+ else if (p == 'r' || p == 'R')
+ parser->state = ST_R;
+ else if (isspace((unsigned char)p))
+ parser->state = ST_TAGA;
+ else
+ parser->state = ST_TAGAX;
+ return 1;
+ case ST_R:
+ /* In a-tag -- "<a hr" already found */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_TAGAQ;
+ else if (p == 'e' || p == 'E')
+ parser->state = ST_E;
+ else if (isspace((unsigned char)p))
+ parser->state = ST_TAGA;
+ else
+ parser->state = ST_TAGAX;
+ return 1;
+ case ST_E:
+ /* In a-tag -- "<a hre" already found */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_TAGAQ;
+ else if (p == 'f' || p == 'F')
+ parser->state = ST_F;
+ else if (isspace((unsigned char)p))
+ parser->state = ST_TAGA;
+ else
+ parser->state = ST_TAGAX;
+ return 1;
+ case ST_F:
+ /* In a-tag -- "<a href" already found */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_TAGAQ;
+ else if (p == '=')
+ parser->state = ST_HREF;
+ else if (!isspace((unsigned char)p))
+ parser->state = ST_TAGAX;
+ return 1;
+ case ST_HREF:
+ /* In a-tag -- "<a href=" already found */
+ if (p == '>')
+ parser->state = ST_NONE;
+ else if (p == '"')
+ parser->state = ST_HREFQ;
+ else if (!isspace((unsigned char)p))
+ parser->state = ST_TAGA;
+ return 1;
+ case ST_HREFQ:
+ /* In href of the a-tag */
+ end_attr = memchr(buf, '"', len);
+ if (end_attr == NULL)
+ return 0;
+ *end_attr = '\0';
+ parser->state = ST_TAGA;
+ fetch_add_entry(&parser->ue, &parser->list_size, &parser->list_len, buf, NULL);
+ return end_attr + 1 - buf;
+ }
+ abort();
+}
+
/*
* List a directory
*/
struct url_ent *
fetchFilteredListHTTP(struct url *url, const char *pattern, const char *flags)
{
- fprintf(stderr, "fetchFilteredListHTTP(): not implemented\n");
- return (NULL);
+ fetchIO *f;
+ char buf[2 * PATH_MAX];
+ size_t buf_len, processed, sum_processed;
+ ssize_t read_len;
+ struct index_parser state;
+
+ state.state = ST_NONE;
+ state.ue = NULL;
+ state.list_size = state.list_len = 0;
+
+ f = fetchGetHTTP(url, flags);
+ if (f == NULL)
+ return NULL;
+
+ buf_len = 0;
+
+ while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
+ buf_len += read_len;
+ sum_processed = 0;
+ do {
+ processed = parse_index(&state, buf + sum_processed, buf_len);
+ buf_len -= processed;
+ sum_processed += processed;
+ } while (processed != 0 && buf_len > 0);
+ memmove(buf, buf + sum_processed, buf_len);
+ }
+
+ fetchIO_close(f);
+ if (read_len < 0) {
+ free(state.ue);
+ state.ue = NULL;
+ }
+ return state.ue;
}
/*
@@ -1166,6 +1356,5 @@ fetchFilteredListHTTP(struct url *url, const char *pattern, const char *flags)
struct url_ent *
fetchListHTTP(struct url *url, const char *flags)
{
- fprintf(stderr, "fetchListHTTP(): not implemented\n");
- return (NULL);
+ return fetchFilteredList(url, "*", flags);
}