summaryrefslogtreecommitdiff
path: root/src/libpcp_http
diff options
context:
space:
mode:
Diffstat (limited to 'src/libpcp_http')
-rw-r--r--src/libpcp_http/GNUmakefile27
-rw-r--r--src/libpcp_http/src/GNUmakefile32
-rw-r--r--src/libpcp_http/src/README78
-rw-r--r--src/libpcp_http/src/http_error_codes.c39
-rw-r--r--src/libpcp_http/src/http_error_codes.h43
-rw-r--r--src/libpcp_http/src/http_fetcher.c886
-rw-r--r--src/libpcp_http/src/http_fetcher.h168
7 files changed, 1273 insertions, 0 deletions
diff --git a/src/libpcp_http/GNUmakefile b/src/libpcp_http/GNUmakefile
new file mode 100644
index 0000000..552118e
--- /dev/null
+++ b/src/libpcp_http/GNUmakefile
@@ -0,0 +1,27 @@
+#
+# Copyright (c) 2000,2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This library is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/src/include/builddefs
+
+SUBDIRS = src
+
+default install : $(SUBDIRS)
+ $(SUBDIRS_MAKERULE)
+
+include $(BUILDRULES)
+
+default_pcp : default
+
+install_pcp : install
diff --git a/src/libpcp_http/src/GNUmakefile b/src/libpcp_http/src/GNUmakefile
new file mode 100644
index 0000000..e09dd62
--- /dev/null
+++ b/src/libpcp_http/src/GNUmakefile
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2000,2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This library is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+
+TOPDIR = ../../..
+include $(TOPDIR)/src/include/builddefs
+
+STATICLIBTARGET = libpcp_http.a
+LCFLAGS = -DVERSION=\"1.1.0\"
+CFILES = http_error_codes.c http_fetcher.c
+HFILES = http_error_codes.h http_fetcher.h
+LSRCFILES = README
+
+base default : $(STATICLIBTARGET)
+
+include $(BUILDRULES)
+
+install : default
+
+default_pcp : default
+
+install_pcp : install
diff --git a/src/libpcp_http/src/README b/src/libpcp_http/src/README
new file mode 100644
index 0000000..fa4cd6c
--- /dev/null
+++ b/src/libpcp_http/src/README
@@ -0,0 +1,78 @@
+HTTP Fetcher
+Lyle Hanson (lhanson@users.sourceforge.net) (C) 2001, 2003, 2004
+http://http-fetcher.sourceforge.net
+===============================================================================
+
+
+ABOUT
+=====
+HTTP Fetcher is a small library that downloads files via HTTP. I developed
+it for use within another project, and because I didn't find any pre-existing
+software that did exactly what I wanted without hassle. Hopefully you'll find
+it useful and avoid writing similar code yourself.
+
+It supports the GET method. Anything further would involve more than
+fetching, now wouldn't it? If you need more than GET, there are other
+libaries out ther that would be better suited to your needs (try http-tiny,
+find it at freshmeat.net).
+
+HTTP Fetcher is meant to be small, fast, and flexible at what it does.
+It's very robust, in my opinion. It's easy to use; using one function,
+it can download any kind of file via HTTP. It also offers further
+sophistication, allowing you control over what (if any) User-Agent or
+Referrer you wish to show to the web server. Which is neat stuff, depending
+on your use (testing and stealth/deception are two that come to mind).
+
+
+DEPENDENCIES
+============
+HTTP Fetcher should run on most unices that support BSD-type sockets and have
+a network connection. Developed and tested on x86 machines running recent
+versions of RedHat.
+
+
+INSTALLATION
+============
+Read the INSTALL file for info on building and installing the library
+
+
+USAGE
+=====
+Read the manpages in the docs directory, see the 'testfetch' example included,
+check out 'fetch' (http://fetch.sourceforge.net), a complete HTTP file download utility.
+
+
+LICENCE
+=======
+HTTP Fetcher is licenced under the GNU Lesser General Public License (LGPL)
+version 2 or newer. See the LICENSE file for full info. Feel free to contact
+me if you have any questions/concerns over licensing issues.
+
+
+FEEDBACK
+========
+If you use this library for anything, drop me a line (lhanson@users.sourceforge.net).
+I'd love to know that formally releasing this stuff was worth it. I'm open
+to any input (bad or good).
+
+
+TODO:
+====
+Further development is focused on the 2.x series of http-fetcher. Only bugfixes
+and minor improvements will be added to 1.x releases.
+
+
+BUGS:
+=====
+This version introduces transparent redirects, but the 1.x series still does
+not allow the client program access to the HTTP return code. So while you can
+now follow redirects, your program won't know anything about it. You can,
+however, turn off or configure the number of redirects to follow.
+
+A better interface is planned for 2.x.x.
+
+
+CREDITS:
+========
+Steve Augart (steve@augart.com) has been a huge contributor to the project.
+Thanks, Steve!
diff --git a/src/libpcp_http/src/http_error_codes.c b/src/libpcp_http/src/http_error_codes.c
new file mode 100644
index 0000000..3b1fb2c
--- /dev/null
+++ b/src/libpcp_http/src/http_error_codes.c
@@ -0,0 +1,39 @@
+/* http_error_codes.c - Error code declarations
+
+ HTTP Fetcher
+ Copyright (C) 2001, 2003, 2004 Lyle Hanson (lhanson@users.sourceforge.net)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ See LICENSE file for details
+ */
+
+
+ /* Note that '%d' cannot be escaped at this time */
+const char *http_errlist[] =
+ {
+ "Success", /* HF_SUCCESS */
+ "Internal Error. What the hell?!", /* HF_METAERROR */
+ "Got NULL url", /* HF_NULLURL */
+ "Timed out, no metadata for %d seconds", /* HF_HEADTIMEOUT */
+ "Timed out, no data for %d seconds", /* HF_DATATIMEOUT */
+ "Couldn't find return code in HTTP response", /* HF_FRETURNCODE */
+ "Couldn't convert return code in HTTP response",/* HF_CRETURNCODE */
+ "Request returned a status code of %d", /* HF_STATUSCODE */
+ "Couldn't convert Content-Length to integer", /* HF_CONTENTLEN */
+ "Network error (description unavailable)", /* HF_HERROR */
+ "Status code of %d but no Location: field", /* HF_CANTREDIRECT */
+ "Followed the maximum number of redirects (%d)" /* HF_MAXREDIRECTS */
+ };
+
+ /* Used to copy in messages from http_errlist[] and replace %d's with
+ * the value of errorInt. Then we can pass the pointer to THIS */
+char convertedError[128];
diff --git a/src/libpcp_http/src/http_error_codes.h b/src/libpcp_http/src/http_error_codes.h
new file mode 100644
index 0000000..f1d9c51
--- /dev/null
+++ b/src/libpcp_http/src/http_error_codes.h
@@ -0,0 +1,43 @@
+/* http_error_codes.h - Error code definitions
+
+ HTTP Fetcher
+ Copyright (C) 2001, 2003, 2004 Lyle Hanson (lhanson@users.sourceforge.net)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ See LICENSE file for details
+
+ */
+
+#ifndef HTTP_ERROR_CODES_H
+#define HTTP_ERROR_CODES_H
+
+/* Error sources */
+#define FETCHER_ERROR 0
+#define ERRNO 1
+#define H_ERRNO 2
+
+/* HTTP Fetcher error codes */
+#define HF_SUCCESS 0
+#define HF_METAERROR 1
+#define HF_NULLURL 2
+#define HF_HEADTIMEOUT 3
+#define HF_DATATIMEOUT 4
+#define HF_FRETURNCODE 5
+#define HF_CRETURNCODE 6
+#define HF_STATUSCODE 7
+#define HF_CONTENTLEN 8
+#define HF_HERROR 9
+#define HF_CANTREDIRECT 10
+#define HF_MAXREDIRECTS 11
+#define HF_CONNECTTIMEOUT 12
+
+#endif
diff --git a/src/libpcp_http/src/http_fetcher.c b/src/libpcp_http/src/http_fetcher.c
new file mode 100644
index 0000000..4d01919
--- /dev/null
+++ b/src/libpcp_http/src/http_fetcher.c
@@ -0,0 +1,886 @@
+/* http_fetcher.c - HTTP handling functions
+
+ HTTP Fetcher
+ Copyright (c) 2014 Red Hat.
+ Copyright (C) 2001, 2003, 2004 Lyle Hanson (lhanson@users.sourceforge.net)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ See LICENSE file for details
+
+ */
+
+#include <ctype.h>
+#include "pmapi.h"
+#include "impl.h"
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#include "http_fetcher.h"
+
+/* Globals */
+int timeout = DEFAULT_READ_TIMEOUT;
+char *userAgent = NULL;
+char *referer = NULL;
+int hideUserAgent = 0;
+int hideReferer = 1;
+static int followRedirects = DEFAULT_REDIRECTS; /* # of redirects to follow */
+extern const char *http_errlist[]; /* Array of HTTP Fetcher error messages */
+extern char convertedError[128]; /* Buffer to used when errors contain %d */
+static int errorSource = 0;
+static int http_errno = 0;
+static int errorInt = 0; /* When the error message has a %d in it,
+ * this variable is inserted */
+
+
+ /*
+ * Actually downloads the page, registering a hit (donation)
+ * If the fileBuf passed in is NULL, the url is downloaded and then
+ * freed; otherwise the necessary space is allocated for fileBuf.
+ * Returns size of download on success, -1 on error is set,
+ */
+int http_fetch(const char *url_tmp, char **fileBuf)
+ {
+ fd_set rfds;
+ struct timeval tv;
+ char headerBuf[HEADER_BUF_SIZE];
+ char *tmp, *url, *pageBuf, *requestBuf = NULL, *host, *charIndex;
+ int sock, bytesRead = 0, contentLength = -1, bufsize = REQUEST_BUF_SIZE;
+ int i,
+ ret = -1,
+ tempSize,
+ selectRet,
+ found = 0, /* For redirects */
+ redirectsFollowed = 0;
+
+
+ if(url_tmp == NULL)
+ {
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_NULLURL;
+ return -1;
+ }
+
+ /* Copy the url passed in into a buffer we can work with, change, etc. */
+ url = malloc(strlen(url_tmp)+1);
+ if(url == NULL)
+ {
+ errorSource = ERRNO;
+ return -1;
+ }
+ strncpy(url, url_tmp, strlen(url_tmp) + 1);
+
+ /* This loop allows us to follow redirects if need be. An afterthought,
+ * added to provide this basic functionality. Will hopefully be designed
+ * better in 2.x.x ;) */
+/* while(!found &&
+ (followRedirects < 0 || redirectsFollowed < followRedirects) )
+ */ do
+ {
+ /* Seek to the file path portion of the url */
+ charIndex = strstr(url, "://");
+ if(charIndex != NULL)
+ {
+ /* url contains a protocol field */
+ charIndex += strlen("://");
+ host = charIndex;
+ charIndex = strchr(charIndex, '/');
+ }
+ else
+ {
+ host = (char *)url;
+ charIndex = strchr(url, '/');
+ }
+
+ /* Compose a request string */
+ requestBuf = malloc(bufsize);
+ if(requestBuf == NULL)
+ {
+ free(url);
+ errorSource = ERRNO;
+ return -1;
+ }
+ requestBuf[0] = 0;
+
+ if(charIndex == NULL)
+ {
+ /* The url has no '/' in it, assume the user is making a root-level
+ * request */
+ tempSize = strlen("GET /") + strlen(HTTP_VERSION) + 2;
+ if(_checkBufSize(&requestBuf, &bufsize, tempSize) ||
+ snprintf(requestBuf, bufsize, "GET / %s\r\n", HTTP_VERSION) < 0)
+ {
+ free(url);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ }
+ else
+ {
+ tempSize = strlen("GET ") + strlen(charIndex) +
+ strlen(HTTP_VERSION) + 4;
+ /* + 4 is for ' ', '\r', '\n', and NULL */
+
+ if(_checkBufSize(&requestBuf, &bufsize, tempSize) ||
+ snprintf(requestBuf, bufsize, "GET %s %s\r\n",
+ charIndex, HTTP_VERSION) < 0)
+ {
+ free(url);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ }
+
+ /* Null out the end of the hostname if need be */
+ if(charIndex != NULL)
+ *charIndex = 0;
+
+ /* Use Host: even though 1.0 doesn't specify it. Some servers
+ * won't play nice if we don't send Host, and it shouldn't
+ * hurt anything */
+ ret = bufsize - strlen(requestBuf); /* Space left in buffer */
+ tempSize = (int)strlen("Host: ") + (int)strlen(host) + 3;
+ /* +3 for "\r\n\0" */
+ if(_checkBufSize(&requestBuf, &bufsize, tempSize + 128))
+ {
+ free(url);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ strcat(requestBuf, "Host: ");
+ strcat(requestBuf, host);
+ strcat(requestBuf, "\r\n");
+
+ if(!hideReferer && referer != NULL) /* NO default referer */
+ {
+ tempSize = (int)strlen("Referer: ") + (int)strlen(referer) + 3;
+ /* + 3 is for '\r', '\n', and NULL */
+ if(_checkBufSize(&requestBuf, &bufsize, tempSize))
+ {
+ free(url);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ strcat(requestBuf, "Referer: ");
+ strcat(requestBuf, referer);
+ strcat(requestBuf, "\r\n");
+ }
+
+ if(!hideUserAgent && userAgent == NULL)
+ {
+ tempSize = (int)strlen("User-Agent: ") +
+ (int)strlen(DEFAULT_USER_AGENT) + (int)strlen(VERSION) + 4;
+ /* + 4 is for '\', '\r', '\n', and NULL */
+ if(_checkBufSize(&requestBuf, &bufsize, tempSize))
+ {
+ free(url);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ strcat(requestBuf, "User-Agent: ");
+ strcat(requestBuf, DEFAULT_USER_AGENT);
+ strcat(requestBuf, "/");
+ strcat(requestBuf, VERSION);
+ strcat(requestBuf, "\r\n");
+ }
+ else if(!hideUserAgent)
+ {
+ tempSize = (int)strlen("User-Agent: ") + (int)strlen(userAgent) + 3;
+ /* + 3 is for '\r', '\n', and NULL */
+ if(_checkBufSize(&requestBuf, &bufsize, tempSize))
+ {
+ free(url);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ strcat(requestBuf, "User-Agent: ");
+ strcat(requestBuf, userAgent);
+ strcat(requestBuf, "\r\n");
+ }
+
+ tempSize = (int)strlen("Connection: Close\r\n\r\n");
+ if(_checkBufSize(&requestBuf, &bufsize, tempSize))
+ {
+ free(url);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ strcat(requestBuf, "Connection: Close\r\n\r\n");
+
+ /* Now free any excess memory allocated to the buffer */
+ tmp = realloc(requestBuf, strlen(requestBuf) + 1);
+ if(tmp == NULL)
+ {
+ free(url);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ requestBuf = tmp;
+
+ sock = makeSocket(host); /* errorSource set within makeSocket */
+ if(sock == -1) { free(url); free(requestBuf); return -1;}
+
+ free(url);
+ url = NULL;
+
+ if(write(sock, requestBuf, strlen(requestBuf)) == -1)
+ {
+ close(sock);
+ free(requestBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+
+ free(requestBuf);
+ requestBuf = NULL;
+
+ /* Grab enough of the response to get the metadata */
+ ret = _http_read_header(sock, headerBuf); /* errorSource set within */
+ if(ret < 0) { close(sock); return -1; }
+
+ /* Get the return code */
+ charIndex = strstr(headerBuf, "HTTP/");
+ if(charIndex == NULL)
+ {
+ close(sock);
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_FRETURNCODE;
+ return -1;
+ }
+ while(*charIndex != ' ')
+ charIndex++;
+ charIndex++;
+
+ ret = sscanf(charIndex, "%d", &i);
+ if(ret != 1)
+ {
+ close(sock);
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_CRETURNCODE;
+ return -1;
+ }
+ if(i<200 || i>307)
+ {
+ close(sock);
+ errorInt = i; /* Status code, to be inserted in error string */
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_STATUSCODE;
+ return -1;
+ }
+
+ /* If a redirect, repeat operation until final URL is found or we
+ * redirect followRedirects times. Note the case sensitive "Location",
+ * should probably be made more robust in the future (without relying
+ * on the non-standard strcasecmp()).
+ * This bit mostly by Dean Wilder, tweaked by me */
+ if(i >= 300)
+ {
+ redirectsFollowed++;
+
+ /* Pick up redirect URL, allocate new url, and repeat process */
+ charIndex = strstr(headerBuf, "Location:");
+ if(!charIndex)
+ {
+ close(sock);
+ errorInt = i; /* Status code, to be inserted in error string */
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_CANTREDIRECT;
+ return -1;
+ }
+ charIndex += strlen("Location:");
+ /* Skip any whitespace... */
+ while(*charIndex != '\0' && isspace((int)*charIndex))
+ charIndex++;
+ if(*charIndex == '\0')
+ {
+ close(sock);
+ errorInt = i; /* Status code, to be inserted in error string */
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_CANTREDIRECT;
+ return -1;
+ }
+
+ i = strcspn(charIndex, " \r\n");
+ if(i > 0)
+ {
+ url = (char *)malloc(i + 1);
+ strncpy(url, charIndex, i);
+ url[i] = '\0';
+ }
+ else
+ /* Found 'Location:' but contains no URL! We'll handle it as
+ * 'found', hopefully the resulting document will give the user
+ * a hint as to what happened. */
+ found = 1;
+ }
+ else
+ found = 1;
+ } while(!found &&
+ (followRedirects < 0 || redirectsFollowed <= followRedirects) );
+
+ if(url) /* Redirection code may malloc this, then exceed followRedirects */
+ {
+ free(url);
+ url = NULL;
+ }
+
+ if(redirectsFollowed >= followRedirects && !found)
+ {
+ close(sock);
+ errorInt = followRedirects; /* To be inserted in error string */
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_MAXREDIRECTS;
+ return -1;
+ }
+
+ /*
+ * Parse out about how big the data segment is.
+ * Note that under current HTTP standards (1.1 and prior), the
+ * Content-Length field is not guaranteed to be accurate or even present.
+ * I just use it here so I can allocate a ballpark amount of memory.
+ *
+ * Note that some servers use different capitalization
+ */
+ charIndex = strstr(headerBuf, "Content-Length:");
+ if(charIndex == NULL)
+ charIndex = strstr(headerBuf, "Content-length:");
+
+ if(charIndex != NULL)
+ {
+ ret = sscanf(charIndex + strlen("content-length: "), "%d",
+ &contentLength);
+ if(ret < 1)
+ {
+ close(sock);
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_CONTENTLEN;
+ return -1;
+ }
+ }
+
+ /* Allocate enough memory to hold the page */
+ if(contentLength == -1)
+ contentLength = DEFAULT_PAGE_BUF_SIZE;
+
+ pageBuf = (char *)malloc(contentLength);
+ if(pageBuf == NULL)
+ {
+ close(sock);
+ errorSource = ERRNO;
+ return -1;
+ }
+
+ /* Begin reading the body of the file */
+ while(ret > 0)
+ {
+ FD_ZERO(&rfds);
+ FD_SET(sock, &rfds);
+ tv.tv_sec = timeout;
+ tv.tv_usec = 0;
+
+ if(timeout >= 0)
+ selectRet = select(sock+1, &rfds, NULL, NULL, &tv);
+ else /* No timeout, can block indefinately */
+ selectRet = select(sock+1, &rfds, NULL, NULL, NULL);
+
+ if(selectRet == 0)
+ {
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_DATATIMEOUT;
+ errorInt = timeout;
+ close(sock);
+ free(pageBuf);
+ return -1;
+ }
+ else if(selectRet == -1)
+ {
+ setoserror(neterror());
+ close(sock);
+ free(pageBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+
+ ret = recv(sock, pageBuf + bytesRead, contentLength, 0);
+ if(ret == -1)
+ {
+ setoserror(neterror());
+ close(sock);
+ free(pageBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+
+ bytesRead += ret;
+
+ if(ret > 0)
+ {
+ /* To be tolerant of inaccurate Content-Length fields, we'll
+ * allocate another read-sized chunk to make sure we have
+ * enough room.
+ */
+ tmp = (char *)realloc(pageBuf, bytesRead + contentLength);
+ if(tmp == NULL)
+ {
+ close(sock);
+ free(pageBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ pageBuf = tmp;
+ }
+ }
+
+ /*
+ * The download buffer is too large. Trim off the safety padding.
+ * Note that we add one NULL byte to the end of the data, as it may not
+ * already be NULL terminated and we can't be sure what type of data it
+ * is or what the caller will do with it.
+ */
+ tmp = (char *)realloc(pageBuf, bytesRead + 1);
+ /* tmp shouldn't be null, since we're _shrinking_ the buffer,
+ * and if it DID fail, we could go on with the too-large buffer,
+ * but something would DEFINATELY be wrong, so we'll just give
+ * an error message */
+ if(tmp == NULL)
+ {
+ close(sock);
+ free(pageBuf);
+ errorSource = ERRNO;
+ return -1;
+ }
+ pageBuf = tmp;
+ pageBuf[bytesRead] = '\0'; /* NULL terminate the data */
+
+ if(fileBuf == NULL) /* They just wanted us to "hit" the url */
+ free(pageBuf);
+ else
+ *fileBuf = pageBuf;
+
+ close(sock);
+ return bytesRead;
+ }
+
+
+
+ /*
+ * Changes the User Agent. Returns 0 on success, -1 on error.
+ */
+int http_setUserAgent(const char *newAgent)
+ {
+ static int freeOldAgent = 0; /* Indicates previous malloc's */
+ char *tmp;
+
+ if(newAgent == NULL)
+ {
+ if(freeOldAgent) free(userAgent);
+ userAgent = NULL;
+ hideUserAgent = 1;
+ }
+ else
+ {
+ tmp = (char *)malloc(strlen(newAgent)+1);
+ if(tmp == NULL) { errorSource = ERRNO; return -1; }
+ if(freeOldAgent) free(userAgent);
+ userAgent = tmp;
+ strcpy(userAgent, newAgent);
+ freeOldAgent = 1;
+ hideUserAgent = 0;
+ }
+
+ return 0;
+ }
+
+
+
+ /*
+ * Changes the Referer. Returns 0 on success, -1 on error
+ */
+int http_setReferer(const char *newReferer)
+ {
+ static int freeOldReferer = 0; /* Indicated previous malloc's */
+ char *tmp;
+
+ if(newReferer == NULL)
+ {
+ if(freeOldReferer) free(referer);
+ referer = NULL;
+ hideReferer = 1;
+ }
+ else
+ {
+ tmp = (char *)malloc(strlen(newReferer)+1);
+ if(tmp == NULL) { errorSource = ERRNO; return -1; }
+ if(freeOldReferer) free(referer);
+ referer = tmp;
+ strcpy(referer, newReferer);
+ freeOldReferer = 1;
+ hideReferer = 0;
+ }
+
+ return 0;
+ }
+
+
+
+ /*
+ * Changes the amount of time that HTTP Fetcher will wait for data
+ * before timing out on reads
+ */
+void http_setTimeout(int seconds) { timeout = seconds; }
+
+
+
+ /*
+ * Changes the number of HTTP redirects HTTP Fetcher will automatically
+ * follow. If a request returns a status code of 3XX and contains
+ * a "Location:" field, the library will transparently follow up to
+ * the specified number of redirects. With this implementation
+ * (which is just a stopgap, really) the caller won't be aware of any
+ * redirection and will assume the returned document came from the original
+ * URL.
+ * To disable redirects, pass a 0. To follow unlimited redirects (probably
+ * unwise), pass a negative value. The default is to follow 3 redirects.
+ */
+void http_setRedirects(int redirects) { followRedirects = redirects; }
+
+
+
+ /*
+ * Puts the filename portion of the url into 'filename'.
+ * Returns:
+ * 0 on success
+ * 1 when url contains no end filename (i.e., 'www.foo.com/'),
+ * and **filename should not be assumed to be valid
+ * -1 on error
+ */
+int http_parseFilename(const char *url, char **filename)
+ {
+ char *ptr;
+
+ if(url == NULL)
+ {
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_NULLURL;
+ return -1;
+ }
+
+ ptr = (char *)rindex(url, '/');
+ if(ptr == NULL)
+ /* Root level request, apparently */
+ return 1;
+
+ ptr++;
+ if(*ptr == '\0') return 1;
+
+ *filename = (char *)malloc(strlen(ptr)+1);
+ if(*filename == NULL) { errorSource = ERRNO; return -1; }
+ strcpy(*filename, ptr);
+
+ return 0;
+ }
+
+
+
+ /* Depending on the source of error, calls either perror() or prints
+ * an HTTP Fetcher error message to stdout */
+void http_perror(const char *string)
+ {
+ if(errorSource == ERRNO)
+ fprintf(stderr, "%s: %s\n", string, osstrerror());
+ else if(errorSource == H_ERRNO)
+ fprintf(stderr, "%s: %s\n", string, hoststrerror());
+ else if(errorSource == FETCHER_ERROR)
+ {
+ const char *stringIndex;
+
+ if(strstr(http_errlist[http_errno], "%d") == NULL)
+ {
+ fputs(string, stderr);
+ fputs(": ", stderr);
+ fputs(http_errlist[http_errno], stderr);
+ fputs("\n", stderr);
+ }
+ else
+ {
+ /* The error string has a %d in it, we need to insert errorInt */
+ stringIndex = http_errlist[http_errno];
+ while(*stringIndex != '%') /* Print up to the %d */
+ {
+ fputc(*stringIndex, stderr);
+ stringIndex++;
+ }
+ fprintf(stderr, "%d", errorInt); /* Print the number */
+ stringIndex += 2; /* Skip past the %d */
+ while(*stringIndex != 0) /* Print up to the end NULL */
+ {
+ fputc(*stringIndex, stderr);
+ stringIndex++;
+ }
+ fputs("\n", stderr);
+ }
+ }
+ }
+
+
+ /*
+ * Returns true/false (1/0) if a timeout occurred on last request.
+ */
+int http_getTimeoutError()
+ {
+ if(errorSource == FETCHER_ERROR)
+ return (http_errno == HF_DATATIMEOUT || http_errno == HF_HEADTIMEOUT || http_errno == HF_CONNECTTIMEOUT);
+ return 0;
+ }
+
+
+ /*
+ * Returns a pointer to the current error description message. The
+ * message pointed to is only good until the next call to http_strerror(),
+ * so if you need to hold on to the message for a while you should make
+ * a copy of it
+ */
+const char *http_strerror()
+ {
+ if(errorSource == ERRNO)
+ return osstrerror();
+ else if(errorSource == H_ERRNO)
+ return hoststrerror();
+ else if(errorSource == FETCHER_ERROR)
+ {
+ if(strstr(http_errlist[http_errno], "%d") == NULL)
+ return http_errlist[http_errno];
+ else
+ {
+ /* The error string has a %d in it, we need to insert errorInt.
+ * convertedError[128] has been declared for that purpose */
+ char *stringIndex, *originalError;
+
+ originalError = (char *)http_errlist[http_errno];
+ convertedError[0] = 0; /* Start off with NULL */
+ stringIndex = strstr(originalError, "%d");
+ strncat(convertedError, originalError, /* Copy up to %d */
+ abs(stringIndex - originalError));
+ sprintf(&convertedError[strlen(convertedError)],"%d",errorInt);
+ stringIndex += 2; /* Skip past the %d */
+ strcat(convertedError, stringIndex);
+
+ return convertedError;
+ }
+ }
+
+ return http_errlist[HF_METAERROR]; /* Should NEVER happen */
+ }
+
+
+ /*
+ * Reads the metadata of an HTTP response.
+ * Perhaps a little inefficient, as it reads 1 byte at a time, but
+ * I don't think it's that much of a loss (most headers aren't HUGE).
+ * Returns:
+ * # of bytes read on success, or
+ * -1 on error
+ */
+int _http_read_header(int sock, char *headerPtr)
+ {
+ fd_set rfds;
+ struct timeval tv;
+ int bytesRead = 0, newlines = 0, ret, selectRet;
+
+ while(newlines != 2 && bytesRead != HEADER_BUF_SIZE)
+ {
+ FD_ZERO(&rfds);
+ FD_SET(sock, &rfds);
+ tv.tv_sec = timeout;
+ tv.tv_usec = 0;
+
+ if(timeout >= 0)
+ selectRet = select(sock+1, &rfds, NULL, NULL, &tv);
+ else /* No timeout, can block indefinately */
+ selectRet = select(sock+1, &rfds, NULL, NULL, NULL);
+
+ if(selectRet == 0)
+ {
+ errorSource = FETCHER_ERROR;
+ http_errno = HF_HEADTIMEOUT;
+ errorInt = timeout;
+ return -1;
+ }
+ else if(selectRet == -1)
+ {
+ setoserror(neterror());
+ errorSource = ERRNO;
+ return -1;
+ }
+
+ ret = recv(sock, headerPtr, 1, 0);
+ if(ret == -1)
+ {
+ setoserror(neterror());
+ errorSource = ERRNO;
+ return -1;
+ }
+ bytesRead++;
+
+ if(*headerPtr == '\r') /* Ignore CR */
+ {
+ /* Basically do nothing special, just don't set newlines
+ * to 0 */
+ headerPtr++;
+ continue;
+ }
+ else if(*headerPtr == '\n') /* LF is the separator */
+ newlines++;
+ else
+ newlines = 0;
+
+ headerPtr++;
+ }
+
+ headerPtr -= 3; /* Snip the trailing LF's */
+ *headerPtr = '\0';
+ return bytesRead;
+ }
+
+
+
+ /*
+ * Opens a TCP socket and returns the descriptor
+ * Returns:
+ * socket descriptor, or
+ * -1 on error
+ */
+int makeSocket(const char *host)
+ {
+ int sock; /* Socket descriptor */
+ int ret;
+ int port;
+ char *p;
+ __pmFdSet wfds;
+ struct timeval tv;
+ struct timeval *ptv;
+ __pmSockAddr *myaddr;
+ __pmHostEnt *servInfo;
+ void *enumIx;
+ int flags = 0;
+
+ /* Check for port number specified in URL */
+ p = strchr(host, ':');
+ if(p)
+ {
+ port = atoi(p + 1);
+ *p = '\0';
+ }
+ else
+ port = PORT_NUMBER;
+
+ servInfo = __pmGetAddrInfo(host);
+ if(servInfo == NULL) { errorSource = H_ERRNO; return -1; }
+
+ sock = -1;
+ enumIx = NULL;
+ for (myaddr = __pmHostEntGetSockAddr(servInfo, &enumIx);
+ myaddr != NULL;
+ myaddr = __pmHostEntGetSockAddr(servInfo, &enumIx)) {
+ /* Create a socket */
+ if (__pmSockAddrIsInet(myaddr))
+ sock = __pmCreateSocket();
+ else if (__pmSockAddrIsIPv6(myaddr))
+ sock = __pmCreateIPv6Socket();
+ else
+ continue;
+ if (sock < 0) {
+ __pmSockAddrFree(myaddr);
+ continue; /* Try the next address */
+ }
+
+ /* Attempt to connect */
+ flags = __pmConnectTo(sock, myaddr, port);
+ __pmSockAddrFree(myaddr);
+
+ if (flags < 0) {
+ /*
+ * Mark failure in case we fall out the end of the loop
+ * and try next address. sock has been closed in __pmConnectTo().
+ */
+ sock = -1;
+ continue;
+ }
+
+ /* FNDELAY and we're in progress - wait on select */
+ tv.tv_sec = timeout;
+ tv.tv_usec = 0;
+ ptv = (tv.tv_sec || tv.tv_usec) ? &tv : NULL;
+ __pmFD_ZERO(&wfds);
+ __pmFD_SET(sock, &wfds);
+ ret = __pmSelectWrite(sock+1, &wfds, ptv);
+
+ /* Was the connection successful? */
+ if (ret < 0) {
+ if (oserror() == EINTR)
+ return _makeSocketErr(sock, FETCHER_ERROR, HF_CONNECTTIMEOUT);
+ return _makeSocketErr(sock, ERRNO, 0);
+ }
+ ret = __pmConnectCheckError(sock);
+ if (ret == 0)
+ break;
+
+ /* Unsuccessful connection. */
+ __pmCloseSocket(sock);
+ sock = -1;
+ } /* loop over addresses */
+
+ __pmHostEntFree(servInfo);
+
+ if(sock == -1) { errorSource = ERRNO; return -1; }
+
+ sock = __pmConnectRestoreFlags(sock, flags);
+ if(sock < 0) { errorSource = ERRNO; return -1; }
+
+ return sock;
+ }
+
+int _makeSocketErr(int sock, int this_errorSource, int this_http_errno){
+ errorSource = this_errorSource;
+ http_errno = this_http_errno;
+ close(sock);
+ return -1;
+ }
+
+ /*
+ * Determines if the given NULL-terminated buffer is large enough to
+ * concatenate the given number of characters. If not, it attempts to
+ * grow the buffer to fit.
+ * Returns:
+ * 0 on success, or
+ * -1 on error (original buffer is unchanged).
+ */
+int _checkBufSize(char **buf, int *bufsize, int more)
+ {
+ char *tmp;
+ int roomLeft = *bufsize - (strlen(*buf) + 1);
+ if(roomLeft > more)
+ return 0;
+ tmp = realloc(*buf, *bufsize + more + 1);
+ if(tmp == NULL)
+ return -1;
+ *buf = tmp;
+ *bufsize += more + 1;
+ return 0;
+ }
diff --git a/src/libpcp_http/src/http_fetcher.h b/src/libpcp_http/src/http_fetcher.h
new file mode 100644
index 0000000..62a39dd
--- /dev/null
+++ b/src/libpcp_http/src/http_fetcher.h
@@ -0,0 +1,168 @@
+/* http_fetcher.h - HTTP handling functions
+
+ HTTP Fetcher
+ Copyright (C) 2001, 2003, 2004 Lyle Hanson (lhanson@users.sourceforge.net)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ See LICENSE file for details
+
+ */
+
+#ifndef HTTP_FETCHER_H
+#define HTTP_FETCHER_H
+
+#include "http_error_codes.h"
+
+#define PORT_NUMBER 80
+#define HTTP_VERSION "HTTP/1.0"
+#define DEFAULT_USER_AGENT "HTTP Fetcher"
+#define DEFAULT_READ_TIMEOUT 30 /* Seconds to wait before giving up
+ * when no data is arriving */
+
+#define REQUEST_BUF_SIZE 1024
+#define HEADER_BUF_SIZE 1024
+#define DEFAULT_PAGE_BUF_SIZE 1024 * 200 /* 200K should hold most things */
+#define DEFAULT_REDIRECTS 3 /* Number of HTTP redirects to follow */
+
+
+
+/******************************************************************************/
+/**************** Function declarations and descriptions **********************/
+/******************************************************************************/
+
+/*
+ * [!!! NOTE !!!] All HTTP Fetcher functions return -1 on error. You can
+ * then either call http_perror to print the error message or call
+ * http_strerror to get a pointer to it
+ */
+
+
+ /*
+ * Download the page, registering a hit. If you pass it a NULL for fileBuf,
+ * 'url' will be requested but will not remain in memory (useful for
+ * simply registering a hit). Otherwise necessary space will be allocated
+ * and will be pointed to by fileBuf. Note that a NULL byte is added to
+ * the data, so the actual buffer will be the file size + 1.
+ * Returns:
+ * # of bytes downloaded, or
+ * -1 on error
+ */
+int http_fetch(const char *url, char **fileBuf);
+
+ /*
+ * Changes the User Agent (shown to the web server with each request)
+ * Send it NULL to avoid telling the server a User Agent
+ * By default, the User Agent is sent (The default one unless changed)
+ * Returns:
+ * 0 on success, or
+ * -1 on error (previous value for agent remains unchanged)
+ */
+int http_setUserAgent(const char *newAgent);
+
+ /*
+ * Changes the Referer (shown to the web server with each request)
+ * Send it NULL to avoid thelling the server a Referer
+ * By default, no Referer is sent
+ * Returns:
+ * 0 on success, or
+ * -1 on error
+ */
+int http_setReferer(const char *newReferer);
+
+ /*
+ * Changes the maximum amount of time that HTTP Fetcher will wait on
+ * data. If this many seconds elapses without more data from the
+ * server, http_fetch will return with an error.
+ * If you pass a value less than 0, reads will not time out, potentially
+ * waiting forever (or until data shows up, whichever comes first)
+ */
+void http_setTimeout(int seconds);
+
+ /*
+ * Changes the number of HTTP redirects HTTP Fetcher will automatically
+ * follow. If a request returns a status code of 3XX and contains
+ * a "Location:" field, the library will transparently follow up to
+ * the specified number of redirects. With this implementation
+ * (which is just a stopgap, really) the caller won't be aware of any
+ * redirection and will assume the returned document came from the original
+ * URL.
+ * To disable redirects, pass a 0. To follow unlimited redirects (probably
+ * unwise), pass a negative value. The default is to follow 3 redirects.
+ */
+void http_setRedirects(int redirects);
+
+ /*
+ * Takes a url and puts the filename portion of it into 'filename'.
+ * Returns:
+ * 0 on success, or
+ * 1 when url contains no end filename (i.e., "www.foo.com/")
+ * and **filename should not be assumed to point to anything), or
+ * -1 on error
+ */
+int http_parseFilename(const char *url, char **filename);
+
+ /*
+ * Works like perror. If an HTTP Fetcher function ever returns an
+ * error (-1), this will print a descriptive message to standard output
+ */
+void http_perror(const char *string);
+
+ /*
+ * Returns true or false (1/0) if last request timed out.
+ */
+int http_getTimeoutError();
+
+ /*
+ * Returns a pointer to the current error description message. The
+ * message pointed to is only good until the next call to http_strerror(),
+ * so if you need to hold on to the message for a while you should make
+ * a copy of it.
+ */
+const char *http_strerror();
+
+
+
+/******************************************************************************/
+/**** The following functions are used INTERNALLY by http_fetcher *************/
+/******************************************************************************/
+
+ /*
+ * Reads the metadata of an HTTP response. On success returns the number
+ * Returns:
+ * # of bytes read on success, or
+ * -1 on error
+ */
+int _http_read_header(int sock, char *headerPtr);
+
+ /*
+ * Opens a TCP socket and returns the descriptor
+ * Returns:
+ * socket descriptor, or
+ * -1 on error
+ */
+int makeSocket(const char *host);
+
+ /* Handles error conditions with creating sockets
+ */
+int _makeSocketErr(int sock, int this_errorSource, int this_http_errno);
+
+ /*
+ * Determines if the given NULL-terminated buffer is large enough to
+ * concatenate the given number of characters. If not, it attempts to
+ * grow the buffer to fit.
+ * Returns:
+ * 0 on success, or
+ * -1 on error (original buffer is unchanged).
+ */
+int _checkBufSize(char **buf, int *bufsize, int more);
+
+#endif