summaryrefslogtreecommitdiff
path: root/src/libpcp_http/src/http_fetcher.h
blob: 62a39ddcc1e9a2953b8f4ab93c10cb6592df534a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/* http_fetcher.h - HTTP handling functions

	HTTP Fetcher
	Copyright (C) 2001, 2003, 2004 Lyle Hanson (lhanson@users.sourceforge.net)

	This library is free software; you can redistribute it and/or
	modify it under the terms of the GNU Library General Public
	License as published by the Free Software Foundation; either
	version 2 of the License, or (at your option) any later version.

	This library is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
	Library General Public License for more details.

	See LICENSE file for details
									
 */

#ifndef HTTP_FETCHER_H
#define HTTP_FETCHER_H

#include "http_error_codes.h"

#define PORT_NUMBER 			80
#define HTTP_VERSION 			"HTTP/1.0"
#define DEFAULT_USER_AGENT		"HTTP Fetcher"
#define DEFAULT_READ_TIMEOUT	30		/* Seconds to wait before giving up
										 *	when no data is arriving */
	 
#define REQUEST_BUF_SIZE 		1024
#define HEADER_BUF_SIZE 		1024
#define DEFAULT_PAGE_BUF_SIZE 	1024 * 200	/* 200K should hold most things */
#define DEFAULT_REDIRECTS       3       /* Number of HTTP redirects to follow */



/******************************************************************************/
/**************** Function declarations and descriptions **********************/
/******************************************************************************/

/* 
 * [!!! NOTE !!!]  All HTTP Fetcher functions return -1 on error.  You can
 *	then either call http_perror to print the error message or call
 *	http_strerror to get a pointer to it
 */


	/*
	 * Download the page, registering a hit. If you pass it a NULL for fileBuf,
	 *	'url' will be requested but will not remain in memory (useful for
	 *	simply registering a hit).  Otherwise necessary space will be allocated
	 *	and will be pointed to by fileBuf.  Note that a NULL byte is added to
     *  the data, so the actual buffer will be the file size + 1.
	 * Returns:
	 *	# of bytes downloaded, or
	 *	-1 on error
	 */
int http_fetch(const char *url, char **fileBuf);

	/*
	 * Changes the User Agent (shown to the web server with each request)
	 *	Send it NULL to avoid telling the server a User Agent
	 *	By default, the User Agent is sent (The default one unless changed)
	 * Returns:
	 *	0 on success, or
	 *	-1 on error (previous value for agent remains unchanged)
	 */
int http_setUserAgent(const char *newAgent);

	/*
	 * Changes the Referer (shown to the web server with each request)
	 *	Send it NULL to avoid thelling the server a Referer
	 *	By default, no Referer is sent
	 * Returns:
	 *	0 on success, or
	 *	-1 on error
	 */
int http_setReferer(const char *newReferer);

	/*
	 * Changes the maximum amount of time that HTTP Fetcher will wait on
	 *	data.  If this many seconds elapses without more data from the
	 *	server, http_fetch will return with an error.
	 * If you pass a value less than 0, reads will not time out, potentially
	 *	waiting forever (or until data shows up, whichever comes first)
	 */
void http_setTimeout(int seconds);

	/*
	 * Changes the number of HTTP redirects HTTP Fetcher will automatically
	 *	follow.  If a request returns a status code of 3XX and contains
	 *	a "Location:" field, the library will transparently follow up to
	 *	the specified number of redirects.  With this implementation
	 *	(which is just a stopgap, really) the caller won't be aware of any
	 *	redirection and will assume the returned document came from the original
	 *	URL.
	 * To disable redirects, pass a 0.  To follow unlimited redirects (probably
	 *  unwise), pass a negative value.  The default is to follow 3 redirects.
	 */
void http_setRedirects(int redirects);

	/*
	 * Takes a url and puts the filename portion of it into 'filename'.
	 * Returns:
	 *	0 on success, or
	 *	1 when url contains no end filename (i.e., "www.foo.com/")
	 *		and **filename should not be assumed to point to anything), or
	 *	-1 on error
	 */
int http_parseFilename(const char *url, char **filename);

	/*
	 * Works like perror.  If an HTTP Fetcher function ever returns an
	 *	error (-1), this will print a descriptive message to standard output
	 */
void http_perror(const char *string);

	/*
	 * Returns true or false (1/0) if last request timed out.
	 */
int http_getTimeoutError();

	/*
	 * Returns a pointer to the current error description message.  The
	 *	message pointed to is only good until the next call to http_strerror(),
	 *	so if you need to hold on to the message for a while you should make
	 *	a copy of it.
	 */
const char *http_strerror();



/******************************************************************************/
/**** The following functions are used INTERNALLY by http_fetcher *************/
/******************************************************************************/

	/*
	 * Reads the metadata of an HTTP response.  On success returns the number
	 * Returns:
	 *	# of bytes read on success, or
	 *	-1 on error
	 */
int _http_read_header(int sock, char *headerPtr);

	/*
	 * Opens a TCP socket and returns the descriptor
	 * Returns:
	 *	socket descriptor, or
	 *	-1 on error
	 */
int makeSocket(const char *host);

	/* Handles error conditions with creating sockets
	 */
int _makeSocketErr(int sock, int this_errorSource, int this_http_errno);

	/*
	 * Determines if the given NULL-terminated buffer is large enough to
	 *	concatenate the given number of characters.  If not, it attempts to
	 *	grow the buffer to fit.
	 * Returns:
	 *	0 on success, or
	 *	-1 on error (original buffer is unchanged).
	 */
int _checkBufSize(char **buf, int *bufsize, int more);

#endif