summaryrefslogtreecommitdiff
path: root/src/network_writev.c
diff options
context:
space:
mode:
authorArno Töll <arno@debian.org>2012-11-21 23:03:34 +0100
committerArno Töll <arno@debian.org>2012-11-21 23:03:34 +0100
commiteb45c46b906e492f063f1469486190e93ff340ff (patch)
tree85d615969fa7bf8056a05b59006f77bc63e85892 /src/network_writev.c
parent6426b37107707a1d95ffd03f68620cbda8bdb942 (diff)
downloadlighttpd-eb45c46b906e492f063f1469486190e93ff340ff.tar.gz
Imported Upstream version 1.4.10upstream/1.4.10
Diffstat (limited to 'src/network_writev.c')
-rw-r--r--src/network_writev.c343
1 files changed, 343 insertions, 0 deletions
diff --git a/src/network_writev.c b/src/network_writev.c
new file mode 100644
index 0000000..578048e
--- /dev/null
+++ b/src/network_writev.c
@@ -0,0 +1,343 @@
+#include "network_backends.h"
+
+#ifdef USE_WRITEV
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <netdb.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "network.h"
+#include "fdevent.h"
+#include "log.h"
+#include "stat_cache.h"
+
+#ifndef UIO_MAXIOV
+# if defined(__FreeBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+/* FreeBSD 4.7 defines it in sys/uio.h only if _KERNEL is specified */
+# define UIO_MAXIOV 1024
+# elif defined(__sgi)
+/* IRIX 6.5 has sysconf(_SC_IOV_MAX) which might return 512 or bigger */
+# define UIO_MAXIOV 512
+# elif defined(__sun)
+/* Solaris (and SunOS?) defines IOV_MAX instead */
+# ifndef IOV_MAX
+# define UIO_MAXIOV 16
+# else
+# define UIO_MAXIOV IOV_MAX
+# endif
+# elif defined(IOV_MAX)
+# define UIO_MAXIOV IOV_MAX
+# else
+# error UIO_MAXIOV nor IOV_MAX are defined
+# endif
+#endif
+
+#if 0
+#define LOCAL_BUFFERING 1
+#endif
+
+int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq) {
+ chunk *c;
+ size_t chunks_written = 0;
+
+ for(c = cq->first; c; c = c->next) {
+ int chunk_finished = 0;
+
+ switch(c->type) {
+ case MEM_CHUNK: {
+ char * offset;
+ size_t toSend;
+ ssize_t r;
+
+ size_t num_chunks, i;
+ struct iovec chunks[UIO_MAXIOV];
+ chunk *tc;
+ size_t num_bytes = 0;
+
+ /* we can't send more then SSIZE_MAX bytes in one chunk */
+
+ /* build writev list
+ *
+ * 1. limit: num_chunks < UIO_MAXIOV
+ * 2. limit: num_bytes < SSIZE_MAX
+ */
+ for(num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < UIO_MAXIOV; num_chunks++, tc = tc->next);
+
+ for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) {
+ if (tc->mem->used == 0) {
+ chunks[i].iov_base = tc->mem->ptr;
+ chunks[i].iov_len = 0;
+ } else {
+ offset = tc->mem->ptr + tc->offset;
+ toSend = tc->mem->used - 1 - tc->offset;
+
+ chunks[i].iov_base = offset;
+
+ /* protect the return value of writev() */
+ if (toSend > SSIZE_MAX ||
+ num_bytes + toSend > SSIZE_MAX) {
+ chunks[i].iov_len = SSIZE_MAX - num_bytes;
+
+ num_chunks = i + 1;
+ break;
+ } else {
+ chunks[i].iov_len = toSend;
+ }
+
+ num_bytes += toSend;
+ }
+ }
+
+ if ((r = writev(fd, chunks, num_chunks)) < 0) {
+ switch (errno) {
+ case EAGAIN:
+ case EINTR:
+ r = 0;
+ break;
+ case EPIPE:
+ case ECONNRESET:
+ return -2;
+ default:
+ log_error_write(srv, __FILE__, __LINE__, "ssd",
+ "writev failed:", strerror(errno), fd);
+
+ return -1;
+ }
+ }
+
+ cq->bytes_out += r;
+
+ /* check which chunks have been written */
+
+ for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) {
+ if (r >= (ssize_t)chunks[i].iov_len) {
+ /* written */
+ r -= chunks[i].iov_len;
+ tc->offset += chunks[i].iov_len;
+
+ if (chunk_finished) {
+ /* skip the chunks from further touches */
+ chunks_written++;
+ c = c->next;
+ } else {
+ /* chunks_written + c = c->next is done in the for()*/
+ chunk_finished++;
+ }
+ } else {
+ /* partially written */
+
+ tc->offset += r;
+ chunk_finished = 0;
+
+ break;
+ }
+ }
+
+ break;
+ }
+ case FILE_CHUNK: {
+ ssize_t r;
+ off_t abs_offset;
+ off_t toSend;
+ stat_cache_entry *sce = NULL;
+
+#define KByte * 1024
+#define MByte * 1024 KByte
+#define GByte * 1024 MByte
+ const off_t we_want_to_mmap = 512 KByte;
+ char *start = NULL;
+
+ if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) {
+ log_error_write(srv, __FILE__, __LINE__, "sb",
+ strerror(errno), c->file.name);
+ return -1;
+ }
+
+ abs_offset = c->file.start + c->offset;
+
+ if (abs_offset > sce->st.st_size) {
+ log_error_write(srv, __FILE__, __LINE__, "sb",
+ "file was shrinked:", c->file.name);
+
+ return -1;
+ }
+
+ /* mmap the buffer
+ * - first mmap
+ * - new mmap as the we are at the end of the last one */
+ if (c->file.mmap.start == MAP_FAILED ||
+ abs_offset == (off_t)(c->file.mmap.offset + c->file.mmap.length)) {
+
+ /* Optimizations for the future:
+ *
+ * adaptive mem-mapping
+ * the problem:
+ * we mmap() the whole file. If someone has alot large files and 32bit
+ * machine the virtual address area will be unrun and we will have a failing
+ * mmap() call.
+ * solution:
+ * only mmap 16M in one chunk and move the window as soon as we have finished
+ * the first 8M
+ *
+ * read-ahead buffering
+ * the problem:
+ * sending out several large files in parallel trashes the read-ahead of the
+ * kernel leading to long wait-for-seek times.
+ * solutions: (increasing complexity)
+ * 1. use madvise
+ * 2. use a internal read-ahead buffer in the chunk-structure
+ * 3. use non-blocking IO for file-transfers
+ * */
+
+ /* all mmap()ed areas are 512kb expect the last which might be smaller */
+ off_t we_want_to_send;
+ size_t to_mmap;
+
+ /* this is a remap, move the mmap-offset */
+ if (c->file.mmap.start != MAP_FAILED) {
+ munmap(c->file.mmap.start, c->file.mmap.length);
+ c->file.mmap.offset += we_want_to_mmap;
+ } else {
+ /* in case the range-offset is after the first mmap()ed area we skip the area */
+ c->file.mmap.offset = 0;
+
+ while (c->file.mmap.offset + we_want_to_mmap < c->file.start) {
+ c->file.mmap.offset += we_want_to_mmap;
+ }
+ }
+
+ /* length is rel, c->offset too, assume there is no limit at the mmap-boundaries */
+ we_want_to_send = c->file.length - c->offset;
+ to_mmap = (c->file.start + c->file.length) - c->file.mmap.offset;
+
+ /* we have more to send than we can mmap() at once */
+ if (abs_offset + we_want_to_send > c->file.mmap.offset + we_want_to_mmap) {
+ we_want_to_send = (c->file.mmap.offset + we_want_to_mmap) - abs_offset;
+ to_mmap = we_want_to_mmap;
+ }
+
+ if (-1 == c->file.fd) { /* open the file if not already open */
+ if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) {
+ log_error_write(srv, __FILE__, __LINE__, "sbs", "open failed for:", c->file.name, strerror(errno));
+
+ return -1;
+ }
+#ifdef FD_CLOEXEC
+ fcntl(c->file.fd, F_SETFD, FD_CLOEXEC);
+#endif
+ }
+
+ if (MAP_FAILED == (c->file.mmap.start = mmap(0, to_mmap, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) {
+ /* close it here, otherwise we'd have to set FD_CLOEXEC */
+
+ log_error_write(srv, __FILE__, __LINE__, "ssbd", "mmap failed:",
+ strerror(errno), c->file.name, c->file.fd);
+
+ return -1;
+ }
+
+ c->file.mmap.length = to_mmap;
+#ifdef LOCAL_BUFFERING
+ buffer_copy_string_len(c->mem, c->file.mmap.start, c->file.mmap.length);
+#else
+#ifdef HAVE_POSIX_MADVISE
+ /* don't advise files < 64Kb */
+ if (c->file.mmap.length > (64 KByte)) {
+ /* darwin 7 is returning EINVAL all the time and I don't know how to
+ * detect this at runtime.i
+ *
+ * ignore the return value for now */
+ posix_madvise(c->file.mmap.start, c->file.mmap.length, POSIX_MADV_WILLNEED);
+ }
+#endif
+#endif
+
+ /* chunk_reset() or chunk_free() will cleanup for us */
+ }
+
+ /* to_send = abs_mmap_end - abs_offset */
+ toSend = (c->file.mmap.offset + c->file.mmap.length) - (abs_offset);
+
+ if (toSend < 0) {
+ log_error_write(srv, __FILE__, __LINE__, "soooo",
+ "toSend is negative:",
+ toSend,
+ c->file.mmap.length,
+ abs_offset,
+ c->file.mmap.offset);
+ assert(toSend < 0);
+ }
+
+#ifdef LOCAL_BUFFERING
+ start = c->mem->ptr;
+#else
+ start = c->file.mmap.start;
+#endif
+
+ if ((r = write(fd, start + (abs_offset - c->file.mmap.offset), toSend)) < 0) {
+ switch (errno) {
+ case EAGAIN:
+ case EINTR:
+ r = 0;
+ break;
+ case EPIPE:
+ case ECONNRESET:
+ return -2;
+ default:
+ log_error_write(srv, __FILE__, __LINE__, "ssd",
+ "write failed:", strerror(errno), fd);
+
+ return -1;
+ }
+ }
+
+ c->offset += r;
+ cq->bytes_out += r;
+
+ if (c->offset == c->file.length) {
+ chunk_finished = 1;
+
+ /* we don't need the mmaping anymore */
+ if (c->file.mmap.start != MAP_FAILED) {
+ munmap(c->file.mmap.start, c->file.mmap.length);
+ c->file.mmap.start = MAP_FAILED;
+ }
+ }
+
+ break;
+ }
+ default:
+
+ log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known");
+
+ return -1;
+ }
+
+ if (!chunk_finished) {
+ /* not finished yet */
+
+ break;
+ }
+
+ chunks_written++;
+ }
+
+ return chunks_written;
+}
+
+#endif