summaryrefslogtreecommitdiff
path: root/archivers/libarchive/files/libarchive/archive_read_open_filename.c
diff options
context:
space:
mode:
Diffstat (limited to 'archivers/libarchive/files/libarchive/archive_read_open_filename.c')
-rw-r--r--archivers/libarchive/files/libarchive/archive_read_open_filename.c508
1 files changed, 406 insertions, 102 deletions
diff --git a/archivers/libarchive/files/libarchive/archive_read_open_filename.c b/archivers/libarchive/files/libarchive/archive_read_open_filename.c
index 74f3e60c43e..5611aa85aa4 100644
--- a/archivers/libarchive/files/libarchive/archive_read_open_filename.c
+++ b/archivers/libarchive/files/libarchive/archive_read_open_filename.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003-2007 Tim Kientzle
+ * Copyright (c) 2003-2010 Tim Kientzle
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,6 +26,9 @@
#include "archive_platform.h"
__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009-12-28 02:28:44Z kientzle $");
+#ifdef HAVE_SYS_IOCTL_H
+#include <sys/ioctl.h>
+#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
@@ -47,29 +50,47 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <sys/disk.h>
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+#include <sys/disklabel.h>
+#include <sys/dkio.h>
+#elif defined(__DragonFly__)
+#include <sys/diskslice.h>
+#endif
#include "archive.h"
+#include "archive_private.h"
+#include "archive_string.h"
#ifndef O_BINARY
#define O_BINARY 0
#endif
+#ifndef O_CLOEXEC
+#define O_CLOEXEC 0
+#endif
struct read_file_data {
int fd;
size_t block_size;
void *buffer;
mode_t st_mode; /* Mode bits for opened file. */
- char can_skip; /* This file supports skipping. */
- char filename[1]; /* Must be last! */
+ char use_lseek;
+ enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type;
+ union {
+ char m[1];/* MBS filename. */
+ wchar_t w[1];/* WCS filename. */
+ } filename; /* Must be last! */
};
+static int file_open(struct archive *, void *);
static int file_close(struct archive *, void *);
+static int file_close2(struct archive *, void *);
+static int file_switch(struct archive *, void *, void *);
static ssize_t file_read(struct archive *, void *, const void **buff);
-#if ARCHIVE_API_VERSION < 2
-static ssize_t file_skip(struct archive *, void *, size_t request);
-#else
-static off_t file_skip(struct archive *, void *, off_t request);
-#endif
+static int64_t file_seek(struct archive *, void *, int64_t request, int);
+static int64_t file_skip(struct archive *, void *, int64_t request);
+static int64_t file_skip_lseek(struct archive *, void *, int64_t request);
int
archive_read_open_file(struct archive *a, const char *filename,
@@ -82,14 +103,139 @@ int
archive_read_open_filename(struct archive *a, const char *filename,
size_t block_size)
{
- struct stat st;
+ const char *filenames[2];
+ filenames[0] = filename;
+ filenames[1] = NULL;
+ return archive_read_open_filenames(a, filenames, block_size);
+}
+
+int
+archive_read_open_filenames(struct archive *a, const char **filenames,
+ size_t block_size)
+{
struct read_file_data *mine;
- void *b;
+ const char *filename = NULL;
+ if (filenames)
+ filename = *(filenames++);
+
+ archive_clear_error(a);
+ do
+ {
+ if (filename == NULL)
+ filename = "";
+ mine = (struct read_file_data *)calloc(1,
+ sizeof(*mine) + strlen(filename));
+ if (mine == NULL)
+ goto no_memory;
+ strcpy(mine->filename.m, filename);
+ mine->block_size = block_size;
+ mine->fd = -1;
+ mine->buffer = NULL;
+ mine->st_mode = mine->use_lseek = 0;
+ if (filename == NULL || filename[0] == '\0') {
+ mine->filename_type = FNT_STDIN;
+ } else
+ mine->filename_type = FNT_MBS;
+ if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
+ return (ARCHIVE_FATAL);
+ if (filenames == NULL)
+ break;
+ filename = *(filenames++);
+ } while (filename != NULL && filename[0] != '\0');
+ archive_read_set_open_callback(a, file_open);
+ archive_read_set_read_callback(a, file_read);
+ archive_read_set_skip_callback(a, file_skip);
+ archive_read_set_close_callback(a, file_close);
+ archive_read_set_switch_callback(a, file_switch);
+ archive_read_set_seek_callback(a, file_seek);
+
+ return (archive_read_open1(a));
+no_memory:
+ archive_set_error(a, ENOMEM, "No memory");
+ return (ARCHIVE_FATAL);
+}
+
+int
+archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename,
+ size_t block_size)
+{
+ struct read_file_data *mine = (struct read_file_data *)calloc(1,
+ sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t));
+ if (!mine)
+ {
+ archive_set_error(a, ENOMEM, "No memory");
+ return (ARCHIVE_FATAL);
+ }
+ mine->fd = -1;
+ mine->block_size = block_size;
+
+ if (wfilename == NULL || wfilename[0] == L'\0') {
+ mine->filename_type = FNT_STDIN;
+ } else {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ mine->filename_type = FNT_WCS;
+ wcscpy(mine->filename.w, wfilename);
+#else
+ /*
+ * POSIX system does not support a wchar_t interface for
+ * open() system call, so we have to translate a wchar_t
+ * filename to multi-byte one and use it.
+ */
+ struct archive_string fn;
+
+ archive_string_init(&fn);
+ if (archive_string_append_from_wcs(&fn, wfilename,
+ wcslen(wfilename)) != 0) {
+ if (errno == ENOMEM)
+ archive_set_error(a, errno,
+ "Can't allocate memory");
+ else
+ archive_set_error(a, EINVAL,
+ "Failed to convert a wide-character"
+ " filename to a multi-byte filename");
+ archive_string_free(&fn);
+ free(mine);
+ return (ARCHIVE_FATAL);
+ }
+ mine->filename_type = FNT_MBS;
+ strcpy(mine->filename.m, fn.s);
+ archive_string_free(&fn);
+#endif
+ }
+ if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
+ return (ARCHIVE_FATAL);
+ archive_read_set_open_callback(a, file_open);
+ archive_read_set_read_callback(a, file_read);
+ archive_read_set_skip_callback(a, file_skip);
+ archive_read_set_close_callback(a, file_close);
+ archive_read_set_switch_callback(a, file_switch);
+ archive_read_set_seek_callback(a, file_seek);
+
+ return (archive_read_open1(a));
+}
+
+static int
+file_open(struct archive *a, void *client_data)
+{
+ struct stat st;
+ struct read_file_data *mine = (struct read_file_data *)client_data;
+ void *buffer;
+ const char *filename = NULL;
+ const wchar_t *wfilename = NULL;
int fd;
+ int is_disk_like = 0;
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+ off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+ struct disklabel dl;
+#elif defined(__DragonFly__)
+ struct partinfo pi;
+#endif
archive_clear_error(a);
- if (filename == NULL || filename[0] == '\0') {
- /* We used to invoke archive_read_open_fd(a,0,block_size)
+ if (mine->filename_type == FNT_STDIN) {
+ /* We used to delegate stdin support by
+ * directly calling archive_read_open_fd(a,0,block_size)
* here, but that doesn't (and shouldn't) handle the
* end-of-file flush when reading stdout from a pipe.
* Basically, read_open_fd() is intended for folks who
@@ -97,60 +243,135 @@ archive_read_open_filename(struct archive *a, const char *filename,
* API is intended to be a little smarter for folks who
* want easy handling of the common case.
*/
- filename = ""; /* Normalize NULL to "" */
fd = 0;
#if defined(__CYGWIN__) || defined(_WIN32)
setmode(0, O_BINARY);
#endif
- } else {
- fd = open(filename, O_RDONLY | O_BINARY);
+ filename = "";
+ } else if (mine->filename_type == FNT_MBS) {
+ filename = mine->filename.m;
+ fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC);
+ __archive_ensure_cloexec_flag(fd);
if (fd < 0) {
archive_set_error(a, errno,
"Failed to open '%s'", filename);
return (ARCHIVE_FATAL);
}
+ } else {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ wfilename = mine->filename.w;
+ fd = _wopen(wfilename, O_RDONLY | O_BINARY);
+ if (fd < 0 && errno == ENOENT) {
+ wchar_t *fullpath;
+ fullpath = __la_win_permissive_name_w(wfilename);
+ if (fullpath != NULL) {
+ fd = _wopen(fullpath, O_RDONLY | O_BINARY);
+ free(fullpath);
+ }
+ }
+ if (fd < 0) {
+ archive_set_error(a, errno,
+ "Failed to open '%S'", wfilename);
+ return (ARCHIVE_FATAL);
+ }
+#else
+ archive_set_error(a, ARCHIVE_ERRNO_MISC,
+ "Unexpedted operation in archive_read_open_filename");
+ return (ARCHIVE_FATAL);
+#endif
}
if (fstat(fd, &st) != 0) {
- archive_set_error(a, errno, "Can't stat '%s'", filename);
+ if (mine->filename_type == FNT_WCS)
+ archive_set_error(a, errno, "Can't stat '%S'",
+ wfilename);
+ else
+ archive_set_error(a, errno, "Can't stat '%s'",
+ filename);
return (ARCHIVE_FATAL);
}
- mine = (struct read_file_data *)calloc(1,
- sizeof(*mine) + strlen(filename));
- b = malloc(block_size);
- if (mine == NULL || b == NULL) {
+ /*
+ * Determine whether the input looks like a disk device or a
+ * tape device. The results are used below to select an I/O
+ * strategy:
+ * = "disk-like" devices support arbitrary lseek() and will
+ * support I/O requests of any size. So we get easy skipping
+ * and can cheat on block sizes to get better performance.
+ * = "tape-like" devices require strict blocking and use
+ * specialized ioctls for seeking.
+ * = "socket-like" devices cannot seek at all but can improve
+ * performance by using nonblocking I/O to read "whatever is
+ * available right now".
+ *
+ * Right now, we only specially recognize disk-like devices,
+ * but it should be straightforward to add probes and strategy
+ * here for tape-like and socket-like devices.
+ */
+ if (S_ISREG(st.st_mode)) {
+ /* Safety: Tell the extractor not to overwrite the input. */
+ archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
+ /* Regular files act like disks. */
+ is_disk_like = 1;
+ }
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+ /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */
+ else if (S_ISCHR(st.st_mode) &&
+ ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 &&
+ mediasize > 0) {
+ is_disk_like = 1;
+ }
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+ /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */
+ else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) &&
+ ioctl(fd, DIOCGDINFO, &dl) == 0 &&
+ dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) {
+ is_disk_like = 1;
+ }
+#elif defined(__DragonFly__)
+ /* DragonFly BSD: if it supports DIOCGPART ioctl, it's disk-like. */
+ else if (S_ISCHR(st.st_mode) &&
+ ioctl(fd, DIOCGPART, &pi) == 0 &&
+ pi.media_size > 0) {
+ is_disk_like = 1;
+ }
+#elif defined(__linux__)
+ /* Linux: All block devices are disk-like. */
+ else if (S_ISBLK(st.st_mode) &&
+ lseek(fd, 0, SEEK_CUR) == 0 &&
+ lseek(fd, 0, SEEK_SET) == 0 &&
+ lseek(fd, 0, SEEK_END) > 0 &&
+ lseek(fd, 0, SEEK_SET) == 0) {
+ is_disk_like = 1;
+ }
+#endif
+ /* TODO: Add an "is_tape_like" variable and appropriate tests. */
+
+ /* Disk-like devices prefer power-of-two block sizes. */
+ /* Use provided block_size as a guide so users have some control. */
+ if (is_disk_like) {
+ size_t new_block_size = 64 * 1024;
+ while (new_block_size < mine->block_size
+ && new_block_size < 64 * 1024 * 1024)
+ new_block_size *= 2;
+ mine->block_size = new_block_size;
+ }
+ buffer = malloc(mine->block_size);
+ if (mine == NULL || buffer == NULL) {
archive_set_error(a, ENOMEM, "No memory");
free(mine);
- free(b);
+ free(buffer);
return (ARCHIVE_FATAL);
}
- strcpy(mine->filename, filename);
- mine->block_size = block_size;
- mine->buffer = b;
+ mine->buffer = buffer;
mine->fd = fd;
/* Remember mode so close can decide whether to flush. */
mine->st_mode = st.st_mode;
- /* If we're reading a file from disk, ensure that we don't
- overwrite it with an extracted file. */
- if (S_ISREG(st.st_mode)) {
- archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
- /*
- * Enabling skip here is a performance optimization
- * for anything that supports lseek(). On FreeBSD
- * (and probably many other systems), only regular
- * files and raw disk devices support lseek() (on
- * other input types, lseek() returns success but
- * doesn't actually change the file pointer, which
- * just completely screws up the position-tracking
- * logic). In addition, I've yet to find a portable
- * way to determine if a device is a raw disk device.
- * So I don't see a way to do much better than to only
- * enable this optimization for regular files.
- */
- mine->can_skip = 1;
- }
- return (archive_read_open2(a, mine,
- NULL, file_read, file_skip, file_close));
+
+ /* Disk-like inputs can use lseek(). */
+ if (is_disk_like)
+ mine->use_lseek = 1;
+
+ return (ARCHIVE_OK);
}
static ssize_t
@@ -159,83 +380,148 @@ file_read(struct archive *a, void *client_data, const void **buff)
struct read_file_data *mine = (struct read_file_data *)client_data;
ssize_t bytes_read;
+ /* TODO: If a recent lseek() operation has left us
+ * mis-aligned, read and return a short block to try to get
+ * us back in alignment. */
+
+ /* TODO: Someday, try mmap() here; if that succeeds, give
+ * the entire file to libarchive as a single block. That
+ * could be a lot faster than block-by-block manual I/O. */
+
+ /* TODO: We might be able to improve performance on pipes and
+ * sockets by setting non-blocking I/O and just accepting
+ * whatever we get here instead of waiting for a full block
+ * worth of data. */
+
*buff = mine->buffer;
for (;;) {
bytes_read = read(mine->fd, mine->buffer, mine->block_size);
if (bytes_read < 0) {
if (errno == EINTR)
continue;
- else if (mine->filename[0] == '\0')
- archive_set_error(a, errno, "Error reading stdin");
+ else if (mine->filename_type == FNT_STDIN)
+ archive_set_error(a, errno,
+ "Error reading stdin");
+ else if (mine->filename_type == FNT_MBS)
+ archive_set_error(a, errno,
+ "Error reading '%s'", mine->filename.m);
else
- archive_set_error(a, errno, "Error reading '%s'",
- mine->filename);
+ archive_set_error(a, errno,
+ "Error reading '%S'", mine->filename.w);
}
return (bytes_read);
}
}
-#if ARCHIVE_API_VERSION < 2
-static ssize_t
-file_skip(struct archive *a, void *client_data, size_t request)
-#else
-static off_t
-file_skip(struct archive *a, void *client_data, off_t request)
-#endif
+/*
+ * Regular files and disk-like block devices can use simple lseek
+ * without needing to round the request to the block size.
+ *
+ * TODO: This can leave future reads mis-aligned. Since we know the
+ * offset here, we should store it and use it in file_read() above
+ * to determine whether we should perform a short read to get back
+ * into alignment. Long series of mis-aligned reads can negatively
+ * impact disk throughput. (Of course, the performance impact should
+ * be carefully tested; extra code complexity is only worthwhile if
+ * it does provide measurable improvement.)
+ *
+ * TODO: Be lazy about the actual seek. There are a few pathological
+ * cases where libarchive makes a bunch of seek requests in a row
+ * without any intervening reads. This isn't a huge performance
+ * problem, since the kernel handles seeks lazily already, but
+ * it would be very slightly faster if we simply remembered the
+ * seek request here and then actually performed the seek at the
+ * top of the read callback above.
+ */
+static int64_t
+file_skip_lseek(struct archive *a, void *client_data, int64_t request)
{
struct read_file_data *mine = (struct read_file_data *)client_data;
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ /* We use _lseeki64() on Windows. */
+ int64_t old_offset, new_offset;
+#else
off_t old_offset, new_offset;
+#endif
+
+ /* We use off_t here because lseek() is declared that way. */
+
+ /* TODO: Deal with case where off_t isn't 64 bits.
+ * This shouldn't be a problem on Linux or other POSIX
+ * systems, since the configuration logic for libarchive
+ * tries to obtain a 64-bit off_t.
+ */
+ if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 &&
+ (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0)
+ return (new_offset - old_offset);
- if (!mine->can_skip) /* We can't skip, so ... */
- return (0); /* ... skip zero bytes. */
+ /* If lseek() fails, don't bother trying again. */
+ mine->use_lseek = 0;
- /* Reduce request to the next smallest multiple of block_size */
- request = (request / mine->block_size) * mine->block_size;
- if (request == 0)
+ /* Let libarchive recover with read+discard */
+ if (errno == ESPIPE)
return (0);
- /*
- * Hurray for lazy evaluation: if the first lseek fails, the second
- * one will not be executed.
- */
- if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
- ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
- {
- /* If skip failed once, it will probably fail again. */
- mine->can_skip = 0;
-
- if (errno == ESPIPE)
- {
- /*
- * Failure to lseek() can be caused by the file
- * descriptor pointing to a pipe, socket or FIFO.
- * Return 0 here, so the compression layer will use
- * read()s instead to advance the file descriptor.
- * It's slower of course, but works as well.
- */
- return (0);
- }
- /*
- * There's been an error other than ESPIPE. This is most
- * likely caused by a programmer error (too large request)
- * or a corrupted archive file.
- */
- if (mine->filename[0] == '\0')
- /*
- * Should never get here, since lseek() on stdin ought
- * to return an ESPIPE error.
- */
- archive_set_error(a, errno, "Error seeking in stdin");
- else
- archive_set_error(a, errno, "Error seeking in '%s'",
- mine->filename);
- return (-1);
- }
- return (new_offset - old_offset);
+ /* If the input is corrupted or truncated, fail. */
+ if (mine->filename_type == FNT_STDIN)
+ archive_set_error(a, errno, "Error seeking in stdin");
+ else if (mine->filename_type == FNT_MBS)
+ archive_set_error(a, errno, "Error seeking in '%s'",
+ mine->filename.m);
+ else
+ archive_set_error(a, errno, "Error seeking in '%S'",
+ mine->filename.w);
+ return (-1);
+}
+
+
+/*
+ * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to
+ * accelerate operation on tape drives.
+ */
+
+static int64_t
+file_skip(struct archive *a, void *client_data, int64_t request)
+{
+ struct read_file_data *mine = (struct read_file_data *)client_data;
+
+ /* Delegate skip requests. */
+ if (mine->use_lseek)
+ return (file_skip_lseek(a, client_data, request));
+
+ /* If we can't skip, return 0; libarchive will read+discard instead. */
+ return (0);
+}
+
+/*
+ * TODO: Store the offset and use it in the read callback.
+ */
+static int64_t
+file_seek(struct archive *a, void *client_data, int64_t request, int whence)
+{
+ struct read_file_data *mine = (struct read_file_data *)client_data;
+ int64_t r;
+
+ /* We use off_t here because lseek() is declared that way. */
+ /* See above for notes about when off_t is less than 64 bits. */
+ r = lseek(mine->fd, request, whence);
+ if (r >= 0)
+ return r;
+
+ /* If the input is corrupted or truncated, fail. */
+ if (mine->filename_type == FNT_STDIN)
+ archive_set_error(a, errno, "Error seeking in stdin");
+ else if (mine->filename_type == FNT_MBS)
+ archive_set_error(a, errno, "Error seeking in '%s'",
+ mine->filename.m);
+ else
+ archive_set_error(a, errno, "Error seeking in '%S'",
+ mine->filename.w);
+ return (ARCHIVE_FATAL);
}
static int
-file_close(struct archive *a, void *client_data)
+file_close2(struct archive *a, void *client_data)
{
struct read_file_data *mine = (struct read_file_data *)client_data;
@@ -246,7 +532,8 @@ file_close(struct archive *a, void *client_data)
/*
* Sometimes, we should flush the input before closing.
* Regular files: faster to just close without flush.
- * Devices: must not flush (user might need to
+ * Disk-like devices: Ditto.
+ * Tapes: must not flush (user might need to
* read the "next" item on a non-rewind device).
* Pipes and sockets: must flush (otherwise, the
* program feeding the pipe or socket may complain).
@@ -263,10 +550,27 @@ file_close(struct archive *a, void *client_data)
} while (bytesRead > 0);
}
/* If a named file was opened, then it needs to be closed. */
- if (mine->filename[0] != '\0')
+ if (mine->filename_type != FNT_STDIN)
close(mine->fd);
}
free(mine->buffer);
+ mine->buffer = NULL;
+ mine->fd = -1;
+ return (ARCHIVE_OK);
+}
+
+static int
+file_close(struct archive *a, void *client_data)
+{
+ struct read_file_data *mine = (struct read_file_data *)client_data;
+ file_close2(a, client_data);
free(mine);
return (ARCHIVE_OK);
}
+
+static int
+file_switch(struct archive *a, void *client_data1, void *client_data2)
+{
+ file_close2(a, client_data1);
+ return file_open(a, client_data2);
+}