diff options
Diffstat (limited to 'archivers/libarchive/files/libarchive/archive_read_support_format_zip.c')
-rw-r--r-- | archivers/libarchive/files/libarchive/archive_read_support_format_zip.c | 1062 |
1 files changed, 973 insertions, 89 deletions
diff --git a/archivers/libarchive/files/libarchive/archive_read_support_format_zip.c b/archivers/libarchive/files/libarchive/archive_read_support_format_zip.c index 18f0d04e5c4..ab21e222f5a 100644 --- a/archivers/libarchive/files/libarchive/archive_read_support_format_zip.c +++ b/archivers/libarchive/files/libarchive/archive_read_support_format_zip.c @@ -52,6 +52,12 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 #ifdef HAVE_ZLIB_H #include <zlib.h> #endif +#ifdef HAVE_BZLIB_H +#include <bzlib.h> +#endif +#ifdef HAVE_LZMA_H +#include <lzma.h> +#endif #include "archive.h" #include "archive_digest_private.h" @@ -63,6 +69,7 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 #include "archive_private.h" #include "archive_rb.h" #include "archive_read_private.h" +#include "archive_ppmd8_private.h" #ifndef HAVE_ZLIB_H #include "archive_crc32.h" @@ -165,13 +172,30 @@ struct zip { char decompress_init; char end_of_entry; -#ifdef HAVE_ZLIB_H unsigned char *uncompressed_buffer; size_t uncompressed_buffer_size; + +#ifdef HAVE_ZLIB_H z_stream stream; char stream_valid; #endif +#if HAVE_LZMA_H && HAVE_LIBLZMA + lzma_stream zipx_lzma_stream; + char zipx_lzma_valid; +#endif + +#ifdef HAVE_BZLIB_H + bz_stream bzstream; + char bzstream_valid; +#endif + + IByteIn zipx_ppmd_stream; + ssize_t zipx_ppmd_read_compressed; + CPpmd8 ppmd8; + char ppmd8_valid; + char ppmd8_stream_failed; + struct archive_string_conv *sconv; struct archive_string_conv *sconv_default; struct archive_string_conv *sconv_utf8; @@ -222,6 +246,33 @@ struct zip { /* Many systems define min or MIN, but not all. */ #define zipmin(a,b) ((a) < (b) ? (a) : (b)) +/* This function is used by Ppmd8_DecodeSymbol during decompression of Ppmd8 + * streams inside ZIP files. It has 2 purposes: one is to fetch the next + * compressed byte from the stream, second one is to increase the counter how + * many compressed bytes were read. */ +static Byte +ppmd_read(void* p) { + /* Get the handle to current decompression context. */ + struct archive_read *a = ((IByteIn*)p)->a; + struct zip *zip = (struct zip*) a->format->data; + ssize_t bytes_avail = 0; + + /* Fetch next byte. */ + const uint8_t* data = __archive_read_ahead(a, 1, &bytes_avail); + if(bytes_avail < 1) { + zip->ppmd8_stream_failed = 1; + return 0; + } + + __archive_read_consume(a, 1); + + /* Increment the counter. */ + ++zip->zipx_ppmd_read_compressed; + + /* Return the next compressed byte. */ + return data[0]; +} + /* ------------------------------------------------------------------------ */ /* @@ -372,6 +423,8 @@ static const struct { {17, "reserved"}, /* Reserved by PKWARE */ {18, "ibm-terse-new"}, /* File is compressed using IBM TERSE (new) */ {19, "ibm-lz777"},/* IBM LZ77 z Architecture (PFS) */ + {95, "xz"}, /* XZ compressed data */ + {96, "jpeg"}, /* JPEG compressed data */ {97, "wav-pack"}, /* WavPack compressed data */ {98, "ppmd-1"}, /* PPMd version I, Rev 1 */ {99, "aes"} /* WinZip AES encryption */ @@ -419,27 +472,49 @@ zip_time(const char *p) * triplets. id and size are 2 bytes each. */ static int -process_extra(struct archive_read *a, const char *p, size_t extra_length, struct zip_entry* zip_entry) +process_extra(struct archive_read *a, struct archive_entry *entry, + const char *p, size_t extra_length, struct zip_entry* zip_entry) { unsigned offset = 0; + struct zip *zip = (struct zip *)(a->format->data); if (extra_length == 0) { return ARCHIVE_OK; } if (extra_length < 4) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Too-small extra data: Need at least 4 bytes, but only found %d bytes", (int)extra_length); - return ARCHIVE_FAILED; + size_t i = 0; + /* Some ZIP files may have trailing 0 bytes. Let's check they + * are all 0 and ignore them instead of returning an error. + * + * This is not techincally correct, but some ZIP files look + * like this and other tools support those files - so let's + * also support them. + */ + for (; i < extra_length; i++) { + if (p[i] != 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Too-small extra data: " + "Need at least 4 bytes, " + "but only found %d bytes", + (int)extra_length); + return ARCHIVE_FAILED; + } + } + + return ARCHIVE_OK; } + while (offset <= extra_length - 4) { unsigned short headerid = archive_le16dec(p + offset); unsigned short datasize = archive_le16dec(p + offset + 2); offset += 4; if (offset + datasize > extra_length) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Extra data overflow: Need %d bytes but only found %d bytes", + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Extra data overflow: " + "Need %d bytes but only found %d bytes", (int)datasize, (int)(extra_length - offset)); return ARCHIVE_FAILED; } @@ -454,9 +529,12 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct if (zip_entry->uncompressed_size == 0xffffffff) { uint64_t t = 0; if (datasize < 8 - || (t = archive_le64dec(p + offset)) > INT64_MAX) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Malformed 64-bit uncompressed size"); + || (t = archive_le64dec(p + offset)) > + INT64_MAX) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Malformed 64-bit " + "uncompressed size"); return ARCHIVE_FAILED; } zip_entry->uncompressed_size = t; @@ -466,9 +544,12 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct if (zip_entry->compressed_size == 0xffffffff) { uint64_t t = 0; if (datasize < 8 - || (t = archive_le64dec(p + offset)) > INT64_MAX) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Malformed 64-bit compressed size"); + || (t = archive_le64dec(p + offset)) > + INT64_MAX) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Malformed 64-bit " + "compressed size"); return ARCHIVE_FAILED; } zip_entry->compressed_size = t; @@ -478,9 +559,12 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct if (zip_entry->local_header_offset == 0xffffffff) { uint64_t t = 0; if (datasize < 8 - || (t = archive_le64dec(p + offset)) > INT64_MAX) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Malformed 64-bit local header offset"); + || (t = archive_le64dec(p + offset)) > + INT64_MAX) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Malformed 64-bit " + "local header offset"); return ARCHIVE_FAILED; } zip_entry->local_header_offset = t; @@ -513,7 +597,8 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct /* Extended time field "UT". */ int flags; if (datasize == 0) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Incomplete extended time field"); return ARCHIVE_FAILED; } @@ -595,7 +680,8 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct * if bitmap & 1, 2 byte "version made by" * if bitmap & 2, 2 byte "internal file attributes" * if bitmap & 4, 4 byte "external file attributes" - * if bitmap & 8, 2 byte comment length + n byte comment + * if bitmap & 8, 2 byte comment length + n byte + * comment */ int bitmap, bitmap_last; @@ -646,13 +732,18 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct = external_attributes >> 16; } else if (zip_entry->system == 0) { // Interpret MSDOS directory bit - if (0x10 == (external_attributes & 0x10)) { - zip_entry->mode = AE_IFDIR | 0775; + if (0x10 == (external_attributes & + 0x10)) { + zip_entry->mode = + AE_IFDIR | 0775; } else { - zip_entry->mode = AE_IFREG | 0664; + zip_entry->mode = + AE_IFREG | 0664; } - if (0x01 == (external_attributes & 0x01)) { - // Read-only bit; strip write permissions + if (0x01 == (external_attributes & + 0x01)) { + /* Read-only bit; + * strip write permissions */ zip_entry->mode &= 0555; } } else { @@ -679,6 +770,59 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct } break; } + case 0x7075: + { + /* Info-ZIP Unicode Path Extra Field. */ + if (datasize < 5 || entry == NULL) + break; + offset += 5; + datasize -= 5; + + /* The path name in this field is always encoded + * in UTF-8. */ + if (zip->sconv_utf8 == NULL) { + zip->sconv_utf8 = + archive_string_conversion_from_charset( + &a->archive, "UTF-8", 1); + /* If the converter from UTF-8 is not + * available, then the path name from the main + * field will more likely be correct. */ + if (zip->sconv_utf8 == NULL) + break; + } + + /* Make sure the CRC32 of the filename matches. */ + if (!zip->ignore_crc32) { + const char *cp = archive_entry_pathname(entry); + if (cp) { + unsigned long file_crc = + zip->crc32func(0, cp, strlen(cp)); + unsigned long utf_crc = + archive_le32dec(p + offset - 4); + if (file_crc != utf_crc) { +#ifdef DEBUG + fprintf(stderr, + "CRC filename mismatch; " + "CDE is %lx, but UTF8 " + "is outdated with %lx\n", + file_crc, utf_crc); +#endif + break; + } + } + } + + if (archive_entry_copy_pathname_l(entry, + p + offset, datasize, zip->sconv_utf8) != 0) { + /* Ignore the error, and fallback to the path + * name from the main field. */ +#ifdef DEBUG + fprintf(stderr, "Failed to read the ZIP " + "0x7075 extra field path.\n"); +#endif + } + break; + } case 0x7855: /* Info-ZIP Unix Extra Field (type 2) "Ux". */ #ifdef DEBUG @@ -713,7 +857,8 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct } if (datasize >= (2 + uidsize + 3)) { /* get a gid size. */ - gidsize = 0xff & (int)p[offset+2+uidsize]; + gidsize = 0xff & + (int)p[offset+2+uidsize]; if (gidsize == 2) zip_entry->gid = archive_le16dec( @@ -730,7 +875,8 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct case 0x9901: /* WinZip AES extra data field. */ if (datasize < 6) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, "Incomplete AES field"); return ARCHIVE_FAILED; } @@ -750,12 +896,6 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct } offset += datasize; } - if (offset != extra_length) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Malformed extra data: Consumed %d bytes of %d bytes", - (int)offset, (int)extra_length); - return ARCHIVE_FAILED; - } return ARCHIVE_OK; } @@ -875,7 +1015,8 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, return (ARCHIVE_FATAL); } - if (ARCHIVE_OK != process_extra(a, h, extra_length, zip_entry)) { + if (ARCHIVE_OK != process_extra(a, entry, h, extra_length, + zip_entry)) { return ARCHIVE_FATAL; } __archive_read_consume(a, extra_length); @@ -892,8 +1033,8 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, zip_entry->mode |= 0664; } - /* Windows archivers sometimes use backslash as the directory separator. - Normalize to slash. */ + /* Windows archivers sometimes use backslash as the directory + * separator. Normalize to slash. */ if (zip_entry->system == 0 && (wp = archive_entry_pathname_w(entry)) != NULL) { if (wcschr(wp, L'/') == NULL && wcschr(wp, L'\\') != NULL) { @@ -1202,7 +1343,8 @@ zip_read_data_none(struct archive_read *a, const void **_buff, zip->entry->crc32 = archive_le32dec(p + 4); compressed = archive_le64dec(p + 8); uncompressed = archive_le64dec(p + 16); - if (compressed > INT64_MAX || uncompressed > INT64_MAX) { + if (compressed > INT64_MAX || uncompressed > + INT64_MAX) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Overflow of 64-bit file sizes"); @@ -1296,6 +1438,728 @@ zip_read_data_none(struct archive_read *a, const void **_buff, return (ARCHIVE_OK); } +static int +consume_optional_marker(struct archive_read *a, struct zip *zip) +{ + if (zip->end_of_entry && (zip->entry->zip_flags & ZIP_LENGTH_AT_END)) { + const char *p; + + if (NULL == (p = __archive_read_ahead(a, 24, NULL))) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP end-of-file record"); + return (ARCHIVE_FATAL); + } + /* Consume the optional PK\007\010 marker. */ + if (p[0] == 'P' && p[1] == 'K' && + p[2] == '\007' && p[3] == '\010') { + p += 4; + zip->unconsumed = 4; + } + if (zip->entry->flags & LA_USED_ZIP64) { + uint64_t compressed, uncompressed; + zip->entry->crc32 = archive_le32dec(p); + compressed = archive_le64dec(p + 4); + uncompressed = archive_le64dec(p + 12); + if (compressed > INT64_MAX || + uncompressed > INT64_MAX) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Overflow of 64-bit file sizes"); + return ARCHIVE_FAILED; + } + zip->entry->compressed_size = compressed; + zip->entry->uncompressed_size = uncompressed; + zip->unconsumed += 20; + } else { + zip->entry->crc32 = archive_le32dec(p); + zip->entry->compressed_size = archive_le32dec(p + 4); + zip->entry->uncompressed_size = archive_le32dec(p + 8); + zip->unconsumed += 12; + } + } + + return (ARCHIVE_OK); +} + +#if HAVE_LZMA_H && HAVE_LIBLZMA +static int +zipx_xz_init(struct archive_read *a, struct zip *zip) +{ + lzma_ret r; + + if(zip->zipx_lzma_valid) { + lzma_end(&zip->zipx_lzma_stream); + zip->zipx_lzma_valid = 0; + } + + memset(&zip->zipx_lzma_stream, 0, sizeof(zip->zipx_lzma_stream)); + r = lzma_stream_decoder(&zip->zipx_lzma_stream, UINT64_MAX, 0); + if (r != LZMA_OK) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "xz initialization failed(%d)", + r); + + return (ARCHIVE_FAILED); + } + + zip->zipx_lzma_valid = 1; + + free(zip->uncompressed_buffer); + + zip->uncompressed_buffer_size = 256 * 1024; + zip->uncompressed_buffer = + (uint8_t*) malloc(zip->uncompressed_buffer_size); + if (zip->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "No memory for xz decompression"); + return (ARCHIVE_FATAL); + } + + zip->decompress_init = 1; + return (ARCHIVE_OK); +} + +static int +zipx_lzma_alone_init(struct archive_read *a, struct zip *zip) +{ + lzma_ret r; + const uint8_t* p; + +#pragma pack(push) +#pragma pack(1) + struct _alone_header { + uint8_t bytes[5]; + uint64_t uncompressed_size; + } alone_header; +#pragma pack(pop) + + if(zip->zipx_lzma_valid) { + lzma_end(&zip->zipx_lzma_stream); + zip->zipx_lzma_valid = 0; + } + + /* To unpack ZIPX's "LZMA" (id 14) stream we can use standard liblzma + * that is a part of XZ Utils. The stream format stored inside ZIPX + * file is a modified "lzma alone" file format, that was used by the + * `lzma` utility which was later deprecated in favour of `xz` utility. * Since those formats are nearly the same, we can use a standard + * "lzma alone" decoder from XZ Utils. */ + + memset(&zip->zipx_lzma_stream, 0, sizeof(zip->zipx_lzma_stream)); + r = lzma_alone_decoder(&zip->zipx_lzma_stream, UINT64_MAX); + if (r != LZMA_OK) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "lzma initialization failed(%d)", r); + + return (ARCHIVE_FAILED); + } + + /* Flag the cleanup function that we want our lzma-related structures + * to be freed later. */ + zip->zipx_lzma_valid = 1; + + /* The "lzma alone" file format and the stream format inside ZIPx are + * almost the same. Here's an example of a structure of "lzma alone" + * format: + * + * $ cat /bin/ls | lzma | xxd | head -n 1 + * 00000000: 5d00 0080 00ff ffff ffff ffff ff00 2814 + * + * 5 bytes 8 bytes n bytes + * <lzma_params><uncompressed_size><data...> + * + * lzma_params is a 5-byte blob that has to be decoded to extract + * parameters of this LZMA stream. The uncompressed_size field is an + * uint64_t value that contains information about the size of the + * uncompressed file, or UINT64_MAX if this value is unknown. + * The <data...> part is the actual lzma-compressed data stream. + * + * Now here's the structure of the stream inside the ZIPX file: + * + * $ cat stream_inside_zipx | xxd | head -n 1 + * 00000000: 0914 0500 5d00 8000 0000 2814 .... .... + * + * 2byte 2byte 5 bytes n bytes + * <magic1><magic2><lzma_params><data...> + * + * This means that the ZIPX file contains an additional magic1 and + * magic2 headers, the lzma_params field contains the same parameter + * set as in the "lzma alone" format, and the <data...> field is the + * same as in the "lzma alone" format as well. Note that also the zipx + * format is missing the uncompressed_size field. + * + * So, in order to use the "lzma alone" decoder for the zipx lzma + * stream, we simply need to shuffle around some fields, prepare a new + * lzma alone header, feed it into lzma alone decoder so it will + * initialize itself properly, and then we can start feeding normal + * zipx lzma stream into the decoder. + */ + + /* Read magic1,magic2,lzma_params from the ZIPX stream. */ + if((p = __archive_read_ahead(a, 9, NULL)) == NULL) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated lzma data"); + return (ARCHIVE_FATAL); + } + + if(p[2] != 0x05 || p[3] != 0x00) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid lzma data"); + return (ARCHIVE_FATAL); + } + + /* Prepare an lzma alone header: copy the lzma_params blob into + * a proper place into the lzma alone header. */ + memcpy(&alone_header.bytes[0], p + 4, 5); + + /* Initialize the 'uncompressed size' field to unknown; we'll manually + * monitor how many bytes there are still to be uncompressed. */ + alone_header.uncompressed_size = UINT64_MAX; + + if(!zip->uncompressed_buffer) { + zip->uncompressed_buffer_size = 256 * 1024; + zip->uncompressed_buffer = + (uint8_t*) malloc(zip->uncompressed_buffer_size); + + if (zip->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "No memory for lzma decompression"); + return (ARCHIVE_FATAL); + } + } + + zip->zipx_lzma_stream.next_in = (void*) &alone_header; + zip->zipx_lzma_stream.avail_in = sizeof(alone_header); + zip->zipx_lzma_stream.total_in = 0; + zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; + zip->zipx_lzma_stream.avail_out = zip->uncompressed_buffer_size; + zip->zipx_lzma_stream.total_out = 0; + + /* Feed only the header into the lzma alone decoder. This will + * effectively initialize the decoder, and will not produce any + * output bytes yet. */ + r = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); + if (r != LZMA_OK) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, + "lzma stream initialization error"); + return ARCHIVE_FATAL; + } + + /* We've already consumed some bytes, so take this into account. */ + __archive_read_consume(a, 9); + zip->entry_bytes_remaining -= 9; + zip->entry_compressed_bytes_read += 9; + + zip->decompress_init = 1; + return (ARCHIVE_OK); +} + +static int +zip_read_data_zipx_xz(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct zip* zip = (struct zip *)(a->format->data); + int ret; + lzma_ret lz_ret; + const void* compressed_buf; + ssize_t bytes_avail, in_bytes, to_consume = 0; + + (void) offset; /* UNUSED */ + + /* Initialize decompressor if not yet initialized. */ + if (!zip->decompress_init) { + ret = zipx_xz_init(a, zip); + if (ret != ARCHIVE_OK) + return (ret); + } + + compressed_buf = __archive_read_ahead(a, 1, &bytes_avail); + if (bytes_avail < 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated xz file body"); + return (ARCHIVE_FATAL); + } + + in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + zip->zipx_lzma_stream.next_in = compressed_buf; + zip->zipx_lzma_stream.avail_in = in_bytes; + zip->zipx_lzma_stream.total_in = 0; + zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; + zip->zipx_lzma_stream.avail_out = zip->uncompressed_buffer_size; + zip->zipx_lzma_stream.total_out = 0; + + /* Perform the decompression. */ + lz_ret = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); + switch(lz_ret) { + case LZMA_DATA_ERROR: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "xz data error (error %d)", (int) lz_ret); + return (ARCHIVE_FATAL); + + case LZMA_NO_CHECK: + case LZMA_OK: + break; + + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "xz unknown error %d", (int) lz_ret); + return (ARCHIVE_FATAL); + + case LZMA_STREAM_END: + lzma_end(&zip->zipx_lzma_stream); + zip->zipx_lzma_valid = 0; + + if((int64_t) zip->zipx_lzma_stream.total_in != + zip->entry_bytes_remaining) + { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "xz premature end of stream"); + return (ARCHIVE_FATAL); + } + + zip->end_of_entry = 1; + break; + } + + to_consume = zip->zipx_lzma_stream.total_in; + + __archive_read_consume(a, to_consume); + zip->entry_bytes_remaining -= to_consume; + zip->entry_compressed_bytes_read += to_consume; + zip->entry_uncompressed_bytes_read += zip->zipx_lzma_stream.total_out; + + *size = zip->zipx_lzma_stream.total_out; + *buff = zip->uncompressed_buffer; + + ret = consume_optional_marker(a, zip); + if (ret != ARCHIVE_OK) + return (ret); + + return (ARCHIVE_OK); +} + +static int +zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct zip* zip = (struct zip *)(a->format->data); + int ret; + lzma_ret lz_ret; + const void* compressed_buf; + ssize_t bytes_avail, in_bytes, to_consume; + + (void) offset; /* UNUSED */ + + /* Initialize decompressor if not yet initialized. */ + if (!zip->decompress_init) { + ret = zipx_lzma_alone_init(a, zip); + if (ret != ARCHIVE_OK) + return (ret); + } + + /* Fetch more compressed data. The same note as in deflate handler + * applies here as well: + * + * Note: '1' here is a performance optimization. Recall that the + * decompression layer returns a count of available bytes; asking for + * more than that forces the decompressor to combine reads by copying + * data. + */ + compressed_buf = __archive_read_ahead(a, 1, &bytes_avail); + if (bytes_avail < 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated lzma file body"); + return (ARCHIVE_FATAL); + } + + /* Set decompressor parameters. */ + in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + + zip->zipx_lzma_stream.next_in = compressed_buf; + zip->zipx_lzma_stream.avail_in = in_bytes; + zip->zipx_lzma_stream.total_in = 0; + zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; + zip->zipx_lzma_stream.avail_out = + /* These lzma_alone streams lack end of stream marker, so let's + * make sure the unpacker won't try to unpack more than it's + * supposed to. */ + zipmin((int64_t) zip->uncompressed_buffer_size, + zip->entry->uncompressed_size - + zip->entry_uncompressed_bytes_read); + zip->zipx_lzma_stream.total_out = 0; + + /* Perform the decompression. */ + lz_ret = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); + switch(lz_ret) { + case LZMA_DATA_ERROR: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "lzma data error (error %d)", (int) lz_ret); + return (ARCHIVE_FATAL); + + case LZMA_OK: + break; + + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "lzma unknown error %d", (int) lz_ret); + return (ARCHIVE_FATAL); + } + + to_consume = zip->zipx_lzma_stream.total_in; + + /* Update pointers. */ + __archive_read_consume(a, to_consume); + zip->entry_bytes_remaining -= to_consume; + zip->entry_compressed_bytes_read += to_consume; + zip->entry_uncompressed_bytes_read += zip->zipx_lzma_stream.total_out; + + if(zip->entry_bytes_remaining == 0) { + zip->end_of_entry = 1; + } + + /* Return values. */ + *size = zip->zipx_lzma_stream.total_out; + *buff = zip->uncompressed_buffer; + + /* Behave the same way as during deflate decompression. */ + ret = consume_optional_marker(a, zip); + if (ret != ARCHIVE_OK) + return (ret); + + /* Free lzma decoder handle because we'll no longer need it. */ + if(zip->end_of_entry) { + lzma_end(&zip->zipx_lzma_stream); + zip->zipx_lzma_valid = 0; + } + + /* If we're here, then we're good! */ + return (ARCHIVE_OK); +} +#endif /* HAVE_LZMA_H && HAVE_LIBLZMA */ + +static int +zipx_ppmd8_init(struct archive_read *a, struct zip *zip) +{ + const void* p; + uint32_t val; + uint32_t order; + uint32_t mem; + uint32_t restore_method; + + /* Remove previous decompression context if it exists. */ + if(zip->ppmd8_valid) { + __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); + zip->ppmd8_valid = 0; + } + + /* Create a new decompression context. */ + __archive_ppmd8_functions.Ppmd8_Construct(&zip->ppmd8); + zip->ppmd8_stream_failed = 0; + + /* Setup function pointers required by Ppmd8 decompressor. The + * 'ppmd_read' function will feed new bytes to the decompressor, + * and will increment the 'zip->zipx_ppmd_read_compressed' counter. */ + zip->ppmd8.Stream.In = &zip->zipx_ppmd_stream; + zip->zipx_ppmd_stream.a = a; + zip->zipx_ppmd_stream.Read = &ppmd_read; + + /* Reset number of read bytes to 0. */ + zip->zipx_ppmd_read_compressed = 0; + + /* Read Ppmd8 header (2 bytes). */ + p = __archive_read_ahead(a, 2, NULL); + if(!p) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated file data in PPMd8 stream"); + return (ARCHIVE_FATAL); + } + __archive_read_consume(a, 2); + + /* Decode the stream's compression parameters. */ + val = archive_le16dec(p); + order = (val & 15) + 1; + mem = ((val >> 4) & 0xff) + 1; + restore_method = (val >> 12); + + if(order < 2 || restore_method > 2) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid parameter set in PPMd8 stream (order=%d, " + "restore=%d)", order, restore_method); + return (ARCHIVE_FAILED); + } + + /* Allocate the memory needed to properly decompress the file. */ + if(!__archive_ppmd8_functions.Ppmd8_Alloc(&zip->ppmd8, mem << 20)) { + archive_set_error(&a->archive, ENOMEM, + "Unable to allocate memory for PPMd8 stream: %d bytes", + mem << 20); + return (ARCHIVE_FATAL); + } + + /* Signal the cleanup function to release Ppmd8 context in the + * cleanup phase. */ + zip->ppmd8_valid = 1; + + /* Perform further Ppmd8 initialization. */ + if(!__archive_ppmd8_functions.Ppmd8_RangeDec_Init(&zip->ppmd8)) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, + "PPMd8 stream range decoder initialization error"); + return (ARCHIVE_FATAL); + } + + __archive_ppmd8_functions.Ppmd8_Init(&zip->ppmd8, order, + restore_method); + + /* Allocate the buffer that will hold uncompressed data. */ + free(zip->uncompressed_buffer); + + zip->uncompressed_buffer_size = 256 * 1024; + zip->uncompressed_buffer = + (uint8_t*) malloc(zip->uncompressed_buffer_size); + + if(zip->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "No memory for PPMd8 decompression"); + return ARCHIVE_FATAL; + } + + /* Ppmd8 initialization is done. */ + zip->decompress_init = 1; + + /* We've already read 2 bytes in the output stream. Additionally, + * Ppmd8 initialization code could read some data as well. So we + * are advancing the stream by 2 bytes plus whatever number of + * bytes Ppmd8 init function used. */ + zip->entry_compressed_bytes_read += 2 + zip->zipx_ppmd_read_compressed; + + return ARCHIVE_OK; +} + +static int +zip_read_data_zipx_ppmd(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct zip* zip = (struct zip *)(a->format->data); + int ret; + size_t consumed_bytes = 0; + ssize_t bytes_avail = 0; + + (void) offset; /* UNUSED */ + + /* If we're here for the first time, initialize Ppmd8 decompression + * context first. */ + if(!zip->decompress_init) { + ret = zipx_ppmd8_init(a, zip); + if(ret != ARCHIVE_OK) + return ret; + } + + /* Fetch for more data. We're reading 1 byte here, but libarchive + * should prefetch more bytes. */ + (void) __archive_read_ahead(a, 1, &bytes_avail); + if(bytes_avail < 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated PPMd8 file body"); + return (ARCHIVE_FATAL); + } + + /* This counter will be updated inside ppmd_read(), which at one + * point will be called by Ppmd8_DecodeSymbol. */ + zip->zipx_ppmd_read_compressed = 0; + + /* Decompression loop. */ + do { + int sym = __archive_ppmd8_functions.Ppmd8_DecodeSymbol( + &zip->ppmd8); + if(sym < 0) { + zip->end_of_entry = 1; + break; + } + + /* This field is set by ppmd_read() when there was no more data + * to be read. */ + if(zip->ppmd8_stream_failed) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated PPMd8 file body"); + return (ARCHIVE_FATAL); + } + + zip->uncompressed_buffer[consumed_bytes] = (uint8_t) sym; + ++consumed_bytes; + } while(consumed_bytes < zip->uncompressed_buffer_size); + + /* Update pointers for libarchive. */ + *buff = zip->uncompressed_buffer; + *size = consumed_bytes; + + /* Update pointers so we can continue decompression in another call. */ + zip->entry_bytes_remaining -= zip->zipx_ppmd_read_compressed; + zip->entry_compressed_bytes_read += zip->zipx_ppmd_read_compressed; + zip->entry_uncompressed_bytes_read += consumed_bytes; + + /* If we're at the end of stream, deinitialize Ppmd8 context. */ + if(zip->end_of_entry) { + __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); + zip->ppmd8_valid = 0; + } + + /* Seek for optional marker, same way as in each zip entry. */ + ret = consume_optional_marker(a, zip); + if (ret != ARCHIVE_OK) + return ret; + + return ARCHIVE_OK; +} + +#ifdef HAVE_BZLIB_H +static int +zipx_bzip2_init(struct archive_read *a, struct zip *zip) +{ + int r; + + /* Deallocate already existing BZ2 decompression context if it + * exists. */ + if(zip->bzstream_valid) { + BZ2_bzDecompressEnd(&zip->bzstream); + zip->bzstream_valid = 0; + } + + /* Allocate a new BZ2 decompression context. */ + memset(&zip->bzstream, 0, sizeof(bz_stream)); + r = BZ2_bzDecompressInit(&zip->bzstream, 0, 1); + if(r != BZ_OK) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "bzip2 initialization failed(%d)", + r); + + return ARCHIVE_FAILED; + } + + /* Mark the bzstream field to be released in cleanup phase. */ + zip->bzstream_valid = 1; + + /* (Re)allocate the buffer that will contain decompressed bytes. */ + free(zip->uncompressed_buffer); + + zip->uncompressed_buffer_size = 256 * 1024; + zip->uncompressed_buffer = + (uint8_t*) malloc(zip->uncompressed_buffer_size); + if (zip->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "No memory for bzip2 decompression"); + return ARCHIVE_FATAL; + } + + /* Initialization done. */ + zip->decompress_init = 1; + return ARCHIVE_OK; +} + +static int +zip_read_data_zipx_bzip2(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct zip *zip = (struct zip *)(a->format->data); + ssize_t bytes_avail = 0, in_bytes, to_consume; + const void *compressed_buff; + int r; + uint64_t total_out; + + (void) offset; /* UNUSED */ + + /* Initialize decompression context if we're here for the first time. */ + if(!zip->decompress_init) { + r = zipx_bzip2_init(a, zip); + if(r != ARCHIVE_OK) + return r; + } + + /* Fetch more compressed bytes. */ + compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); + if(bytes_avail < 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated bzip2 file body"); + return (ARCHIVE_FATAL); + } + + in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + if(in_bytes < 1) { + /* libbz2 doesn't complain when caller feeds avail_in == 0. + * It will actually return success in this case, which is + * undesirable. This is why we need to make this check + * manually. */ + + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated bzip2 file body"); + return (ARCHIVE_FATAL); + } + + /* Setup buffer boundaries. */ + zip->bzstream.next_in = (char*)(uintptr_t) compressed_buff; + zip->bzstream.avail_in = in_bytes; + zip->bzstream.total_in_hi32 = 0; + zip->bzstream.total_in_lo32 = 0; + zip->bzstream.next_out = (char*) zip->uncompressed_buffer; + zip->bzstream.avail_out = zip->uncompressed_buffer_size; + zip->bzstream.total_out_hi32 = 0; + zip->bzstream.total_out_lo32 = 0; + + /* Perform the decompression. */ + r = BZ2_bzDecompress(&zip->bzstream); + switch(r) { + case BZ_STREAM_END: + /* If we're at the end of the stream, deinitialize the + * decompression context now. */ + switch(BZ2_bzDecompressEnd(&zip->bzstream)) { + case BZ_OK: + break; + default: + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Failed to clean up bzip2 " + "decompressor"); + return ARCHIVE_FATAL; + } + + zip->end_of_entry = 1; + break; + case BZ_OK: + /* The decompressor has successfully decoded this + * chunk of data, but more data is still in queue. */ + break; + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "bzip2 decompression failed"); + return ARCHIVE_FATAL; + } + + /* Update the pointers so decompressor can continue decoding. */ + to_consume = zip->bzstream.total_in_lo32; + __archive_read_consume(a, to_consume); + + total_out = ((uint64_t) zip->bzstream.total_out_hi32 << 32) + + zip->bzstream.total_out_lo32; + + zip->entry_bytes_remaining -= to_consume; + zip->entry_compressed_bytes_read += to_consume; + zip->entry_uncompressed_bytes_read += total_out; + + /* Give libarchive its due. */ + *size = total_out; + *buff = zip->uncompressed_buffer; + + /* Seek for optional marker, like in other entries. */ + r = consume_optional_marker(a, zip); + if(r != ARCHIVE_OK) + return r; + + return ARCHIVE_OK; +} + +#endif + #ifdef HAVE_ZLIB_H static int zip_deflate_init(struct archive_read *a, struct zip *zip) @@ -1371,8 +2235,10 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, if (zip->tctx_valid || zip->cctx_valid) { if (zip->decrypted_bytes_remaining < (size_t)bytes_avail) { size_t buff_remaining = - (zip->decrypted_buffer + zip->decrypted_buffer_size) - - (zip->decrypted_ptr + zip->decrypted_bytes_remaining); + (zip->decrypted_buffer + + zip->decrypted_buffer_size) + - (zip->decrypted_ptr + + zip->decrypted_bytes_remaining); if (buff_remaining > (size_t)bytes_avail) buff_remaining = (size_t)bytes_avail; @@ -1383,12 +2249,12 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, + buff_remaining) > zip->entry_bytes_remaining) { if (zip->entry_bytes_remaining < - (int64_t)zip->decrypted_bytes_remaining) + (int64_t)zip->decrypted_bytes_remaining) buff_remaining = 0; else buff_remaining = (size_t)zip->entry_bytes_remaining - - zip->decrypted_bytes_remaining; + - zip->decrypted_bytes_remaining; } } if (buff_remaining > 0) { @@ -1407,7 +2273,8 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, + zip->decrypted_bytes_remaining, &dsize); } - zip->decrypted_bytes_remaining += buff_remaining; + zip->decrypted_bytes_remaining += + buff_remaining; } } bytes_avail = zip->decrypted_bytes_remaining; @@ -1470,42 +2337,9 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, return (r); } - if (zip->end_of_entry && (zip->entry->zip_flags & ZIP_LENGTH_AT_END)) { - const char *p; - - if (NULL == (p = __archive_read_ahead(a, 24, NULL))) { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Truncated ZIP end-of-file record"); - return (ARCHIVE_FATAL); - } - /* Consume the optional PK\007\010 marker. */ - if (p[0] == 'P' && p[1] == 'K' && - p[2] == '\007' && p[3] == '\010') { - p += 4; - zip->unconsumed = 4; - } - if (zip->entry->flags & LA_USED_ZIP64) { - uint64_t compressed, uncompressed; - zip->entry->crc32 = archive_le32dec(p); - compressed = archive_le64dec(p + 4); - uncompressed = archive_le64dec(p + 12); - if (compressed > INT64_MAX || uncompressed > INT64_MAX) { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Overflow of 64-bit file sizes"); - return ARCHIVE_FAILED; - } - zip->entry->compressed_size = compressed; - zip->entry->uncompressed_size = uncompressed; - zip->unconsumed += 20; - } else { - zip->entry->crc32 = archive_le32dec(p); - zip->entry->compressed_size = archive_le32dec(p + 4); - zip->entry->uncompressed_size = archive_le32dec(p + 8); - zip->unconsumed += 12; - } - } + r = consume_optional_marker(a, zip); + if (r != ARCHIVE_OK) + return (r); return (ARCHIVE_OK); } @@ -1933,6 +2767,24 @@ archive_read_format_zip_read_data(struct archive_read *a, case 0: /* No compression. */ r = zip_read_data_none(a, buff, size, offset); break; +#ifdef HAVE_BZLIB_H + case 12: /* ZIPx bzip2 compression. */ + r = zip_read_data_zipx_bzip2(a, buff, size, offset); + break; +#endif +#if HAVE_LZMA_H && HAVE_LIBLZMA + case 14: /* ZIPx LZMA compression. */ + r = zip_read_data_zipx_lzma_alone(a, buff, size, offset); + break; + case 95: /* ZIPx XZ compression. */ + r = zip_read_data_zipx_xz(a, buff, size, offset); + break; +#endif + /* PPMd support is built-in, so we don't need any #if guards. */ + case 98: /* ZIPx PPMd compression. */ + r = zip_read_data_zipx_ppmd(a, buff, size, offset); + break; + #ifdef HAVE_ZLIB_H case 8: /* Deflate compression. */ r = zip_read_data_deflate(a, buff, size, offset); @@ -1941,8 +2793,8 @@ archive_read_format_zip_read_data(struct archive_read *a, default: /* Unsupported compression. */ /* Return a warning. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Unsupported ZIP compression method (%s)", - compression_name(zip->entry->compression)); + "Unsupported ZIP compression method (%d: %s)", + zip->entry->compression, compression_name(zip->entry->compression)); /* We can't decompress this entry, but we will * be able to skip() it and try the next entry. */ return (ARCHIVE_FAILED); @@ -2000,11 +2852,29 @@ archive_read_format_zip_cleanup(struct archive_read *a) struct zip_entry *zip_entry, *next_zip_entry; zip = (struct zip *)(a->format->data); + #ifdef HAVE_ZLIB_H if (zip->stream_valid) inflateEnd(&zip->stream); - free(zip->uncompressed_buffer); #endif + +#if HAVE_LZMA_H && HAVE_LIBLZMA + if (zip->zipx_lzma_valid) { + lzma_end(&zip->zipx_lzma_stream); + } +#endif + +#ifdef HAVE_BZLIB_H + if (zip->bzstream_valid) { + BZ2_bzDecompressEnd(&zip->bzstream); + } +#endif + + free(zip->uncompressed_buffer); + + if (zip->ppmd8_valid) + __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); + if (zip->zip_entries) { zip_entry = zip->zip_entries; while (zip_entry != NULL) { @@ -2628,7 +3498,8 @@ expose_parent_dirs(struct zip *zip, const char *name, size_t name_length) } static int -slurp_central_directory(struct archive_read *a, struct zip *zip) +slurp_central_directory(struct archive_read *a, struct archive_entry* entry, + struct zip *zip) { ssize_t i; unsigned found; @@ -2708,6 +3579,11 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) return ARCHIVE_FATAL; zip_entry = calloc(1, sizeof(struct zip_entry)); + if (zip_entry == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate zip entry"); + return ARCHIVE_FATAL; + } zip_entry->next = zip->zip_entries; zip_entry->flags |= LA_FROM_CENTRAL_DIRECTORY; zip->zip_entries = zip_entry; @@ -2733,8 +3609,10 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) filename_length = archive_le16dec(p + 28); extra_length = archive_le16dec(p + 30); comment_length = archive_le16dec(p + 32); - /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */ - /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */ + /* disk_start = archive_le16dec(p + 34); + * Better be zero. + * internal_attributes = archive_le16dec(p + 36); + * text bit */ external_attributes = archive_le32dec(p + 38); zip_entry->local_header_offset = archive_le32dec(p + 42) + correction; @@ -2770,7 +3648,8 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) "Truncated ZIP file header"); return ARCHIVE_FATAL; } - if (ARCHIVE_OK != process_extra(a, p + filename_length, extra_length, zip_entry)) { + if (ARCHIVE_OK != process_extra(a, entry, p + filename_length, + extra_length, zip_entry)) { return ARCHIVE_FATAL; } @@ -2792,7 +3671,8 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) * a directory. We should treat it as a non * resource fork file to expose it. */ if (name[filename_length-1] != '/' && - (r - name < 3 || r[0] != '.' || r[1] != '_')) { + (r - name < 3 || r[0] != '.' || + r[1] != '_')) { __archive_rb_tree_insert_node( &zip->tree, &zip_entry->node); /* Expose its parent directories. */ @@ -2869,8 +3749,10 @@ zip_read_mac_metadata(struct archive_read *a, struct archive_entry *entry, switch(rsrc->compression) { case 0: /* No compression. */ if (rsrc->uncompressed_size != rsrc->compressed_size) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Malformed OS X metadata entry: inconsistent size"); + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Malformed OS X metadata entry: " + "inconsistent size"); return (ARCHIVE_FATAL); } #ifdef HAVE_ZLIB_H @@ -3029,7 +3911,7 @@ archive_read_format_zip_seekable_read_header(struct archive_read *a, a->archive.archive_format_name = "ZIP"; if (zip->zip_entries == NULL) { - r = slurp_central_directory(a, zip); + r = slurp_central_directory(a, entry, zip); if (r != ARCHIVE_OK) return r; /* Get first entry whose local header offset is lower than @@ -3059,8 +3941,8 @@ archive_read_format_zip_seekable_read_header(struct archive_read *a, __archive_read_reset_passphrase(a); /* File entries are sorted by the header offset, we should mostly - * use __archive_read_consume to advance a read point to avoid redundant - * data reading. */ + * use __archive_read_consume to advance a read point to avoid + * redundant data reading. */ offset = archive_filter_bytes(&a->archive, 0); if (offset < zip->entry->local_header_offset) __archive_read_consume(a, @@ -3141,3 +4023,5 @@ archive_read_support_format_zip_seekable(struct archive *_a) free(zip); return (ARCHIVE_OK); } + +/*# vim:set noet:*/ |