diff options
Diffstat (limited to 'archivers/libarchive/files/doc/wiki')
15 files changed, 6297 insertions, 0 deletions
diff --git a/archivers/libarchive/files/doc/wiki/Makefile b/archivers/libarchive/files/doc/wiki/Makefile new file mode 100644 index 00000000000..e6d60387882 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/Makefile @@ -0,0 +1,46 @@ + +default: all + + +ManPageArchiveEntry3.wiki: ../mdoc2wiki.awk ../../libarchive/archive_entry.3 + awk -f ../mdoc2wiki.awk < ../../libarchive/archive_entry.3 > ManPageArchiveEntry3.wiki + +ManPageArchiveRead3.wiki: ../mdoc2wiki.awk ../../libarchive/archive_read.3 + awk -f ../mdoc2wiki.awk < ../../libarchive/archive_read.3 > ManPageArchiveRead3.wiki + +ManPageArchiveReadDisk3.wiki: ../mdoc2wiki.awk ../../libarchive/archive_read_disk.3 + awk -f ../mdoc2wiki.awk < ../../libarchive/archive_read_disk.3 > ManPageArchiveReadDisk3.wiki + +ManPageArchiveUtil3.wiki: ../mdoc2wiki.awk ../../libarchive/archive_util.3 + awk -f ../mdoc2wiki.awk < ../../libarchive/archive_util.3 > ManPageArchiveUtil3.wiki + +ManPageArchiveWrite3.wiki: ../mdoc2wiki.awk ../../libarchive/archive_write.3 + awk -f ../mdoc2wiki.awk < ../../libarchive/archive_write.3 > ManPageArchiveWrite3.wiki + +ManPageArchiveWriteDisk3.wiki: ../mdoc2wiki.awk ../../libarchive/archive_write_disk.3 + awk -f ../mdoc2wiki.awk < ../../libarchive/archive_write_disk.3 > ManPageArchiveWriteDisk3.wiki + +ManPageCpio5.wiki: ../mdoc2wiki.awk ../../libarchive/cpio.5 + awk -f ../mdoc2wiki.awk < ../../libarchive/cpio.5 > ManPageCpio5.wiki + +ManPageLibarchiveFormats5.wiki: ../mdoc2wiki.awk ../../libarchive/libarchive-formats.5 + awk -f ../mdoc2wiki.awk < ../../libarchive/libarchive-formats.5 > ManPageLibarchiveFormats5.wiki + +ManPageLibarchive3.wiki: ../mdoc2wiki.awk ../../libarchive/libarchive.3 + awk -f ../mdoc2wiki.awk < ../../libarchive/libarchive.3 > ManPageLibarchive3.wiki + +ManPageLibarchiveInternals3.wiki: ../mdoc2wiki.awk ../../libarchive/libarchive_internals.3 + awk -f ../mdoc2wiki.awk < ../../libarchive/libarchive_internals.3 > ManPageLibarchiveInternals3.wiki + +ManPageMtree5.wiki: ../mdoc2wiki.awk ../../libarchive/mtree.5 + awk -f ../mdoc2wiki.awk < ../../libarchive/mtree.5 > ManPageMtree5.wiki + +ManPageTar5.wiki: ../mdoc2wiki.awk ../../libarchive/tar.5 + awk -f ../mdoc2wiki.awk < ../../libarchive/tar.5 > ManPageTar5.wiki + +ManPageBsdtar1.wiki: ../mdoc2wiki.awk ../../tar/bsdtar.1 + awk -f ../mdoc2wiki.awk < ../../tar/bsdtar.1 > ManPageBsdtar1.wiki + +ManPageBsdcpio1.wiki: ../mdoc2wiki.awk ../../cpio/bsdcpio.1 + awk -f ../mdoc2wiki.awk < ../../cpio/bsdcpio.1 > ManPageBsdcpio1.wiki +all: ManPageArchiveEntry3.wiki ManPageArchiveRead3.wiki ManPageArchiveReadDisk3.wiki ManPageArchiveUtil3.wiki ManPageArchiveWrite3.wiki ManPageArchiveWriteDisk3.wiki ManPageCpio5.wiki ManPageLibarchiveFormats5.wiki ManPageLibarchive3.wiki ManPageLibarchiveInternals3.wiki ManPageMtree5.wiki ManPageTar5.wiki ManPageBsdtar1.wiki ManPageBsdcpio1.wiki diff --git a/archivers/libarchive/files/doc/wiki/ManPageArchiveEntry3.wiki b/archivers/libarchive/files/doc/wiki/ManPageArchiveEntry3.wiki new file mode 100644 index 00000000000..d4109a8b275 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageArchiveEntry3.wiki @@ -0,0 +1,504 @@ +#summary archive_entry 3 manual page +== NAME == +*archive_entry_acl_add_entry*, +*archive_entry_acl_add_entry_w*, +*archive_entry_acl_clear*, +*archive_entry_acl_count*, +*archive_entry_acl_next*, +*archive_entry_acl_next_w*, +*archive_entry_acl_reset*, +*archive_entry_acl_text_w*, +*archive_entry_atime*, +*archive_entry_atime_nsec*, +*archive_entry_clear*, +*archive_entry_clone*, +*archive_entry_copy_fflags_text*, +*archive_entry_copy_fflags_text_w*, +*archive_entry_copy_gname*, +*archive_entry_copy_gname_w*, +*archive_entry_copy_hardlink*, +*archive_entry_copy_hardlink_w*, +*archive_entry_copy_link*, +*archive_entry_copy_link_w*, +*archive_entry_copy_pathname_w*, +*archive_entry_copy_sourcepath*, +*archive_entry_copy_stat*, +*archive_entry_copy_symlink*, +*archive_entry_copy_symlink_w*, +*archive_entry_copy_uname*, +*archive_entry_copy_uname_w*, +*archive_entry_dev*, +*archive_entry_devmajor*, +*archive_entry_devminor*, +*archive_entry_filetype*, +*archive_entry_fflags*, +*archive_entry_fflags_text*, +*archive_entry_free*, +*archive_entry_gid*, +*archive_entry_gname*, +*archive_entry_hardlink*, +*archive_entry_ino*, +*archive_entry_mode*, +*archive_entry_mtime*, +*archive_entry_mtime_nsec*, +*archive_entry_nlink*, +*archive_entry_new*, +*archive_entry_pathname*, +*archive_entry_pathname_w*, +*archive_entry_rdev*, +*archive_entry_rdevmajor*, +*archive_entry_rdevminor*, +*archive_entry_set_atime*, +*archive_entry_set_ctime*, +*archive_entry_set_dev*, +*archive_entry_set_devmajor*, +*archive_entry_set_devminor*, +*archive_entry_set_filetype*, +*archive_entry_set_fflags*, +*archive_entry_set_gid*, +*archive_entry_set_gname*, +*archive_entry_set_hardlink*, +*archive_entry_set_link*, +*archive_entry_set_mode*, +*archive_entry_set_mtime*, +*archive_entry_set_pathname*, +*archive_entry_set_rdevmajor*, +*archive_entry_set_rdevminor*, +*archive_entry_set_size*, +*archive_entry_set_symlink*, +*archive_entry_set_uid*, +*archive_entry_set_uname*, +*archive_entry_size*, +*archive_entry_sourcepath*, +*archive_entry_stat*, +*archive_entry_symlink*, +*archive_entry_uid*, +*archive_entry_uname* +- functions for manipulating archive entry descriptions +== SYNOPSIS == +*#include <archive_entry.h>* +<br> +*void* +<br> +*archive_entry_acl_add_entry*(_struct archive_entry `*`_, _int type_, _int permset_, _int tag_, _int qual_, _const char `*`name_); +<br> +*void* +<br> +*archive_entry_acl_add_entry_w*(_struct archive_entry `*`_, _int type_, _int permset_, _int tag_, _int qual_, _const wchar_t `*`name_); +<br> +*void* +<br> +*archive_entry_acl_clear*(_struct archive_entry `*`_); +<br> +*int* +<br> +*archive_entry_acl_count*(_struct archive_entry `*`_, _int type_); +<br> +*int* +<br> +*archive_entry_acl_next*(_struct archive_entry `*`_, _int want_type_, _int `*`type_, _int `*`permset_, _int `*`tag_, _int `*`qual_, _const char `*``*`name_); +<br> +*int* +<br> +*archive_entry_acl_next_w*(_struct archive_entry `*`_, _int want_type_, _int `*`type_, _int `*`permset_, _int `*`tag_, _int `*`qual_, _const wchar_t `*``*`name_); +<br> +*int* +<br> +*archive_entry_acl_reset*(_struct archive_entry `*`_, _int want_type_); +<br> +*const wchar_t `*`* +<br> +*archive_entry_acl_text_w*(_struct archive_entry `*`_, _int flags_); +<br> +*time_t* +<br> +*archive_entry_atime*(_struct archive_entry `*`_); +<br> +*long* +<br> +*archive_entry_atime_nsec*(_struct archive_entry `*`_); +<br> +*struct archive_entry `*`* +<br> +*archive_entry_clear*(_struct archive_entry `*`_); +<br> +*struct archive_entry `*`* +<br> +*archive_entry_clone*(_struct archive_entry `*`_); +<br> +*const char `*` `*`* +<br> +*archive_entry_copy_fflags_text_w*(_struct archive_entry `*`_, _const char `*`_); +<br> +*const wchar_t `*`* +<br> +*archive_entry_copy_fflags_text_w*(_struct archive_entry `*`_, _const wchar_t `*`_); +<br> +*void* +<br> +*archive_entry_copy_gname*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_copy_gname_w*(_struct archive_entry `*`_, _const wchar_t `*`_); +<br> +*void* +<br> +*archive_entry_copy_hardlink*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_copy_hardlink_w*(_struct archive_entry `*`_, _const wchar_t `*`_); +<br> +*void* +<br> +*archive_entry_copy_sourcepath*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_copy_pathname_w*(_struct archive_entry `*`_, _const wchar_t `*`_); +<br> +*void* +<br> +*archive_entry_copy_stat*(_struct archive_entry `*`_, _const struct stat `*`_); +<br> +*void* +<br> +*archive_entry_copy_symlink*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_copy_symlink_w*(_struct archive_entry `*`_, _const wchar_t `*`_); +<br> +*void* +<br> +*archive_entry_copy_uname*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_copy_uname_w*(_struct archive_entry `*`_, _const wchar_t `*`_); +<br> +*dev_t* +<br> +*archive_entry_dev*(_struct archive_entry `*`_); +<br> +*dev_t* +<br> +*archive_entry_devmajor*(_struct archive_entry `*`_); +<br> +*dev_t* +<br> +*archive_entry_devminor*(_struct archive_entry `*`_); +<br> +*mode_t* +<br> +*archive_entry_filetype*(_struct archive_entry `*`_); +<br> +*void* +<br> +*archive_entry_fflags*(_struct archive_entry `*`_, _unsigned long `*`set_, _unsigned long `*`clear_); +<br> +*const char `*`* +<br> +*archive_entry_fflags_text*(_struct archive_entry `*`_); +<br> +*void* +<br> +*archive_entry_free*(_struct archive_entry `*`_); +<br> +*const char `*`* +<br> +*archive_entry_gname*(_struct archive_entry `*`_); +<br> +*const char `*`* +<br> +*archive_entry_hardlink*(_struct archive_entry `*`_); +<br> +*ino_t* +<br> +*archive_entry_ino*(_struct archive_entry `*`_); +<br> +*mode_t* +<br> +*archive_entry_mode*(_struct archive_entry `*`_); +<br> +*time_t* +<br> +*archive_entry_mtime*(_struct archive_entry `*`_); +<br> +*long* +<br> +*archive_entry_mtime_nsec*(_struct archive_entry `*`_); +<br> +*unsigned int* +<br> +*archive_entry_nlink*(_struct archive_entry `*`_); +<br> +*struct archive_entry `*`* +<br> +*archive_entry_new*(_void_); +<br> +*const char `*`* +<br> +*archive_entry_pathname*(_struct archive_entry `*`_); +<br> +*const wchar_t `*`* +<br> +*archive_entry_pathname_w*(_struct archive_entry `*`_); +<br> +*dev_t* +<br> +*archive_entry_rdev*(_struct archive_entry `*`_); +<br> +*dev_t* +<br> +*archive_entry_rdevmajor*(_struct archive_entry `*`_); +<br> +*dev_t* +<br> +*archive_entry_rdevminor*(_struct archive_entry `*`_); +<br> +*void* +<br> +*archive_entry_set_dev*(_struct archive_entry `*`_, _dev_t_); +<br> +*void* +<br> +*archive_entry_set_devmajor*(_struct archive_entry `*`_, _dev_t_); +<br> +*void* +<br> +*archive_entry_set_devminor*(_struct archive_entry `*`_, _dev_t_); +<br> +*void* +<br> +*archive_entry_set_filetype*(_struct archive_entry `*`_, _unsigned int_); +<br> +*void* +<br> +*archive_entry_set_fflags*(_struct archive_entry `*`_, _unsigned long set_, _unsigned long clear_); +<br> +*void* +<br> +*archive_entry_set_gid*(_struct archive_entry `*`_, _gid_t_); +<br> +*void* +<br> +*archive_entry_set_gname*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_set_hardlink*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_set_ino*(_struct archive_entry `*`_, _unsigned long_); +<br> +*void* +<br> +*archive_entry_set_link*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_set_mode*(_struct archive_entry `*`_, _mode_t_); +<br> +*void* +<br> +*archive_entry_set_mtime*(_struct archive_entry `*`_, _time_t_, _long nanos_); +<br> +*void* +<br> +*archive_entry_set_nlink*(_struct archive_entry `*`_, _unsigned int_); +<br> +*void* +<br> +*archive_entry_set_pathname*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_set_rdev*(_struct archive_entry `*`_, _dev_t_); +<br> +*void* +<br> +*archive_entry_set_rdevmajor*(_struct archive_entry `*`_, _dev_t_); +<br> +*void* +<br> +*archive_entry_set_rdevminor*(_struct archive_entry `*`_, _dev_t_); +<br> +*void* +<br> +*archive_entry_set_size*(_struct archive_entry `*`_, _int64_t_); +<br> +*void* +<br> +*archive_entry_set_symlink*(_struct archive_entry `*`_, _const char `*`_); +<br> +*void* +<br> +*archive_entry_set_uid*(_struct archive_entry `*`_, _uid_t_); +<br> +*void* +<br> +*archive_entry_set_uname*(_struct archive_entry `*`_, _const char `*`_); +<br> +*int64_t* +<br> +*archive_entry_size*(_struct archive_entry `*`_); +<br> +*const char `*`* +<br> +*archive_entry_sourcepath*(_struct archive_entry `*`_); +<br> +*const struct stat `*`* +<br> +*archive_entry_stat*(_struct archive_entry `*`_); +<br> +*const char `*`* +<br> +*archive_entry_symlink*(_struct archive_entry `*`_); +<br> +*const char `*`* +<br> +*archive_entry_uname*(_struct archive_entry `*`_); +== DESCRIPTION == +These functions create and manipulate data objects that +represent entries within an archive. +You can think of a +*struct archive_entry* +as a heavy-duty version of +*struct stat :* +it includes everything from +*struct stat* +plus associated pathname, textual group and user names, etc. +These objects are used by +*libarchive*(3) +to represent the metadata associated with a particular +entry in an archive. +=== Create and Destroy=== +There are functions to allocate, destroy, clear, and copy +_archive_entry_ +objects: +<dl> +<dt>*archive_entry_clear*()</dt><dd> +Erases the object, resetting all internal fields to the +same state as a newly-created object. +This is provided to allow you to quickly recycle objects +without thrashing the heap. +</dd><dt>*archive_entry_clone*()</dt><dd> +A deep copy operation; all text fields are duplicated. +</dd><dt>*archive_entry_free*()</dt><dd> +Releases the +*struct archive_entry* +object. +</dd><dt>*archive_entry_new*()</dt><dd> +Allocate and return a blank +*struct archive_entry* +object. +</dd></dl> +=== Set and Get Functions=== +Most of the functions here set or read entries in an object. +Such functions have one of the following forms: +<dl> +<dt>*archive_entry_set_XXXX*()</dt><dd> +Stores the provided data in the object. +In particular, for strings, the pointer is stored, +not the referenced string. +</dd><dt>*archive_entry_copy_XXXX*()</dt><dd> +As above, except that the referenced data is copied +into the object. +</dd><dt>*archive_entry_XXXX*()</dt><dd> +Returns the specified data. +In the case of strings, a const-qualified pointer to +the string is returned. +</dd></dl> +String data can be set or accessed as wide character strings +or normal +_char_ +strings. +The functions that use wide character strings are suffixed with +*`_`w*. +Note that these are different representations of the same data: +For example, if you store a narrow string and read the corresponding +wide string, the object will transparently convert formats +using the current locale. +Similarly, if you store a wide string and then store a +narrow string for the same data, the previously-set wide string will +be discarded in favor of the new data. + +There are a few set/get functions that merit additional description: +<dl> +<dt>*archive_entry_set_link*()</dt><dd> +This function sets the symlink field if it is already set. +Otherwise, it sets the hardlink field. +</dd></dl> +=== File Flags=== +File flags are transparently converted between a bitmap +representation and a textual format. +For example, if you set the bitmap and ask for text, the library +will build a canonical text format. +However, if you set a text format and request a text format, +you will get back the same text, even if it is ill-formed. +If you need to canonicalize a textual flags string, you should first set the +text form, then request the bitmap form, then use that to set the bitmap form. +Setting the bitmap format will clear the internal text representation +and force it to be reconstructed when you next request the text form. + +The bitmap format consists of two integers, one containing bits +that should be set, the other specifying bits that should be +cleared. +Bits not mentioned in either bitmap will be ignored. +Usually, the bitmap of bits to be cleared will be set to zero. +In unusual circumstances, you can force a fully-specified set +of file flags by setting the bitmap of flags to clear to the complement +of the bitmap of flags to set. +(This differs from +*fflagstostr*(3), +which only includes names for set bits.) +Converting a bitmap to a textual string is a platform-specific +operation; bits that are not meaningful on the current platform +will be ignored. + +The canonical text format is a comma-separated list of flag names. +The +*archive_entry_copy_fflags_text*() +and +*archive_entry_copy_fflags_text_w*() +functions parse the provided text and sets the internal bitmap values. +This is a platform-specific operation; names that are not meaningful +on the current platform will be ignored. +The function returns a pointer to the start of the first name that was not +recognized, or NULL if every name was recognized. +Note that every name--including names that follow an unrecognized name--will +be evaluated, and the bitmaps will be set to reflect every name that is +recognized. +(In particular, this differs from +*strtofflags*(3), +which stops parsing at the first unrecognized name.) +=== ACL Handling=== +XXX This needs serious help. +XXX + +An +"Access Control List" +(ACL) is a list of permissions that grant access to particular users or +groups beyond what would normally be provided by standard POSIX mode bits. +The ACL handling here addresses some deficiencies in the POSIX.1e draft 17 ACL +specification. +In particular, POSIX.1e draft 17 specifies several different formats, but +none of those formats include both textual user/group names and numeric +UIDs/GIDs. + +XXX explain ACL stuff XXX +== SEE ALSO == +*archive*(3) +== HISTORY == +The +*libarchive* +library first appeared in +FreeBSD 5.3. +== AUTHORS == +The +*libarchive* +library was written by +Tim Kientzle <kientzle@acm.org.> diff --git a/archivers/libarchive/files/doc/wiki/ManPageArchiveRead3.wiki b/archivers/libarchive/files/doc/wiki/ManPageArchiveRead3.wiki new file mode 100644 index 00000000000..9d3f62cb9b6 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageArchiveRead3.wiki @@ -0,0 +1,694 @@ +#summary archive_read 3 manual page +== NAME == +*archive_read_new*, +*archive_read_set_filter_options*, +*archive_read_set_format_options*, +*archive_read_set_options*, +*archive_read_support_compression_all*, +*archive_read_support_compression_bzip2*, +*archive_read_support_compression_compress*, +*archive_read_support_compression_gzip*, +*archive_read_support_compression_lzma*, +*archive_read_support_compression_none*, +*archive_read_support_compression_xz*, +*archive_read_support_compression_program*, +*archive_read_support_compression_program_signature*, +*archive_read_support_format_all*, +*archive_read_support_format_ar*, +*archive_read_support_format_cpio*, +*archive_read_support_format_empty*, +*archive_read_support_format_iso9660*, +*archive_read_support_format_mtree,* +*archive_read_support_format_raw,* +*archive_read_support_format_tar*, +*archive_read_support_format_zip*, +*archive_read_open*, +*archive_read_open2*, +*archive_read_open_fd*, +*archive_read_open_FILE*, +*archive_read_open_filename*, +*archive_read_open_memory*, +*archive_read_next_header*, +*archive_read_next_header2*, +*archive_read_data*, +*archive_read_data_block*, +*archive_read_data_skip*, +*archive_read_data_into_buffer*, +*archive_read_data_into_fd*, +*archive_read_extract*, +*archive_read_extract2*, +*archive_read_extract_set_progress_callback*, +*archive_read_close*, +*archive_read_finish* +- functions for reading streaming archives +== SYNOPSIS == +*#include <archive.h>* +<br> +*struct archive `*`* +<br> +*archive_read_new*(_void_); +<br> +*int* +<br> +*archive_read_support_compression_all*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_compression_bzip2*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_compression_compress*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_compression_gzip*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_compression_lzma*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_compression_none*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_compression_xz*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_compression_program*(_struct archive `*`_, _const char `*`cmd_); +<br> +*int* +<br> +*archive_read_support_compression_program_signature*(_struct archive `*`_, _const char `*`cmd_, _const void `*`signature_, _size_t signature_length_); +<br> +*int* +<br> +*archive_read_support_format_all*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_format_ar*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_format_cpio*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_format_empty*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_format_iso9660*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_format_mtree*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_format_raw*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_format_tar*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_support_format_zip*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_set_filter_options*(_struct archive `*`_, _const char `*`_); +<br> +*int* +<br> +*archive_read_set_format_options*(_struct archive `*`_, _const char `*`_); +<br> +*int* +<br> +*archive_read_set_options*(_struct archive `*`_, _const char `*`_); +<br> +*int* +<br> +*archive_read_open*(_struct archive `*`_, _void `*`client_data_, _archive_open_callback `*`_, _archive_read_callback `*`_, _archive_close_callback `*`_); +<br> +*int* +<br> +*archive_read_open2*(_struct archive `*`_, _void `*`client_data_, _archive_open_callback `*`_, _archive_read_callback `*`_, _archive_skip_callback `*`_, _archive_close_callback `*`_); +<br> +*int* +<br> +*archive_read_open_FILE*(_struct archive `*`_, _FILE `*`file_); +<br> +*int* +<br> +*archive_read_open_fd*(_struct archive `*`_, _int fd_, _size_t block_size_); +<br> +*int* +<br> +*archive_read_open_filename*(_struct archive `*`_, _const char `*`filename_, _size_t block_size_); +<br> +*int* +<br> +*archive_read_open_memory*(_struct archive `*`_, _void `*`buff_, _size_t size_); +<br> +*int* +<br> +*archive_read_next_header*(_struct archive `*`_, _struct archive_entry `*``*`_); +<br> +*int* +<br> +*archive_read_next_header2*(_struct archive `*`_, _struct archive_entry `*`_); +<br> +*ssize_t* +<br> +*archive_read_data*(_struct archive `*`_, _void `*`buff_, _size_t len_); +<br> +*int* +<br> +*archive_read_data_block*(_struct archive `*`_, _const void `*``*`buff_, _size_t `*`len_, _off_t `*`offset_); +<br> +*int* +<br> +*archive_read_data_skip*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_data_into_buffer*(_struct archive `*`_, _void `*`_, _ssize_t len_); +<br> +*int* +<br> +*archive_read_data_into_fd*(_struct archive `*`_, _int fd_); +<br> +*int* +<br> +*archive_read_extract*(_struct archive `*`_, _struct archive_entry `*`_, _int flags_); +<br> +*int* +<br> +*archive_read_extract2*(_struct archive `*`src_, _struct archive_entry `*`_, _struct archive `*`dest_); +<br> +*void* +<br> +*archive_read_extract_set_progress_callback*(_struct archive `*`_, _void (`*`func)(void `*`)_, _void `*`user_data_); +<br> +*int* +<br> +*archive_read_close*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_finish*(_struct archive `*`_); +== DESCRIPTION == +These functions provide a complete API for reading streaming archives. +The general process is to first create the +*struct archive* +object, set options, initialize the reader, iterate over the archive +headers and associated data, then close the archive and release all +resources. +The following summary describes the functions in approximately the +order they would be used: +<dl> +<dt>*archive_read_new*()</dt><dd> +Allocates and initializes a +*struct archive* +object suitable for reading from an archive. +</dd><dt> +*archive_read_support_compression_bzip2*(), +*archive_read_support_compression_compress*(), +*archive_read_support_compression_gzip*(), +*archive_read_support_compression_lzma*(), +*archive_read_support_compression_none*(), +*archive_read_support_compression_xz*() +</dt> <dd> +Enables auto-detection code and decompression support for the +specified compression. +Returns +*ARCHIVE_OK* +if the compression is fully supported, or +*ARCHIVE_WARN* +if the compression is supported only through an external program. +Note that decompression using an external program is usually slower than +decompression through built-in libraries. +Note that +"none" +is always enabled by default. +</dd><dt>*archive_read_support_compression_all*()</dt><dd> +Enables all available decompression filters. +</dd><dt>*archive_read_support_compression_program*()</dt><dd> +Data is fed through the specified external program before being dearchived. +Note that this disables automatic detection of the compression format, +so it makes no sense to specify this in conjunction with any other +decompression option. +</dd><dt>*archive_read_support_compression_program_signature*()</dt><dd> +This feeds data through the specified external program +but only if the initial bytes of the data match the specified +signature value. +</dd><dt> +*archive_read_support_format_all*(), +*archive_read_support_format_ar*(), +*archive_read_support_format_cpio*(), +*archive_read_support_format_empty*(), +*archive_read_support_format_iso9660*(), +*archive_read_support_format_mtree*(), +*archive_read_support_format_tar*(), +*archive_read_support_format_zip*() +</dt> <dd> +Enables support---including auto-detection code---for the +specified archive format. +For example, +*archive_read_support_format_tar*() +enables support for a variety of standard tar formats, old-style tar, +ustar, pax interchange format, and many common variants. +For convenience, +*archive_read_support_format_all*() +enables support for all available formats. +Only empty archives are supported by default. +</dd><dt>*archive_read_support_format_raw*()</dt><dd> +The +"raw" +format handler allows libarchive to be used to read arbitrary data. +It treats any data stream as an archive with a single entry. +The pathname of this entry is +"data ;" +all other entry fields are unset. +This is not enabled by +*archive_read_support_format_all*() +in order to avoid erroneous handling of damaged archives. +</dd><dt> +*archive_read_set_filter_options*(), +*archive_read_set_format_options*(), +*archive_read_set_options*() +</dt> <dd> +Specifies options that will be passed to currently-registered +filters (including decompression filters) and/or format readers. +The argument is a comma-separated list of individual options. +Individual options have one of the following forms: +<dl> +<dt>_option=value_</dt><dd> +The option/value pair will be provided to every module. +Modules that do not accept an option with this name will ignore it. +</dd><dt>_option_</dt><dd> +The option will be provided to every module with a value of +"1". +</dd><dt>_!option_</dt><dd> +The option will be provided to every module with a NULL value. +</dd><dt>_module:option=value_, _module:option_, _module:!option_</dt><dd> +As above, but the corresponding option and value will be provided +only to modules whose name matches +_module_. +</dd></dl> +The return value will be +*ARCHIVE_OK* +if any module accepts the option, or +*ARCHIVE_WARN* +if no module accepted the option, or +*ARCHIVE_FATAL* +if there was a fatal error while attempting to process the option. + +The currently supported options are: +<dl> +<dt>Format iso9660</dt><dd> +<dl> +<dt>*joliet*</dt><dd> +Support Joliet extensions. +Defaults to enabled, use +*!joliet* +to disable. +</dd></dl> +</dd></dl> +</dd><dt>*archive_read_open*()</dt><dd> +The same as +*archive_read_open2*(), +except that the skip callback is assumed to be +NULL. +</dd><dt>*archive_read_open2*()</dt><dd> +Freeze the settings, open the archive, and prepare for reading entries. +This is the most generic version of this call, which accepts +four callback functions. +Most clients will want to use +*archive_read_open_filename*(), +*archive_read_open_FILE*(), +*archive_read_open_fd*(), +or +*archive_read_open_memory*() +instead. +The library invokes the client-provided functions to obtain +raw bytes from the archive. +</dd><dt>*archive_read_open_FILE*()</dt><dd> +Like +*archive_read_open*(), +except that it accepts a +*FILE `*`* +pointer. +This function should not be used with tape drives or other devices +that require strict I/O blocking. +</dd><dt>*archive_read_open_fd*()</dt><dd> +Like +*archive_read_open*(), +except that it accepts a file descriptor and block size rather than +a set of function pointers. +Note that the file descriptor will not be automatically closed at +end-of-archive. +This function is safe for use with tape drives or other blocked devices. +</dd><dt>*archive_read_open_file*()</dt><dd> +This is a deprecated synonym for +*archive_read_open_filename*(). +</dd><dt>*archive_read_open_filename*()</dt><dd> +Like +*archive_read_open*(), +except that it accepts a simple filename and a block size. +A NULL filename represents standard input. +This function is safe for use with tape drives or other blocked devices. +</dd><dt>*archive_read_open_memory*()</dt><dd> +Like +*archive_read_open*(), +except that it accepts a pointer and size of a block of +memory containing the archive data. +</dd><dt>*archive_read_next_header*()</dt><dd> +Read the header for the next entry and return a pointer to +a +*struct archive_entry .* +This is a convenience wrapper around +*archive_read_next_header2*() +that reuses an internal +*struct archive_entry* +object for each request. +</dd><dt>*archive_read_next_header2*()</dt><dd> +Read the header for the next entry and populate the provided +*struct archive_entry .* +</dd><dt>*archive_read_data*()</dt><dd> +Read data associated with the header just read. +Internally, this is a convenience function that calls +*archive_read_data_block*() +and fills any gaps with nulls so that callers see a single +continuous stream of data. +</dd><dt>*archive_read_data_block*()</dt><dd> +Return the next available block of data for this entry. +Unlike +*archive_read_data*(), +the +*archive_read_data_block*() +function avoids copying data and allows you to correctly handle +sparse files, as supported by some archive formats. +The library guarantees that offsets will increase and that blocks +will not overlap. +Note that the blocks returned from this function can be much larger +than the block size read from disk, due to compression +and internal buffer optimizations. +</dd><dt>*archive_read_data_skip*()</dt><dd> +A convenience function that repeatedly calls +*archive_read_data_block*() +to skip all of the data for this archive entry. +</dd><dt>*archive_read_data_into_buffer*()</dt><dd> +This function is deprecated and will be removed. +Use +*archive_read_data*() +instead. +</dd><dt>*archive_read_data_into_fd*()</dt><dd> +A convenience function that repeatedly calls +*archive_read_data_block*() +to copy the entire entry to the provided file descriptor. +</dd><dt>*archive_read_extract*(), *archive_read_extract_set_skip_file*()</dt><dd> +A convenience function that wraps the corresponding +*archive_write_disk*(3) +interfaces. +The first call to +*archive_read_extract*() +creates a restore object using +*archive_write_disk_new*(3) +and +*archive_write_disk_set_standard_lookup*(3), +then transparently invokes +*archive_write_disk_set_options*(3), +*archive_write_header*(3), +*archive_write_data*(3), +and +*archive_write_finish_entry*(3) +to create the entry on disk and copy data into it. +The +_flags_ +argument is passed unmodified to +*archive_write_disk_set_options*(3). +</dd><dt>*archive_read_extract2*()</dt><dd> +This is another version of +*archive_read_extract*() +that allows you to provide your own restore object. +In particular, this allows you to override the standard lookup functions +using +*archive_write_disk_set_group_lookup*(3), +and +*archive_write_disk_set_user_lookup*(3). +Note that +*archive_read_extract2*() +does not accept a +_flags_ +argument; you should use +*archive_write_disk_set_options*() +to set the restore options yourself. +</dd><dt>*archive_read_extract_set_progress_callback*()</dt><dd> +Sets a pointer to a user-defined callback that can be used +for updating progress displays during extraction. +The progress function will be invoked during the extraction of large +regular files. +The progress function will be invoked with the pointer provided to this call. +Generally, the data pointed to should include a reference to the archive +object and the archive_entry object so that various statistics +can be retrieved for the progress display. +</dd><dt>*archive_read_close*()</dt><dd> +Complete the archive and invoke the close callback. +</dd><dt>*archive_read_finish*()</dt><dd> +Invokes +*archive_read_close*() +if it was not invoked manually, then release all resources. +Note: In libarchive 1.x, this function was declared to return +*void ,* +which made it impossible to detect certain errors when +*archive_read_close*() +was invoked implicitly from this function. +The declaration is corrected beginning with libarchive 2.0. +</dd></dl> + +Note that the library determines most of the relevant information about +the archive by inspection. +In particular, it automatically detects +*gzip*(1) +or +*bzip2*(1) +compression and transparently performs the appropriate decompression. +It also automatically detects the archive format. + +A complete description of the +*struct archive* +and +*struct archive_entry* +objects can be found in the overview manual page for +*libarchive*(3). +== CLIENT CALLBACKS == +The callback functions must match the following prototypes: +<ul> +<li> +*typedef ssize_t* +*archive_read_callback*(_struct archive `*`_, _void `*`client_data_, _const void `*``*`buffer_) +</li><li> +*typedef int* +*archive_skip_callback*(_struct archive `*`_, _void `*`client_data_, _size_t request_) +</li><li> +*typedef int* +*archive_open_callback*(_struct archive `*`_, _void `*`client_data_) +</li><li> +*typedef int* +*archive_close_callback*(_struct archive `*`_, _void `*`client_data_) +</li></ul> + +The open callback is invoked by +*archive_open*(). +It should return +*ARCHIVE_OK* +if the underlying file or data source is successfully +opened. +If the open fails, it should call +*archive_set_error*() +to register an error code and message and return +*ARCHIVE_FATAL*. + +The read callback is invoked whenever the library +requires raw bytes from the archive. +The read callback should read data into a buffer, +set the +{{{ +const void **buffer +}}} +argument to point to the available data, and +return a count of the number of bytes available. +The library will invoke the read callback again +only after it has consumed this data. +The library imposes no constraints on the size +of the data blocks returned. +On end-of-file, the read callback should +return zero. +On error, the read callback should invoke +*archive_set_error*() +to register an error code and message and +return -1. + +The skip callback is invoked when the +library wants to ignore a block of data. +The return value is the number of bytes actually +skipped, which may differ from the request. +If the callback cannot skip data, it should return +zero. +If the skip callback is not provided (the +function pointer is +NULL ), +the library will invoke the read function +instead and simply discard the result. +A skip callback can provide significant +performance gains when reading uncompressed +archives from slow disk drives or other media +that can skip quickly. + +The close callback is invoked by archive_close when +the archive processing is complete. +The callback should return +*ARCHIVE_OK* +on success. +On failure, the callback should invoke +*archive_set_error*() +to register an error code and message and +return +*ARCHIVE_FATAL.* +== EXAMPLE == +The following illustrates basic usage of the library. +In this example, +the callback functions are simply wrappers around the standard +*open*(2), +*read*(2), +and +*close*(2) +system calls. +{{{ +void +list_archive(const char *name) +{ + struct mydata *mydata; + struct archive *a; + struct archive_entry *entry; + mydata = malloc(sizeof(struct mydata)); + a = archive_read_new(); + mydata->name = name; + archive_read_support_compression_all(a); + archive_read_support_format_all(a); + archive_read_open(a, mydata, myopen, myread, myclose); + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + printf("%s\\n",archive_entry_pathname(entry)); + archive_read_data_skip(a); + } + archive_read_finish(a); + free(mydata); +} +ssize_t +myread(struct archive *a, void *client_data, const void **buff) +{ + struct mydata *mydata = client_data; + *buff = mydata->buff; + return (read(mydata->fd, mydata->buff, 10240)); +} +int +myopen(struct archive *a, void *client_data) +{ + struct mydata *mydata = client_data; + mydata->fd = open(mydata->name, O_RDONLY); + return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL); +} +int +myclose(struct archive *a, void *client_data) +{ + struct mydata *mydata = client_data; + if (mydata->fd > 0) + close(mydata->fd); + return (ARCHIVE_OK); +} +}}} +== RETURN VALUES == +Most functions return zero on success, non-zero on error. +The possible return codes include: +*ARCHIVE_OK* +(the operation succeeded), +*ARCHIVE_WARN* +(the operation succeeded but a non-critical error was encountered), +*ARCHIVE_EOF* +(end-of-archive was encountered), +*ARCHIVE_RETRY* +(the operation failed but can be retried), +and +*ARCHIVE_FATAL* +(there was a fatal error; the archive should be closed immediately). +Detailed error codes and textual descriptions are available from the +*archive_errno*() +and +*archive_error_string*() +functions. + +*archive_read_new*() +returns a pointer to a freshly allocated +*struct archive* +object. +It returns +NULL +on error. + +*archive_read_data*() +returns a count of bytes actually read or zero at the end of the entry. +On error, a value of +*ARCHIVE_FATAL*, +*ARCHIVE_WARN*, +or +*ARCHIVE_RETRY* +is returned and an error code and textual description can be retrieved from the +*archive_errno*() +and +*archive_error_string*() +functions. + +The library expects the client callbacks to behave similarly. +If there is an error, you can use +*archive_set_error*() +to set an appropriate error code and description, +then return one of the non-zero values above. +(Note that the value eventually returned to the client may +not be the same; many errors that are not critical at the level +of basic I/O can prevent the archive from being properly read, +thus most I/O errors eventually cause +*ARCHIVE_FATAL* +to be returned.) +== SEE ALSO == +*tar*(1), +*archive*(3), +*archive_util*(3), +*tar*(5) +== HISTORY == +The +*libarchive* +library first appeared in +FreeBSD 5.3. +== AUTHORS == +The +*libarchive* +library was written by +Tim Kientzle <kientzle@acm.org.> +== BUGS == +Many traditional archiver programs treat +empty files as valid empty archives. +For example, many implementations of +*tar*(1) +allow you to append entries to an empty file. +Of course, it is impossible to determine the format of an empty file +by inspecting the contents, so this library treats empty files as +having a special +"empty" +format. diff --git a/archivers/libarchive/files/doc/wiki/ManPageArchiveReadDisk3.wiki b/archivers/libarchive/files/doc/wiki/ManPageArchiveReadDisk3.wiki new file mode 100644 index 00000000000..4135470e704 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageArchiveReadDisk3.wiki @@ -0,0 +1,287 @@ +#summary archive_read_disk 3 manual page +== NAME == +*archive_read_disk_new*, +*archive_read_disk_set_symlink_logical*, +*archive_read_disk_set_symlink_physical*, +*archive_read_disk_set_symlink_hybrid*, +*archive_read_disk_entry_from_file*, +*archive_read_disk_gname*, +*archive_read_disk_uname*, +*archive_read_disk_set_uname_lookup*, +*archive_read_disk_set_gname_lookup*, +*archive_read_disk_set_standard_lookup*, +*archive_read_close*, +*archive_read_finish* +- functions for reading objects from disk +== SYNOPSIS == +*#include <archive.h>* +<br> +*struct archive `*`* +<br> +*archive_read_disk_new*(_void_); +<br> +*int* +<br> +*archive_read_disk_set_symlink_logical*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_disk_set_symlink_physical*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_disk_set_symlink_hybrid*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_disk_gname*(_struct archive `*`_, _gid_t_); +<br> +*int* +<br> +*archive_read_disk_uname*(_struct archive `*`_, _uid_t_); +<br> +*int* +<br> +*archive_read_disk_set_gname_lookup*(_struct archive `*`_, _void `*`_, _const char `*`(`*`lookup)(void `*`, gid_t)_, _void (`*`cleanup)(void `*`)_); +<br> +*int* +<br> +*archive_read_disk_set_uname_lookup*(_struct archive `*`_, _void `*`_, _const char `*`(`*`lookup)(void `*`, uid_t)_, _void (`*`cleanup)(void `*`)_); +<br> +*int* +<br> +*archive_read_disk_set_standard_lookup*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_disk_entry_from_file*(_struct archive `*`_, _struct archive_entry `*`_, _int fd_, _const struct stat `*`_); +<br> +*int* +<br> +*archive_read_close*(_struct archive `*`_); +<br> +*int* +<br> +*archive_read_finish*(_struct archive `*`_); +== DESCRIPTION == +These functions provide an API for reading information about +objects on disk. +In particular, they provide an interface for populating +*struct archive_entry* +objects. +<dl> +<dt>*archive_read_disk_new*()</dt><dd> +Allocates and initializes a +*struct archive* +object suitable for reading object information from disk. +</dd><dt> +*archive_read_disk_set_symlink_logical*(), +*archive_read_disk_set_symlink_physical*(), +*archive_read_disk_set_symlink_hybrid*() +</dt> <dd> +This sets the mode used for handling symbolic links. +The +"logical" +mode follows all symbolic links. +The +"physical" +mode does not follow any symbolic links. +The +"hybrid" +mode currently behaves identically to the +"logical" +mode. +</dd><dt> +*archive_read_disk_gname*(), +*archive_read_disk_uname*() +</dt> <dd> +Returns a user or group name given a gid or uid value. +By default, these always return a NULL string. +</dd><dt> +*archive_read_disk_set_gname_lookup*(), +*archive_read_disk_set_uname_lookup*() +</dt> <dd> +These allow you to override the functions used for +user and group name lookups. +You may also provide a +*void `*`* +pointer to a private data structure and a cleanup function for +that data. +The cleanup function will be invoked when the +*struct archive* +object is destroyed or when new lookup functions are registered. +</dd><dt>*archive_read_disk_set_standard_lookup*()</dt><dd> +This convenience function installs a standard set of user +and group name lookup functions. +These functions use +*getpwid*(3) +and +*getgrid*(3) +to convert ids to names, defaulting to NULL if the names cannot +be looked up. +These functions also implement a simple memory cache to reduce +the number of calls to +*getpwid*(3) +and +*getgrid*(3). +</dd><dt>*archive_read_disk_entry_from_file*()</dt><dd> +Populates a +*struct archive_entry* +object with information about a particular file. +The +*archive_entry* +object must have already been created with +*archive_entry_new*(3) +and at least one of the source path or path fields must already be set. +(If both are set, the source path will be used.) + +Information is read from disk using the path name from the +*struct archive_entry* +object. +If a file descriptor is provided, some information will be obtained using +that file descriptor, on platforms that support the appropriate +system calls. + +If a pointer to a +*struct stat* +is provided, information from that structure will be used instead +of reading from the disk where appropriate. +This can provide performance benefits in scenarios where +*struct stat* +information has already been read from the disk as a side effect +of some other operation. +(For example, directory traversal libraries often provide this information.) + +Where necessary, user and group ids are converted to user and group names +using the currently registered lookup functions above. +This affects the file ownership fields and ACL values in the +*struct archive_entry* +object. +</dd><dt>*archive_read_close*()</dt><dd> +This currently does nothing. +</dd><dt>*archive_write_finish*()</dt><dd> +Invokes +*archive_write_close*() +if it was not invoked manually, then releases all resources. +</dd></dl> +More information about the +_struct_ archive +object and the overall design of the library can be found in the +*libarchive*(3) +overview. +== EXAMPLE == +The following illustrates basic usage of the library by +showing how to use it to copy an item on disk into an archive. +{{{ +void +file_to_archive(struct archive *a, const char *name) +{ + char buff[8192]; + size_t bytes_read; + struct archive *ard; + struct archive_entry *entry; + int fd; + ard = archive_read_disk_new(); + archive_read_disk_set_standard_lookup(ard); + entry = archive_entry_new(); + fd = open(name, O_RDONLY); + if (fd < 0) + return; + archive_entry_copy_sourcepath(entry, name); + archive_read_disk_entry_from_file(ard, entry, fd, NULL); + archive_write_header(a, entry); + while ((bytes_read = read(fd, buff, sizeof(buff))) > 0) + archive_write_data(a, buff, bytes_read); + archive_write_finish_entry(a); + archive_read_finish(ard); + archive_entry_free(entry); +} +}}} +== RETURN VALUES == +Most functions return +*ARCHIVE_OK* +(zero) on success, or one of several negative +error codes for errors. +Specific error codes include: +*ARCHIVE_RETRY* +for operations that might succeed if retried, +*ARCHIVE_WARN* +for unusual conditions that do not prevent further operations, and +*ARCHIVE_FATAL* +for serious errors that make remaining operations impossible. +The +*archive_errno*(3) +and +*archive_error_string*(3) +functions can be used to retrieve an appropriate error code and a +textual error message. +(See +*archive_util*(3) +for details.) + +*archive_read_disk_new*() +returns a pointer to a newly-allocated +*struct archive* +object or NULL if the allocation failed for any reason. + +*archive_read_disk_gname*() +and +*archive_read_disk_uname*() +return +*const char `*`* +pointers to the textual name or NULL if the lookup failed for any reason. +The returned pointer points to internal storage that +may be reused on the next call to either of these functions; +callers should copy the string if they need to continue accessing it. + +== SEE ALSO == +*archive_read*(3), +*archive_write*(3), +*archive_write_disk*(3), +*tar*(1), +*libarchive*(3) +== HISTORY == +The +*libarchive* +library first appeared in +FreeBSD 5.3. +The +*archive_read_disk* +interface was added to +*libarchive* 2.6 +and first appeared in +FreeBSD 8.0. +== AUTHORS == +The +*libarchive* +library was written by +Tim Kientzle <kientzle@freebsd.org.> +== BUGS == +The +"standard" +user name and group name lookup functions are not the defaults because +*getgrid*(3) +and +*getpwid*(3) +are sometimes too large for particular applications. +The current design allows the application author to use a more +compact implementation when appropriate. + +The full list of metadata read from disk by +*archive_read_disk_entry_from_file*() +is necessarily system-dependent. + +The +*archive_read_disk_entry_from_file*() +function reads as much information as it can from disk. +Some method should be provided to limit this so that clients who +do not need ACLs, for instance, can avoid the extra work needed +to look up such information. + +This API should provide a set of methods for walking a directory tree. +That would make it a direct parallel of the +*archive_read*(3) +API. +When such methods are implemented, the +"hybrid" +symbolic link mode will make sense. diff --git a/archivers/libarchive/files/doc/wiki/ManPageArchiveUtil3.wiki b/archivers/libarchive/files/doc/wiki/ManPageArchiveUtil3.wiki new file mode 100644 index 00000000000..e33b0076935 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageArchiveUtil3.wiki @@ -0,0 +1,146 @@ +#summary archive_util 3 manual page +== NAME == +*archive_clear_error*, +*archive_compression*, +*archive_compression_name*, +*archive_copy_error*, +*archive_errno*, +*archive_error_string*, +*archive_file_count*, +*archive_format*, +*archive_format_name*, +*archive_set_error* +- libarchive utility functions +== SYNOPSIS == +*#include <archive.h>* +<br> +*void* +<br> +*archive_clear_error*(_struct archive `*`_); +<br> +*int* +<br> +*archive_compression*(_struct archive `*`_); +<br> +*const char `*`* +<br> +*archive_compression_name*(_struct archive `*`_); +<br> +*void* +<br> +*archive_copy_error*(_struct archive `*`_, _struct archive `*`_); +<br> +*int* +<br> +*archive_errno*(_struct archive `*`_); +<br> +*const char `*`* +<br> +*archive_error_string*(_struct archive `*`_); +<br> +*int* +<br> +*archive_file_count*(_struct archive `*`_); +<br> +*int* +<br> +*archive_format*(_struct archive `*`_); +<br> +*const char `*`* +<br> +*archive_format_name*(_struct archive `*`_); +<br> +*void* +<br> +*archive_set_error*(_struct archive `*`_, _int error_code_, _const char `*`fmt_, _..._); +== DESCRIPTION == +These functions provide access to various information about the +*struct archive* +object used in the +*libarchive*(3) +library. +<dl> +<dt>*archive_clear_error*()</dt><dd> +Clears any error information left over from a previous call. +Not generally used in client code. +</dd><dt>*archive_compression*()</dt><dd> +Returns a numeric code indicating the current compression. +This value is set by +*archive_read_open*(). +</dd><dt>*archive_compression_name*()</dt><dd> +Returns a text description of the current compression suitable for display. +</dd><dt>*archive_copy_error*()</dt><dd> +Copies error information from one archive to another. +</dd><dt>*archive_errno*()</dt><dd> +Returns a numeric error code (see +*errno*(2)) +indicating the reason for the most recent error return. +</dd><dt>*archive_error_string*()</dt><dd> +Returns a textual error message suitable for display. +The error message here is usually more specific than that +obtained from passing the result of +*archive_errno*() +to +*strerror*(3). +</dd><dt>*archive_file_count*()</dt><dd> +Returns a count of the number of files processed by this archive object. +The count is incremented by calls to +*archive_write_header*() +or +*archive_read_next_header*(.) +</dd><dt>*archive_format*()</dt><dd> +Returns a numeric code indicating the format of the current +archive entry. +This value is set by a successful call to +*archive_read_next_header*(). +Note that it is common for this value to change from +entry to entry. +For example, a tar archive might have several entries that +utilize GNU tar extensions and several entries that do not. +These entries will have different format codes. +</dd><dt>*archive_format_name*()</dt><dd> +A textual description of the format of the current entry. +</dd><dt>*archive_set_error*()</dt><dd> +Sets the numeric error code and error description that will be returned +by +*archive_errno*() +and +*archive_error_string*(). +This function should be used within I/O callbacks to set system-specific +error codes and error descriptions. +This function accepts a printf-like format string and arguments. +However, you should be careful to use only the following printf +format specifiers: +"%c", +"%d", +"%jd", +"%jo", +"%ju", +"%jx", +"%ld", +"%lo", +"%lu", +"%lx", +"%o", +"%u", +"%s", +"%x", +"%%". +Field-width specifiers and other printf features are +not uniformly supported and should not be used. +</dd></dl> +== SEE ALSO == +*archive_read*(3), +*archive_write*(3), +*libarchive*(3), +*printf*(3) +== HISTORY == +The +*libarchive* +library first appeared in +FreeBSD 5.3. +== AUTHORS == +The +*libarchive* +library was written by +Tim Kientzle <kientzle@acm.org.> diff --git a/archivers/libarchive/files/doc/wiki/ManPageArchiveWrite3.wiki b/archivers/libarchive/files/doc/wiki/ManPageArchiveWrite3.wiki new file mode 100644 index 00000000000..30ccd8f36c4 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageArchiveWrite3.wiki @@ -0,0 +1,630 @@ +#summary archive_write 3 manual page +== NAME == +*archive_write_new*, +*archive_write_set_format_cpio*, +*archive_write_set_format_pax*, +*archive_write_set_format_pax_restricted*, +*archive_write_set_format_shar*, +*archive_write_set_format_shar_binary*, +*archive_write_set_format_ustar*, +*archive_write_get_bytes_per_block*, +*archive_write_set_bytes_per_block*, +*archive_write_set_bytes_in_last_block*, +*archive_write_set_compression_bzip2*, +*archive_write_set_compression_compress*, +*archive_write_set_compression_gzip*, +*archive_write_set_compression_none*, +*archive_write_set_compression_program*, +*archive_write_set_compressor_options*, +*archive_write_set_format_options*, +*archive_write_set_options*, +*archive_write_open*, +*archive_write_open_fd*, +*archive_write_open_FILE*, +*archive_write_open_filename*, +*archive_write_open_memory*, +*archive_write_header*, +*archive_write_data*, +*archive_write_finish_entry*, +*archive_write_close*, +*archive_write_finish* +- functions for creating archives +== SYNOPSIS == +*#include <archive.h>* +<br> +*struct archive `*`* +<br> +*archive_write_new*(_void_); +<br> +*int* +<br> +*archive_write_get_bytes_per_block*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_bytes_per_block*(_struct archive `*`_, _int bytes_per_block_); +<br> +*int* +<br> +*archive_write_set_bytes_in_last_block*(_struct archive `*`_, _int_); +<br> +*int* +<br> +*archive_write_set_compression_bzip2*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_compression_compress*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_compression_gzip*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_compression_none*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_compression_program*(_struct archive `*`_, _const char `*` cmd_); +<br> +*int* +<br> +*archive_write_set_format_cpio*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_format_pax*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_format_pax_restricted*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_format_shar*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_format_shar_binary*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_format_ustar*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_set_format_options*(_struct archive `*`_, _const char `*`_); +<br> +*int* +<br> +*archive_write_set_compressor_options*(_struct archive `*`_, _const char `*`_); +<br> +*int* +<br> +*archive_write_set_options*(_struct archive `*`_, _const char `*`_); +<br> +*int* +<br> +*archive_write_open*(_struct archive `*`_, _void `*`client_data_, _archive_open_callback `*`_, _archive_write_callback `*`_, _archive_close_callback `*`_); +<br> +*int* +<br> +*archive_write_open_fd*(_struct archive `*`_, _int fd_); +<br> +*int* +<br> +*archive_write_open_FILE*(_struct archive `*`_, _FILE `*`file_); +<br> +*int* +<br> +*archive_write_open_filename*(_struct archive `*`_, _const char `*`filename_); +<br> +*int* +<br> +*archive_write_open_memory*(_struct archive `*`_, _void `*`buffer_, _size_t bufferSize_, _size_t `*`outUsed_); +<br> +*int* +<br> +*archive_write_header*(_struct archive `*`_, _struct archive_entry `*`_); +<br> +*ssize_t* +<br> +*archive_write_data*(_struct archive `*`_, _const void `*`_, _size_t_); +<br> +*int* +<br> +*archive_write_finish_entry*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_close*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_finish*(_struct archive `*`_); +== DESCRIPTION == +These functions provide a complete API for creating streaming +archive files. +The general process is to first create the +*struct archive* +object, set any desired options, initialize the archive, append entries, then +close the archive and release all resources. +The following summary describes the functions in approximately +the order they are ordinarily used: +<dl> +<dt>*archive_write_new*()</dt><dd> +Allocates and initializes a +*struct archive* +object suitable for writing a tar archive. +</dd><dt>*archive_write_set_bytes_per_block*()</dt><dd> +Sets the block size used for writing the archive data. +Every call to the write callback function, except possibly the last one, will +use this value for the length. +The third parameter is a boolean that specifies whether or not the final block +written will be padded to the full block size. +If it is zero, the last block will not be padded. +If it is non-zero, padding will be added both before and after compression. +The default is to use a block size of 10240 bytes and to pad the last block. +Note that a block size of zero will suppress internal blocking +and cause writes to be sent directly to the write callback as they occur. +</dd><dt>*archive_write_get_bytes_per_block*()</dt><dd> +Retrieve the block size to be used for writing. +A value of -1 here indicates that the library should use default values. +A value of zero indicates that internal blocking is suppressed. +</dd><dt>*archive_write_set_bytes_in_last_block*()</dt><dd> +Sets the block size used for writing the last block. +If this value is zero, the last block will be padded to the same size +as the other blocks. +Otherwise, the final block will be padded to a multiple of this size. +In particular, setting it to 1 will cause the final block to not be padded. +For compressed output, any padding generated by this option +is applied only after the compression. +The uncompressed data is always unpadded. +The default is to pad the last block to the full block size (note that +*archive_write_open_filename*() +will set this based on the file type). +Unlike the other +"set" +functions, this function can be called after the archive is opened. +</dd><dt>*archive_write_get_bytes_in_last_block*()</dt><dd> +Retrieve the currently-set value for last block size. +A value of -1 here indicates that the library should use default values. +</dd><dt> +*archive_write_set_format_cpio*(), +*archive_write_set_format_pax*(), +*archive_write_set_format_pax_restricted*(), +*archive_write_set_format_shar*(), +*archive_write_set_format_shar_binary*(), +*archive_write_set_format_ustar*() +</dt> <dd> +Sets the format that will be used for the archive. +The library can write +POSIX octet-oriented cpio format archives, +POSIX-standard +"pax interchange" +format archives, +traditional +"shar" +archives, +enhanced +"binary" +shar archives that store a variety of file attributes and handle binary files, +and +POSIX-standard +"ustar" +archives. +The pax interchange format is a backwards-compatible tar format that +adds key/value attributes to each entry and supports arbitrary +filenames, linknames, uids, sizes, etc. +"Restricted pax interchange format" +is the library default; this is the same as pax format, but suppresses +the pax extended header for most normal files. +In most cases, this will result in ordinary ustar archives. +</dd><dt> +*archive_write_set_compression_bzip2*(), +*archive_write_set_compression_compress*(), +*archive_write_set_compression_gzip*(), +*archive_write_set_compression_none*() +</dt> <dd> +The resulting archive will be compressed as specified. +Note that the compressed output is always properly blocked. +</dd><dt>*archive_write_set_compression_program*()</dt><dd> +The archive will be fed into the specified compression program. +The output of that program is blocked and written to the client +write callbacks. +</dd><dt> +*archive_write_set_compressor_options*(), +*archive_write_set_format_options*(), +*archive_write_set_options*() +</dt> <dd> +Specifies options that will be passed to the currently-enabled +compressor and/or format writer. +The argument is a comma-separated list of individual options. +Individual options have one of the following forms: +<dl> +<dt>_option=value_</dt><dd> +The option/value pair will be provided to every module. +Modules that do not accept an option with this name will ignore it. +</dd><dt>_option_</dt><dd> +The option will be provided to every module with a value of +"1". +</dd><dt>_!option_</dt><dd> +The option will be provided to every module with a NULL value. +</dd><dt>_module:option=value_, _module:option_, _module:!option_</dt><dd> +As above, but the corresponding option and value will be provided +only to modules whose name matches +_module_. +</dd></dl> +The return value will be +*ARCHIVE_OK* +if any module accepts the option, or +*ARCHIVE_WARN* +if no module accepted the option, or +*ARCHIVE_FATAL* +if there was a fatal error while attempting to process the option. + +The currently supported options are: +<dl> +<dt>Compressor gzip</dt><dd> +<dl> +<dt>*compression-level*</dt><dd> +The value is interpreted as a decimal integer specifying the +gzip compression level. +</dd></dl> +</dd><dt>Compressor xz</dt><dd> +<dl> +<dt>*compression-level*</dt><dd> +The value is interpreted as a decimal integer specifying the +compression level. +</dd></dl> +</dd><dt>Format mtree</dt><dd> +<dl> +<dt>*cksum*, *device*, *flags*, *gid*, *gname*, *indent*, *link*, *md5*, *mode*, *nlink*, *rmd160*, *sha1*, *sha256*, *sha384*, *sha512*, *size*, *time*, *uid*, *uname*</dt><dd> +Enable a particular keyword in the mtree output. +Prefix with an exclamation mark to disable the corresponding keyword. +The default is equivalent to +"device, flags, gid, gname, link, mode, nlink, size, time, type, uid, uname". +</dd><dt>*all*</dt><dd> +Enables all of the above keywords. +</dd><dt>*use-set*</dt><dd> +Enables generation of +*/set* +lines that specify default values for the following files and/or directories. +</dd><dt>*indent*</dt><dd> +XXX needs explanation XXX +</dd></dl> +</dd></dl> +</dd><dt>*archive_write_open*()</dt><dd> +Freeze the settings, open the archive, and prepare for writing entries. +This is the most generic form of this function, which accepts +pointers to three callback functions which will be invoked by +the compression layer to write the constructed archive. +</dd><dt>*archive_write_open_fd*()</dt><dd> +A convenience form of +*archive_write_open*() +that accepts a file descriptor. +The +*archive_write_open_fd*() +function is safe for use with tape drives or other +block-oriented devices. +</dd><dt>*archive_write_open_FILE*()</dt><dd> +A convenience form of +*archive_write_open*() +that accepts a +*FILE `*`* +pointer. +Note that +*archive_write_open_FILE*() +is not safe for writing to tape drives or other devices +that require correct blocking. +</dd><dt>*archive_write_open_file*()</dt><dd> +A deprecated synonym for +*archive_write_open_filename*(). +</dd><dt>*archive_write_open_filename*()</dt><dd> +A convenience form of +*archive_write_open*() +that accepts a filename. +A NULL argument indicates that the output should be written to standard output; +an argument of +"-" +will open a file with that name. +If you have not invoked +*archive_write_set_bytes_in_last_block*(), +then +*archive_write_open_filename*() +will adjust the last-block padding depending on the file: +it will enable padding when writing to standard output or +to a character or block device node, it will disable padding otherwise. +You can override this by manually invoking +*archive_write_set_bytes_in_last_block*() +before calling +*archive_write_open*(). +The +*archive_write_open_filename*() +function is safe for use with tape drives or other +block-oriented devices. +</dd><dt>*archive_write_open_memory*()</dt><dd> +A convenience form of +*archive_write_open*() +that accepts a pointer to a block of memory that will receive +the archive. +The final +*size_t `*`* +argument points to a variable that will be updated +after each write to reflect how much of the buffer +is currently in use. +You should be careful to ensure that this variable +remains allocated until after the archive is +closed. +</dd><dt>*archive_write_header*()</dt><dd> +Build and write a header using the data in the provided +*struct archive_entry* +structure. +See +*archive_entry*(3) +for information on creating and populating +*struct archive_entry* +objects. +</dd><dt>*archive_write_data*()</dt><dd> +Write data corresponding to the header just written. +Returns number of bytes written or -1 on error. +</dd><dt>*archive_write_finish_entry*()</dt><dd> +Close out the entry just written. +In particular, this writes out the final padding required by some formats. +Ordinarily, clients never need to call this, as it +is called automatically by +*archive_write_next_header*() +and +*archive_write_close*() +as needed. +</dd><dt>*archive_write_close*()</dt><dd> +Complete the archive and invoke the close callback. +</dd><dt>*archive_write_finish*()</dt><dd> +Invokes +*archive_write_close*() +if it was not invoked manually, then releases all resources. +Note that this function was declared to return +*void* +in libarchive 1.x, which made it impossible to detect errors when +*archive_write_close*() +was invoked implicitly from this function. +This is corrected beginning with libarchive 2.0. +</dd></dl> +More information about the +_struct_ archive +object and the overall design of the library can be found in the +*libarchive*(3) +overview. +== IMPLEMENTATION == +Compression support is built-in to libarchive, which uses zlib and bzlib +to handle gzip and bzip2 compression, respectively. +== CLIENT CALLBACKS == +To use this library, you will need to define and register +callback functions that will be invoked to write data to the +resulting archive. +These functions are registered by calling +*archive_write_open*(): +<ul> +<li> +*typedef int* +*archive_open_callback*(_struct archive `*`_, _void `*`client_data_) +</li></ul> + +The open callback is invoked by +*archive_write_open*(). +It should return +*ARCHIVE_OK* +if the underlying file or data source is successfully +opened. +If the open fails, it should call +*archive_set_error*() +to register an error code and message and return +*ARCHIVE_FATAL*. +<ul> +<li> +*typedef ssize_t* +*archive_write_callback*(_struct archive `*`_, _void `*`client_data_, _const void `*`buffer_, _size_t length_) +</li></ul> + +The write callback is invoked whenever the library +needs to write raw bytes to the archive. +For correct blocking, each call to the write callback function +should translate into a single +*write*(2) +system call. +This is especially critical when writing archives to tape drives. +On success, the write callback should return the +number of bytes actually written. +On error, the callback should invoke +*archive_set_error*() +to register an error code and message and return -1. +<ul> +<li> +*typedef int* +*archive_close_callback*(_struct archive `*`_, _void `*`client_data_) +</li></ul> + +The close callback is invoked by archive_close when +the archive processing is complete. +The callback should return +*ARCHIVE_OK* +on success. +On failure, the callback should invoke +*archive_set_error*() +to register an error code and message and +return +*ARCHIVE_FATAL.* +== EXAMPLE == +The following sketch illustrates basic usage of the library. +In this example, +the callback functions are simply wrappers around the standard +*open*(2), +*write*(2), +and +*close*(2) +system calls. +{{{ +#ifdef __linux__ +#define _FILE_OFFSET_BITS 64 +#endif +#include <sys/stat.h> +#include <archive.h> +#include <archive_entry.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +struct mydata { + const char *name; + int fd; +}; +int +myopen(struct archive *a, void *client_data) +{ + struct mydata *mydata = client_data; + mydata->fd = open(mydata->name, O_WRONLY | O_CREAT, 0644); + if (mydata->fd >= 0) + return (ARCHIVE_OK); + else + return (ARCHIVE_FATAL); +} +ssize_t +mywrite(struct archive *a, void *client_data, const void *buff, size_t n) +{ + struct mydata *mydata = client_data; + return (write(mydata->fd, buff, n)); +} +int +myclose(struct archive *a, void *client_data) +{ + struct mydata *mydata = client_data; + if (mydata->fd > 0) + close(mydata->fd); + return (0); +} +void +write_archive(const char *outname, const char **filename) +{ + struct mydata *mydata = malloc(sizeof(struct mydata)); + struct archive *a; + struct archive_entry *entry; + struct stat st; + char buff[8192]; + int len; + int fd; + a = archive_write_new(); + mydata->name = outname; + archive_write_set_compression_gzip(a); + archive_write_set_format_ustar(a); + archive_write_open(a, mydata, myopen, mywrite, myclose); + while (*filename) { + stat(*filename, &st); + entry = archive_entry_new(); + archive_entry_copy_stat(entry, &st); + archive_entry_set_pathname(entry, *filename); + archive_write_header(a, entry); + fd = open(*filename, O_RDONLY); + len = read(fd, buff, sizeof(buff)); + while ( len > 0 ) { + archive_write_data(a, buff, len); + len = read(fd, buff, sizeof(buff)); + } + archive_entry_free(entry); + filename++; + } + archive_write_finish(a); +} +int main(int argc, const char **argv) +{ + const char *outname; + argv++; + outname = argv++; + write_archive(outname, argv); + return 0; +} +}}} +== RETURN VALUES == +Most functions return +*ARCHIVE_OK* +(zero) on success, or one of several non-zero +error codes for errors. +Specific error codes include: +*ARCHIVE_RETRY* +for operations that might succeed if retried, +*ARCHIVE_WARN* +for unusual conditions that do not prevent further operations, and +*ARCHIVE_FATAL* +for serious errors that make remaining operations impossible. +The +*archive_errno*() +and +*archive_error_string*() +functions can be used to retrieve an appropriate error code and a +textual error message. + +*archive_write_new*() +returns a pointer to a newly-allocated +*struct archive* +object. + +*archive_write_data*() +returns a count of the number of bytes actually written. +On error, -1 is returned and the +*archive_errno*() +and +*archive_error_string*() +functions will return appropriate values. +Note that if the client-provided write callback function +returns a non-zero value, that error will be propagated back to the caller +through whatever API function resulted in that call, which +may include +*archive_write_header*(), +*archive_write_data*(), +*archive_write_close*(), +or +*archive_write_finish*(). +The client callback can call +*archive_set_error*() +to provide values that can then be retrieved by +*archive_errno*() +and +*archive_error_string*(). +== SEE ALSO == +*tar*(1), +*libarchive*(3), +*tar*(5) +== HISTORY == +The +*libarchive* +library first appeared in +FreeBSD 5.3. +== AUTHORS == +The +*libarchive* +library was written by +Tim Kientzle <kientzle@acm.org.> +== BUGS == +There are many peculiar bugs in historic tar implementations that may cause +certain programs to reject archives written by this library. +For example, several historic implementations calculated header checksums +incorrectly and will thus reject valid archives; GNU tar does not fully support +pax interchange format; some old tar implementations required specific +field terminations. + +The default pax interchange format eliminates most of the historic +tar limitations and provides a generic key/value attribute facility +for vendor-defined extensions. +One oversight in POSIX is the failure to provide a standard attribute +for large device numbers. +This library uses +"SCHILY.devminor" +and +"SCHILY.devmajor" +for device numbers that exceed the range supported by the backwards-compatible +ustar header. +These keys are compatible with Joerg Schilling's +*star* +archiver. +Other implementations may not recognize these keys and will thus be unable +to correctly restore device nodes with large device numbers from archives +created by this library. diff --git a/archivers/libarchive/files/doc/wiki/ManPageArchiveWriteDisk3.wiki b/archivers/libarchive/files/doc/wiki/ManPageArchiveWriteDisk3.wiki new file mode 100644 index 00000000000..f71f85fc0ca --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageArchiveWriteDisk3.wiki @@ -0,0 +1,358 @@ +#summary archive_write_disk 3 manual page +== NAME == +*archive_write_disk_new*, +*archive_write_disk_set_options*, +*archive_write_disk_set_skip_file*, +*archive_write_disk_set_group_lookup*, +*archive_write_disk_set_standard_lookup*, +*archive_write_disk_set_user_lookup*, +*archive_write_header*, +*archive_write_data*, +*archive_write_finish_entry*, +*archive_write_close*, +*archive_write_finish* +- functions for creating objects on disk +== SYNOPSIS == +*#include <archive.h>* +<br> +*struct archive `*`* +<br> +*archive_write_disk_new*(_void_); +<br> +*int* +<br> +*archive_write_disk_set_options*(_struct archive `*`_, _int flags_); +<br> +*int* +<br> +*archive_write_disk_set_skip_file*(_struct archive `*`_, _dev_t_, _ino_t_); +<br> +*int* +<br> +*archive_write_disk_set_group_lookup*(_struct archive `*`_, _void `*`_, _gid_t (`*`)(void `*`, const char `*`gname, gid_t gid)_, _void (`*`cleanup)(void `*`)_); +<br> +*int* +<br> +*archive_write_disk_set_standard_lookup*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_disk_set_user_lookup*(_struct archive `*`_, _void `*`_, _uid_t (`*`)(void `*`, const char `*`uname, uid_t uid)_, _void (`*`cleanup)(void `*`)_); +<br> +*int* +<br> +*archive_write_header*(_struct archive `*`_, _struct archive_entry `*`_); +<br> +*ssize_t* +<br> +*archive_write_data*(_struct archive `*`_, _const void `*`_, _size_t_); +<br> +*int* +<br> +*archive_write_finish_entry*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_close*(_struct archive `*`_); +<br> +*int* +<br> +*archive_write_finish*(_struct archive `*`_); +== DESCRIPTION == +These functions provide a complete API for creating objects on +disk from +*struct archive_entry* +descriptions. +They are most naturally used when extracting objects from an archive +using the +*archive_read*() +interface. +The general process is to read +*struct archive_entry* +objects from an archive, then write those objects to a +*struct archive* +object created using the +*archive_write_disk*() +family functions. +This interface is deliberately very similar to the +*archive_write*() +interface used to write objects to a streaming archive. +<dl> +<dt>*archive_write_disk_new*()</dt><dd> +Allocates and initializes a +*struct archive* +object suitable for writing objects to disk. +</dd><dt>*archive_write_disk_set_skip_file*()</dt><dd> +Records the device and inode numbers of a file that should not be +overwritten. +This is typically used to ensure that an extraction process does not +overwrite the archive from which objects are being read. +This capability is technically unnecessary but can be a significant +performance optimization in practice. +</dd><dt>*archive_write_disk_set_options*()</dt><dd> +The options field consists of a bitwise OR of one or more of the +following values: +<dl> +<dt>*ARCHIVE_EXTRACT_OWNER*</dt><dd> +The user and group IDs should be set on the restored file. +By default, the user and group IDs are not restored. +</dd><dt>*ARCHIVE_EXTRACT_PERM*</dt><dd> +Full permissions (including SGID, SUID, and sticky bits) should +be restored exactly as specified, without obeying the +current umask. +Note that SUID and SGID bits can only be restored if the +user and group ID of the object on disk are correct. +If +*ARCHIVE_EXTRACT_OWNER* +is not specified, then SUID and SGID bits will only be restored +if the default user and group IDs of newly-created objects on disk +happen to match those specified in the archive entry. +By default, only basic permissions are restored, and umask is obeyed. +</dd><dt>*ARCHIVE_EXTRACT_TIME*</dt><dd> +The timestamps (mtime, ctime, and atime) should be restored. +By default, they are ignored. +Note that restoring of atime is not currently supported. +</dd><dt>*ARCHIVE_EXTRACT_NO_OVERWRITE*</dt><dd> +Existing files on disk will not be overwritten. +By default, existing regular files are truncated and overwritten; +existing directories will have their permissions updated; +other pre-existing objects are unlinked and recreated from scratch. +</dd><dt>*ARCHIVE_EXTRACT_UNLINK*</dt><dd> +Existing files on disk will be unlinked before any attempt to +create them. +In some cases, this can prove to be a significant performance improvement. +By default, existing files are truncated and rewritten, but +the file is not recreated. +In particular, the default behavior does not break existing hard links. +</dd><dt>*ARCHIVE_EXTRACT_ACL*</dt><dd> +Attempt to restore ACLs. +By default, extended ACLs are ignored. +</dd><dt>*ARCHIVE_EXTRACT_FFLAGS*</dt><dd> +Attempt to restore extended file flags. +By default, file flags are ignored. +</dd><dt>*ARCHIVE_EXTRACT_XATTR*</dt><dd> +Attempt to restore POSIX.1e extended attributes. +By default, they are ignored. +</dd><dt>*ARCHIVE_EXTRACT_SECURE_SYMLINKS*</dt><dd> +Refuse to extract any object whose final location would be altered +by a symlink on disk. +This is intended to help guard against a variety of mischief +caused by archives that (deliberately or otherwise) extract +files outside of the current directory. +The default is not to perform this check. +If +*ARCHIVE_EXTRACT_UNLINK* +is specified together with this option, the library will +remove any intermediate symlinks it finds and return an +error only if such symlink could not be removed. +</dd><dt>*ARCHIVE_EXTRACT_SECURE_NODOTDOT*</dt><dd> +Refuse to extract a path that contains a +_.._ +element anywhere within it. +The default is to not refuse such paths. +Note that paths ending in +_.._ +always cause an error, regardless of this flag. +</dd><dt>*ARCHIVE_EXTRACT_SPARSE*</dt><dd> +Scan data for blocks of NUL bytes and try to recreate them with holes. +This results in sparse files, independent of whether the archive format +supports or uses them. +</dd></dl> +</dd><dt> +*archive_write_disk_set_group_lookup*(), +*archive_write_disk_set_user_lookup*() +</dt> <dd> +The +*struct archive_entry* +objects contain both names and ids that can be used to identify users +and groups. +These names and ids describe the ownership of the file itself and +also appear in ACL lists. +By default, the library uses the ids and ignores the names, but +this can be overridden by registering user and group lookup functions. +To register, you must provide a lookup function which +accepts both a name and id and returns a suitable id. +You may also provide a +*void `*`* +pointer to a private data structure and a cleanup function for +that data. +The cleanup function will be invoked when the +*struct archive* +object is destroyed. +</dd><dt>*archive_write_disk_set_standard_lookup*()</dt><dd> +This convenience function installs a standard set of user +and group lookup functions. +These functions use +*getpwnam*(3) +and +*getgrnam*(3) +to convert names to ids, defaulting to the ids if the names cannot +be looked up. +These functions also implement a simple memory cache to reduce +the number of calls to +*getpwnam*(3) +and +*getgrnam*(3). +</dd><dt>*archive_write_header*()</dt><dd> +Build and write a header using the data in the provided +*struct archive_entry* +structure. +See +*archive_entry*(3) +for information on creating and populating +*struct archive_entry* +objects. +</dd><dt>*archive_write_data*()</dt><dd> +Write data corresponding to the header just written. +Returns number of bytes written or -1 on error. +</dd><dt>*archive_write_finish_entry*()</dt><dd> +Close out the entry just written. +Ordinarily, clients never need to call this, as it +is called automatically by +*archive_write_next_header*() +and +*archive_write_close*() +as needed. +</dd><dt>*archive_write_close*()</dt><dd> +Set any attributes that could not be set during the initial restore. +For example, directory timestamps are not restored initially because +restoring a subsequent file would alter that timestamp. +Similarly, non-writable directories are initially created with +write permissions (so that their contents can be restored). +The +*archive_write_disk_new* +library maintains a list of all such deferred attributes and +sets them when this function is invoked. +</dd><dt>*archive_write_finish*()</dt><dd> +Invokes +*archive_write_close*() +if it was not invoked manually, then releases all resources. +</dd></dl> +More information about the +_struct_ archive +object and the overall design of the library can be found in the +*libarchive*(3) +overview. +Many of these functions are also documented under +*archive_write*(3). +== RETURN VALUES == +Most functions return +*ARCHIVE_OK* +(zero) on success, or one of several non-zero +error codes for errors. +Specific error codes include: +*ARCHIVE_RETRY* +for operations that might succeed if retried, +*ARCHIVE_WARN* +for unusual conditions that do not prevent further operations, and +*ARCHIVE_FATAL* +for serious errors that make remaining operations impossible. +The +*archive_errno*() +and +*archive_error_string*() +functions can be used to retrieve an appropriate error code and a +textual error message. + +*archive_write_disk_new*() +returns a pointer to a newly-allocated +*struct archive* +object. + +*archive_write_data*() +returns a count of the number of bytes actually written. +On error, -1 is returned and the +*archive_errno*() +and +*archive_error_string*() +functions will return appropriate values. +== SEE ALSO == +*archive_read*(3), +*archive_write*(3), +*tar*(1), +*libarchive*(3) +== HISTORY == +The +*libarchive* +library first appeared in +FreeBSD 5.3. +The +*archive_write_disk* +interface was added to +*libarchive* 2.0 +and first appeared in +FreeBSD 6.3. +== AUTHORS == +The +*libarchive* +library was written by +Tim Kientzle <kientzle@acm.org.> +== BUGS == +Directories are actually extracted in two distinct phases. +Directories are created during +*archive_write_header*(), +but final permissions are not set until +*archive_write_close*(). +This separation is necessary to correctly handle borderline +cases such as a non-writable directory containing +files, but can cause unexpected results. +In particular, directory permissions are not fully +restored until the archive is closed. +If you use +*chdir*(2) +to change the current directory between calls to +*archive_read_extract*() +or before calling +*archive_read_close*(), +you may confuse the permission-setting logic with +the result that directory permissions are restored +incorrectly. + +The library attempts to create objects with filenames longer than +*PATH_MAX* +by creating prefixes of the full path and changing the current directory. +Currently, this logic is limited in scope; the fixup pass does +not work correctly for such objects and the symlink security check +option disables the support for very long pathnames. + +Restoring the path +_aa/../bb_ +does create each intermediate directory. +In particular, the directory +_aa_ +is created as well as the final object +_bb_. +In theory, this can be exploited to create an entire directory heirarchy +with a single request. +Of course, this does not work if the +*ARCHIVE_EXTRACT_NODOTDOT* +option is specified. + +Implicit directories are always created obeying the current umask. +Explicit objects are created obeying the current umask unless +*ARCHIVE_EXTRACT_PERM* +is specified, in which case they current umask is ignored. + +SGID and SUID bits are restored only if the correct user and +group could be set. +If +*ARCHIVE_EXTRACT_OWNER* +is not specified, then no attempt is made to set the ownership. +In this case, SGID and SUID bits are restored only if the +user and group of the final object happen to match those specified +in the entry. + +The +"standard" +user-id and group-id lookup functions are not the defaults because +*getgrnam*(3) +and +*getpwnam*(3) +are sometimes too large for particular applications. +The current design allows the application author to use a more +compact implementation when appropriate. + +There should be a corresponding +*archive_read_disk* +interface that walks a directory heirarchy and returns archive +entry objects. diff --git a/archivers/libarchive/files/doc/wiki/ManPageBsdcpio1.wiki b/archivers/libarchive/files/doc/wiki/ManPageBsdcpio1.wiki new file mode 100644 index 00000000000..d3c24f5b66a --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageBsdcpio1.wiki @@ -0,0 +1,386 @@ +#summary BSDCPIO 1 manual page +== NAME == +*cpio* +- copy files to and from archives +== SYNOPSIS == +<br> +*cpio* +{*-i*} +`[`_options_`]` +`[`_pattern_ ...`]` +`[`_`<`_ archive`]` +<br> +*cpio* +{*-o*} +`[`_options_`]` +_`<`_ name-list +`[`_>_ archive`]` +<br> +*cpio* +{*-p*} +`[`_options_`]` +_dest-dir_ +_`<`_ name-list +== DESCRIPTION == +*cpio* +copies files between archives and directories. +This implementation can extract from tar, pax, cpio, zip, jar, ar, +and ISO 9660 cdrom images and can create tar, pax, cpio, ar, +and shar archives. + +The first option to +*cpio* +is a mode indicator from the following list: +<dl> +<dt>*-i*</dt><dd> +Input. +Read an archive from standard input (unless overriden) and extract the +contents to disk or (if the +*-t* +option is specified) +list the contents to standard output. +If one or more file patterns are specified, only files matching +one of the patterns will be extracted. +</dd><dt>*-o*</dt><dd> +Output. +Read a list of filenames from standard input and produce a new archive +on standard output (unless overriden) containing the specified items. +</dd><dt>*-p*</dt><dd> +Pass-through. +Read a list of filenames from standard input and copy the files to the +specified directory. +</dd></dl> + +== OPTIONS == +Unless specifically stated otherwise, options are applicable in +all operating modes. +<dl> +<dt>*-0*</dt><dd> +Read filenames separated by NUL characters instead of newlines. +This is necessary if any of the filenames being read might contain newlines. +</dd><dt>*-A*</dt><dd> +(o mode only) +Append to the specified archive. +(Not yet implemented.) +</dd><dt>*-a*</dt><dd> +(o and p modes) +Reset access times on files after they are read. +</dd><dt>*-B*</dt><dd> +(o mode only) +Block output to records of 5120 bytes. +</dd><dt>*-C* _size_</dt><dd> +(o mode only) +Block output to records of +_size_ +bytes. +</dd><dt>*-c*</dt><dd> +(o mode only) +Use the old POSIX portable character format. +Equivalent to +*--format* _odc_. +</dd><dt>*-d*</dt><dd> +(i and p modes) +Create directories as necessary. +</dd><dt>*-E* _file_</dt><dd> +(i mode only) +Read list of file name patterns from +_file_ +to list and extract. +</dd><dt>*-F* _file_</dt><dd> +Read archive from or write archive to +_file_. +</dd><dt>*-f* _pattern_</dt><dd> +(i mode only) +Ignore files that match +_pattern_. +</dd><dt>*--format* _format_</dt><dd> +(o mode only) +Produce the output archive in the specified format. +Supported formats include: + +<dl> +<dt>_cpio_</dt><dd> +Synonym for +_odc_. +</dd><dt>_newc_</dt><dd> +The SVR4 portable cpio format. +</dd><dt>_odc_</dt><dd> +The old POSIX.1 portable octet-oriented cpio format. +</dd><dt>_pax_</dt><dd> +The POSIX.1 pax format, an extension of the ustar format. +</dd><dt>_ustar_</dt><dd> +The POSIX.1 tar format. +</dd></dl> + +The default format is +_odc_. +See +*libarchive_formats*(5) +for more complete information about the +formats currently supported by the underlying +*libarchive*(3) +library. +</dd><dt>*-H* _format_</dt><dd> +Synonym for +*--format*. +</dd><dt>*-h*, *--help*</dt><dd> +Print usage information. +</dd><dt>*-I* _file_</dt><dd> +Read archive from +_file_. +</dd><dt>*-i*</dt><dd> +Input mode. +See above for description. +</dd><dt>*--insecure*</dt><dd> +(i and p mode only) +Disable security checks during extraction or copying. +This allows extraction via symbolic links and path names containing +Sq .. +in the name. +</dd><dt>*-J*</dt><dd> +(o mode only) +Compress the file with xz-compatible compression before writing it. +In input mode, this option is ignored; xz compression is recognized +automatically on input. +</dd><dt>*-j*</dt><dd> +Synonym for +*-y*. +</dd><dt>*-L*</dt><dd> +(o and p modes) +All symbolic links will be followed. +Normally, symbolic links are archived and copied as symbolic links. +With this option, the target of the link will be archived or copied instead. +</dd><dt>*-l*</dt><dd> +(p mode only) +Create links from the target directory to the original files, +instead of copying. +</dd><dt>*-lzma*</dt><dd> +(o mode only) +Compress the file with lzma-compatible compression before writing it. +In input mode, this option is ignored; lzma compression is recognized +automatically on input. +</dd><dt>*-m*</dt><dd> +(i and p modes) +Set file modification time on created files to match +those in the source. +</dd><dt>*-n*</dt><dd> +(i mode, only with +*-t*) +Display numeric uid and gid. +By default, +*cpio* +displays the user and group names when they are provided in the +archive, or looks up the user and group names in the system +password database. +</dd><dt>*-no-preserve-owner*</dt><dd> +(i mode only) +Do not attempt to restore file ownership. +This is the default when run by non-root users. +</dd><dt>*-O* _file_</dt><dd> +Write archive to +_file_. +</dd><dt>*-o*</dt><dd> +Output mode. +See above for description. +</dd><dt>*-p*</dt><dd> +Pass-through mode. +See above for description. +</dd><dt>*-preserve-owner*</dt><dd> +(i mode only) +Restore file ownership. +This is the default when run by the root user. +</dd><dt>*--quiet*</dt><dd> +Suppress unnecessary messages. +</dd><dt>*-R* `[`user`]``[`:`]``[`group`]`</dt><dd> +Set the owner and/or group on files in the output. +If group is specified with no user +(for example, +*-R* _:wheel_) +then the group will be set but not the user. +If the user is specified with a trailing colon and no group +(for example, +*-R* _root:_) +then the group will be set to the user's default group. +If the user is specified with no trailing colon, then +the user will be set but not the group. +In +*-i* +and +*-p* +modes, this option can only be used by the super-user. +(For compatibility, a period can be used in place of the colon.) +</dd><dt>*-r*</dt><dd> +(All modes.) +Rename files interactively. +For each file, a prompt is written to +_/dev/tty_ +containing the name of the file and a line is read from +_/dev/tty_. +If the line read is blank, the file is skipped. +If the line contains a single period, the file is processed normally. +Otherwise, the line is taken to be the new name of the file. +</dd><dt>*-t*</dt><dd> +(i mode only) +List the contents of the archive to stdout; +do not restore the contents to disk. +</dd><dt>*-u*</dt><dd> +(i and p modes) +Unconditionally overwrite existing files. +Ordinarily, an older file will not overwrite a newer file on disk. +</dd><dt>*-v*</dt><dd> +Print the name of each file to stderr as it is processed. +With +*-t*, +provide a detailed listing of each file. +</dd><dt>*--version*</dt><dd> +Print the program version information and exit. +</dd><dt>*-y*</dt><dd> +(o mode only) +Compress the archive with bzip2-compatible compression before writing it. +In input mode, this option is ignored; +bzip2 compression is recognized automatically on input. +</dd><dt>*-Z*</dt><dd> +(o mode only) +Compress the archive with compress-compatible compression before writing it. +In input mode, this option is ignored; +compression is recognized automatically on input. +</dd><dt>*-z*</dt><dd> +(o mode only) +Compress the archive with gzip-compatible compression before writing it. +In input mode, this option is ignored; +gzip compression is recognized automatically on input. +</dd></dl> +== ENVIRONMENT == +The following environment variables affect the execution of +*cpio*: +<dl> +<dt>*LANG* +The locale to use. +See +*environ*(7) +for more information. +</dt><dt>*TZ* +The timezone to use when displaying dates. +See +*environ*(7) +for more information. +</dt></dl> +== EXIT STATUS == +The *cpio* utility exits 0 on success, and >0 if an error occurs. +== EXAMPLES == +The +*cpio* +command is traditionally used to copy file heirarchies in conjunction +with the +*find*(1) +command. +The first example here simply copies all files from +_src_ +to +_dest_: +{{{ +find src | cpio -pmud dest +}}} + +By carefully selecting options to the +*find*(1) +command and combining it with other standard utilities, +it is possible to exercise very fine control over which files are copied. +This next example copies files from +_src_ +to +_dest_ +that are more than 2 days old and whose names match a particular pattern: +{{{ +find src -mtime _+2_ | grep foo[bar] | cpio -pdmu dest +}}} + +This example copies files from +_src_ +to +_dest_ +that are more than 2 days old and which contain the word +"foobar": +{{{ +find src -mtime _+2_ | xargs grep -l foobar | cpio -pdmu dest +}}} +== COMPATIBILITY == +The mode options i, o, and p and the options +a, B, c, d, f, l, m, r, t, u, and v comply with SUSv2. + +The old POSIX.1 standard specified that only +*-i*, +*-o*, +and +*-p* +were interpreted as command-line options. +Each took a single argument of a list of modifier +characters. +For example, the standard syntax allows +*-imu* +but does not support +*-miu* +or +*-i* *-m* *-u*, +since +_m_ +and +_u_ +are only modifiers to +*-i*, +they are not command-line options in their own right. +The syntax supported by this implementation is backwards-compatible +with the standard. +For best compatibility, scripts should limit themselves to the +standard syntax. +== SEE ALSO == +*bzip2*(1), +*tar*(1), +*gzip*(1), +*mt*(1), +*pax*(1), +*libarchive*(3), +*cpio*(5), +*libarchive-formats*(5), +*tar*(5) +== STANDARDS == +There is no current POSIX standard for the cpio command; it appeared +in +ISO/IEC 9945-1:1996 (``POSIX.1'') +but was dropped from +IEEE Std 1003.1-2001 (``POSIX.1''). + +The cpio, ustar, and pax interchange file formats are defined by +IEEE Std 1003.1-2001 (``POSIX.1'') +for the pax command. +== HISTORY == +The original +*cpio* +and +*find* +utilities were written by Dick Haight +while working in AT&T's Unix Support Group. +They first appeared in 1977 in PWB/UNIX 1.0, the +"Programmer's Work Bench" +system developed for use within AT&T. +They were first released outside of AT&T as part of System III Unix in 1981. +As a result, +*cpio* +actually predates +*tar*, +even though it was not well-known outside of AT&T until some time later. + +This is a complete re-implementation based on the +*libarchive*(3) +library. +== BUGS == +The cpio archive format has several basic limitations: +It does not store user and group names, only numbers. +As a result, it cannot be reliably used to transfer +files between systems with dissimilar user and group numbering. +Older cpio formats limit the user and group numbers to +16 or 18 bits, which is insufficient for modern systems. +The cpio archive formats cannot support files over 4 gigabytes, +except for the +"odc" +variant, which can support files up to 8 gigabytes. diff --git a/archivers/libarchive/files/doc/wiki/ManPageBsdtar1.wiki b/archivers/libarchive/files/doc/wiki/ManPageBsdtar1.wiki new file mode 100644 index 00000000000..c1fedb14e1e --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageBsdtar1.wiki @@ -0,0 +1,941 @@ +#summary BSDTAR 1 manual page +== NAME == +*tar* +- manipulate tape archives +== SYNOPSIS == +<br> +*tar* +`[`_bundled-flags_ `<`args`>``]` +`[``<`_file_`>` | `<`_pattern_`>` ...`]` +<br> +*tar* +{*-c*} +`[`_options_`]` +`[`_files_ | _directories_`]` +<br> +*tar* +{*-r* | *-u*} +*-f* _archive-file_ +`[`_options_`]` +`[`_files_ | _directories_`]` +<br> +*tar* +{*-t* | *-x*} +`[`_options_`]` +`[`_patterns_`]` +== DESCRIPTION == +*tar* +creates and manipulates streaming archive files. +This implementation can extract from tar, pax, cpio, zip, jar, ar, +and ISO 9660 cdrom images and can create tar, pax, cpio, ar, +and shar archives. + +The first synopsis form shows a +"bundled" +option word. +This usage is provided for compatibility with historical implementations. +See COMPATIBILITY below for details. + +The other synopsis forms show the preferred usage. +The first option to +*tar* +is a mode indicator from the following list: +<dl> +<dt>*-c*</dt><dd> +Create a new archive containing the specified items. +</dd><dt>*-r*</dt><dd> +Like +*-c*, +but new entries are appended to the archive. +Note that this only works on uncompressed archives stored in regular files. +The +*-f* +option is required. +</dd><dt>*-t*</dt><dd> +List archive contents to stdout. +</dd><dt>*-u*</dt><dd> +Like +*-r*, +but new entries are added only if they have a modification date +newer than the corresponding entry in the archive. +Note that this only works on uncompressed archives stored in regular files. +The +*-f* +option is required. +</dd><dt>*-x*</dt><dd> +Extract to disk from the archive. +If a file with the same name appears more than once in the archive, +each copy will be extracted, with later copies overwriting (replacing) +earlier copies. +</dd></dl> + +In +*-c*, +*-r*, +or +*-u* +mode, each specified file or directory is added to the +archive in the order specified on the command line. +By default, the contents of each directory are also archived. + +In extract or list mode, the entire command line +is read and parsed before the archive is opened. +The pathnames or patterns on the command line indicate +which items in the archive should be processed. +Patterns are shell-style globbing patterns as +documented in +*tcsh*(1). +== OPTIONS == +Unless specifically stated otherwise, options are applicable in +all operating modes. +<dl> +<dt>*@*_archive_</dt><dd> +(c and r mode only) +The specified archive is opened and the entries +in it will be appended to the current archive. +As a simple example, +{{{ +tar -c -f - newfile @original.tar +}}} +writes a new archive to standard output containing a file +_newfile_ +and all of the entries from +_original.tar_. +In contrast, +{{{ +tar -c -f - newfile original.tar +}}} +creates a new archive with only two entries. +Similarly, +{{{ +tar -czf - --format pax @- +}}} +reads an archive from standard input (whose format will be determined +automatically) and converts it into a gzip-compressed +pax-format archive on stdout. +In this way, +*tar* +can be used to convert archives from one format to another. +</dd><dt>*-b* _blocksize_</dt><dd> +Specify the block size, in 512-byte records, for tape drive I/O. +As a rule, this argument is only needed when reading from or writing +to tape drives, and usually not even then as the default block size of +20 records (10240 bytes) is very common. +</dd><dt>*-C* _directory_</dt><dd> +In c and r mode, this changes the directory before adding +the following files. +In x mode, change directories after opening the archive +but before extracting entries from the archive. +</dd><dt>*--check-links*</dt><dd> +(c and r modes only) +Issue a warning message unless all links to each file are archived. +</dd><dt>*--chroot*</dt><dd> +(x mode only) +*chroot*() +to the current directory after processing any +*-C* +options and before extracting any files. +</dd><dt>*--exclude* _pattern_</dt><dd> +Do not process files or directories that match the +specified pattern. +Note that exclusions take precedence over patterns or filenames +specified on the command line. +</dd><dt>*--format* _format_</dt><dd> +(c, r, u mode only) +Use the specified format for the created archive. +Supported formats include +"cpio", +"pax", +"shar", +and +"ustar". +Other formats may also be supported; see +*libarchive-formats*(5) +for more information about currently-supported formats. +In r and u modes, when extending an existing archive, the format specified +here must be compatible with the format of the existing archive on disk. +</dd><dt>*-f* _file_</dt><dd> +Read the archive from or write the archive to the specified file. +The filename can be +_-_ +for standard input or standard output. +If not specified, the default tape device will be used. +(On +FreeBSD, +the default tape device is +_/dev/sa0_.) +</dd><dt>*-H*</dt><dd> +(c and r mode only) +Symbolic links named on the command line will be followed; the +target of the link will be archived, not the link itself. +</dd><dt>*-h*</dt><dd> +(c and r mode only) +Synonym for +*-L*. +</dd><dt>*-I*</dt><dd> +Synonym for +*-T*. +</dd><dt>*--include* _pattern_</dt><dd> +Process only files or directories that match the specified pattern. +Note that exclusions specified with +*--exclude* +take precedence over inclusions. +If no inclusions are explicitly specified, all entries are processed by +default. +The +*--include* +option is especially useful when filtering archives. +For example, the command +{{{ +tar -c -f new.tar --include='*foo*' @old.tgz +}}} +creates a new archive +_new.tar_ +containing only the entries from +_old.tgz_ +containing the string +Sq foo. +</dd><dt>*-j*</dt><dd> +(c mode only) +Compress the resulting archive with +*bzip2*(1). +In extract or list modes, this option is ignored. +Note that, unlike other +*tar* +implementations, this implementation recognizes bzip2 compression +automatically when reading archives. +</dd><dt>*-k*</dt><dd> +(x mode only) +Do not overwrite existing files. +In particular, if a file appears more than once in an archive, +later copies will not overwrite earlier copies. +</dd><dt>*--keep-newer-files*</dt><dd> +(x mode only) +Do not overwrite existing files that are newer than the +versions appearing in the archive being extracted. +</dd><dt>*-L*</dt><dd> +(c and r mode only) +All symbolic links will be followed. +Normally, symbolic links are archived as such. +With this option, the target of the link will be archived instead. +</dd><dt>*-l*</dt><dd> +This is a synonym for the +*--check-links* +option. +</dd><dt>*-m*</dt><dd> +(x mode only) +Do not extract modification time. +By default, the modification time is set to the time stored in the archive. +</dd><dt>*-n*</dt><dd> +(c, r, u modes only) +Do not recursively archive the contents of directories. +</dd><dt>*--newer* _date_</dt><dd> +(c, r, u modes only) +Only include files and directories newer than the specified date. +This compares ctime entries. +</dd><dt>*--newer-mtime* _date_</dt><dd> +(c, r, u modes only) +Like +*--newer*, +except it compares mtime entries instead of ctime entries. +</dd><dt>*--newer-than* _file_</dt><dd> +(c, r, u modes only) +Only include files and directories newer than the specified file. +This compares ctime entries. +</dd><dt>*--newer-mtime-than* _file_</dt><dd> +(c, r, u modes only) +Like +*--newer-than*, +except it compares mtime entries instead of ctime entries. +</dd><dt>*--nodump*</dt><dd> +(c and r modes only) +Honor the nodump file flag by skipping this file. +</dd><dt>*--null*</dt><dd> +(use with +*-I*, +*-T*, +or +*-X*) +Filenames or patterns are separated by null characters, +not by newlines. +This is often used to read filenames output by the +*-print0* +option to +*find*(1). +</dd><dt>*--numeric-owner*</dt><dd> +(x mode only) +Ignore symbolic user and group names when restoring archives to disk, +only numeric uid and gid values will be obeyed. +</dd><dt>*-O*</dt><dd> +(x, t modes only) +In extract (-x) mode, files will be written to standard out rather than +being extracted to disk. +In list (-t) mode, the file listing will be written to stderr rather than +the usual stdout. +</dd><dt>*-o*</dt><dd> +(x mode) +Use the user and group of the user running the program rather +than those specified in the archive. +Note that this has no significance unless +*-p* +is specified, and the program is being run by the root user. +In this case, the file modes and flags from +the archive will be restored, but ACLs or owner information in +the archive will be discarded. +</dd><dt>*-o*</dt><dd> +(c, r, u mode) +A synonym for +*--format* _ustar_ +</dd><dt>*--one-file-system*</dt><dd> +(c, r, and u modes) +Do not cross mount points. +</dd><dt>*--options* _options_</dt><dd> +Select optional behaviors for particular modules. +The argument is a text string containing comma-separated +keywords and values. +These are passed to the modules that handle particular +formats to control how those formats will behave. +Each option has one of the following forms: +<dl> +<dt>_key=value_</dt><dd> +The key will be set to the specified value in every module that supports it. +Modules that do not support this key will ignore it. +</dd><dt>_key_</dt><dd> +The key will be enabled in every module that supports it. +This is equivalent to +_key_*=1*. +</dd><dt>_!key_</dt><dd> +The key will be disabled in every module that supports it. +</dd><dt>_module:key=value_, _module:key_, _module:!key_</dt><dd> +As above, but the corresponding key and value will be provided +only to modules whose name matches +_module_. +</dd></dl> +The currently supported modules and keys are: +<dl> +<dt>*iso9660:joliet*</dt><dd> +Support Joliet extensions. +This is enabled by default, use +*!joliet* +or +*iso9660:!joliet* +to disable. +</dd><dt>*iso9660:rockridge*</dt><dd> +Support Rock Ridge extensions. +This is enabled by default, use +*!rockridge* +or +*iso9660:!rockridge* +to disable. +</dd><dt>*gzip:compression-level*</dt><dd> +A decimal integer from 0 to 9 specifying the gzip compression level. +</dd><dt>*xz:compression-level*</dt><dd> +A decimal integer from 0 to 9 specifying the xz compression level. +</dd><dt>*mtree:*_keyword_</dt><dd> +The mtree writer module allows you to specify which mtree keywords +will be included in the output. +Supported keywords include: +*cksum*, *device*, *flags*, *gid*, *gname*, *indent*, +*link*, *md5*, *mode*, *nlink*, *rmd160*, *sha1*, *sha256*, +*sha384*, *sha512*, *size*, *time*, *uid*, *uname*. +The default is equivalent to: +"device, flags, gid, gname, link, mode, nlink, size, time, type, uid, uname". +</dd><dt>*mtree:all*</dt><dd> +Enables all of the above keywords. +You can also use +*mtree:!all* +to disable all keywords. +</dd><dt>*mtree:use-set*</dt><dd> +Enable generation of +*/set* +lines in the output. +</dd><dt>*mtree:indent*</dt><dd> +Produce human-readable output by indenting options and splitting lines +to fit into 80 columns. +</dd><dt>*zip:compression*=_type_</dt><dd> +Use +_type_ +as compression method. +Supported values are store (uncompressed) and deflate (gzip algorithm). +</dd></dl> +If a provided option is not supported by any module, that +is a fatal error. +</dd><dt>*-P*</dt><dd> +Preserve pathnames. +By default, absolute pathnames (those that begin with a / +character) have the leading slash removed both when creating archives +and extracting from them. +Also, +*tar* +will refuse to extract archive entries whose pathnames contain +_.._ +or whose target directory would be altered by a symlink. +This option suppresses these behaviors. +</dd><dt>*-p*</dt><dd> +(x mode only) +Preserve file permissions. +Attempt to restore the full permissions, including owner, file modes, file +flags and ACLs, if available, for each item extracted from the archive. +By default, newly-created files are owned by the user running +*tar*, +the file mode is restored for newly-created regular files, and +all other types of entries receive default permissions. +If +*tar* +is being run by root, the default is to restore the owner unless the +*-o* +option is also specified. +</dd><dt>*-q* (*--fast-read*)</dt><dd> +(x and t mode only) +Extract or list only the first archive entry that matches each pattern +or filename operand. +Exit as soon as each specified pattern or filename has been matched. +By default, the archive is always read to the very end, since +there can be multiple entries with the same name and, by convention, +later entries overwrite earlier entries. +This option is provided as a performance optimization. +</dd><dt>*-S*</dt><dd> +(x mode only) +Extract files as sparse files. +For every block on disk, check first if it contains only NULL bytes and seek +over it otherwise. +This works similiar to the conv=sparse option of dd. +</dd><dt>*--strip-components* _count_</dt><dd> +(x mode only) +Remove the specified number of leading path elements. +Pathnames with fewer elements will be silently skipped. +Note that the pathname is edited after checking inclusion/exclusion patterns +but before security checks. +</dd><dt>*-s* _pattern_</dt><dd> +Modify file or archive member names according to +_pattern_. +The pattern has the format +_/old/new/_`[`gps`]` +where +_old_ +is a basic regular expression, +_new_ +is the replacement string of the matched part, +and the optional trailing letters modify +how the replacement is handled. +If +_old_ +is not matched, the pattern is skipped. +Within +_new_, +~ is substituted with the match, \1 to \9 with the content of +the corresponding captured group. +The optional trailing g specifies that matching should continue +after the matched part and stopped on the first unmatched pattern. +The optional trailing s specifies that the pattern applies to the value +of symbolic links. +The optional trailing p specifies that after a successful substitution +the original path name and the new path name should be printed to +standard error. +</dd><dt>*-T* _filename_</dt><dd> +In x or t mode, +*tar* +will read the list of names to be extracted from +_filename_. +In c mode, +*tar* +will read names to be archived from +_filename_. +The special name +"-C" +on a line by itself will cause the current directory to be changed to +the directory specified on the following line. +Names are terminated by newlines unless +*--null* +is specified. +Note that +*--null* +also disables the special handling of lines containing +"-C". +</dd><dt>*-U*</dt><dd> +(x mode only) +Unlink files before creating them. +Without this option, +*tar* +overwrites existing files, which preserves existing hardlinks. +With this option, existing hardlinks will be broken, as will any +symlink that would affect the location of an extracted file. +</dd><dt>*--use-compress-program* _program_</dt><dd> +Pipe the input (in x or t mode) or the output (in c mode) through +_program_ +instead of using the builtin compression support. +</dd><dt>*-v*</dt><dd> +Produce verbose output. +In create and extract modes, +*tar* +will list each file name as it is read from or written to +the archive. +In list mode, +*tar* +will produce output similar to that of +*ls*(1). +Additional +*-v* +options will provide additional detail. +</dd><dt>*--version*</dt><dd> +Print version of +*tar* +and +*libarchive*, +and exit. +</dd><dt>*-w*</dt><dd> +Ask for confirmation for every action. +</dd><dt>*-X* _filename_</dt><dd> +Read a list of exclusion patterns from the specified file. +See +*--exclude* +for more information about the handling of exclusions. +</dd><dt>*-y*</dt><dd> +(c mode only) +Compress the resulting archive with +*bzip2*(1). +In extract or list modes, this option is ignored. +Note that, unlike other +*tar* +implementations, this implementation recognizes bzip2 compression +automatically when reading archives. +</dd><dt>*-z*</dt><dd> +(c mode only) +Compress the resulting archive with +*gzip*(1). +In extract or list modes, this option is ignored. +Note that, unlike other +*tar* +implementations, this implementation recognizes gzip compression +automatically when reading archives. +</dd><dt>*-Z*</dt><dd> +(c mode only) +Compress the resulting archive with +*compress*(1). +In extract or list modes, this option is ignored. +Note that, unlike other +*tar* +implementations, this implementation recognizes compress compression +automatically when reading archives. +</dd></dl> +== ENVIRONMENT == +The following environment variables affect the execution of +*tar*: +<dl> +<dt>*LANG* +The locale to use. +See +*environ*(7) +for more information. +</dt><dt>*TAPE* +The default tape device. +The +*-f* +option overrides this. +</dt><dt>*TZ* +The timezone to use when displaying dates. +See +*environ*(7) +for more information. +</dt></dl> +== FILES == +<dl> +<dt>*/dev/sa0* +The default tape device, if not overridden by the +.IR TAPE +environment variable or the +*-f* +option. +</dt></dl> +== EXIT STATUS == +The *tar* utility exits 0 on success, and >0 if an error occurs. +== EXAMPLES == +The following creates a new archive +called +_file.tar.gz_ +that contains two files +_source.c_ +and +_source.h_: +{{{ +tar -czf file.tar.gz source.c source.h +}}} + +To view a detailed table of contents for this +archive: +{{{ +tar -tvf file.tar.gz +}}} + +To extract all entries from the archive on +the default tape drive: +{{{ +tar -x +}}} + +To examine the contents of an ISO 9660 cdrom image: +{{{ +tar -tf image.iso +}}} + +To move file hierarchies, invoke +*tar* +as +{{{ +tar -cf - -C srcdir\. | tar -xpf - -C destdir +}}} +or more traditionally +{{{ +cd srcdir ; tar -cf -\. | (cd destdir ; tar -xpf -) +}}} + +In create mode, the list of files and directories to be archived +can also include directory change instructions of the form +*-C*_foo/baz_ +and archive inclusions of the form +*@*_archive-file_. +For example, the command line +{{{ +tar -c -f new.tar foo1 @old.tgz -C/tmp foo2 +}}} +will create a new archive +_new.tar_. +*tar* +will read the file +_foo1_ +from the current directory and add it to the output archive. +It will then read each entry from +_old.tgz_ +and add those entries to the output archive. +Finally, it will switch to the +_/tmp_ +directory and add +_foo2_ +to the output archive. + +An input file in +*mtree*(5) +format can be used to create an output archive with arbitrary ownership, +permissions, or names that differ from existing data on disk: + +{{{ +$ cat input.mtree +}}} +{{{ +#mtree +}}} +{{{ +usr/bin uid=0 gid=0 mode=0755 type=dir +}}} +{{{ +usr/bin/ls uid=0 gid=0 mode=0755 type=file content=myls +}}} +{{{ +$ tar -cvf output.tar @input.mtree +}}} + +The +*--newer* +and +*--newer-mtime* +switches accept a variety of common date and time specifications, including +"12 Mar 2005 7:14:29pm", +"2005-03-12 19:14", +"5 minutes ago", +and +"19:14 PST May 1". + +The +*--options* +argument can be used to control various details of archive generation +or reading. +For example, you can generate mtree output which only contains +*type*, *time*, +and +*uid* +keywords: +{{{ +tar -cf file.tar --format=mtree --options='!all,type,time,uid' dir +}}} +or you can set the compression level used by gzip or xz compression: +{{{ +tar -czf file.tar --options='compression-level=9'. +}}} +For more details, see the explanation of the +*archive_read_set_options*() +and +*archive_write_set_options*() +API calls that are described in +*archive_read*(3) +and +*archive_write*(3). +== COMPATIBILITY == +The bundled-arguments format is supported for compatibility +with historic implementations. +It consists of an initial word (with no leading - character) in which +each character indicates an option. +Arguments follow as separate words. +The order of the arguments must match the order +of the corresponding characters in the bundled command word. +For example, +{{{ +tar tbf 32 file.tar +}}} +specifies three flags +*t*, +*b*, +and +*f*. +The +*b* +and +*f* +flags both require arguments, +so there must be two additional items +on the command line. +The +_32_ +is the argument to the +*b* +flag, and +_file.tar_ +is the argument to the +*f* +flag. + +The mode options c, r, t, u, and x and the options +b, f, l, m, o, v, and w comply with SUSv2. + +For maximum portability, scripts that invoke +*tar* +should use the bundled-argument format above, should limit +themselves to the +*c*, +*t*, +and +*x* +modes, and the +*b*, +*f*, +*m*, +*v*, +and +*w* +options. + +Additional long options are provided to improve compatibility with other +tar implementations. +== SECURITY == +Certain security issues are common to many archiving programs, including +*tar*. +In particular, carefully-crafted archives can request that +*tar* +extract files to locations outside of the target directory. +This can potentially be used to cause unwitting users to overwrite +files they did not intend to overwrite. +If the archive is being extracted by the superuser, any file +on the system can potentially be overwritten. +There are three ways this can happen. +Although +*tar* +has mechanisms to protect against each one, +savvy users should be aware of the implications: +<ul> +<li> +Archive entries can have absolute pathnames. +By default, +*tar* +removes the leading +_/_ +character from filenames before restoring them to guard against this problem. +</li><li> +Archive entries can have pathnames that include +_.._ +components. +By default, +*tar* +will not extract files containing +_.._ +components in their pathname. +</li><li> +Archive entries can exploit symbolic links to restore +files to other directories. +An archive can restore a symbolic link to another directory, +then use that link to restore a file into that directory. +To guard against this, +*tar* +checks each extracted path for symlinks. +If the final path element is a symlink, it will be removed +and replaced with the archive entry. +If +*-U* +is specified, any intermediate symlink will also be unconditionally removed. +If neither +*-U* +nor +*-P* +is specified, +*tar* +will refuse to extract the entry. +</li></ul> +To protect yourself, you should be wary of any archives that +come from untrusted sources. +You should examine the contents of an archive with +{{{ +tar -tf filename +}}} +before extraction. +You should use the +*-k* +option to ensure that +*tar* +will not overwrite any existing files or the +*-U* +option to remove any pre-existing files. +You should generally not extract archives while running with super-user +privileges. +Note that the +*-P* +option to +*tar* +disables the security checks above and allows you to extract +an archive while preserving any absolute pathnames, +_.._ +components, or symlinks to other directories. +== SEE ALSO == +*bzip2*(1), +*compress*(1), +*cpio*(1), +*gzip*(1), +*mt*(1), +*pax*(1), +*shar*(1), +*libarchive*(3), +*libarchive-formats*(5), +*tar*(5) +== STANDARDS == +There is no current POSIX standard for the tar command; it appeared +in +ISO/IEC 9945-1:1996 (``POSIX.1'') +but was dropped from +IEEE Std 1003.1-2001 (``POSIX.1''). +The options used by this implementation were developed by surveying a +number of existing tar implementations as well as the old POSIX specification +for tar and the current POSIX specification for pax. + +The ustar and pax interchange file formats are defined by +IEEE Std 1003.1-2001 (``POSIX.1'') +for the pax command. +== HISTORY == +A +*tar* +command appeared in Seventh Edition Unix, which was released in January, 1979. +There have been numerous other implementations, +many of which extended the file format. +John Gilmore's +*pdtar* +public-domain implementation (circa November, 1987) +was quite influential, and formed the basis of GNU tar. +GNU tar was included as the standard system tar +in +FreeBSD +beginning with +FreeBSD 1.0. + +This is a complete re-implementation based on the +*libarchive*(3) +library. +== BUGS == +This program follows +ISO/IEC 9945-1:1996 (``POSIX.1'') +for the definition of the +*-l* +option. +Note that GNU tar prior to version 1.15 treated +*-l* +as a synonym for the +*--one-file-system* +option. + +The +*-C* _dir_ +option may differ from historic implementations. + +All archive output is written in correctly-sized blocks, even +if the output is being compressed. +Whether or not the last output block is padded to a full +block size varies depending on the format and the +output device. +For tar and cpio formats, the last block of output is padded +to a full block size if the output is being +written to standard output or to a character or block device such as +a tape drive. +If the output is being written to a regular file, the last block +will not be padded. +Many compressors, including +*gzip*(1) +and +*bzip2*(1), +complain about the null padding when decompressing an archive created by +*tar*, +although they still extract it correctly. + +The compression and decompression is implemented internally, so +there may be insignificant differences between the compressed output +generated by +{{{ +tar -czf - file +}}} +and that generated by +{{{ +tar -cf - file | gzip +}}} + +The default should be to read and write archives to the standard I/O paths, +but tradition (and POSIX) dictates otherwise. + +The +*r* +and +*u* +modes require that the archive be uncompressed +and located in a regular file on disk. +Other archives can be modified using +*c* +mode with the +_@archive-file_ +extension. + +To archive a file called +_@foo_ +or +_-foo_ +you must specify it as +_./@foo_ +or +_./-foo_, +respectively. + +In create mode, a leading +_./_ +is always removed. +A leading +_/_ +is stripped unless the +*-P* +option is specified. + +There needs to be better support for file selection on both create +and extract. + +There is not yet any support for multi-volume archives or for archiving +sparse files. + +Converting between dissimilar archive formats (such as tar and cpio) using the +*@*_-_ +convention can cause hard link information to be lost. +(This is a consequence of the incompatible ways that different archive +formats store hardlink information.) + +There are alternative long options for many of the short options that +are deliberately not documented. diff --git a/archivers/libarchive/files/doc/wiki/ManPageCpio5.wiki b/archivers/libarchive/files/doc/wiki/ManPageCpio5.wiki new file mode 100644 index 00000000000..f39f64f4711 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageCpio5.wiki @@ -0,0 +1,297 @@ +#summary CPIO 5 manual page +== NAME == +*cpio* +- format of cpio archive files +== DESCRIPTION == +The +*cpio* +archive format collects any number of files, directories, and other +file system objects (symbolic links, device nodes, etc.) into a single +stream of bytes. +=== General Format=== +Each file system object in a +*cpio* +archive comprises a header record with basic numeric metadata +followed by the full pathname of the entry and the file data. +The header record stores a series of integer values that generally +follow the fields in +_struct_ stat. +(See +*stat*(2) +for details.) +The variants differ primarily in how they store those integers +(binary, octal, or hexadecimal). +The header is followed by the pathname of the +entry (the length of the pathname is stored in the header) +and any file data. +The end of the archive is indicated by a special record with +the pathname +"TRAILER!!!". +=== PWB format=== +XXX Any documentation of the original PWB/UNIX 1.0 format? XXX +=== Old Binary Format=== +The old binary +*cpio* +format stores numbers as 2-byte and 4-byte binary values. +Each entry begins with a header in the following format: +{{{ +struct header_old_cpio { + unsigned short c_magic; + unsigned short c_dev; + unsigned short c_ino; + unsigned short c_mode; + unsigned short c_uid; + unsigned short c_gid; + unsigned short c_nlink; + unsigned short c_rdev; + unsigned short c_mtime[2]; + unsigned short c_namesize; + unsigned short c_filesize[2]; +}; +}}} + +The +_unsigned_ short +fields here are 16-bit integer values; the +_unsigned_ int +fields are 32-bit integer values. +The fields are as follows +<dl> +<dt>_magic_</dt><dd> +The integer value octal 070707. +This value can be used to determine whether this archive is +written with little-endian or big-endian integers. +</dd><dt>_dev_, _ino_</dt><dd> +The device and inode numbers from the disk. +These are used by programs that read +*cpio* +archives to determine when two entries refer to the same file. +Programs that synthesize +*cpio* +archives should be careful to set these to distinct values for each entry. +</dd><dt>_mode_</dt><dd> +The mode specifies both the regular permissions and the file type. +It consists of several bit fields as follows: +<dl> +<dt>0170000</dt><dd> +This masks the file type bits. +</dd><dt>0140000</dt><dd> +File type value for sockets. +</dd><dt>0120000</dt><dd> +File type value for symbolic links. +For symbolic links, the link body is stored as file data. +</dd><dt>0100000</dt><dd> +File type value for regular files. +</dd><dt>0060000</dt><dd> +File type value for block special devices. +</dd><dt>0040000</dt><dd> +File type value for directories. +</dd><dt>0020000</dt><dd> +File type value for character special devices. +</dd><dt>0010000</dt><dd> +File type value for named pipes or FIFOs. +</dd><dt>0004000</dt><dd> +SUID bit. +</dd><dt>0002000</dt><dd> +SGID bit. +</dd><dt>0001000</dt><dd> +Sticky bit. +On some systems, this modifies the behavior of executables and/or directories. +</dd><dt>0000777</dt><dd> +The lower 9 bits specify read/write/execute permissions +for world, group, and user following standard POSIX conventions. +</dd></dl> +</dd><dt>_uid_, _gid_</dt><dd> +The numeric user id and group id of the owner. +</dd><dt>_nlink_</dt><dd> +The number of links to this file. +Directories always have a value of at least two here. +Note that hardlinked files include file data with every copy in the archive. +</dd><dt>_rdev_</dt><dd> +For block special and character special entries, +this field contains the associated device number. +For all other entry types, it should be set to zero by writers +and ignored by readers. +</dd><dt>_mtime_</dt><dd> +Modification time of the file, indicated as the number +of seconds since the start of the epoch, +00:00:00 UTC January 1, 1970. +The four-byte integer is stored with the most-significant 16 bits first +followed by the least-significant 16 bits. +Each of the two 16 bit values are stored in machine-native byte order. +</dd><dt>_namesize_</dt><dd> +The number of bytes in the pathname that follows the header. +This count includes the trailing NUL byte. +</dd><dt>_filesize_</dt><dd> +The size of the file. +Note that this archive format is limited to +four gigabyte file sizes. +See +_mtime_ +above for a description of the storage of four-byte integers. +</dd></dl> + +The pathname immediately follows the fixed header. +If the +*namesize* +is odd, an additional NUL byte is added after the pathname. +The file data is then appended, padded with NUL +bytes to an even length. + +Hardlinked files are not given special treatment; +the full file contents are included with each copy of the +file. +=== Portable ASCII Format=== +Version 2 of the Single UNIX Specification (``SUSv2'') +standardized an ASCII variant that is portable across all +platforms. +It is commonly known as the +"old character" +format or as the +"odc" +format. +It stores the same numeric fields as the old binary format, but +represents them as 6-character or 11-character octal values. +{{{ +struct cpio_odc_header { + char c_magic[6]; + char c_dev[6]; + char c_ino[6]; + char c_mode[6]; + char c_uid[6]; + char c_gid[6]; + char c_nlink[6]; + char c_rdev[6]; + char c_mtime[11]; + char c_namesize[6]; + char c_filesize[11]; +}; +}}} + +The fields are identical to those in the old binary format. +The name and file body follow the fixed header. +Unlike the old binary format, there is no additional padding +after the pathname or file contents. +If the files being archived are themselves entirely ASCII, then +the resulting archive will be entirely ASCII, except for the +NUL byte that terminates the name field. +=== New ASCII Format=== +The "new" ASCII format uses 8-byte hexadecimal fields for +all numbers and separates device numbers into separate fields +for major and minor numbers. +{{{ +struct cpio_newc_header { + char c_magic[6]; + char c_ino[8]; + char c_mode[8]; + char c_uid[8]; + char c_gid[8]; + char c_nlink[8]; + char c_mtime[8]; + char c_filesize[8]; + char c_devmajor[8]; + char c_devminor[8]; + char c_rdevmajor[8]; + char c_rdevminor[8]; + char c_namesize[8]; + char c_check[8]; +}; +}}} + +Except as specified below, the fields here match those specified +for the old binary format above. +<dl> +<dt>_magic_</dt><dd> +The string +"070701". +</dd><dt>_check_</dt><dd> +This field is always set to zero by writers and ignored by readers. +See the next section for more details. +</dd></dl> + +The pathname is followed by NUL bytes so that the total size +of the fixed header plus pathname is a multiple of four. +Likewise, the file data is padded to a multiple of four bytes. +Note that this format supports only 4 gigabyte files (unlike the +older ASCII format, which supports 8 gigabyte files). + +In this format, hardlinked files are handled by setting the +filesize to zero for each entry except the last one that +appears in the archive. +=== New CRC Format=== +The CRC format is identical to the new ASCII format described +in the previous section except that the magic field is set +to +"070702" +and the +_check_ +field is set to the sum of all bytes in the file data. +This sum is computed treating all bytes as unsigned values +and using unsigned arithmetic. +Only the least-significant 32 bits of the sum are stored. +=== HP variants=== +The +*cpio* +implementation distributed with HPUX used XXXX but stored +device numbers differently XXX. +=== Other Extensions and Variants=== +Sun Solaris uses additional file types to store extended file +data, including ACLs and extended attributes, as special +entries in cpio archives. + +XXX Others? XXX +== BUGS == +The +"CRC" +format is mis-named, as it uses a simple checksum and +not a cyclic redundancy check. + +The old binary format is limited to 16 bits for user id, +group id, device, and inode numbers. +It is limited to 4 gigabyte file sizes. + +The old ASCII format is limited to 18 bits for +the user id, group id, device, and inode numbers. +It is limited to 8 gigabyte file sizes. + +The new ASCII format is limited to 4 gigabyte file sizes. + +None of the cpio formats store user or group names, +which are essential when moving files between systems with +dissimilar user or group numbering. + +Especially when writing older cpio variants, it may be necessary +to map actual device/inode values to synthesized values that +fit the available fields. +With very large filesystems, this may be necessary even for +the newer formats. +== SEE ALSO == +*cpio*(1), +*tar*(5) +== STANDARDS == +The +*cpio* +utility is no longer a part of POSIX or the Single Unix Standard. +It last appeared in +Version 2 of the Single UNIX Specification (``SUSv2''). +It has been supplanted in subsequent standards by +*pax*(1). +The portable ASCII format is currently part of the specification for the +*pax*(1) +utility. +== HISTORY == +The original cpio utility was written by Dick Haight +while working in AT&T's Unix Support Group. +It appeared in 1977 as part of PWB/UNIX 1.0, the +"Programmer's Work Bench" +derived from +At v6 +that was used internally at AT&T. +Both the old binary and old character formats were in use +by 1980, according to the System III source released +by SCO under their +"Ancient Unix" +license. +The character format was adopted as part of +IEEE Std 1003.1-1988 (``POSIX.1''). +XXX when did "newc" appear? Who invented it? When did HP come out with their variant? When did Sun introduce ACLs and extended attributes? XXX diff --git a/archivers/libarchive/files/doc/wiki/ManPageLibarchive3.wiki b/archivers/libarchive/files/doc/wiki/ManPageLibarchive3.wiki new file mode 100644 index 00000000000..997212f1232 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageLibarchive3.wiki @@ -0,0 +1,302 @@ +#summary LIBARCHIVE 3 manual page +== NAME == +*libarchive* +- functions for reading and writing streaming archives +== LIBRARY == +Lb libarchive +== OVERVIEW == +The +*libarchive* +library provides a flexible interface for reading and writing +streaming archive files such as tar and cpio. +The library is inherently stream-oriented; readers serially iterate through +the archive, writers serially add things to the archive. +In particular, note that there is no built-in support for +random access nor for in-place modification. + +When reading an archive, the library automatically detects the +format and the compression. +The library currently has read support for: +<ul> +<li> +old-style tar archives, +</li><li> +most variants of the POSIX +"ustar" +format, +</li><li> +the POSIX +"pax interchange" +format, +</li><li> +GNU-format tar archives, +</li><li> +most common cpio archive formats, +</li><li> +ISO9660 CD images (with or without RockRidge extensions), +</li><li> +Zip archives. +</li></ul> +The library automatically detects archives compressed with +*gzip*(1), +*bzip2*(1), +or +*compress*(1) +and decompresses them transparently. + +When writing an archive, you can specify the compression +to be used and the format to use. +The library can write +<ul> +<li> +POSIX-standard +"ustar" +archives, +</li><li> +POSIX +"pax interchange format" +archives, +</li><li> +POSIX octet-oriented cpio archives, +</li><li> +two different variants of shar archives. +</li></ul> +Pax interchange format is an extension of the tar archive format that +eliminates essentially all of the limitations of historic tar formats +in a standard fashion that is supported +by POSIX-compliant +*pax*(1) +implementations on many systems as well as several newer implementations of +*tar*(1). +Note that the default write format will suppress the pax extended +attributes for most entries; explicitly requesting pax format will +enable those attributes for all entries. + +The read and write APIs are accessed through the +*archive_read_XXX*() +functions and the +*archive_write_XXX*() +functions, respectively, and either can be used independently +of the other. + +The rest of this manual page provides an overview of the library +operation. +More detailed information can be found in the individual manual +pages for each API or utility function. +== READING AN ARCHIVE == +To read an archive, you must first obtain an initialized +*struct archive* +object from +*archive_read_new*(). +You can then modify this object for the desired operations with the +various +*archive_read_set_XXX*() +and +*archive_read_support_XXX*() +functions. +In particular, you will need to invoke appropriate +*archive_read_support_XXX*() +functions to enable the corresponding compression and format +support. +Note that these latter functions perform two distinct operations: +they cause the corresponding support code to be linked into your +program, and they enable the corresponding auto-detect code. +Unless you have specific constraints, you will generally want +to invoke +*archive_read_support_compression_all*() +and +*archive_read_support_format_all*() +to enable auto-detect for all formats and compression types +currently supported by the library. + +Once you have prepared the +*struct archive* +object, you call +*archive_read_open*() +to actually open the archive and prepare it for reading. +There are several variants of this function; +the most basic expects you to provide pointers to several +functions that can provide blocks of bytes from the archive. +There are convenience forms that allow you to +specify a filename, file descriptor, +*FILE `*`* +object, or a block of memory from which to read the archive data. +Note that the core library makes no assumptions about the +size of the blocks read; +callback functions are free to read whatever block size is +most appropriate for the medium. + +Each archive entry consists of a header followed by a certain +amount of data. +You can obtain the next header with +*archive_read_next_header*(), +which returns a pointer to an +*struct archive_entry* +structure with information about the current archive element. +If the entry is a regular file, then the header will be followed +by the file data. +You can use +*archive_read_data*() +(which works much like the +*read*(2) +system call) +to read this data from the archive. +You may prefer to use the higher-level +*archive_read_data_skip*(), +which reads and discards the data for this entry, +*archive_read_data_to_buffer*(), +which reads the data into an in-memory buffer, +*archive_read_data_to_file*(), +which copies the data to the provided file descriptor, or +*archive_read_extract*(), +which recreates the specified entry on disk and copies data +from the archive. +In particular, note that +*archive_read_extract*() +uses the +*struct archive_entry* +structure that you provide it, which may differ from the +entry just read from the archive. +In particular, many applications will want to override the +pathname, file permissions, or ownership. + +Once you have finished reading data from the archive, you +should call +*archive_read_close*() +to close the archive, then call +*archive_read_finish*() +to release all resources, including all memory allocated by the library. + +The +*archive_read*(3) +manual page provides more detailed calling information for this API. +== WRITING AN ARCHIVE == +You use a similar process to write an archive. +The +*archive_write_new*() +function creates an archive object useful for writing, +the various +*archive_write_set_XXX*() +functions are used to set parameters for writing the archive, and +*archive_write_open*() +completes the setup and opens the archive for writing. + +Individual archive entries are written in a three-step +process: +You first initialize a +*struct archive_entry* +structure with information about the new entry. +At a minimum, you should set the pathname of the +entry and provide a +_struct_ stat +with a valid +_st_mode_ +field, which specifies the type of object and +_st_size_ +field, which specifies the size of the data portion of the object. +The +*archive_write_header*() +function actually writes the header data to the archive. +You can then use +*archive_write_data*() +to write the actual data. + +After all entries have been written, use the +*archive_write_finish*() +function to release all resources. + +The +*archive_write*(3) +manual page provides more detailed calling information for this API. +== DESCRIPTION == +Detailed descriptions of each function are provided by the +corresponding manual pages. + +All of the functions utilize an opaque +*struct archive* +datatype that provides access to the archive contents. + +The +*struct archive_entry* +structure contains a complete description of a single archive +entry. +It uses an opaque interface that is fully documented in +*archive_entry*(3). + +Users familiar with historic formats should be aware that the newer +variants have eliminated most restrictions on the length of textual fields. +Clients should not assume that filenames, link names, user names, or +group names are limited in length. +In particular, pax interchange format can easily accommodate pathnames +in arbitrary character sets that exceed +_PATH_MAX_. +== RETURN VALUES == +Most functions return zero on success, non-zero on error. +The return value indicates the general severity of the error, ranging +from +*ARCHIVE_WARN*, +which indicates a minor problem that should probably be reported +to the user, to +*ARCHIVE_FATAL*, +which indicates a serious problem that will prevent any further +operations on this archive. +On error, the +*archive_errno*() +function can be used to retrieve a numeric error code (see +*errno*(2)). +The +*archive_error_string*() +returns a textual error message suitable for display. + +*archive_read_new*() +and +*archive_write_new*() +return pointers to an allocated and initialized +*struct archive* +object. + +*archive_read_data*() +and +*archive_write_data*() +return a count of the number of bytes actually read or written. +A value of zero indicates the end of the data for this entry. +A negative value indicates an error, in which case the +*archive_errno*() +and +*archive_error_string*() +functions can be used to obtain more information. +== ENVIRONMENT == +There are character set conversions within the +*archive_entry*(3) +functions that are impacted by the currently-selected locale. +== SEE ALSO == +*tar*(1), +*archive_entry*(3), +*archive_read*(3), +*archive_util*(3), +*archive_write*(3), +*tar*(5) +== HISTORY == +The +*libarchive* +library first appeared in +FreeBSD 5.3. +== AUTHORS == +The +*libarchive* +library was written by +Tim Kientzle <kientzle@acm.org.> +== BUGS == +Some archive formats support information that is not supported by +*struct archive_entry .* +Such information cannot be fully archived or restored using this library. +This includes, for example, comments, character sets, +or the arbitrary key/value pairs that can appear in +pax interchange format archives. + +Conversely, of course, not all of the information that can be +stored in an +*struct archive_entry* +is supported by all formats. +For example, cpio formats do not support nanosecond timestamps; +old tar formats do not support large device numbers. diff --git a/archivers/libarchive/files/doc/wiki/ManPageLibarchiveFormats5.wiki b/archivers/libarchive/files/doc/wiki/ManPageLibarchiveFormats5.wiki new file mode 100644 index 00000000000..0a8f362672f --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageLibarchiveFormats5.wiki @@ -0,0 +1,327 @@ +#summary libarchive-formats 5 manual page +== NAME == +*libarchive-formats* +- archive formats supported by the libarchive library +== DESCRIPTION == +The +*libarchive*(3) +library reads and writes a variety of streaming archive formats. +Generally speaking, all of these archive formats consist of a series of +"entries". +Each entry stores a single file system object, such as a file, directory, +or symbolic link. + +The following provides a brief description of each format supported +by libarchive, with some information about recognized extensions or +limitations of the current library support. +Note that just because a format is supported by libarchive does not +imply that a program that uses libarchive will support that format. +Applications that use libarchive specify which formats they wish +to support, though many programs do use libarchive convenience +functions to enable all supported formats. +=== Tar Formats=== +The +*libarchive*(3) +library can read most tar archives. +However, it only writes POSIX-standard +"ustar" +and +"pax interchange" +formats. + +All tar formats store each entry in one or more 512-byte records. +The first record is used for file metadata, including filename, +timestamp, and mode information, and the file data is stored in +subsequent records. +Later variants have extended this by either appropriating undefined +areas of the header record, extending the header to multiple records, +or by storing special entries that modify the interpretation of +subsequent entries. + +<dl> +<dt>*gnutar*</dt><dd> +The +*libarchive*(3) +library can read GNU-format tar archives. +It currently supports the most popular GNU extensions, including +modern long filename and linkname support, as well as atime and ctime data. +The libarchive library does not support multi-volume +archives, nor the old GNU long filename format. +It can read GNU sparse file entries, including the new POSIX-based +formats, but cannot write GNU sparse file entries. +</dd><dt>*pax*</dt><dd> +The +*libarchive*(3) +library can read and write POSIX-compliant pax interchange format +archives. +Pax interchange format archives are an extension of the older ustar +format that adds a separate entry with additional attributes stored +as key/value pairs immediately before each regular entry. +The presence of these additional entries is the only difference between +pax interchange format and the older ustar format. +The extended attributes are of unlimited length and are stored +as UTF-8 Unicode strings. +Keywords defined in the standard are in all lowercase; vendors are allowed +to define custom keys by preceding them with the vendor name in all uppercase. +When writing pax archives, libarchive uses many of the SCHILY keys +defined by Joerg Schilling's +"star" +archiver and a few LIBARCHIVE keys. +The libarchive library can read most of the SCHILY keys +and most of the GNU keys introduced by GNU tar. +It silently ignores any keywords that it does not understand. +</dd><dt>*restricted* pax</dt><dd> +The libarchive library can also write pax archives in which it +attempts to suppress the extended attributes entry whenever +possible. +The result will be identical to a ustar archive unless the +extended attributes entry is required to store a long file +name, long linkname, extended ACL, file flags, or if any of the standard +ustar data (user name, group name, UID, GID, etc) cannot be fully +represented in the ustar header. +In all cases, the result can be dearchived by any program that +can read POSIX-compliant pax interchange format archives. +Programs that correctly read ustar format (see below) will also be +able to read this format; any extended attributes will be extracted as +separate files stored in +_PaxHeader_ +directories. +</dd><dt>*ustar*</dt><dd> +The libarchive library can both read and write this format. +This format has the following limitations: +<ul> +<li> +Device major and minor numbers are limited to 21 bits. +Nodes with larger numbers will not be added to the archive. +</li><li> +Path names in the archive are limited to 255 bytes. +(Shorter if there is no / character in exactly the right place.) +</li><li> +Symbolic links and hard links are stored in the archive with +the name of the referenced file. +This name is limited to 100 bytes. +</li><li> +Extended attributes, file flags, and other extended +security information cannot be stored. +</li><li> +Archive entries are limited to 8 gigabytes in size. +</li></ul> +Note that the pax interchange format has none of these restrictions. +</dd></dl> + +The libarchive library also reads a variety of commonly-used extensions to +the basic tar format. +These extensions are recognized automatically whenever they appear. +<dl> +<dt>Numeric extensions.</dt><dd> +The POSIX standards require fixed-length numeric fields to be written with +some character position reserved for terminators. +Libarchive allows these fields to be written without terminator characters. +This extends the allowable range; in particular, ustar archives with this +extension can support entries up to 64 gigabytes in size. +Libarchive also recognizes base-256 values in most numeric fields. +This essentially removes all limitations on file size, modification time, +and device numbers. +</dd><dt>Solaris extensions</dt><dd> +Libarchive recognizes ACL and extended attribute records written +by Solaris tar. +Currently, libarchive only has support for old-style ACLs; the +newer NFSv4 ACLs are recognized but discarded. +</dd></dl> + +The first tar program appeared in Seventh Edition Unix in 1979. +The first official standard for the tar file format was the +"ustar" +(Unix Standard Tar) format defined by POSIX in 1988. +POSIX.1-2001 extended the ustar format to create the +"pax interchange" +format. +=== Cpio Formats=== +The libarchive library can read a number of common cpio variants and can write +"odc" +and +"newc" +format archives. +A cpio archive stores each entry as a fixed-size header followed +by a variable-length filename and variable-length data. +Unlike the tar format, the cpio format does only minimal padding +of the header or file data. +There are several cpio variants, which differ primarily in +how they store the initial header: some store the values as +octal or hexadecimal numbers in ASCII, others as binary values of +varying byte order and length. +<dl> +<dt>*binary*</dt><dd> +The libarchive library transparently reads both big-endian and little-endian +variants of the original binary cpio format. +This format used 32-bit binary values for file size and mtime, +and 16-bit binary values for the other fields. +</dd><dt>*odc*</dt><dd> +The libarchive library can both read and write this +POSIX-standard format, which is officially known as the +"cpio interchange format" +or the +"octet-oriented cpio archive format" +and sometimes unofficially referred to as the +"old character format". +This format stores the header contents as octal values in ASCII. +It is standard, portable, and immune from byte-order confusion. +File sizes and mtime are limited to 33 bits (8GB file size), +other fields are limited to 18 bits. +</dd><dt>*SVR4*</dt><dd> +The libarchive library can read both CRC and non-CRC variants of +this format. +The SVR4 format uses eight-digit hexadecimal values for +all header fields. +This limits file size to 4GB, and also limits the mtime and +other fields to 32 bits. +The SVR4 format can optionally include a CRC of the file +contents, although libarchive does not currently verify this CRC. +</dd></dl> + +Cpio first appeared in PWB/UNIX 1.0, which was released within +AT&T in 1977. +PWB/UNIX 1.0 formed the basis of System III Unix, released outside +of AT&T in 1981. +This makes cpio older than tar, although cpio was not included +in Version 7 AT&T Unix. +As a result, the tar command became much better known in universities +and research groups that used Version 7. +The combination of the +*find* +and +*cpio* +utilities provided very precise control over file selection. +Unfortunately, the format has many limitations that make it unsuitable +for widespread use. +Only the POSIX format permits files over 4GB, and its 18-bit +limit for most other fields makes it unsuitable for modern systems. +In addition, cpio formats only store numeric UID/GID values (not +usernames and group names), which can make it very difficult to correctly +transfer archives across systems with dissimilar user numbering. +=== Shar Formats=== +A +"shell archive" +is a shell script that, when executed on a POSIX-compliant +system, will recreate a collection of file system objects. +The libarchive library can write two different kinds of shar archives: +<dl> +<dt>*shar*</dt><dd> +The traditional shar format uses a limited set of POSIX +commands, including +*echo*(1), +*mkdir*(1), +and +*sed*(1). +It is suitable for portably archiving small collections of plain text files. +However, it is not generally well-suited for large archives +(many implementations of +*sh*(1) +have limits on the size of a script) nor should it be used with non-text files. +</dd><dt>*shardump*</dt><dd> +This format is similar to shar but encodes files using +*uuencode*(1) +so that the result will be a plain text file regardless of the file contents. +It also includes additional shell commands that attempt to reproduce as +many file attributes as possible, including owner, mode, and flags. +The additional commands used to restore file attributes make +shardump archives less portable than plain shar archives. +</dd></dl> +=== ISO9660 format=== +Libarchive can read and extract from files containing ISO9660-compliant +CDROM images. +In many cases, this can remove the need to burn a physical CDROM +just in order to read the files contained in an ISO9660 image. +It also avoids security and complexity issues that come with +virtual mounts and loopback devices. +Libarchive supports the most common Rockridge extensions and has partial +support for Joliet extensions. +If both extensions are present, the Joliet extensions will be +used and the Rockridge extensions will be ignored. +In particular, this can create problems with hardlinks and symlinks, +which are supported by Rockridge but not by Joliet. +=== Zip format=== +Libarchive can read and write zip format archives that have +uncompressed entries and entries compressed with the +"deflate" +algorithm. +Older zip compression algorithms are not supported. +It can extract jar archives, archives that use Zip64 extensions and many +self-extracting zip archives. +Libarchive reads Zip archives as they are being streamed, +which allows it to read archives of arbitrary size. +It currently does not use the central directory; this +limits libarchive's ability to support some self-extracting +archives and ones that have been modified in certain ways. +=== Archive (library) file format=== +The Unix archive format (commonly created by the +*ar*(1) +archiver) is a general-purpose format which is +used almost exclusively for object files to be +read by the link editor +*ld*(1). +The ar format has never been standardised. +There are two common variants: +the GNU format derived from SVR4, +and the BSD format, which first appeared in 4.4BSD. +The two differ primarily in their handling of filenames +longer than 15 characters: +the GNU/SVR4 variant writes a filename table at the beginning of the archive; +the BSD format stores each long filename in an extension +area adjacent to the entry. +Libarchive can read both extensions, +including archives that may include both types of long filenames. +Programs using libarchive can write GNU/SVR4 format +if they provide a filename table to be written into +the archive before any of the entries. +Any entries whose names are not in the filename table +will be written using BSD-style long filenames. +This can cause problems for programs such as +GNU ld that do not support the BSD-style long filenames. +=== mtree=== +Libarchive can read and write files in +*mtree*(5) +format. +This format is not a true archive format, but rather a textual description +of a file hierarchy in which each line specifies the name of a file and +provides specific metadata about that file. +Libarchive can read all of the keywords supported by both +the NetBSD and FreeBSD versions of +*mtree*(1), +although many of the keywords cannot currently be stored in an +*archive_entry* +object. +When writing, libarchive supports use of the +*archive_write_set_options*(3) +interface to specify which keywords should be included in the +output. +If libarchive was compiled with access to suitable +cryptographic libraries (such as the OpenSSL libraries), +it can compute hash entries such as +*sha512* +or +*md5* +from file data being written to the mtree writer. + +When reading an mtree file, libarchive will locate the corresponding +files on disk using the +*contents* +keyword if present or the regular filename. +If it can locate and open the file on disk, it will use that +to fill in any metadata that is missing from the mtree file +and will read the file contents and return those to the program +using libarchive. +If it cannot locate and open the file on disk, libarchive +will return an error for any attempt to read the entry +body. +== SEE ALSO == +*ar*(1), +*cpio*(1), +*mkisofs*(1), +*shar*(1), +*tar*(1), +*zip*(1), +*zlib*(3), +*cpio*(5), +*mtree*(5), +*tar*(5) diff --git a/archivers/libarchive/files/doc/wiki/ManPageLibarchiveInternals3.wiki b/archivers/libarchive/files/doc/wiki/ManPageLibarchiveInternals3.wiki new file mode 100644 index 00000000000..b21fedbab1c --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageLibarchiveInternals3.wiki @@ -0,0 +1,337 @@ +#summary LIBARCHIVE 3 manual page +== NAME == +*libarchive_internals* +- description of libarchive internal interfaces +== OVERVIEW == +The +*libarchive* +library provides a flexible interface for reading and writing +streaming archive files such as tar and cpio. +Internally, it follows a modular layered design that should +make it easy to add new archive and compression formats. +== GENERAL ARCHITECTURE == +Externally, libarchive exposes most operations through an +opaque, object-style interface. +The +*archive_entry*(1) +objects store information about a single filesystem object. +The rest of the library provides facilities to write +*archive_entry*(1) +objects to archive files, +read them from archive files, +and write them to disk. +(There are plans to add a facility to read +*archive_entry*(1) +objects from disk as well.) + +The read and write APIs each have four layers: a public API +layer, a format layer that understands the archive file format, +a compression layer, and an I/O layer. +The I/O layer is completely exposed to clients who can replace +it entirely with their own functions. + +In order to provide as much consistency as possible for clients, +some public functions are virtualized. +Eventually, it should be possible for clients to open +an archive or disk writer, and then use a single set of +code to select and write entries, regardless of the target. +== READ ARCHITECTURE == +From the outside, clients use the +*archive_read*(3) +API to manipulate an +*archive* +object to read entries and bodies from an archive stream. +Internally, the +*archive* +object is cast to an +*archive_read* +object, which holds all read-specific data. +The API has four layers: +The lowest layer is the I/O layer. +This layer can be overridden by clients, but most clients use +the packaged I/O callbacks provided, for example, by +*archive_read_open_memory*(3), +and +*archive_read_open_fd*(3). +The compression layer calls the I/O layer to +read bytes and decompresses them for the format layer. +The format layer unpacks a stream of uncompressed bytes and +creates +*archive_entry* +objects from the incoming data. +The API layer tracks overall state +(for example, it prevents clients from reading data before reading a header) +and invokes the format and compression layer operations +through registered function pointers. +In particular, the API layer drives the format-detection process: +When opening the archive, it reads an initial block of data +and offers it to each registered compression handler. +The one with the highest bid is initialized with the first block. +Similarly, the format handlers are polled to see which handler +is the best for each archive. +(Prior to 2.4.0, the format bidders were invoked for each +entry, but this design hindered error recovery.) +=== I/O Layer and Client Callbacks=== +The read API goes to some lengths to be nice to clients. +As a result, there are few restrictions on the behavior of +the client callbacks. + +The client read callback is expected to provide a block +of data on each call. +A zero-length return does indicate end of file, but otherwise +blocks may be as small as one byte or as large as the entire file. +In particular, blocks may be of different sizes. + +The client skip callback returns the number of bytes actually +skipped, which may be much smaller than the skip requested. +The only requirement is that the skip not be larger. +In particular, clients are allowed to return zero for any +skip that they don't want to handle. +The skip callback must never be invoked with a negative value. + +Keep in mind that not all clients are reading from disk: +clients reading from networks may provide different-sized +blocks on every request and cannot skip at all; +advanced clients may use +*mmap*(2) +to read the entire file into memory at once and return the +entire file to libarchive as a single block; +other clients may begin asynchronous I/O operations for the +next block on each request. +=== Decompresssion Layer=== +The decompression layer not only handles decompression, +it also buffers data so that the format handlers see a +much nicer I/O model. +The decompression API is a two stage peek/consume model. +A read_ahead request specifies a minimum read amount; +the decompression layer must provide a pointer to at least +that much data. +If more data is immediately available, it should return more: +the format layer handles bulk data reads by asking for a minimum +of one byte and then copying as much data as is available. + +A subsequent call to the +*consume*() +function advances the read pointer. +Note that data returned from a +*read_ahead*() +call is guaranteed to remain in place until +the next call to +*read_ahead*(). +Intervening calls to +*consume*() +should not cause the data to move. + +Skip requests must always be handled exactly. +Decompression handlers that cannot seek forward should +not register a skip handler; +the API layer fills in a generic skip handler that reads and discards data. + +A decompression handler has a specific lifecycle: +<dl> +<dt>Registration/Configuration</dt><dd> +When the client invokes the public support function, +the decompression handler invokes the internal +*__archive_read_register_compression*() +function to provide bid and initialization functions. +This function returns +*NULL* +on error or else a pointer to a +*struct* decompressor_t. +This structure contains a +_void_ * config +slot that can be used for storing any customization information. +</dd><dt>Bid</dt><dd> +The bid function is invoked with a pointer and size of a block of data. +The decompressor can access its config data +through the +_decompressor_ +element of the +*archive_read* +object. +The bid function is otherwise stateless. +In particular, it must not perform any I/O operations. + +The value returned by the bid function indicates its suitability +for handling this data stream. +A bid of zero will ensure that this decompressor is never invoked. +Return zero if magic number checks fail. +Otherwise, your initial implementation should return the number of bits +actually checked. +For example, if you verify two full bytes and three bits of another +byte, bid 19. +Note that the initial block may be very short; +be careful to only inspect the data you are given. +(The current decompressors require two bytes for correct bidding.) +</dd><dt>Initialize</dt><dd> +The winning bidder will have its init function called. +This function should initialize the remaining slots of the +_struct_ decompressor_t +object pointed to by the +_decompressor_ +element of the +_archive_read_ +object. +In particular, it should allocate any working data it needs +in the +_data_ +slot of that structure. +The init function is called with the block of data that +was used for tasting. +At this point, the decompressor is responsible for all I/O +requests to the client callbacks. +The decompressor is free to read more data as and when +necessary. +</dd><dt>Satisfy I/O requests</dt><dd> +The format handler will invoke the +_read_ahead_, +_consume_, +and +_skip_ +functions as needed. +</dd><dt>Finish</dt><dd> +The finish method is called only once when the archive is closed. +It should release anything stored in the +_data_ +and +_config_ +slots of the +_decompressor_ +object. +It should not invoke the client close callback. +</dd></dl> +=== Format Layer=== +The read formats have a similar lifecycle to the decompression handlers: +<dl> +<dt>Registration</dt><dd> +Allocate your private data and initialize your pointers. +</dd><dt>Bid</dt><dd> +Formats bid by invoking the +*read_ahead*() +decompression method but not calling the +*consume*() +method. +This allows each bidder to look ahead in the input stream. +Bidders should not look further ahead than necessary, as long +look aheads put pressure on the decompression layer to buffer +lots of data. +Most formats only require a few hundred bytes of look ahead; +look aheads of a few kilobytes are reasonable. +(The ISO9660 reader sometimes looks ahead by 48k, which +should be considered an upper limit.) +</dd><dt>Read header</dt><dd> +The header read is usually the most complex part of any format. +There are a few strategies worth mentioning: +For formats such as tar or cpio, reading and parsing the header is +straightforward since headers alternate with data. +For formats that store all header data at the beginning of the file, +the first header read request may have to read all headers into +memory and store that data, sorted by the location of the file +data. +Subsequent header read requests will skip forward to the +beginning of the file data and return the corresponding header. +</dd><dt>Read Data</dt><dd> +The read data interface supports sparse files; this requires that +each call return a block of data specifying the file offset and +size. +This may require you to carefully track the location so that you +can return accurate file offsets for each read. +Remember that the decompressor will return as much data as it has. +Generally, you will want to request one byte, +examine the return value to see how much data is available, and +possibly trim that to the amount you can use. +You should invoke consume for each block just before you return it. +</dd><dt>Skip All Data</dt><dd> +The skip data call should skip over all file data and trailing padding. +This is called automatically by the API layer just before each +header read. +It is also called in response to the client calling the public +*data_skip*() +function. +</dd><dt>Cleanup</dt><dd> +On cleanup, the format should release all of its allocated memory. +</dd></dl> +=== API Layer=== +XXX to do XXX +== WRITE ARCHITECTURE == +The write API has a similar set of four layers: +an API layer, a format layer, a compression layer, and an I/O layer. +The registration here is much simpler because only +one format and one compression can be registered at a time. +=== I/O Layer and Client Callbacks=== +XXX To be written XXX +=== Compression Layer=== +XXX To be written XXX +=== Format Layer=== +XXX To be written XXX +=== API Layer=== +XXX To be written XXX +== WRITE_DISK ARCHITECTURE == +The write_disk API is intended to look just like the write API +to clients. +Since it does not handle multiple formats or compression, it +is not layered internally. +== GENERAL SERVICES == +The +*archive_read*, +*archive_write*, +and +*archive_write_disk* +objects all contain an initial +*archive* +object which provides common support for a set of standard services. +(Recall that ANSI/ISO C90 guarantees that you can cast freely between +a pointer to a structure and a pointer to the first element of that +structure.) +The +*archive* +object has a magic value that indicates which API this object +is associated with, +slots for storing error information, +and function pointers for virtualized API functions. +== MISCELLANEOUS NOTES == +Connecting existing archiving libraries into libarchive is generally +quite difficult. +In particular, many existing libraries strongly assume that you +are reading from a file; they seek forwards and backwards as necessary +to locate various pieces of information. +In contrast, libarchive never seeks backwards in its input, which +sometimes requires very different approaches. + +For example, libarchive's ISO9660 support operates very differently +from most ISO9660 readers. +The libarchive support utilizes a work-queue design that +keeps a list of known entries sorted by their location in the input. +Whenever libarchive's ISO9660 implementation is asked for the next +header, checks this list to find the next item on the disk. +Directories are parsed when they are encountered and new +items are added to the list. +This design relies heavily on the ISO9660 image being optimized so that +directories always occur earlier on the disk than the files they +describe. + +Depending on the specific format, such approaches may not be possible. +The ZIP format specification, for example, allows archivers to store +key information only at the end of the file. +In theory, it is possible to create ZIP archives that cannot +be read without seeking. +Fortunately, such archives are very rare, and libarchive can read +most ZIP archives, though it cannot always extract as much information +as a dedicated ZIP program. +== SEE ALSO == +*archive*(3), +*archive_entry*(3), +*archive_read*(3), +*archive_write*(3), +*archive_write_disk*(3) +== HISTORY == +The +*libarchive* +library first appeared in +FreeBSD 5.3. +== AUTHORS == +The +*libarchive* +library was written by +Tim Kientzle <kientzle@acm.org.> +== BUGS == diff --git a/archivers/libarchive/files/doc/wiki/ManPageMtree5.wiki b/archivers/libarchive/files/doc/wiki/ManPageMtree5.wiki new file mode 100644 index 00000000000..fd49e3061d8 --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageMtree5.wiki @@ -0,0 +1,237 @@ +#summary MTREE 5 manual page +== NAME == +*mtree* +- format of mtree dir hierarchy files +== DESCRIPTION == +The +*mtree* +format is a textual format that describes a collection of filesystem objects. +Such files are typically used to create or verify directory hierarchies. +=== General Format=== +An +*mtree* +file consists of a series of lines, each providing information +about a single filesystem object. +Leading whitespace is always ignored. + +When encoding file or pathnames, any backslash character or +character outside of the 95 printable ASCII characters must be +encoded as a a backslash followed by three +octal digits. +When reading mtree files, any appearance of a backslash +followed by three octal digits should be converted into the +corresponding character. + +Each line is interpreted independently as one of the following types: +<dl> +<dt>Signature</dt><dd> +The first line of any mtree file must begin with +"#mtree". +If a file contains any full path entries, the first line should +begin with +"#mtree v2.0", +otherwise, the first line should begin with +"#mtree v1.0". +</dd><dt>Blank</dt><dd> +Blank lines are ignored. +</dd><dt>Comment</dt><dd> +Lines beginning with +*#* +are ignored. +</dd><dt>Special</dt><dd> +Lines beginning with +*/* +are special commands that influence +the interpretation of later lines. +</dd><dt>Relative</dt><dd> +If the first whitespace-delimited word has no +*/* +characters, +it is the name of a file in the current directory. +Any relative entry that describes a directory changes the +current directory. +</dd><dt>dot-dot</dt><dd> +As a special case, a relative entry with the filename +_.._ +changes the current directory to the parent directory. +Options on dot-dot entries are always ignored. +</dd><dt>Full</dt><dd> +If the first whitespace-delimited word has a +*/* +character after +the first character, it is the pathname of a file relative to the +starting directory. +There can be multiple full entries describing the same file. +</dd></dl> + +Some tools that process +*mtree* +files may require that multiple lines describing the same file +occur consecutively. +It is not permitted for the same file to be mentioned using +both a relative and a full file specification. +=== Special commands=== +Two special commands are currently defined: +<dl> +<dt>*/set*</dt><dd> +This command defines default values for one or more keywords. +It is followed on the same line by one or more whitespace-separated +keyword definitions. +These definitions apply to all following files that do not specify +a value for that keyword. +</dd><dt>*/unset*</dt><dd> +This command removes any default value set by a previous +*/set* +command. +It is followed on the same line by one or more keywords +separated by whitespace. +</dd></dl> +=== Keywords=== +After the filename, a full or relative entry consists of zero +or more whitespace-separated keyword definitions. +Each such definition consists of a key from the following +list immediately followed by an '=' sign +and a value. +Software programs reading mtree files should warn about +unrecognized keywords. + +Currently supported keywords are as follows: +<dl> +<dt>*cksum*</dt><dd> +The checksum of the file using the default algorithm specified by +the +*cksum*(1) +utility. +</dd><dt>*contents*</dt><dd> +The full pathname of a file that holds the contents of this file. +</dd><dt>*flags*</dt><dd> +The file flags as a symbolic name. +See +*chflags*(1) +for information on these names. +If no flags are to be set the string +"none" +may be used to override the current default. +</dd><dt>*gid*</dt><dd> +The file group as a numeric value. +</dd><dt>*gname*</dt><dd> +The file group as a symbolic name. +</dd><dt>*ignore*</dt><dd> +Ignore any file hierarchy below this file. +</dd><dt>*link*</dt><dd> +The target of the symbolic link when type=link. +</dd><dt>*md5*</dt><dd> +The MD5 message digest of the file. +</dd><dt>*md5digest*</dt><dd> +A synonym for +*md5*. +</dd><dt>*mode*</dt><dd> +The current file's permissions as a numeric (octal) or symbolic +value. +</dd><dt>*nlink*</dt><dd> +The number of hard links the file is expected to have. +</dd><dt>*nochange*</dt><dd> +Make sure this file or directory exists but otherwise ignore all attributes. +</dd><dt>*ripemd160digest*</dt><dd> +The +*RIPEMD160* +message digest of the file. +</dd><dt>*rmd160*</dt><dd> +A synonym for +*ripemd160digest*. +</dd><dt>*rmd160digest*</dt><dd> +A synonym for +*ripemd160digest*. +</dd><dt>*sha1*</dt><dd> +The +*FIPS* +160-1 +("Tn SHA-1") +message digest of the file. +</dd><dt>*sha1digest*</dt><dd> +A synonym for +*sha1*. +</dd><dt>*sha256*</dt><dd> +The +*FIPS* +180-2 +("Tn SHA-256") +message digest of the file. +</dd><dt>*sha256digest*</dt><dd> +A synonym for +*sha256*. +</dd><dt>*size*</dt><dd> +The size, in bytes, of the file. +</dd><dt>*time*</dt><dd> +The last modification time of the file. +</dd><dt>*type*</dt><dd> +The type of the file; may be set to any one of the following: + +<dl> +<dt>*block*</dt><dd> +block special device +</dd><dt>*char*</dt><dd> +character special device +</dd><dt>*dir*</dt><dd> +directory +</dd><dt>*fifo*</dt><dd> +fifo +</dd><dt>*file*</dt><dd> +regular file +</dd><dt>*link*</dt><dd> +symbolic link +</dd><dt>*socket*</dt><dd> +socket +</dd></dl> +</dd><dt>*uid*</dt><dd> +The file owner as a numeric value. +</dd><dt>*uname*</dt><dd> +The file owner as a symbolic name. +</dd></dl> + +== SEE ALSO == +*cksum*(1), +*find*(1), +*mtree*(8) +== BUGS == +The +FreeBSD +implementation of mtree does not currently support +the +*mtree* +2.0 +format. +The requirement for a +"#mtree" +signature line is new and not yet widely implemented. +== HISTORY == +The +*mtree* +utility appeared in +BSD 4.3 Reno. +The +*MD5* +digest capability was added in +FreeBSD 2.1, +in response to the widespread use of programs which can spoof +*cksum*(1). +The +*SHA-1* +and +*RIPEMD160* +digests were added in +FreeBSD 4.0, +as new attacks have demonstrated weaknesses in +*MD5 .* +The +*SHA-256* +digest was added in +FreeBSD 6.0. +Support for file flags was added in +FreeBSD 4.0, +and mostly comes from +NetBSD. +The +"full" +entry format was added by +NetBSD. diff --git a/archivers/libarchive/files/doc/wiki/ManPageTar5.wiki b/archivers/libarchive/files/doc/wiki/ManPageTar5.wiki new file mode 100644 index 00000000000..12fd514562f --- /dev/null +++ b/archivers/libarchive/files/doc/wiki/ManPageTar5.wiki @@ -0,0 +1,805 @@ +#summary tar 5 manual page +== NAME == +*tar* +- format of tape archive files +== DESCRIPTION == +The +*tar* +archive format collects any number of files, directories, and other +file system objects (symbolic links, device nodes, etc.) into a single +stream of bytes. +The format was originally designed to be used with +tape drives that operate with fixed-size blocks, but is widely used as +a general packaging mechanism. +=== General Format=== +A +*tar* +archive consists of a series of 512-byte records. +Each file system object requires a header record which stores basic metadata +(pathname, owner, permissions, etc.) and zero or more records containing any +file data. +The end of the archive is indicated by two records consisting +entirely of zero bytes. + +For compatibility with tape drives that use fixed block sizes, +programs that read or write tar files always read or write a fixed +number of records with each I/O operation. +These +"blocks" +are always a multiple of the record size. +The maximum block size supported by early +implementations was 10240 bytes or 20 records. +This is still the default for most implementations +although block sizes of 1MiB (2048 records) or larger are +commonly used with modern high-speed tape drives. +(Note: the terms +"block" +and +"record" +here are not entirely standard; this document follows the +convention established by John Gilmore in documenting +*pdtar*.) +=== Old-Style Archive Format=== +The original tar archive format has been extended many times to +include additional information that various implementors found +necessary. +This section describes the variant implemented by the tar command +included in +At v7, +which seems to be the earliest widely-used version of the tar program. + +The header record for an old-style +*tar* +archive consists of the following: +{{{ +struct header_old_tar { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char linkflag[1]; + char linkname[100]; + char pad[255]; +}; +}}} +All unused bytes in the header record are filled with nulls. +<dl> +<dt>_name_</dt><dd> +Pathname, stored as a null-terminated string. +Early tar implementations only stored regular files (including +hardlinks to those files). +One common early convention used a trailing "/" character to indicate +a directory name, allowing directory permissions and owner information +to be archived and restored. +</dd><dt>_mode_</dt><dd> +File mode, stored as an octal number in ASCII. +</dd><dt>_uid_, _gid_</dt><dd> +User id and group id of owner, as octal numbers in ASCII. +</dd><dt>_size_</dt><dd> +Size of file, as octal number in ASCII. +For regular files only, this indicates the amount of data +that follows the header. +In particular, this field was ignored by early tar implementations +when extracting hardlinks. +Modern writers should always store a zero length for hardlink entries. +</dd><dt>_mtime_</dt><dd> +Modification time of file, as an octal number in ASCII. +This indicates the number of seconds since the start of the epoch, +00:00:00 UTC January 1, 1970. +Note that negative values should be avoided +here, as they are handled inconsistently. +</dd><dt>_checksum_</dt><dd> +Header checksum, stored as an octal number in ASCII. +To compute the checksum, set the checksum field to all spaces, +then sum all bytes in the header using unsigned arithmetic. +This field should be stored as six octal digits followed by a null and a space +character. +Note that many early implementations of tar used signed arithmetic +for the checksum field, which can cause interoperability problems +when transferring archives between systems. +Modern robust readers compute the checksum both ways and accept the +header if either computation matches. +</dd><dt>_linkflag_, _linkname_</dt><dd> +In order to preserve hardlinks and conserve tape, a file +with multiple links is only written to the archive the first +time it is encountered. +The next time it is encountered, the +_linkflag_ +is set to an ASCII +Sq 1 +and the +_linkname_ +field holds the first name under which this file appears. +(Note that regular files have a null value in the +_linkflag_ +field.) +</dd></dl> + +Early tar implementations varied in how they terminated these fields. +The tar command in +At v7 +used the following conventions (this is also documented in early BSD manpages): +the pathname must be null-terminated; +the mode, uid, and gid fields must end in a space and a null byte; +the size and mtime fields must end in a space; +the checksum is terminated by a null and a space. +Early implementations filled the numeric fields with leading spaces. +This seems to have been common practice until the +IEEE Std 1003.1-1988 (``POSIX.1'') +standard was released. +For best portability, modern implementations should fill the numeric +fields with leading zeros. +=== Pre-POSIX Archives=== +An early draft of +IEEE Std 1003.1-1988 (``POSIX.1'') +served as the basis for John Gilmore's +*pdtar* +program and many system implementations from the late 1980s +and early 1990s. +These archives generally follow the POSIX ustar +format described below with the following variations: +<ul> +<li> +The magic value is +"ustar\ \&" +(note the following space). +The version field contains a space character followed by a null. +</li><li> +The numeric fields are generally filled with leading spaces +(not leading zeros as recommended in the final standard). +</li><li> +The prefix field is often not used, limiting pathnames to +the 100 characters of old-style archives. +</li></ul> +=== POSIX ustar Archives=== +IEEE Std 1003.1-1988 (``POSIX.1'') +defined a standard tar file format to be read and written +by compliant implementations of +*tar*(1). +This format is often called the +"ustar" +format, after the magic value used +in the header. +(The name is an acronym for +"Unix Standard TAR".) +It extends the historic format with new fields: +{{{ +struct header_posix_ustar { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char typeflag[1]; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; + char pad[12]; +}; +}}} +<dl> +<dt>_typeflag_</dt><dd> +Type of entry. +POSIX extended the earlier +_linkflag_ +field with several new type values: +<dl> +<dt>"0"</dt><dd> +Regular file. +NUL should be treated as a synonym, for compatibility purposes. +</dd><dt>"1"</dt><dd> +Hard link. +</dd><dt>"2"</dt><dd> +Symbolic link. +</dd><dt>"3"</dt><dd> +Character device node. +</dd><dt>"4"</dt><dd> +Block device node. +</dd><dt>"5"</dt><dd> +Directory. +</dd><dt>"6"</dt><dd> +FIFO node. +</dd><dt>"7"</dt><dd> +Reserved. +</dd><dt>Other</dt><dd> +A POSIX-compliant implementation must treat any unrecognized typeflag value +as a regular file. +In particular, writers should ensure that all entries +have a valid filename so that they can be restored by readers that do not +support the corresponding extension. +Uppercase letters "A" through "Z" are reserved for custom extensions. +Note that sockets and whiteout entries are not archivable. +</dd></dl> +It is worth noting that the +_size_ +field, in particular, has different meanings depending on the type. +For regular files, of course, it indicates the amount of data +following the header. +For directories, it may be used to indicate the total size of all +files in the directory, for use by operating systems that pre-allocate +directory space. +For all other types, it should be set to zero by writers and ignored +by readers. +</dd><dt>_magic_</dt><dd> +Contains the magic value +"ustar" +followed by a NUL byte to indicate that this is a POSIX standard archive. +Full compliance requires the uname and gname fields be properly set. +</dd><dt>_version_</dt><dd> +Version. +This should be +"00" +(two copies of the ASCII digit zero) for POSIX standard archives. +</dd><dt>_uname_, _gname_</dt><dd> +User and group names, as null-terminated ASCII strings. +These should be used in preference to the uid/gid values +when they are set and the corresponding names exist on +the system. +</dd><dt>_devmajor_, _devminor_</dt><dd> +Major and minor numbers for character device or block device entry. +</dd><dt>_name_, _prefix_</dt><dd> +If the pathname is too long to fit in the 100 bytes provided by the standard +format, it can be split at any +_/_ +character with the first portion going into the prefix field. +If the prefix field is not empty, the reader will prepend +the prefix value and a +_/_ +character to the regular name field to obtain the full pathname. +The standard does not require a trailing +_/_ +character on directory names, though most implementations still +include this for compatibility reasons. +</dd></dl> + +Note that all unused bytes must be set to +NUL. + +Field termination is specified slightly differently by POSIX +than by previous implementations. +The +_magic_, +_uname_, +and +_gname_ +fields must have a trailing +NUL. +The +_pathname_, +_linkname_, +and +_prefix_ +fields must have a trailing +NUL +unless they fill the entire field. +(In particular, it is possible to store a 256-character pathname if it +happens to have a +_/_ +as the 156th character.) +POSIX requires numeric fields to be zero-padded in the front, and requires +them to be terminated with either space or +NUL +characters. + +Currently, most tar implementations comply with the ustar +format, occasionally extending it by adding new fields to the +blank area at the end of the header record. +=== Pax Interchange Format=== +There are many attributes that cannot be portably stored in a +POSIX ustar archive. +IEEE Std 1003.1-2001 (``POSIX.1'') +defined a +"pax interchange format" +that uses two new types of entries to hold text-formatted +metadata that applies to following entries. +Note that a pax interchange format archive is a ustar archive in every +respect. +The new data is stored in ustar-compatible archive entries that use the +"x" +or +"g" +typeflag. +In particular, older implementations that do not fully support these +extensions will extract the metadata into regular files, where the +metadata can be examined as necessary. + +An entry in a pax interchange format archive consists of one or +two standard ustar entries, each with its own header and data. +The first optional entry stores the extended attributes +for the following entry. +This optional first entry has an "x" typeflag and a size field that +indicates the total size of the extended attributes. +The extended attributes themselves are stored as a series of text-format +lines encoded in the portable UTF-8 encoding. +Each line consists of a decimal number, a space, a key string, an equals +sign, a value string, and a new line. +The decimal number indicates the length of the entire line, including the +initial length field and the trailing newline. +An example of such a field is: +{{{ +25 ctime=1084839148.1212\en +}}} +Keys in all lowercase are standard keys. +Vendors can add their own keys by prefixing them with an all uppercase +vendor name and a period. +Note that, unlike the historic header, numeric values are stored using +decimal, not octal. +A description of some common keys follows: +<dl> +<dt>*atime*, *ctime*, *mtime*</dt><dd> +File access, inode change, and modification times. +These fields can be negative or include a decimal point and a fractional value. +</dd><dt>*uname*, *uid*, *gname*, *gid*</dt><dd> +User name, group name, and numeric UID and GID values. +The user name and group name stored here are encoded in UTF8 +and can thus include non-ASCII characters. +The UID and GID fields can be of arbitrary length. +</dd><dt>*linkpath*</dt><dd> +The full path of the linked-to file. +Note that this is encoded in UTF8 and can thus include non-ASCII characters. +</dd><dt>*path*</dt><dd> +The full pathname of the entry. +Note that this is encoded in UTF8 and can thus include non-ASCII characters. +</dd><dt>*realtime.`*`*, *security.`*`*</dt><dd> +These keys are reserved and may be used for future standardization. +</dd><dt>*size*</dt><dd> +The size of the file. +Note that there is no length limit on this field, allowing conforming +archives to store files much larger than the historic 8GB limit. +</dd><dt>*SCHILY.`*`*</dt><dd> +Vendor-specific attributes used by Joerg Schilling's +*star* +implementation. +</dd><dt>*SCHILY.acl.access*, *SCHILY.acl.default*</dt><dd> +Stores the access and default ACLs as textual strings in a format +that is an extension of the format specified by POSIX.1e draft 17. +In particular, each user or group access specification can include a fourth +colon-separated field with the numeric UID or GID. +This allows ACLs to be restored on systems that may not have complete +user or group information available (such as when NIS/YP or LDAP services +are temporarily unavailable). +</dd><dt>*SCHILY.devminor*, *SCHILY.devmajor*</dt><dd> +The full minor and major numbers for device nodes. +</dd><dt>*SCHILY.fflags*</dt><dd> +The file flags. +</dd><dt>*SCHILY.realsize*</dt><dd> +The full size of the file on disk. +XXX explain? XXX +</dd><dt>*SCHILY.dev,* *SCHILY.ino*, *SCHILY.nlinks*</dt><dd> +The device number, inode number, and link count for the entry. +In particular, note that a pax interchange format archive using Joerg +Schilling's +*SCHILY.`*`* +extensions can store all of the data from +_struct_ stat. +</dd><dt>*LIBARCHIVE.xattr.*_namespace_._key_</dt><dd> +Libarchive stores POSIX.1e-style extended attributes using +keys of this form. +The +_key_ +value is URL-encoded: +All non-ASCII characters and the two special characters +"=" +and +"%" +are encoded as +"%" +followed by two uppercase hexadecimal digits. +The value of this key is the extended attribute value +encoded in base 64. +XXX Detail the base-64 format here XXX +</dd><dt>*VENDOR.`*`*</dt><dd> +XXX document other vendor-specific extensions XXX +</dd></dl> + +Any values stored in an extended attribute override the corresponding +values in the regular tar header. +Note that compliant readers should ignore the regular fields when they +are overridden. +This is important, as existing archivers are known to store non-compliant +values in the standard header fields in this situation. +There are no limits on length for any of these fields. +In particular, numeric fields can be arbitrarily large. +All text fields are encoded in UTF8. +Compliant writers should store only portable 7-bit ASCII characters in +the standard ustar header and use extended +attributes whenever a text value contains non-ASCII characters. + +In addition to the +*x* +entry described above, the pax interchange format +also supports a +*g* +entry. +The +*g* +entry is identical in format, but specifies attributes that serve as +defaults for all subsequent archive entries. +The +*g* +entry is not widely used. + +Besides the new +*x* +and +*g* +entries, the pax interchange format has a few other minor variations +from the earlier ustar format. +The most troubling one is that hardlinks are permitted to have +data following them. +This allows readers to restore any hardlink to a file without +having to rewind the archive to find an earlier entry. +However, it creates complications for robust readers, as it is no longer +clear whether or not they should ignore the size field for hardlink entries. +=== GNU Tar Archives=== +The GNU tar program started with a pre-POSIX format similar to that +described earlier and has extended it using several different mechanisms: +It added new fields to the empty space in the header (some of which was later +used by POSIX for conflicting purposes); +it allowed the header to be continued over multiple records; +and it defined new entries that modify following entries +(similar in principle to the +*x* +entry described above, but each GNU special entry is single-purpose, +unlike the general-purpose +*x* +entry). +As a result, GNU tar archives are not POSIX compatible, although +more lenient POSIX-compliant readers can successfully extract most +GNU tar archives. +{{{ +struct header_gnu_tar { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char typeflag[1]; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char atime[12]; + char ctime[12]; + char offset[12]; + char longnames[4]; + char unused[1]; + struct { + char offset[12]; + char numbytes[12]; + } sparse[4]; + char isextended[1]; + char realsize[12]; + char pad[17]; +}; +}}} +<dl> +<dt>_typeflag_</dt><dd> +GNU tar uses the following special entry types, in addition to +those defined by POSIX: +<dl> +<dt>7</dt><dd> +GNU tar treats type "7" records identically to type "0" records, +except on one obscure RTOS where they are used to indicate the +pre-allocation of a contiguous file on disk. +</dd><dt>D</dt><dd> +This indicates a directory entry. +Unlike the POSIX-standard "5" +typeflag, the header is followed by data records listing the names +of files in this directory. +Each name is preceded by an ASCII "Y" +if the file is stored in this archive or "N" if the file is not +stored in this archive. +Each name is terminated with a null, and +an extra null marks the end of the name list. +The purpose of this +entry is to support incremental backups; a program restoring from +such an archive may wish to delete files on disk that did not exist +in the directory when the archive was made. + +Note that the "D" typeflag specifically violates POSIX, which requires +that unrecognized typeflags be restored as normal files. +In this case, restoring the "D" entry as a file could interfere +with subsequent creation of the like-named directory. +</dd><dt>K</dt><dd> +The data for this entry is a long linkname for the following regular entry. +</dd><dt>L</dt><dd> +The data for this entry is a long pathname for the following regular entry. +</dd><dt>M</dt><dd> +This is a continuation of the last file on the previous volume. +GNU multi-volume archives guarantee that each volume begins with a valid +entry header. +To ensure this, a file may be split, with part stored at the end of one volume, +and part stored at the beginning of the next volume. +The "M" typeflag indicates that this entry continues an existing file. +Such entries can only occur as the first or second entry +in an archive (the latter only if the first entry is a volume label). +The +_size_ +field specifies the size of this entry. +The +_offset_ +field at bytes 369-380 specifies the offset where this file fragment +begins. +The +_realsize_ +field specifies the total size of the file (which must equal +_size_ +plus +_offset_). +When extracting, GNU tar checks that the header file name is the one it is +expecting, that the header offset is in the correct sequence, and that +the sum of offset and size is equal to realsize. +</dd><dt>N</dt><dd> +Type "N" records are no longer generated by GNU tar. +They contained a +list of files to be renamed or symlinked after extraction; this was +originally used to support long names. +The contents of this record +are a text description of the operations to be done, in the form +"Rename %s to %s\en" +or +"Symlink %s to %s\en ;" +in either case, both +filenames are escaped using K&R C syntax. +Due to security concerns, "N" records are now generally ignored +when reading archives. +</dd><dt>S</dt><dd> +This is a +"sparse" +regular file. +Sparse files are stored as a series of fragments. +The header contains a list of fragment offset/length pairs. +If more than four such entries are required, the header is +extended as necessary with +"extra" +header extensions (an older format that is no longer used), or +"sparse" +extensions. +</dd><dt>V</dt><dd> +The +_name_ +field should be interpreted as a tape/volume header name. +This entry should generally be ignored on extraction. +</dd></dl> +</dd><dt>_magic_</dt><dd> +The magic field holds the five characters +"ustar" +followed by a space. +Note that POSIX ustar archives have a trailing null. +</dd><dt>_version_</dt><dd> +The version field holds a space character followed by a null. +Note that POSIX ustar archives use two copies of the ASCII digit +"0". +</dd><dt>_atime_, _ctime_</dt><dd> +The time the file was last accessed and the time of +last change of file information, stored in octal as with +_mtime_. +</dd><dt>_longnames_</dt><dd> +This field is apparently no longer used. +</dd><dt>Sparse _offset_ / _numbytes_</dt><dd> +Each such structure specifies a single fragment of a sparse +file. +The two fields store values as octal numbers. +The fragments are each padded to a multiple of 512 bytes +in the archive. +On extraction, the list of fragments is collected from the +header (including any extension headers), and the data +is then read and written to the file at appropriate offsets. +</dd><dt>_isextended_</dt><dd> +If this is set to non-zero, the header will be followed by additional +"sparse header" +records. +Each such record contains information about as many as 21 additional +sparse blocks as shown here: +{{{ +struct gnu_sparse_header { + struct { + char offset[12]; + char numbytes[12]; + } sparse[21]; + char isextended[1]; + char padding[7]; +}; +}}} +</dd><dt>_realsize_</dt><dd> +A binary representation of the file's complete size, with a much larger range +than the POSIX file size. +In particular, with +*M* +type files, the current entry is only a portion of the file. +In that case, the POSIX size field will indicate the size of this +entry; the +_realsize_ +field will indicate the total size of the file. +</dd></dl> +=== GNU tar pax archives=== +GNU tar 1.14 (XXX check this XXX) and later will write +pax interchange format archives when you specify the +*--posix* +flag. +This format uses custom keywords to store sparse file information. +There have been three iterations of this support, referred to +as +"0.0", +"0.1", +and +"1.0". +<dl> +<dt>*GNU.sparse.numblocks*, *GNU.sparse.offset*, *GNU.sparse.numbytes*, *GNU.sparse.size*</dt><dd> +The +"0.0" +format used an initial +*GNU.sparse.numblocks* +attribute to indicate the number of blocks in the file, a pair of +*GNU.sparse.offset* +and +*GNU.sparse.numbytes* +to indicate the offset and size of each block, +and a single +*GNU.sparse.size* +to indicate the full size of the file. +This is not the same as the size in the tar header because the +latter value does not include the size of any holes. +This format required that the order of attributes be preserved and +relied on readers accepting multiple appearances of the same attribute +names, which is not officially permitted by the standards. +</dd><dt>*GNU.sparse.map*</dt><dd> +The +"0.1" +format used a single attribute that stored a comma-separated +list of decimal numbers. +Each pair of numbers indicated the offset and size, respectively, +of a block of data. +This does not work well if the archive is extracted by an archiver +that does not recognize this extension, since many pax implementations +simply discard unrecognized attributes. +</dd><dt>*GNU.sparse.major*, *GNU.sparse.minor*, *GNU.sparse.name*, *GNU.sparse.realsize*</dt><dd> +The +"1.0" +format stores the sparse block map in one or more 512-byte blocks +prepended to the file data in the entry body. +The pax attributes indicate the existence of this map +(via the +*GNU.sparse.major* +and +*GNU.sparse.minor* +fields) +and the full size of the file. +The +*GNU.sparse.name* +holds the true name of the file. +To avoid confusion, the name stored in the regular tar header +is a modified name so that extraction errors will be apparent +to users. +</dd></dl> +=== Solaris Tar=== +XXX More Details Needed XXX + +Solaris tar (beginning with SunOS XXX 5.7 ?? XXX) supports an +"extended" +format that is fundamentally similar to pax interchange format, +with the following differences: +<ul> +<li> +Extended attributes are stored in an entry whose type is +*X*, +not +*x*, +as used by pax interchange format. +The detailed format of this entry appears to be the same +as detailed above for the +*x* +entry. +</li><li> +An additional +*A* +entry is used to store an ACL for the following regular entry. +The body of this entry contains a seven-digit octal number +followed by a zero byte, followed by the +textual ACL description. +The octal value is the number of ACL entries +plus a constant that indicates the ACL type: 01000000 +for POSIX.1e ACLs and 03000000 for NFSv4 ACLs. +</li></ul> +=== AIX Tar=== +XXX More details needed XXX +=== Mac OS X Tar=== +The tar distributed with Apple's Mac OS X stores most regular files +as two separate entries in the tar archive. +The two entries have the same name except that the first +one has +"._" +added to the beginning of the name. +This first entry stores the +"resource fork" +with additional attributes for the file. +The Mac OS X +*CopyFile*() +API is used to separate a file on disk into separate +resource and data streams and to reassemble those separate +streams when the file is restored to disk. +=== Other Extensions=== +One obvious extension to increase the size of files is to +eliminate the terminating characters from the various +numeric fields. +For example, the standard only allows the size field to contain +11 octal digits, reserving the twelfth byte for a trailing +NUL character. +Allowing 12 octal digits allows file sizes up to 64 GB. + +Another extension, utilized by GNU tar, star, and other newer +*tar* +implementations, permits binary numbers in the standard numeric fields. +This is flagged by setting the high bit of the first byte. +This permits 95-bit values for the length and time fields +and 63-bit values for the uid, gid, and device numbers. +GNU tar supports this extension for the +length, mtime, ctime, and atime fields. +Joerg Schilling's star program supports this extension for +all numeric fields. +Note that this extension is largely obsoleted by the extended attribute +record provided by the pax interchange format. + +Another early GNU extension allowed base-64 values rather than octal. +This extension was short-lived and is no longer supported by any +implementation. +== SEE ALSO == +*ar*(1), +*pax*(1), +*tar*(1) +== STANDARDS == +The +*tar* +utility is no longer a part of POSIX or the Single Unix Standard. +It last appeared in +Version 2 of the Single UNIX Specification (``SUSv2''). +It has been supplanted in subsequent standards by +*pax*(1). +The ustar format is currently part of the specification for the +*pax*(1) +utility. +The pax interchange file format is new with +IEEE Std 1003.1-2001 (``POSIX.1''). +== HISTORY == +A +*tar* +command appeared in Seventh Edition Unix, which was released in January, 1979. +It replaced the +*tp* +program from Fourth Edition Unix which in turn replaced the +*tap* +program from First Edition Unix. +John Gilmore's +*pdtar* +public-domain implementation (circa 1987) was highly influential +and formed the basis of +*GNU* tar +(circa 1988). +Joerg Shilling's +*star* +archiver is another open-source (GPL) archiver (originally developed +circa 1985) which features complete support for pax interchange +format. + +This documentation was written as part of the +*libarchive* +and +*bsdtar* +project by +Tim Kientzle <kientzle@FreeBSD.org.> |