From c3fa22fc8023ad21ad32b13f934c3324756c9236 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Mon, 25 Jan 2010 17:03:28 +0200 Subject: [PATCH] Read POSIX multivolume archives split at the header boundary. * src/common.h (read_header_mode): New enum. (read_header): Change type of the 3rd argument. * src/list.c (read_header): Change type of the 3rd argument. All callers updated. * src/buffer.c (try_new_volume): Allow for volumes split at the extended/ustar header boundary. This is against POSIX specs, but we must be able to read such archives anyway. * tests/multiv07.at: New test case. * tests/Makefile.am: Add multiv07.at * tests/testsuite.at: Likewise. * src/compare.c: Update calls to read_header. * src/delete.c: Likewise. * src/update.c: Likewise. --- src/buffer.c | 45 +++++++++++++++++++++++++++++++++++---------- src/common.h | 13 ++++++++++++- src/compare.c | 8 +++++--- src/delete.c | 5 +++-- src/list.c | 36 ++++++++++++++++++++++++------------ src/update.c | 3 ++- tests/Makefile.am | 1 + tests/multiv07.at | 43 +++++++++++++++++++++++++++++++++++++++++++ tests/testsuite.at | 3 ++- 9 files changed, 127 insertions(+), 30 deletions(-) create mode 100644 tests/multiv07.at diff --git a/src/buffer.c b/src/buffer.c index f419dd7..b47b773 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1165,7 +1165,7 @@ read_header0 (struct tar_stat_info *info) enum read_header rc; tar_stat_init (info); - rc = read_header (¤t_header, info, false); + rc = read_header (¤t_header, info, read_header_auto); if (rc == HEADER_SUCCESS) { set_next_block_after (current_header); @@ -1213,17 +1213,42 @@ try_new_volume () { case XGLTYPE: { - if (!read_header0 (&dummy)) - return false; + tar_stat_init (&dummy); + if (read_header (&header, &dummy, read_header_x_global) + != HEADER_SUCCESS_EXTENDED) + { + ERROR ((0, 0, _("This does not look like a tar archive"))); + return false; + } + xheader_decode (&dummy); /* decodes values from the global header */ tar_stat_destroy (&dummy); - if (!real_s_name) - { - /* We have read the extended header of the first member in - this volume. Put it back, so next read_header works as - expected. */ - current_block = record_start; - } + + /* The initial global header must be immediately followed by + an extended PAX header for the first member in this volume. + However, in some cases tar may split volumes in the middle + of a PAX header. This is incorrect, and should be fixed + in the future versions. In the meantime we must be + prepared to correctly list and extract such archives. + + If this happens, the following call to read_header returns + HEADER_FAILURE, which is ignored. + + See also tests/multiv07.at */ + + switch (read_header (&header, &dummy, read_header_auto)) + { + case HEADER_SUCCESS: + set_next_block_after (header); + break; + + case HEADER_FAILURE: + break; + + default: + ERROR ((0, 0, _("This does not look like a tar archive"))); + return false; + } break; } diff --git a/src/common.h b/src/common.h index 2712115..d2de528 100644 --- a/src/common.h +++ b/src/common.h @@ -548,6 +548,17 @@ enum read_header HEADER_FAILURE /* ill-formed header, or bad checksum */ }; +/* Operation mode for read_header: */ + +enum read_header_mode +{ + read_header_auto, /* process extended headers automatically */ + read_header_x_raw, /* return raw extended headers (return + HEADER_SUCCESS_EXTENDED) */ + read_header_x_global /* when POSIX global extended header is read, + decode it and return + HEADER_SUCCESS_EXTENDED */ +}; extern union block *current_header; extern enum archive_format current_format; extern size_t recent_long_name_blocks; @@ -586,7 +597,7 @@ void print_header (struct tar_stat_info *st, union block *blk, void read_and (void (*do_something) (void)); enum read_header read_header (union block **return_block, struct tar_stat_info *info, - bool raw_extended_headers); + enum read_header_mode m); enum read_header tar_checksum (union block *header, bool silent); void skip_file (off_t size); void skip_member (void); diff --git a/src/compare.c b/src/compare.c index 9385d40..f4e92da 100644 --- a/src/compare.c +++ b/src/compare.c @@ -579,7 +579,8 @@ verify_volume (void) while (1) { enum read_header status = read_header (¤t_header, - ¤t_stat_info, false); + ¤t_stat_info, + read_header_auto); if (status == HEADER_FAILURE) { @@ -590,7 +591,7 @@ verify_volume (void) counter++; set_next_block_after (current_header); status = read_header (¤t_header, ¤t_stat_info, - false); + read_header_auto); } while (status == HEADER_FAILURE); @@ -608,7 +609,8 @@ verify_volume (void) { char buf[UINTMAX_STRSIZE_BOUND]; - status = read_header (¤t_header, ¤t_stat_info, false); + status = read_header (¤t_header, ¤t_stat_info, + read_header_auto); if (status == HEADER_ZERO_BLOCK) break; WARNOPT (WARN_ALONE_ZERO_BLOCK, diff --git a/src/delete.c b/src/delete.c index 8f729ba..75dc01f 100644 --- a/src/delete.c +++ b/src/delete.c @@ -167,7 +167,7 @@ delete_archive_members (void) { enum read_header status = read_header (¤t_header, ¤t_stat_info, - true); + read_header_x_raw); switch (status) { @@ -262,7 +262,8 @@ delete_archive_members (void) if (current_block == record_end) flush_archive (); - status = read_header (¤t_header, ¤t_stat_info, false); + status = read_header (¤t_header, ¤t_stat_info, + read_header_auto); xheader_decode (¤t_stat_info); diff --git a/src/list.c b/src/list.c index 5a341aa..716c0b4 100644 --- a/src/list.c +++ b/src/list.c @@ -78,7 +78,8 @@ read_and (void (*do_something) (void)) prev_status = status; tar_stat_destroy (¤t_stat_info); - status = read_header (¤t_header, ¤t_stat_info, false); + status = read_header (¤t_header, ¤t_stat_info, + read_header_auto); switch (status) { case HEADER_STILL_UNREAD: @@ -139,7 +140,8 @@ read_and (void (*do_something) (void)) { char buf[UINTMAX_STRSIZE_BOUND]; - status = read_header (¤t_header, ¤t_stat_info, false); + status = read_header (¤t_header, ¤t_stat_info, + read_header_auto); if (status == HEADER_ZERO_BLOCK) break; WARNOPT (WARN_ALONE_ZERO_BLOCK, @@ -282,21 +284,29 @@ tar_checksum (union block *header, bool silent) } /* Read a block that's supposed to be a header block. Return its - address in "current_header", and if it is good, the file's size - and names (file name, link name) in *info. + address in *RETURN_BLOCK, and if it is good, the file's size + and names (file name, link name) in *INFO. - Return 1 for success, 0 if the checksum is bad, EOF on eof, 2 for a - block full of zeros (EOF marker). + Return one of enum read_header describing the status of the + operation. - If RAW_EXTENDED_HEADERS is nonzero, do not automagically fold the - GNU long name and link headers into later headers. + The MODE parameter instructs read_header what to do with special + header blocks, i.e.: extended POSIX, GNU long name or long link, + etc.: - You must always set_next_block_after(current_header) to skip past + read_header_auto process them automatically, + read_header_x_raw when a special header is read, return + HEADER_SUCCESS_EXTENDED without actually + processing the header, + read_header_x_global when a POSIX global header is read, + decode it and return HEADER_SUCCESS_EXTENDED. + + You must always set_next_block_after(*return_block) to skip past the header which this routine reads. */ enum read_header read_header (union block **return_block, struct tar_stat_info *info, - bool raw_extended_headers) + enum read_header_mode mode) { union block *header; union block *header_copy; @@ -333,7 +343,7 @@ read_header (union block **return_block, struct tar_stat_info *info, || header->header.typeflag == XGLTYPE || header->header.typeflag == SOLARIS_XHDTYPE) { - if (raw_extended_headers) + if (mode == read_header_x_raw) return HEADER_SUCCESS_EXTENDED; else if (header->header.typeflag == GNUTYPE_LONGNAME || header->header.typeflag == GNUTYPE_LONGLINK) @@ -405,6 +415,8 @@ read_header (union block **return_block, struct tar_stat_info *info, OFF_FROM_HEADER (header->header.size)); xheader_decode_global (&xhdr); xheader_destroy (&xhdr); + if (mode == read_header_x_global) + return HEADER_SUCCESS_EXTENDED; } /* Loop! */ @@ -1397,7 +1409,7 @@ test_archive_label () name_gather (); open_archive (ACCESS_READ); - if (read_header (¤t_header, ¤t_stat_info, false) + if (read_header (¤t_header, ¤t_stat_info, read_header_auto) == HEADER_SUCCESS) { char *s = NULL; diff --git a/src/update.c b/src/update.c index 468c645..b015175 100644 --- a/src/update.c +++ b/src/update.c @@ -115,7 +115,8 @@ update_archive (void) while (!found_end) { enum read_header status = read_header (¤t_header, - ¤t_stat_info, false); + ¤t_stat_info, + read_header_auto); switch (status) { diff --git a/tests/Makefile.am b/tests/Makefile.am index 1b3b2c1..e9b753c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -105,6 +105,7 @@ TESTSUITE_AT = \ multiv04.at\ multiv05.at\ multiv06.at\ + multiv07.at\ old.at\ options.at\ options02.at\ diff --git a/tests/multiv07.at b/tests/multiv07.at new file mode 100644 index 0000000..ff965d6 --- /dev/null +++ b/tests/multiv07.at @@ -0,0 +1,43 @@ +# Test suite for GNU tar. -*- Autotest -*- +# Copyright (C) 2010 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Description: When creating POSIX multivolume archives, tar may in +# some cases write an extended header at the end of one volume, and +# the corresponding ustar header at the beginning of the next volume. +# Such archives do not fully comply with the POSIX specs, but tar must +# be able to read them anyway. This is what this script tests. +# +# See function try_new_volume, in file src/buffer.c near line 1227 +# for additional details. + +AT_SETUP([volumes split at an extended header]) +AT_KEYWORDS([multivolume multiv multiv07 xsplit]) + +AT_CHECK([ +AT_XFAIL_IF(test -f $[]XFAILFILE) +AT_TARBALL_PREREQ([xsplit-1.tar],[0e008c84c517e48fbf23ca6a7033cde6]) +AT_TARBALL_PREREQ([xsplit-2.tar],[03150b9852d285458f43734e9e0b9a45]) + +cd $TEST_DATA_DIR +tar -t -M -fxsplit-1.tar -fxsplit-2.tar +], +[0], +[Archive volumes split at an extended header Volume 1 +foo +bar +]) + +AT_CLEANUP diff --git a/tests/testsuite.at b/tests/testsuite.at index 6dbd6b3..f581071 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -69,7 +69,7 @@ m4_define([AT_TARBALL_PREREQ],[ test -z "$[]TEST_DATA_DIR" && AT_SKIP_TEST tarball_prereq $1 $2 $[]TEST_DATA_DIR $[]TEST_DATA_URL || AT_SKIP_TEST]) -dnl AT_TARBALL_PREREQ(tarball, md5sum) - Same for star testfiles +dnl AT_STAR_PREREQ(tarball, md5sum) - Same for star testfiles m4_define([AT_STAR_PREREQ],[ test -z "$STAR_TESTSCRIPTS" && AT_SKIP_TEST tarball_prereq $1 $2 $[]STAR_TESTSCRIPTS $[]STAR_DATA_URL || AT_SKIP_TEST @@ -191,6 +191,7 @@ m4_include([multiv03.at]) m4_include([multiv04.at]) m4_include([multiv05.at]) m4_include([multiv06.at]) +m4_include([multiv07.at]) m4_include([old.at]) -- 2.45.2