From 090d1d36ae038a82d2b4bb54a8464bb7629188b7 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Fri, 23 Jun 2006 15:21:48 +0000 Subject: [PATCH] Implement sparse format versioning. Implement new version (1.0) of PAX sparse format. (pax_sparse_member_p): Fix condition (pax_dump_header): A dispatcher function (pax_dump_header_0,pax_dump_header_1): New functions. (pax_optab): Update (oldgnu_dump_header): Minor fix: make sure sparse_header.isextended is set before calling set_next_block_after --- src/sparse.c | 283 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 254 insertions(+), 29 deletions(-) diff --git a/src/sparse.c b/src/sparse.c index a8192be..799a00f 100644 --- a/src/sparse.c +++ b/src/sparse.c @@ -1,6 +1,6 @@ /* Functions for dealing with sparse files - Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -740,11 +740,9 @@ oldgnu_dump_header (struct tar_sparse_file *file) oldgnu_store_sparse_info (file, &i, blk->sparse_header.sp, SPARSES_IN_SPARSE_HEADER); - set_next_block_after (blk); if (i < file->stat_info->sparse_map_avail) blk->sparse_header.isextended = 1; - else - break; + set_next_block_after (blk); } return true; } @@ -843,16 +841,16 @@ static struct tar_sparse_optab const star_optab = { }; -/* GNU PAX sparse file format. The sparse file map is stored in - x header: +/* GNU PAX sparse file format. There are several versions: + + + * 0.0 + + The initial version of sparse format used by tar 1.14-1.15.1. + The sparse file map is stored in x header: GNU.sparse.size Real size of the stored file GNU.sparse.numblocks Number of blocks in the sparse map - GNU.sparse.map Map of non-null data chunks. A string consisting - of comma-separated values "offset,size[,offset,size]..." - - Tar versions 1.14-1.15.1 instead of the latter used: - repeat numblocks time GNU.sparse.offset Offset of the next data block GNU.sparse.numbytes Size of the next data block @@ -860,26 +858,73 @@ static struct tar_sparse_optab const star_optab = { This has been reported as conflicting with the POSIX specs. The reason is that offsets and sizes of non-zero data blocks were stored in multiple - instances of GNU.sparse.offset/GNU.sparse.numbytes variables. However, + instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas POSIX requires the latest occurrence of the variable to override all previous occurrences. - - To avoid this incompatibility new keyword GNU.sparse.map was introduced - in tar 1.15.2. Some people might still need the 1.14 way of handling - sparse files for the compatibility reasons: it can be achieved by - specifying `--pax-option delete=GNU.sparse.map' in the command line. + + To avoid this incompatibility two following versions were introduced. + + * 0.1 + + Used by tar 1.15.2 -- 1.15.91 (alpha releases). + + The sparse file map is stored in + x header: + + GNU.sparse.size Real size of the stored file + GNU.sparse.numblocks Number of blocks in the sparse map + GNU.sparse.map Map of non-null data chunks. A string consisting + of comma-separated values "offset,size[,offset,size]..." + + The resulting GNU.sparse.map string can be *very* long. While POSIX does not + impose any limit on the length of a x header variable, this can confuse some + tars. + + * 1.0 - See FIXME-1.14-1.15.1-1.20, below. + Starting from this version, the exact sparse format version is specified explicitely + in the header using the following variables: + + GNU.sparse.major Major version + GNU.sparse.minor Minor version + + X header keeps the following variables: + + GNU.sparse.name Real file name of the sparse file + GNU.sparse.realsize Real size of the stored file (corresponds to the old + GNU.sparse.size variable) + + The name field of the ustar header is constructed using the pattern + "%d/GNUSparseFile.%p/%f". + + The sparse map itself is stored in the file data block, preceding the actual + file data. It consists of a series of octal numbers of arbitrary length, delimited + by newlines. The map is padded with nulls to the nearest block boundary. + + The first number gives the number of entries in the map. Following are map entries, + each one consisting of two numbers giving the offset and size of the + data block it describes. + + The format is designed in such a way that non-posix aware tars and tars not + supporting GNU.sparse.* keywords will extract each sparse file in its condensed + form with the file map attached and will place it into a separate directory. + Then, using a simple program it would be possible to expand the file to its + original form even without GNU tar. + + Bu default, v.1.0 archives are created. To use other formats, --sparse-version + option is provided. Additionally, v.0.0 can be obtained by deleting GNU.sparse.map + from 0.1 format: --sparse-version 0.1 --pax-option delete=GNU.sparse.map */ static bool pax_sparse_member_p (struct tar_sparse_file *file) { - return file->stat_info->sparse_map_avail > 0; + return file->stat_info->sparse_map_avail > 0 + || file->stat_info->sparse_major > 0; } static bool -pax_dump_header (struct tar_sparse_file *file) +pax_dump_header_0 (struct tar_sparse_file *file) { off_t block_ordinal = current_block_ordinal (); union block *blk; @@ -892,13 +937,8 @@ pax_dump_header (struct tar_sparse_file *file) xheader_store ("GNU.sparse.size", file->stat_info, NULL); xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL); - /* FIXME-1.14-1.15.1-1.20: See the comment above. - Starting with 1.17 this should display a warning about POSIX-incompatible - keywords being generated. In 1.20, the true branch of the if block below - will be removed and GNU.sparse.map will be marked in xhdr_tab as - protected. */ - - if (xheader_keyword_deleted_p ("GNU.sparse.map")) + if (xheader_keyword_deleted_p ("GNU.sparse.map") + || tar_sparse_minor == 0) { for (i = 0; i < file->stat_info->sparse_map_avail; i++) { @@ -936,13 +976,198 @@ pax_dump_header (struct tar_sparse_file *file) return true; } +static bool +pax_dump_header_1 (struct tar_sparse_file *file) +{ + off_t block_ordinal = current_block_ordinal (); + union block *blk; + char *p, *q; + size_t i; + char nbuf[UINTMAX_STRSIZE_BOUND]; + off_t size = 0; + struct sp_array *map = file->stat_info->sparse_map; + char *save_file_name = file->stat_info->file_name; + +#define COPY_STRING(b,dst,src) do \ + { \ + char *endp = b->buffer + BLOCKSIZE; \ + char *srcp = src; \ + while (*srcp) \ + { \ + if (dst == endp) \ + { \ + set_next_block_after (b); \ + b = find_next_block (); \ + dst = b->buffer; \ + endp = b->buffer + BLOCKSIZE; \ + } \ + *dst++ = *srcp++; \ + } \ + } while (0) + + /* Compute stored file size */ + p = umaxtostr (file->stat_info->sparse_map_avail, nbuf); + size += strlen (p) + 1; + for (i = 0; i < file->stat_info->sparse_map_avail; i++) + { + p = umaxtostr (map[i].offset, nbuf); + size += strlen (p) + 1; + p = umaxtostr (map[i].numbytes, nbuf); + size += strlen (p) + 1; + } + size = (size + BLOCKSIZE - 1) / BLOCKSIZE; + file->stat_info->archive_file_size += size * BLOCKSIZE; + + /* Store sparse file identification */ + xheader_store ("GNU.sparse.major", file->stat_info, NULL); + xheader_store ("GNU.sparse.minor", file->stat_info, NULL); + xheader_store ("GNU.sparse.name", file->stat_info, NULL); + xheader_store ("GNU.sparse.realsize", file->stat_info, NULL); + + file->stat_info->file_name = xheader_format_name (file->stat_info, + "%d/GNUSparseFile.%p/%f", 0); + + blk = start_header (file->stat_info); + /* Store the effective (shrunken) file size */ + OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size); + finish_header (file->stat_info, blk, block_ordinal); + free (file->stat_info->file_name); + file->stat_info->file_name = save_file_name; + + blk = find_next_block (); + q = blk->buffer; + p = umaxtostr (file->stat_info->sparse_map_avail, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + for (i = 0; i < file->stat_info->sparse_map_avail; i++) + { + p = umaxtostr (map[i].offset, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + p = umaxtostr (map[i].numbytes, nbuf); + COPY_STRING (blk, q, p); + COPY_STRING (blk, q, "\n"); + } + memset (q, 0, BLOCKSIZE - (q - blk->buffer)); + set_next_block_after (blk); + return true; +} + +static bool +pax_dump_header (struct tar_sparse_file *file) +{ + file->stat_info->sparse_major = tar_sparse_major; + file->stat_info->sparse_minor = tar_sparse_minor; + + if (file->stat_info->sparse_major == 0) + pax_dump_header_0 (file); + else + pax_dump_header_1 (file); +} + +static bool +decode_num (uintmax_t *num, char const *arg, uintmax_t maxval) +{ + uintmax_t u; + char *arg_lim; + + if (!ISDIGIT (*arg)) + return false; + + u = strtoumax (arg, &arg_lim, 10); + + if (! (u <= maxval && errno != ERANGE) || *arg_lim) + return false; + + *num = u; + return true; +} + +static bool +pax_decode_header (struct tar_sparse_file *file) +{ + if (file->stat_info->sparse_major > 0) + { + uintmax_t u; + char nbuf[UINTMAX_STRSIZE_BOUND]; + union block *blk; + char *p; + size_t i; + +#define COPY_BUF(b,buf,src) do \ + { \ + char *endp = b->buffer + BLOCKSIZE; \ + char *dst = buf; \ + do \ + { \ + if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \ + { \ + ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \ + file->stat_info->orig_file_name)); \ + return false; \ + } \ + if (src == endp) \ + { \ + set_next_block_after (b); \ + b = find_next_block (); \ + src = b->buffer; \ + endp = b->buffer + BLOCKSIZE; \ + } \ + *dst = *src++; \ + } \ + while (*dst++ != '\n'); \ + dst[-1] = 0; \ + } while (0) + + set_next_block_after (current_header); + blk = find_next_block (); + p = blk->buffer; + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + file->stat_info->sparse_map_size = u; + file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size, + sizeof (*file->stat_info->sparse_map)); + file->stat_info->sparse_map_avail = 0; + for (i = 0; i < file->stat_info->sparse_map_size; i++) + { + struct sp_array sp; + + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + sp.offset = u; + COPY_BUF (blk,nbuf,p); + if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t))) + { + ERROR ((0, 0, _("%s: malformed sparse archive member"), + file->stat_info->orig_file_name)); + return false; + } + sp.numbytes = u; + sparse_add_map (file->stat_info, &sp); + } + set_next_block_after (blk); + } + + return true; +} + static struct tar_sparse_optab const pax_optab = { NULL, /* No init function */ NULL, /* No done function */ pax_sparse_member_p, pax_dump_header, - NULL, /* No decode_header function */ - NULL, /* No fixup_header function */ + NULL, + pax_decode_header, NULL, /* No scan_block function */ sparse_dump_region, sparse_extract_region, -- 2.45.2