1 /* Functions for dealing with sparse files
3 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
24 struct tar_sparse_file
;
25 static bool sparse_select_optab (struct tar_sparse_file
*file
);
27 enum sparse_scan_state
34 struct tar_sparse_optab
36 bool (*init
) (struct tar_sparse_file
*);
37 bool (*done
) (struct tar_sparse_file
*);
38 bool (*sparse_member_p
) (struct tar_sparse_file
*);
39 bool (*dump_header
) (struct tar_sparse_file
*);
40 bool (*fixup_header
) (struct tar_sparse_file
*);
41 bool (*decode_header
) (struct tar_sparse_file
*);
42 bool (*scan_block
) (struct tar_sparse_file
*, enum sparse_scan_state
,
44 bool (*dump_region
) (struct tar_sparse_file
*, size_t);
45 bool (*extract_region
) (struct tar_sparse_file
*, size_t);
48 struct tar_sparse_file
50 int fd
; /* File descriptor */
51 bool seekable
; /* Is fd seekable? */
52 off_t offset
; /* Current offset in fd if seekable==false.
54 off_t dumped_size
; /* Number of bytes actually written
56 struct tar_stat_info
*stat_info
; /* Information about the file */
57 struct tar_sparse_optab
const *optab
;
58 void *closure
; /* Any additional data optab calls might
62 /* Dump zeros to file->fd until offset is reached. It is used instead of
63 lseek if the output file is not seekable */
65 dump_zeros (struct tar_sparse_file
*file
, off_t offset
)
67 static char const zero_buf
[BLOCKSIZE
];
69 if (offset
< file
->offset
)
75 while (file
->offset
< offset
)
77 size_t size
= (BLOCKSIZE
< offset
- file
->offset
79 : offset
- file
->offset
);
82 wrbytes
= write (file
->fd
, zero_buf
, size
);
89 file
->offset
+= wrbytes
;
96 tar_sparse_member_p (struct tar_sparse_file
*file
)
98 if (file
->optab
->sparse_member_p
)
99 return file
->optab
->sparse_member_p (file
);
104 tar_sparse_init (struct tar_sparse_file
*file
)
106 memset (file
, 0, sizeof *file
);
108 if (!sparse_select_optab (file
))
111 if (file
->optab
->init
)
112 return file
->optab
->init (file
);
118 tar_sparse_done (struct tar_sparse_file
*file
)
120 if (file
->optab
->done
)
121 return file
->optab
->done (file
);
126 tar_sparse_scan (struct tar_sparse_file
*file
, enum sparse_scan_state state
,
129 if (file
->optab
->scan_block
)
130 return file
->optab
->scan_block (file
, state
, block
);
135 tar_sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
137 if (file
->optab
->dump_region
)
138 return file
->optab
->dump_region (file
, i
);
143 tar_sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
145 if (file
->optab
->extract_region
)
146 return file
->optab
->extract_region (file
, i
);
151 tar_sparse_dump_header (struct tar_sparse_file
*file
)
153 if (file
->optab
->dump_header
)
154 return file
->optab
->dump_header (file
);
159 tar_sparse_decode_header (struct tar_sparse_file
*file
)
161 if (file
->optab
->decode_header
)
162 return file
->optab
->decode_header (file
);
167 tar_sparse_fixup_header (struct tar_sparse_file
*file
)
169 if (file
->optab
->fixup_header
)
170 return file
->optab
->fixup_header (file
);
176 lseek_or_error (struct tar_sparse_file
*file
, off_t offset
)
179 ? lseek (file
->fd
, offset
, SEEK_SET
) < 0
180 : ! dump_zeros (file
, offset
))
182 seek_diag_details (file
->stat_info
->orig_file_name
, offset
);
188 /* Takes a blockful of data and basically cruises through it to see if
189 it's made *entirely* of zeros, returning a 0 the instant it finds
190 something that is a nonzero, i.e., useful data. */
192 zero_block_p (char const *buffer
, size_t size
)
201 sparse_add_map (struct tar_stat_info
*st
, struct sp_array
const *sp
)
203 struct sp_array
*sparse_map
= st
->sparse_map
;
204 size_t avail
= st
->sparse_map_avail
;
205 if (avail
== st
->sparse_map_size
)
206 st
->sparse_map
= sparse_map
=
207 x2nrealloc (sparse_map
, &st
->sparse_map_size
, sizeof *sparse_map
);
208 sparse_map
[avail
] = *sp
;
209 st
->sparse_map_avail
= avail
+ 1;
212 /* Scan the sparse file and create its map */
214 sparse_scan_file (struct tar_sparse_file
*file
)
216 struct tar_stat_info
*st
= file
->stat_info
;
218 char buffer
[BLOCKSIZE
];
221 struct sp_array sp
= {0, 0};
223 if (!lseek_or_error (file
, 0))
226 st
->archive_file_size
= 0;
228 if (!tar_sparse_scan (file
, scan_begin
, NULL
))
231 while ((count
= safe_read (fd
, buffer
, sizeof buffer
)) != 0
232 && count
!= SAFE_READ_ERROR
)
234 /* Analyze the block. */
235 if (zero_block_p (buffer
, count
))
239 sparse_add_map (st
, &sp
);
241 if (!tar_sparse_scan (file
, scan_block
, NULL
))
247 if (sp
.numbytes
== 0)
249 sp
.numbytes
+= count
;
250 st
->archive_file_size
+= count
;
251 if (!tar_sparse_scan (file
, scan_block
, buffer
))
258 if (sp
.numbytes
== 0)
261 sparse_add_map (st
, &sp
);
262 st
->archive_file_size
+= count
;
263 return tar_sparse_scan (file
, scan_end
, NULL
);
266 static struct tar_sparse_optab
const oldgnu_optab
;
267 static struct tar_sparse_optab
const star_optab
;
268 static struct tar_sparse_optab
const pax_optab
;
271 sparse_select_optab (struct tar_sparse_file
*file
)
273 switch (current_format
== DEFAULT_FORMAT
? archive_format
: current_format
)
280 case GNU_FORMAT
: /*FIXME: This one should disappear? */
281 file
->optab
= &oldgnu_optab
;
285 file
->optab
= &pax_optab
;
289 file
->optab
= &star_optab
;
299 sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
302 off_t bytes_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
304 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
307 while (bytes_left
> 0)
309 size_t bufsize
= (bytes_left
> BLOCKSIZE
) ? BLOCKSIZE
: bytes_left
;
312 blk
= find_next_block ();
313 bytes_read
= safe_read (file
->fd
, blk
->buffer
, bufsize
);
314 if (bytes_read
== SAFE_READ_ERROR
)
316 read_diag_details (file
->stat_info
->orig_file_name
,
317 (file
->stat_info
->sparse_map
[i
].offset
318 + file
->stat_info
->sparse_map
[i
].numbytes
324 memset (blk
->buffer
+ bytes_read
, 0, BLOCKSIZE
- bytes_read
);
325 bytes_left
-= bytes_read
;
326 file
->dumped_size
+= bytes_read
;
327 set_next_block_after (blk
);
334 sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
338 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
341 write_size
= file
->stat_info
->sparse_map
[i
].numbytes
;
345 /* Last block of the file is a hole */
346 if (file
->seekable
&& sys_truncate (file
->fd
))
347 truncate_warn (file
->stat_info
->orig_file_name
);
349 else while (write_size
> 0)
352 size_t wrbytes
= (write_size
> BLOCKSIZE
) ? BLOCKSIZE
: write_size
;
353 union block
*blk
= find_next_block ();
356 ERROR ((0, 0, _("Unexpected EOF in archive")));
359 set_next_block_after (blk
);
360 count
= full_write (file
->fd
, blk
->buffer
, wrbytes
);
362 file
->dumped_size
+= count
;
363 file
->offset
+= count
;
364 if (count
!= wrbytes
)
366 write_error_details (file
->stat_info
->orig_file_name
,
376 /* Interface functions */
378 sparse_dump_file (int fd
, struct tar_stat_info
*st
)
381 struct tar_sparse_file file
;
383 if (!tar_sparse_init (&file
))
384 return dump_status_not_implemented
;
388 file
.seekable
= true; /* File *must* be seekable for dump to work */
390 rc
= sparse_scan_file (&file
);
391 if (rc
&& file
.optab
->dump_region
)
393 tar_sparse_dump_header (&file
);
399 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
400 rc
= tar_sparse_dump_region (&file
, i
);
404 pad_archive (file
.stat_info
->archive_file_size
- file
.dumped_size
);
405 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
408 /* Returns true if the file represented by stat is a sparse one */
410 sparse_file_p (struct tar_stat_info
*st
)
412 return (ST_NBLOCKS (st
->stat
)
413 < (st
->stat
.st_size
/ ST_NBLOCKSIZE
414 + (st
->stat
.st_size
% ST_NBLOCKSIZE
!= 0)));
418 sparse_member_p (struct tar_stat_info
*st
)
420 struct tar_sparse_file file
;
422 if (!tar_sparse_init (&file
))
425 return tar_sparse_member_p (&file
);
429 sparse_fixup_header (struct tar_stat_info
*st
)
431 struct tar_sparse_file file
;
433 if (!tar_sparse_init (&file
))
436 return tar_sparse_fixup_header (&file
);
440 sparse_extract_file (int fd
, struct tar_stat_info
*st
, off_t
*size
)
443 struct tar_sparse_file file
;
446 if (!tar_sparse_init (&file
))
447 return dump_status_not_implemented
;
451 file
.seekable
= lseek (fd
, 0, SEEK_SET
) == 0;
454 rc
= tar_sparse_decode_header (&file
);
455 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
456 rc
= tar_sparse_extract_region (&file
, i
);
457 *size
= file
.stat_info
->archive_file_size
- file
.dumped_size
;
458 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
462 sparse_skip_file (struct tar_stat_info
*st
)
465 struct tar_sparse_file file
;
467 if (!tar_sparse_init (&file
))
468 return dump_status_not_implemented
;
473 rc
= tar_sparse_decode_header (&file
);
474 skip_file (file
.stat_info
->archive_file_size
);
475 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
480 check_sparse_region (struct tar_sparse_file
*file
, off_t beg
, off_t end
)
482 if (!lseek_or_error (file
, beg
))
488 size_t rdsize
= BLOCKSIZE
< end
- beg
? BLOCKSIZE
: end
- beg
;
489 char diff_buffer
[BLOCKSIZE
];
491 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
492 if (bytes_read
== SAFE_READ_ERROR
)
494 read_diag_details (file
->stat_info
->orig_file_name
,
499 if (!zero_block_p (diff_buffer
, bytes_read
))
501 char begbuf
[INT_BUFSIZE_BOUND (off_t
)];
502 report_difference (file
->stat_info
,
503 _("File fragment at %s is not a hole"),
504 offtostr (beg
, begbuf
));
514 check_data_region (struct tar_sparse_file
*file
, size_t i
)
518 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
520 size_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
521 while (size_left
> 0)
524 size_t rdsize
= (size_left
> BLOCKSIZE
) ? BLOCKSIZE
: size_left
;
525 char diff_buffer
[BLOCKSIZE
];
527 union block
*blk
= find_next_block ();
530 ERROR ((0, 0, _("Unexpected EOF in archive")));
533 set_next_block_after (blk
);
534 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
535 if (bytes_read
== SAFE_READ_ERROR
)
537 read_diag_details (file
->stat_info
->orig_file_name
,
538 (file
->stat_info
->sparse_map
[i
].offset
539 + file
->stat_info
->sparse_map
[i
].numbytes
544 file
->dumped_size
+= bytes_read
;
545 size_left
-= bytes_read
;
546 if (memcmp (blk
->buffer
, diff_buffer
, rdsize
))
548 report_difference (file
->stat_info
, _("Contents differ"));
556 sparse_diff_file (int fd
, struct tar_stat_info
*st
)
559 struct tar_sparse_file file
;
563 if (!tar_sparse_init (&file
))
564 return dump_status_not_implemented
;
569 rc
= tar_sparse_decode_header (&file
);
570 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
572 rc
= check_sparse_region (&file
,
573 offset
, file
.stat_info
->sparse_map
[i
].offset
)
574 && check_data_region (&file
, i
);
575 offset
= file
.stat_info
->sparse_map
[i
].offset
576 + file
.stat_info
->sparse_map
[i
].numbytes
;
580 skip_file (file
.stat_info
->archive_file_size
- file
.dumped_size
);
582 tar_sparse_done (&file
);
587 /* Old GNU Format. The sparse file information is stored in the
588 oldgnu_header in the following manner:
590 The header is marked with type 'S'. Its `size' field contains
591 the cumulative size of all non-empty blocks of the file. The
592 actual file size is stored in `realsize' member of oldgnu_header.
594 The map of the file is stored in a list of `struct sparse'.
595 Each struct contains offset to the block of data and its
596 size (both as octal numbers). The first file header contains
597 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
598 contains more structs, then the field `isextended' of the main
599 header is set to 1 (binary) and the `struct sparse_header'
600 header follows, containing at most 21 following structs
601 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
602 field of the extended header is set and next next extension header
605 enum oldgnu_add_status
613 oldgnu_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
615 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
618 /* Add a sparse item to the sparse file and its obstack */
619 static enum oldgnu_add_status
620 oldgnu_add_sparse (struct tar_sparse_file
*file
, struct sparse
*s
)
624 if (s
->numbytes
[0] == '\0')
626 sp
.offset
= OFF_FROM_HEADER (s
->offset
);
627 sp
.numbytes
= SIZE_FROM_HEADER (s
->numbytes
);
629 || file
->stat_info
->stat
.st_size
< sp
.offset
+ sp
.numbytes
630 || file
->stat_info
->archive_file_size
< 0)
633 sparse_add_map (file
->stat_info
, &sp
);
638 oldgnu_fixup_header (struct tar_sparse_file
*file
)
640 /* NOTE! st_size was initialized from the header
641 which actually contains archived size. The following fixes it */
642 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
643 file
->stat_info
->stat
.st_size
=
644 OFF_FROM_HEADER (current_header
->oldgnu_header
.realsize
);
648 /* Convert old GNU format sparse data to internal representation */
650 oldgnu_get_sparse_info (struct tar_sparse_file
*file
)
653 union block
*h
= current_header
;
655 enum oldgnu_add_status rc
;
657 file
->stat_info
->sparse_map_avail
= 0;
658 for (i
= 0; i
< SPARSES_IN_OLDGNU_HEADER
; i
++)
660 rc
= oldgnu_add_sparse (file
, &h
->oldgnu_header
.sp
[i
]);
665 for (ext_p
= h
->oldgnu_header
.isextended
;
666 rc
== add_ok
&& ext_p
; ext_p
= h
->sparse_header
.isextended
)
668 h
= find_next_block ();
671 ERROR ((0, 0, _("Unexpected EOF in archive")));
674 set_next_block_after (h
);
675 for (i
= 0; i
< SPARSES_IN_SPARSE_HEADER
&& rc
== add_ok
; i
++)
676 rc
= oldgnu_add_sparse (file
, &h
->sparse_header
.sp
[i
]);
681 ERROR ((0, 0, _("%s: invalid sparse archive member"),
682 file
->stat_info
->orig_file_name
));
689 oldgnu_store_sparse_info (struct tar_sparse_file
*file
, size_t *pindex
,
690 struct sparse
*sp
, size_t sparse_size
)
692 for (; *pindex
< file
->stat_info
->sparse_map_avail
693 && sparse_size
> 0; sparse_size
--, sp
++, ++*pindex
)
695 OFF_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].offset
,
697 SIZE_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].numbytes
,
703 oldgnu_dump_header (struct tar_sparse_file
*file
)
705 off_t block_ordinal
= current_block_ordinal ();
709 blk
= start_header (file
->stat_info
);
710 blk
->header
.typeflag
= GNUTYPE_SPARSE
;
711 if (file
->stat_info
->sparse_map_avail
> SPARSES_IN_OLDGNU_HEADER
)
712 blk
->oldgnu_header
.isextended
= 1;
714 /* Store the real file size */
715 OFF_TO_CHARS (file
->stat_info
->stat
.st_size
, blk
->oldgnu_header
.realsize
);
716 /* Store the effective (shrunken) file size */
717 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
720 oldgnu_store_sparse_info (file
, &i
,
721 blk
->oldgnu_header
.sp
,
722 SPARSES_IN_OLDGNU_HEADER
);
723 blk
->oldgnu_header
.isextended
= i
< file
->stat_info
->sparse_map_avail
;
724 finish_header (file
->stat_info
, blk
, block_ordinal
);
726 while (i
< file
->stat_info
->sparse_map_avail
)
728 blk
= find_next_block ();
729 memset (blk
->buffer
, 0, BLOCKSIZE
);
730 oldgnu_store_sparse_info (file
, &i
,
731 blk
->sparse_header
.sp
,
732 SPARSES_IN_SPARSE_HEADER
);
733 set_next_block_after (blk
);
734 if (i
< file
->stat_info
->sparse_map_avail
)
735 blk
->sparse_header
.isextended
= 1;
742 static struct tar_sparse_optab
const oldgnu_optab
= {
743 NULL
, /* No init function */
744 NULL
, /* No done function */
745 oldgnu_sparse_member_p
,
748 oldgnu_get_sparse_info
,
749 NULL
, /* No scan_block function */
751 sparse_extract_region
,
758 star_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
760 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
764 star_fixup_header (struct tar_sparse_file
*file
)
766 /* NOTE! st_size was initialized from the header
767 which actually contains archived size. The following fixes it */
768 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
769 file
->stat_info
->stat
.st_size
=
770 OFF_FROM_HEADER (current_header
->star_in_header
.realsize
);
774 /* Convert STAR format sparse data to internal representation */
776 star_get_sparse_info (struct tar_sparse_file
*file
)
779 union block
*h
= current_header
;
781 enum oldgnu_add_status rc
= add_ok
;
783 file
->stat_info
->sparse_map_avail
= 0;
785 if (h
->star_in_header
.prefix
[0] == '\0'
786 && h
->star_in_header
.sp
[0].offset
[10] != '\0')
788 /* Old star format */
789 for (i
= 0; i
< SPARSES_IN_STAR_HEADER
; i
++)
791 rc
= oldgnu_add_sparse (file
, &h
->star_in_header
.sp
[i
]);
795 ext_p
= h
->star_in_header
.isextended
;
800 for (; rc
== add_ok
&& ext_p
; ext_p
= h
->star_ext_header
.isextended
)
802 h
= find_next_block ();
805 ERROR ((0, 0, _("Unexpected EOF in archive")));
808 set_next_block_after (h
);
809 for (i
= 0; i
< SPARSES_IN_STAR_EXT_HEADER
&& rc
== add_ok
; i
++)
810 rc
= oldgnu_add_sparse (file
, &h
->star_ext_header
.sp
[i
]);
815 ERROR ((0, 0, _("%s: invalid sparse archive member"),
816 file
->stat_info
->orig_file_name
));
823 static struct tar_sparse_optab
const star_optab
= {
824 NULL
, /* No init function */
825 NULL
, /* No done function */
826 star_sparse_member_p
,
829 star_get_sparse_info
,
830 NULL
, /* No scan_block function */
831 NULL
, /* No dump region function */
832 sparse_extract_region
,
836 /* GNU PAX sparse file format. The sparse file map is stored in
839 GNU.sparse.size Real size of the stored file
840 GNU.sparse.numblocks Number of blocks in the sparse map
841 repeat numblocks time
842 GNU.sparse.offset Offset of the next data block
843 GNU.sparse.numbytes Size of the next data block
848 pax_sparse_member_p (struct tar_sparse_file
*file
)
850 return file
->stat_info
->sparse_map_avail
> 0;
854 pax_dump_header (struct tar_sparse_file
*file
)
856 off_t block_ordinal
= current_block_ordinal ();
860 /* Store the real file size */
861 xheader_store ("GNU.sparse.size", file
->stat_info
, NULL
);
862 xheader_store ("GNU.sparse.numblocks", file
->stat_info
, NULL
);
863 for (i
= 0; i
< file
->stat_info
->sparse_map_avail
; i
++)
865 xheader_store ("GNU.sparse.offset", file
->stat_info
, &i
);
866 xheader_store ("GNU.sparse.numbytes", file
->stat_info
, &i
);
869 blk
= start_header (file
->stat_info
);
870 /* Store the effective (shrunken) file size */
871 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
872 finish_header (file
->stat_info
, blk
, block_ordinal
);
876 static struct tar_sparse_optab
const pax_optab
= {
877 NULL
, /* No init function */
878 NULL
, /* No done function */
881 NULL
, /* No decode_header function */
882 NULL
, /* No fixup_header function */
883 NULL
, /* No scan_block function */
885 sparse_extract_region
,