1 /* Functions for dealing with sparse files
3 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
24 struct tar_sparse_file
;
26 enum sparse_scan_state
33 struct tar_sparse_optab
35 bool (*init
) (struct tar_sparse_file
*);
36 bool (*done
) (struct tar_sparse_file
*);
37 bool (*sparse_member_p
) (struct tar_sparse_file
*);
38 bool (*dump_header
) (struct tar_sparse_file
*);
39 bool (*fixup_header
) (struct tar_sparse_file
*);
40 bool (*decode_header
) (struct tar_sparse_file
*);
41 bool (*scan_block
) (struct tar_sparse_file
*, enum sparse_scan_state
,
43 bool (*dump_region
) (struct tar_sparse_file
*, size_t);
44 bool (*extract_region
) (struct tar_sparse_file
*, size_t);
47 struct tar_sparse_file
49 int fd
; /* File descriptor */
50 bool seekable
; /* Is fd seekable? */
51 off_t offset
; /* Current offset in fd if seekable==false.
53 off_t dumped_size
; /* Number of bytes actually written
55 struct tar_stat_info
*stat_info
; /* Information about the file */
56 struct tar_sparse_optab
const *optab
;
57 void *closure
; /* Any additional data optab calls might
61 /* Dump zeros to file->fd until offset is reached. It is used instead of
62 lseek if the output file is not seekable */
64 dump_zeros (struct tar_sparse_file
*file
, off_t offset
)
66 static char const zero_buf
[BLOCKSIZE
];
68 if (offset
< file
->offset
)
74 while (file
->offset
< offset
)
76 size_t size
= (BLOCKSIZE
< offset
- file
->offset
78 : offset
- file
->offset
);
81 wrbytes
= write (file
->fd
, zero_buf
, size
);
88 file
->offset
+= wrbytes
;
95 tar_sparse_member_p (struct tar_sparse_file
*file
)
97 if (file
->optab
->sparse_member_p
)
98 return file
->optab
->sparse_member_p (file
);
103 tar_sparse_init (struct tar_sparse_file
*file
)
105 if (file
->optab
->init
)
106 return file
->optab
->init (file
);
111 tar_sparse_done (struct tar_sparse_file
*file
)
113 if (file
->optab
->done
)
114 return file
->optab
->done (file
);
119 tar_sparse_scan (struct tar_sparse_file
*file
, enum sparse_scan_state state
,
122 if (file
->optab
->scan_block
)
123 return file
->optab
->scan_block (file
, state
, block
);
128 tar_sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
130 if (file
->optab
->dump_region
)
131 return file
->optab
->dump_region (file
, i
);
136 tar_sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
138 if (file
->optab
->extract_region
)
139 return file
->optab
->extract_region (file
, i
);
144 tar_sparse_dump_header (struct tar_sparse_file
*file
)
146 if (file
->optab
->dump_header
)
147 return file
->optab
->dump_header (file
);
152 tar_sparse_decode_header (struct tar_sparse_file
*file
)
154 if (file
->optab
->decode_header
)
155 return file
->optab
->decode_header (file
);
160 tar_sparse_fixup_header (struct tar_sparse_file
*file
)
162 if (file
->optab
->fixup_header
)
163 return file
->optab
->fixup_header (file
);
169 lseek_or_error (struct tar_sparse_file
*file
, off_t offset
)
172 ? lseek (file
->fd
, offset
, SEEK_SET
) < 0
173 : ! dump_zeros (file
, offset
))
175 seek_diag_details (file
->stat_info
->orig_file_name
, offset
);
181 /* Takes a blockful of data and basically cruises through it to see if
182 it's made *entirely* of zeros, returning a 0 the instant it finds
183 something that is a nonzero, i.e., useful data. */
185 zero_block_p (char const *buffer
, size_t size
)
194 sparse_add_map (struct tar_stat_info
*st
, struct sp_array
const *sp
)
196 struct sp_array
*sparse_map
= st
->sparse_map
;
197 size_t avail
= st
->sparse_map_avail
;
198 if (avail
== st
->sparse_map_size
)
199 st
->sparse_map
= sparse_map
=
200 x2nrealloc (sparse_map
, &st
->sparse_map_size
, sizeof *sparse_map
);
201 sparse_map
[avail
] = *sp
;
202 st
->sparse_map_avail
= avail
+ 1;
205 /* Scan the sparse file and create its map */
207 sparse_scan_file (struct tar_sparse_file
*file
)
209 struct tar_stat_info
*st
= file
->stat_info
;
211 char buffer
[BLOCKSIZE
];
214 struct sp_array sp
= {0, 0};
216 if (!lseek_or_error (file
, 0))
219 if (!tar_sparse_scan (file
, scan_begin
, NULL
))
222 while ((count
= safe_read (fd
, buffer
, sizeof buffer
)) != 0
223 && count
!= SAFE_READ_ERROR
)
225 /* Analyze the block. */
226 if (zero_block_p (buffer
, count
))
230 sparse_add_map (st
, &sp
);
232 if (!tar_sparse_scan (file
, scan_block
, NULL
))
238 if (sp
.numbytes
== 0)
240 sp
.numbytes
+= count
;
241 st
->archive_file_size
+= count
;
242 if (!tar_sparse_scan (file
, scan_block
, buffer
))
249 if (sp
.numbytes
== 0)
252 sparse_add_map (st
, &sp
);
253 st
->archive_file_size
+= count
;
254 return tar_sparse_scan (file
, scan_end
, NULL
);
257 static struct tar_sparse_optab
const oldgnu_optab
;
258 static struct tar_sparse_optab
const star_optab
;
259 static struct tar_sparse_optab
const pax_optab
;
262 sparse_select_optab (struct tar_sparse_file
*file
)
264 switch (current_format
== DEFAULT_FORMAT
? archive_format
: current_format
)
271 case GNU_FORMAT
: /*FIXME: This one should disappear? */
272 file
->optab
= &oldgnu_optab
;
276 file
->optab
= &pax_optab
;
280 file
->optab
= &star_optab
;
290 sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
293 off_t bytes_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
295 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
298 while (bytes_left
> 0)
300 size_t bufsize
= (bytes_left
> BLOCKSIZE
) ? BLOCKSIZE
: bytes_left
;
303 blk
= find_next_block ();
304 bytes_read
= safe_read (file
->fd
, blk
->buffer
, bufsize
);
305 if (bytes_read
== SAFE_READ_ERROR
)
307 read_diag_details (file
->stat_info
->orig_file_name
,
308 (file
->stat_info
->sparse_map
[i
].offset
309 + file
->stat_info
->sparse_map
[i
].numbytes
315 memset (blk
->buffer
+ bytes_read
, 0, BLOCKSIZE
- bytes_read
);
316 bytes_left
-= bytes_read
;
317 file
->dumped_size
+= bytes_read
;
318 set_next_block_after (blk
);
325 sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
329 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
332 write_size
= file
->stat_info
->sparse_map
[i
].numbytes
;
336 /* Last block of the file is a hole */
337 if (file
->seekable
&& sys_truncate (file
->fd
))
338 truncate_warn (file
->stat_info
->orig_file_name
);
340 else while (write_size
> 0)
343 size_t wrbytes
= (write_size
> BLOCKSIZE
) ? BLOCKSIZE
: write_size
;
344 union block
*blk
= find_next_block ();
347 ERROR ((0, 0, _("Unexpected EOF in archive")));
350 set_next_block_after (blk
);
351 count
= full_write (file
->fd
, blk
->buffer
, wrbytes
);
353 file
->dumped_size
+= count
;
354 file
->offset
+= count
;
355 if (count
!= wrbytes
)
357 write_error_details (file
->stat_info
->orig_file_name
,
367 /* Interface functions */
369 sparse_dump_file (int fd
, struct tar_stat_info
*st
)
372 struct tar_sparse_file file
= { 0, };
376 file
.seekable
= true; /* File *must* be seekable for dump to work */
378 if (!sparse_select_optab (&file
)
379 || !tar_sparse_init (&file
))
380 return dump_status_not_implemented
;
382 rc
= sparse_scan_file (&file
);
383 if (rc
&& file
.optab
->dump_region
)
385 tar_sparse_dump_header (&file
);
391 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
392 rc
= tar_sparse_dump_region (&file
, i
);
396 pad_archive (file
.stat_info
->archive_file_size
- file
.dumped_size
);
397 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
400 /* Returns true if the file represented by stat is a sparse one */
402 sparse_file_p (struct tar_stat_info
*st
)
404 return (ST_NBLOCKS (st
->stat
)
405 < (st
->stat
.st_size
/ ST_NBLOCKSIZE
406 + (st
->stat
.st_size
% ST_NBLOCKSIZE
!= 0)));
410 sparse_member_p (struct tar_stat_info
*st
)
412 struct tar_sparse_file file
;
414 if (!sparse_select_optab (&file
))
417 return tar_sparse_member_p (&file
);
421 sparse_fixup_header (struct tar_stat_info
*st
)
423 struct tar_sparse_file file
;
425 if (!sparse_select_optab (&file
))
428 return tar_sparse_fixup_header (&file
);
432 sparse_extract_file (int fd
, struct tar_stat_info
*st
, off_t
*size
)
435 struct tar_sparse_file file
;
440 file
.seekable
= lseek (fd
, 0, SEEK_SET
) == 0;
443 if (!sparse_select_optab (&file
)
444 || !tar_sparse_init (&file
))
445 return dump_status_not_implemented
;
447 rc
= tar_sparse_decode_header (&file
);
448 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
449 rc
= tar_sparse_extract_region (&file
, i
);
450 *size
= file
.stat_info
->archive_file_size
- file
.dumped_size
;
451 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
455 sparse_skip_file (struct tar_stat_info
*st
)
458 struct tar_sparse_file file
;
463 if (!sparse_select_optab (&file
)
464 || !tar_sparse_init (&file
))
465 return dump_status_not_implemented
;
467 rc
= tar_sparse_decode_header (&file
);
468 skip_file (file
.stat_info
->archive_file_size
);
469 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
474 check_sparse_region (struct tar_sparse_file
*file
, off_t beg
, off_t end
)
476 if (!lseek_or_error (file
, beg
))
482 size_t rdsize
= BLOCKSIZE
< end
- beg
? BLOCKSIZE
: end
- beg
;
483 char diff_buffer
[BLOCKSIZE
];
485 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
486 if (bytes_read
== SAFE_READ_ERROR
)
488 read_diag_details (file
->stat_info
->orig_file_name
,
493 if (!zero_block_p (diff_buffer
, bytes_read
))
495 char begbuf
[INT_BUFSIZE_BOUND (off_t
)];
496 report_difference (file
->stat_info
,
497 _("File fragment at %s is not a hole"),
498 offtostr (beg
, begbuf
));
508 check_data_region (struct tar_sparse_file
*file
, size_t i
)
512 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
514 size_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
515 while (size_left
> 0)
518 size_t rdsize
= (size_left
> BLOCKSIZE
) ? BLOCKSIZE
: size_left
;
519 char diff_buffer
[BLOCKSIZE
];
521 union block
*blk
= find_next_block ();
524 ERROR ((0, 0, _("Unexpected EOF in archive")));
527 set_next_block_after (blk
);
528 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
529 if (bytes_read
== SAFE_READ_ERROR
)
531 read_diag_details (file
->stat_info
->orig_file_name
,
532 (file
->stat_info
->sparse_map
[i
].offset
533 + file
->stat_info
->sparse_map
[i
].numbytes
538 file
->dumped_size
+= bytes_read
;
539 size_left
-= bytes_read
;
540 if (memcmp (blk
->buffer
, diff_buffer
, rdsize
))
542 report_difference (file
->stat_info
, _("Contents differ"));
550 sparse_diff_file (int fd
, struct tar_stat_info
*st
)
553 struct tar_sparse_file file
;
560 if (!sparse_select_optab (&file
)
561 || !tar_sparse_init (&file
))
562 return dump_status_not_implemented
;
564 rc
= tar_sparse_decode_header (&file
);
565 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
567 rc
= check_sparse_region (&file
,
568 offset
, file
.stat_info
->sparse_map
[i
].offset
)
569 && check_data_region (&file
, i
);
570 offset
= file
.stat_info
->sparse_map
[i
].offset
571 + file
.stat_info
->sparse_map
[i
].numbytes
;
575 skip_file (file
.stat_info
->archive_file_size
- file
.dumped_size
);
577 tar_sparse_done (&file
);
582 /* Old GNU Format. The sparse file information is stored in the
583 oldgnu_header in the following manner:
585 The header is marked with type 'S'. Its `size' field contains
586 the cumulative size of all non-empty blocks of the file. The
587 actual file size is stored in `realsize' member of oldgnu_header.
589 The map of the file is stored in a list of `struct sparse'.
590 Each struct contains offset to the block of data and its
591 size (both as octal numbers). The first file header contains
592 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
593 contains more structs, then the field `isextended' of the main
594 header is set to 1 (binary) and the `struct sparse_header'
595 header follows, containing at most 21 following structs
596 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
597 field of the extended header is set and next next extension header
600 enum oldgnu_add_status
608 oldgnu_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
610 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
613 /* Add a sparse item to the sparse file and its obstack */
614 static enum oldgnu_add_status
615 oldgnu_add_sparse (struct tar_sparse_file
*file
, struct sparse
*s
)
619 if (s
->numbytes
[0] == '\0')
621 sp
.offset
= OFF_FROM_HEADER (s
->offset
);
622 sp
.numbytes
= SIZE_FROM_HEADER (s
->numbytes
);
624 || file
->stat_info
->stat
.st_size
< sp
.offset
+ sp
.numbytes
625 || file
->stat_info
->archive_file_size
< 0)
628 sparse_add_map (file
->stat_info
, &sp
);
633 oldgnu_fixup_header (struct tar_sparse_file
*file
)
635 /* NOTE! st_size was initialized from the header
636 which actually contains archived size. The following fixes it */
637 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
638 file
->stat_info
->stat
.st_size
=
639 OFF_FROM_HEADER (current_header
->oldgnu_header
.realsize
);
643 /* Convert old GNU format sparse data to internal representation */
645 oldgnu_get_sparse_info (struct tar_sparse_file
*file
)
648 union block
*h
= current_header
;
650 enum oldgnu_add_status rc
;
652 file
->stat_info
->sparse_map_avail
= 0;
653 for (i
= 0; i
< SPARSES_IN_OLDGNU_HEADER
; i
++)
655 rc
= oldgnu_add_sparse (file
, &h
->oldgnu_header
.sp
[i
]);
660 for (ext_p
= h
->oldgnu_header
.isextended
;
661 rc
== add_ok
&& ext_p
; ext_p
= h
->sparse_header
.isextended
)
663 h
= find_next_block ();
666 ERROR ((0, 0, _("Unexpected EOF in archive")));
669 set_next_block_after (h
);
670 for (i
= 0; i
< SPARSES_IN_SPARSE_HEADER
&& rc
== add_ok
; i
++)
671 rc
= oldgnu_add_sparse (file
, &h
->sparse_header
.sp
[i
]);
676 ERROR ((0, 0, _("%s: invalid sparse archive member"),
677 file
->stat_info
->orig_file_name
));
684 oldgnu_store_sparse_info (struct tar_sparse_file
*file
, size_t *pindex
,
685 struct sparse
*sp
, size_t sparse_size
)
687 for (; *pindex
< file
->stat_info
->sparse_map_avail
688 && sparse_size
> 0; sparse_size
--, sp
++, ++*pindex
)
690 OFF_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].offset
,
692 SIZE_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].numbytes
,
698 oldgnu_dump_header (struct tar_sparse_file
*file
)
700 off_t block_ordinal
= current_block_ordinal ();
704 blk
= start_header (file
->stat_info
);
705 blk
->header
.typeflag
= GNUTYPE_SPARSE
;
706 if (file
->stat_info
->sparse_map_avail
> SPARSES_IN_OLDGNU_HEADER
)
707 blk
->oldgnu_header
.isextended
= 1;
709 /* Store the real file size */
710 OFF_TO_CHARS (file
->stat_info
->stat
.st_size
, blk
->oldgnu_header
.realsize
);
711 /* Store the effective (shrunken) file size */
712 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
715 oldgnu_store_sparse_info (file
, &i
,
716 blk
->oldgnu_header
.sp
,
717 SPARSES_IN_OLDGNU_HEADER
);
718 blk
->oldgnu_header
.isextended
= i
< file
->stat_info
->sparse_map_avail
;
719 finish_header (file
->stat_info
, blk
, block_ordinal
);
721 while (i
< file
->stat_info
->sparse_map_avail
)
723 blk
= find_next_block ();
724 memset (blk
->buffer
, 0, BLOCKSIZE
);
725 oldgnu_store_sparse_info (file
, &i
,
726 blk
->sparse_header
.sp
,
727 SPARSES_IN_SPARSE_HEADER
);
728 set_next_block_after (blk
);
729 if (i
< file
->stat_info
->sparse_map_avail
)
730 blk
->sparse_header
.isextended
= 1;
737 static struct tar_sparse_optab
const oldgnu_optab
= {
738 NULL
, /* No init function */
739 NULL
, /* No done function */
740 oldgnu_sparse_member_p
,
743 oldgnu_get_sparse_info
,
744 NULL
, /* No scan_block function */
746 sparse_extract_region
,
753 star_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
755 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
759 star_fixup_header (struct tar_sparse_file
*file
)
761 /* NOTE! st_size was initialized from the header
762 which actually contains archived size. The following fixes it */
763 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
764 file
->stat_info
->stat
.st_size
=
765 OFF_FROM_HEADER (current_header
->star_in_header
.realsize
);
769 /* Convert STAR format sparse data to internal representation */
771 star_get_sparse_info (struct tar_sparse_file
*file
)
774 union block
*h
= current_header
;
776 enum oldgnu_add_status rc
= add_ok
;
778 file
->stat_info
->sparse_map_avail
= 0;
780 if (h
->star_in_header
.prefix
[0] == '\0'
781 && h
->star_in_header
.sp
[0].offset
[10] != '\0')
783 /* Old star format */
784 for (i
= 0; i
< SPARSES_IN_STAR_HEADER
; i
++)
786 rc
= oldgnu_add_sparse (file
, &h
->star_in_header
.sp
[i
]);
790 ext_p
= h
->star_in_header
.isextended
;
795 for (; rc
== add_ok
&& ext_p
; ext_p
= h
->star_ext_header
.isextended
)
797 h
= find_next_block ();
800 ERROR ((0, 0, _("Unexpected EOF in archive")));
803 set_next_block_after (h
);
804 for (i
= 0; i
< SPARSES_IN_STAR_EXT_HEADER
&& rc
== add_ok
; i
++)
805 rc
= oldgnu_add_sparse (file
, &h
->star_ext_header
.sp
[i
]);
810 ERROR ((0, 0, _("%s: invalid sparse archive member"),
811 file
->stat_info
->orig_file_name
));
818 static struct tar_sparse_optab
const star_optab
= {
819 NULL
, /* No init function */
820 NULL
, /* No done function */
821 star_sparse_member_p
,
824 star_get_sparse_info
,
825 NULL
, /* No scan_block function */
826 NULL
, /* No dump region function */
827 sparse_extract_region
,
831 /* GNU PAX sparse file format. The sparse file map is stored in
834 GNU.sparse.size Real size of the stored file
835 GNU.sparse.numblocks Number of blocks in the sparse map
836 repeat numblocks time
837 GNU.sparse.offset Offset of the next data block
838 GNU.sparse.numbytes Size of the next data block
843 pax_sparse_member_p (struct tar_sparse_file
*file
)
845 return file
->stat_info
->archive_file_size
!= file
->stat_info
->stat
.st_size
;
849 pax_dump_header (struct tar_sparse_file
*file
)
851 off_t block_ordinal
= current_block_ordinal ();
855 /* Store the real file size */
856 xheader_store ("GNU.sparse.size", file
->stat_info
, NULL
);
857 xheader_store ("GNU.sparse.numblocks", file
->stat_info
, NULL
);
858 for (i
= 0; i
< file
->stat_info
->sparse_map_avail
; i
++)
860 xheader_store ("GNU.sparse.offset", file
->stat_info
, &i
);
861 xheader_store ("GNU.sparse.numbytes", file
->stat_info
, &i
);
864 blk
= start_header (file
->stat_info
);
865 /* Store the effective (shrunken) file size */
866 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
867 finish_header (file
->stat_info
, blk
, block_ordinal
);
871 static struct tar_sparse_optab
const pax_optab
= {
872 NULL
, /* No init function */
873 NULL
, /* No done function */
876 NULL
, /* No decode_header function */
877 NULL
, /* No fixup_header function */
878 NULL
, /* No scan_block function */
880 sparse_extract_region
,