1 /* Functions for dealing with sparse files
3 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
24 struct tar_sparse_file
;
26 enum sparse_scan_state
33 struct tar_sparse_optab
35 bool (*init
) (struct tar_sparse_file
*);
36 bool (*done
) (struct tar_sparse_file
*);
37 bool (*sparse_member_p
) (struct tar_sparse_file
*);
38 bool (*dump_header
) (struct tar_sparse_file
*);
39 bool (*fixup_header
) (struct tar_sparse_file
*);
40 bool (*decode_header
) (struct tar_sparse_file
*);
41 bool (*scan_block
) (struct tar_sparse_file
*, enum sparse_scan_state
,
43 bool (*dump_region
) (struct tar_sparse_file
*, size_t);
44 bool (*extract_region
) (struct tar_sparse_file
*, size_t);
47 struct tar_sparse_file
49 int fd
; /* File descriptor */
50 bool seekable
; /* Is fd seekable? */
51 off_t offset
; /* Current offset in fd if seekable==false.
53 off_t dumped_size
; /* Number of bytes actually written
55 struct tar_stat_info
*stat_info
; /* Information about the file */
56 struct tar_sparse_optab
const *optab
;
57 void *closure
; /* Any additional data optab calls might
61 /* Dump zeros to file->fd until offset is reached. It is used instead of
62 lseek if the output file is not seekable */
64 dump_zeros (struct tar_sparse_file
*file
, off_t offset
)
66 static char const zero_buf
[BLOCKSIZE
];
68 if (offset
< file
->offset
)
74 while (file
->offset
< offset
)
76 size_t size
= (BLOCKSIZE
< offset
- file
->offset
78 : offset
- file
->offset
);
81 wrbytes
= write (file
->fd
, zero_buf
, size
);
88 file
->offset
+= wrbytes
;
95 tar_sparse_member_p (struct tar_sparse_file
*file
)
97 if (file
->optab
->sparse_member_p
)
98 return file
->optab
->sparse_member_p (file
);
103 tar_sparse_init (struct tar_sparse_file
*file
)
105 file
->dumped_size
= 0;
106 if (file
->optab
->init
)
107 return file
->optab
->init (file
);
112 tar_sparse_done (struct tar_sparse_file
*file
)
114 if (file
->optab
->done
)
115 return file
->optab
->done (file
);
120 tar_sparse_scan (struct tar_sparse_file
*file
, enum sparse_scan_state state
,
123 if (file
->optab
->scan_block
)
124 return file
->optab
->scan_block (file
, state
, block
);
129 tar_sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
131 if (file
->optab
->dump_region
)
132 return file
->optab
->dump_region (file
, i
);
137 tar_sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
139 if (file
->optab
->extract_region
)
140 return file
->optab
->extract_region (file
, i
);
145 tar_sparse_dump_header (struct tar_sparse_file
*file
)
147 if (file
->optab
->dump_header
)
148 return file
->optab
->dump_header (file
);
153 tar_sparse_decode_header (struct tar_sparse_file
*file
)
155 if (file
->optab
->decode_header
)
156 return file
->optab
->decode_header (file
);
161 tar_sparse_fixup_header (struct tar_sparse_file
*file
)
163 if (file
->optab
->fixup_header
)
164 return file
->optab
->fixup_header (file
);
170 lseek_or_error (struct tar_sparse_file
*file
, off_t offset
)
173 ? lseek (file
->fd
, offset
, SEEK_SET
) < 0
174 : ! dump_zeros (file
, offset
))
176 seek_diag_details (file
->stat_info
->orig_file_name
, offset
);
182 /* Takes a blockful of data and basically cruises through it to see if
183 it's made *entirely* of zeros, returning a 0 the instant it finds
184 something that is a nonzero, i.e., useful data. */
186 zero_block_p (char const *buffer
, size_t size
)
195 sparse_add_map (struct tar_stat_info
*st
, struct sp_array
const *sp
)
197 struct sp_array
*sparse_map
= st
->sparse_map
;
198 size_t avail
= st
->sparse_map_avail
;
199 if (avail
== st
->sparse_map_size
)
200 st
->sparse_map
= sparse_map
=
201 x2nrealloc (sparse_map
, &st
->sparse_map_size
, sizeof *sparse_map
);
202 sparse_map
[avail
] = *sp
;
203 st
->sparse_map_avail
= avail
+ 1;
206 /* Scan the sparse file and create its map */
208 sparse_scan_file (struct tar_sparse_file
*file
)
210 struct tar_stat_info
*st
= file
->stat_info
;
212 char buffer
[BLOCKSIZE
];
215 struct sp_array sp
= {0, 0};
217 if (!lseek_or_error (file
, 0))
220 st
->archive_file_size
= 0;
222 if (!tar_sparse_scan (file
, scan_begin
, NULL
))
225 while ((count
= safe_read (fd
, buffer
, sizeof buffer
)) != 0
226 && count
!= SAFE_READ_ERROR
)
228 /* Analyze the block. */
229 if (zero_block_p (buffer
, count
))
233 sparse_add_map (st
, &sp
);
235 if (!tar_sparse_scan (file
, scan_block
, NULL
))
241 if (sp
.numbytes
== 0)
243 sp
.numbytes
+= count
;
244 st
->archive_file_size
+= count
;
245 if (!tar_sparse_scan (file
, scan_block
, buffer
))
252 if (sp
.numbytes
== 0)
255 sparse_add_map (st
, &sp
);
256 st
->archive_file_size
+= count
;
257 return tar_sparse_scan (file
, scan_end
, NULL
);
260 static struct tar_sparse_optab
const oldgnu_optab
;
261 static struct tar_sparse_optab
const star_optab
;
262 static struct tar_sparse_optab
const pax_optab
;
265 sparse_select_optab (struct tar_sparse_file
*file
)
267 switch (current_format
== DEFAULT_FORMAT
? archive_format
: current_format
)
274 case GNU_FORMAT
: /*FIXME: This one should disappear? */
275 file
->optab
= &oldgnu_optab
;
279 file
->optab
= &pax_optab
;
283 file
->optab
= &star_optab
;
293 sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
296 off_t bytes_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
298 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
301 while (bytes_left
> 0)
303 size_t bufsize
= (bytes_left
> BLOCKSIZE
) ? BLOCKSIZE
: bytes_left
;
306 blk
= find_next_block ();
307 bytes_read
= safe_read (file
->fd
, blk
->buffer
, bufsize
);
308 if (bytes_read
== SAFE_READ_ERROR
)
310 read_diag_details (file
->stat_info
->orig_file_name
,
311 (file
->stat_info
->sparse_map
[i
].offset
312 + file
->stat_info
->sparse_map
[i
].numbytes
318 memset (blk
->buffer
+ bytes_read
, 0, BLOCKSIZE
- bytes_read
);
319 bytes_left
-= bytes_read
;
320 file
->dumped_size
+= bytes_read
;
321 set_next_block_after (blk
);
328 sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
332 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
335 write_size
= file
->stat_info
->sparse_map
[i
].numbytes
;
339 /* Last block of the file is a hole */
340 if (file
->seekable
&& sys_truncate (file
->fd
))
341 truncate_warn (file
->stat_info
->orig_file_name
);
343 else while (write_size
> 0)
346 size_t wrbytes
= (write_size
> BLOCKSIZE
) ? BLOCKSIZE
: write_size
;
347 union block
*blk
= find_next_block ();
350 ERROR ((0, 0, _("Unexpected EOF in archive")));
353 set_next_block_after (blk
);
354 count
= full_write (file
->fd
, blk
->buffer
, wrbytes
);
356 file
->dumped_size
+= count
;
357 file
->offset
+= count
;
358 if (count
!= wrbytes
)
360 write_error_details (file
->stat_info
->orig_file_name
,
370 /* Interface functions */
372 sparse_dump_file (int fd
, struct tar_stat_info
*st
)
375 struct tar_sparse_file file
= { 0, };
379 file
.seekable
= true; /* File *must* be seekable for dump to work */
381 if (!sparse_select_optab (&file
)
382 || !tar_sparse_init (&file
))
383 return dump_status_not_implemented
;
385 rc
= sparse_scan_file (&file
);
386 if (rc
&& file
.optab
->dump_region
)
388 tar_sparse_dump_header (&file
);
394 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
395 rc
= tar_sparse_dump_region (&file
, i
);
399 pad_archive (file
.stat_info
->archive_file_size
- file
.dumped_size
);
400 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
403 /* Returns true if the file represented by stat is a sparse one */
405 sparse_file_p (struct tar_stat_info
*st
)
407 return (ST_NBLOCKS (st
->stat
)
408 < (st
->stat
.st_size
/ ST_NBLOCKSIZE
409 + (st
->stat
.st_size
% ST_NBLOCKSIZE
!= 0)));
413 sparse_member_p (struct tar_stat_info
*st
)
415 struct tar_sparse_file file
;
417 if (!sparse_select_optab (&file
))
420 return tar_sparse_member_p (&file
);
424 sparse_fixup_header (struct tar_stat_info
*st
)
426 struct tar_sparse_file file
;
428 if (!sparse_select_optab (&file
))
431 return tar_sparse_fixup_header (&file
);
435 sparse_extract_file (int fd
, struct tar_stat_info
*st
, off_t
*size
)
438 struct tar_sparse_file file
;
443 file
.seekable
= lseek (fd
, 0, SEEK_SET
) == 0;
446 if (!sparse_select_optab (&file
)
447 || !tar_sparse_init (&file
))
448 return dump_status_not_implemented
;
450 rc
= tar_sparse_decode_header (&file
);
451 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
452 rc
= tar_sparse_extract_region (&file
, i
);
453 *size
= file
.stat_info
->archive_file_size
- file
.dumped_size
;
454 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
458 sparse_skip_file (struct tar_stat_info
*st
)
461 struct tar_sparse_file file
;
466 if (!sparse_select_optab (&file
)
467 || !tar_sparse_init (&file
))
468 return dump_status_not_implemented
;
470 rc
= tar_sparse_decode_header (&file
);
471 skip_file (file
.stat_info
->archive_file_size
);
472 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
477 check_sparse_region (struct tar_sparse_file
*file
, off_t beg
, off_t end
)
479 if (!lseek_or_error (file
, beg
))
485 size_t rdsize
= BLOCKSIZE
< end
- beg
? BLOCKSIZE
: end
- beg
;
486 char diff_buffer
[BLOCKSIZE
];
488 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
489 if (bytes_read
== SAFE_READ_ERROR
)
491 read_diag_details (file
->stat_info
->orig_file_name
,
496 if (!zero_block_p (diff_buffer
, bytes_read
))
498 char begbuf
[INT_BUFSIZE_BOUND (off_t
)];
499 report_difference (file
->stat_info
,
500 _("File fragment at %s is not a hole"),
501 offtostr (beg
, begbuf
));
511 check_data_region (struct tar_sparse_file
*file
, size_t i
)
515 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
517 size_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
518 while (size_left
> 0)
521 size_t rdsize
= (size_left
> BLOCKSIZE
) ? BLOCKSIZE
: size_left
;
522 char diff_buffer
[BLOCKSIZE
];
524 union block
*blk
= find_next_block ();
527 ERROR ((0, 0, _("Unexpected EOF in archive")));
530 set_next_block_after (blk
);
531 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
532 if (bytes_read
== SAFE_READ_ERROR
)
534 read_diag_details (file
->stat_info
->orig_file_name
,
535 (file
->stat_info
->sparse_map
[i
].offset
536 + file
->stat_info
->sparse_map
[i
].numbytes
541 file
->dumped_size
+= bytes_read
;
542 size_left
-= bytes_read
;
543 if (memcmp (blk
->buffer
, diff_buffer
, rdsize
))
545 report_difference (file
->stat_info
, _("Contents differ"));
553 sparse_diff_file (int fd
, struct tar_stat_info
*st
)
556 struct tar_sparse_file file
;
563 if (!sparse_select_optab (&file
)
564 || !tar_sparse_init (&file
))
565 return dump_status_not_implemented
;
567 rc
= tar_sparse_decode_header (&file
);
568 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
570 rc
= check_sparse_region (&file
,
571 offset
, file
.stat_info
->sparse_map
[i
].offset
)
572 && check_data_region (&file
, i
);
573 offset
= file
.stat_info
->sparse_map
[i
].offset
574 + file
.stat_info
->sparse_map
[i
].numbytes
;
578 skip_file (file
.stat_info
->archive_file_size
- file
.dumped_size
);
580 tar_sparse_done (&file
);
585 /* Old GNU Format. The sparse file information is stored in the
586 oldgnu_header in the following manner:
588 The header is marked with type 'S'. Its `size' field contains
589 the cumulative size of all non-empty blocks of the file. The
590 actual file size is stored in `realsize' member of oldgnu_header.
592 The map of the file is stored in a list of `struct sparse'.
593 Each struct contains offset to the block of data and its
594 size (both as octal numbers). The first file header contains
595 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
596 contains more structs, then the field `isextended' of the main
597 header is set to 1 (binary) and the `struct sparse_header'
598 header follows, containing at most 21 following structs
599 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
600 field of the extended header is set and next next extension header
603 enum oldgnu_add_status
611 oldgnu_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
613 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
616 /* Add a sparse item to the sparse file and its obstack */
617 static enum oldgnu_add_status
618 oldgnu_add_sparse (struct tar_sparse_file
*file
, struct sparse
*s
)
622 if (s
->numbytes
[0] == '\0')
624 sp
.offset
= OFF_FROM_HEADER (s
->offset
);
625 sp
.numbytes
= SIZE_FROM_HEADER (s
->numbytes
);
627 || file
->stat_info
->stat
.st_size
< sp
.offset
+ sp
.numbytes
628 || file
->stat_info
->archive_file_size
< 0)
631 sparse_add_map (file
->stat_info
, &sp
);
636 oldgnu_fixup_header (struct tar_sparse_file
*file
)
638 /* NOTE! st_size was initialized from the header
639 which actually contains archived size. The following fixes it */
640 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
641 file
->stat_info
->stat
.st_size
=
642 OFF_FROM_HEADER (current_header
->oldgnu_header
.realsize
);
646 /* Convert old GNU format sparse data to internal representation */
648 oldgnu_get_sparse_info (struct tar_sparse_file
*file
)
651 union block
*h
= current_header
;
653 enum oldgnu_add_status rc
;
655 file
->stat_info
->sparse_map_avail
= 0;
656 for (i
= 0; i
< SPARSES_IN_OLDGNU_HEADER
; i
++)
658 rc
= oldgnu_add_sparse (file
, &h
->oldgnu_header
.sp
[i
]);
663 for (ext_p
= h
->oldgnu_header
.isextended
;
664 rc
== add_ok
&& ext_p
; ext_p
= h
->sparse_header
.isextended
)
666 h
= find_next_block ();
669 ERROR ((0, 0, _("Unexpected EOF in archive")));
672 set_next_block_after (h
);
673 for (i
= 0; i
< SPARSES_IN_SPARSE_HEADER
&& rc
== add_ok
; i
++)
674 rc
= oldgnu_add_sparse (file
, &h
->sparse_header
.sp
[i
]);
679 ERROR ((0, 0, _("%s: invalid sparse archive member"),
680 file
->stat_info
->orig_file_name
));
687 oldgnu_store_sparse_info (struct tar_sparse_file
*file
, size_t *pindex
,
688 struct sparse
*sp
, size_t sparse_size
)
690 for (; *pindex
< file
->stat_info
->sparse_map_avail
691 && sparse_size
> 0; sparse_size
--, sp
++, ++*pindex
)
693 OFF_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].offset
,
695 SIZE_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].numbytes
,
701 oldgnu_dump_header (struct tar_sparse_file
*file
)
703 off_t block_ordinal
= current_block_ordinal ();
707 blk
= start_header (file
->stat_info
);
708 blk
->header
.typeflag
= GNUTYPE_SPARSE
;
709 if (file
->stat_info
->sparse_map_avail
> SPARSES_IN_OLDGNU_HEADER
)
710 blk
->oldgnu_header
.isextended
= 1;
712 /* Store the real file size */
713 OFF_TO_CHARS (file
->stat_info
->stat
.st_size
, blk
->oldgnu_header
.realsize
);
714 /* Store the effective (shrunken) file size */
715 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
718 oldgnu_store_sparse_info (file
, &i
,
719 blk
->oldgnu_header
.sp
,
720 SPARSES_IN_OLDGNU_HEADER
);
721 blk
->oldgnu_header
.isextended
= i
< file
->stat_info
->sparse_map_avail
;
722 finish_header (file
->stat_info
, blk
, block_ordinal
);
724 while (i
< file
->stat_info
->sparse_map_avail
)
726 blk
= find_next_block ();
727 memset (blk
->buffer
, 0, BLOCKSIZE
);
728 oldgnu_store_sparse_info (file
, &i
,
729 blk
->sparse_header
.sp
,
730 SPARSES_IN_SPARSE_HEADER
);
731 set_next_block_after (blk
);
732 if (i
< file
->stat_info
->sparse_map_avail
)
733 blk
->sparse_header
.isextended
= 1;
740 static struct tar_sparse_optab
const oldgnu_optab
= {
741 NULL
, /* No init function */
742 NULL
, /* No done function */
743 oldgnu_sparse_member_p
,
746 oldgnu_get_sparse_info
,
747 NULL
, /* No scan_block function */
749 sparse_extract_region
,
756 star_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
758 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
762 star_fixup_header (struct tar_sparse_file
*file
)
764 /* NOTE! st_size was initialized from the header
765 which actually contains archived size. The following fixes it */
766 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
767 file
->stat_info
->stat
.st_size
=
768 OFF_FROM_HEADER (current_header
->star_in_header
.realsize
);
772 /* Convert STAR format sparse data to internal representation */
774 star_get_sparse_info (struct tar_sparse_file
*file
)
777 union block
*h
= current_header
;
779 enum oldgnu_add_status rc
= add_ok
;
781 file
->stat_info
->sparse_map_avail
= 0;
783 if (h
->star_in_header
.prefix
[0] == '\0'
784 && h
->star_in_header
.sp
[0].offset
[10] != '\0')
786 /* Old star format */
787 for (i
= 0; i
< SPARSES_IN_STAR_HEADER
; i
++)
789 rc
= oldgnu_add_sparse (file
, &h
->star_in_header
.sp
[i
]);
793 ext_p
= h
->star_in_header
.isextended
;
798 for (; rc
== add_ok
&& ext_p
; ext_p
= h
->star_ext_header
.isextended
)
800 h
= find_next_block ();
803 ERROR ((0, 0, _("Unexpected EOF in archive")));
806 set_next_block_after (h
);
807 for (i
= 0; i
< SPARSES_IN_STAR_EXT_HEADER
&& rc
== add_ok
; i
++)
808 rc
= oldgnu_add_sparse (file
, &h
->star_ext_header
.sp
[i
]);
813 ERROR ((0, 0, _("%s: invalid sparse archive member"),
814 file
->stat_info
->orig_file_name
));
821 static struct tar_sparse_optab
const star_optab
= {
822 NULL
, /* No init function */
823 NULL
, /* No done function */
824 star_sparse_member_p
,
827 star_get_sparse_info
,
828 NULL
, /* No scan_block function */
829 NULL
, /* No dump region function */
830 sparse_extract_region
,
834 /* GNU PAX sparse file format. The sparse file map is stored in
837 GNU.sparse.size Real size of the stored file
838 GNU.sparse.numblocks Number of blocks in the sparse map
839 repeat numblocks time
840 GNU.sparse.offset Offset of the next data block
841 GNU.sparse.numbytes Size of the next data block
846 pax_sparse_member_p (struct tar_sparse_file
*file
)
848 return file
->stat_info
->archive_file_size
!= file
->stat_info
->stat
.st_size
;
852 pax_dump_header (struct tar_sparse_file
*file
)
854 off_t block_ordinal
= current_block_ordinal ();
858 /* Store the real file size */
859 xheader_store ("GNU.sparse.size", file
->stat_info
, NULL
);
860 xheader_store ("GNU.sparse.numblocks", file
->stat_info
, NULL
);
861 for (i
= 0; i
< file
->stat_info
->sparse_map_avail
; i
++)
863 xheader_store ("GNU.sparse.offset", file
->stat_info
, &i
);
864 xheader_store ("GNU.sparse.numbytes", file
->stat_info
, &i
);
867 blk
= start_header (file
->stat_info
);
868 /* Store the effective (shrunken) file size */
869 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
870 finish_header (file
->stat_info
, blk
, block_ordinal
);
874 static struct tar_sparse_optab
const pax_optab
= {
875 NULL
, /* No init function */
876 NULL
, /* No done function */
879 NULL
, /* No decode_header function */
880 NULL
, /* No fixup_header function */
881 NULL
, /* No scan_block function */
883 sparse_extract_region
,