1 /* Functions for dealing with sparse files
3 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
24 struct tar_sparse_file
;
26 enum sparse_scan_state
33 struct tar_sparse_optab
35 bool (*init
) (struct tar_sparse_file
*);
36 bool (*done
) (struct tar_sparse_file
*);
37 bool (*sparse_member_p
) (struct tar_sparse_file
*);
38 bool (*dump_header
) (struct tar_sparse_file
*);
39 bool (*fixup_header
) (struct tar_sparse_file
*);
40 bool (*decode_header
) (struct tar_sparse_file
*);
41 bool (*scan_block
) (struct tar_sparse_file
*, enum sparse_scan_state
,
43 bool (*dump_region
) (struct tar_sparse_file
*, size_t);
44 bool (*extract_region
) (struct tar_sparse_file
*, size_t);
47 struct tar_sparse_file
49 int fd
; /* File descriptor */
50 bool seekable
; /* Is fd seekable? */
51 off_t offset
; /* Current offset in fd if seekable==false.
53 off_t dumped_size
; /* Number of bytes actually written
55 struct tar_stat_info
*stat_info
; /* Information about the file */
56 struct tar_sparse_optab
const *optab
;
57 void *closure
; /* Any additional data optab calls might
61 /* Dump zeros to file->fd until offset is reached. It is used instead of
62 lseek if the output file is not seekable */
64 dump_zeros (struct tar_sparse_file
*file
, off_t offset
)
66 static char const zero_buf
[BLOCKSIZE
];
68 if (offset
< file
->offset
)
74 while (file
->offset
< offset
)
76 size_t size
= (BLOCKSIZE
< offset
- file
->offset
78 : offset
- file
->offset
);
81 wrbytes
= write (file
->fd
, zero_buf
, size
);
88 file
->offset
+= wrbytes
;
95 tar_sparse_member_p (struct tar_sparse_file
*file
)
97 if (file
->optab
->sparse_member_p
)
98 return file
->optab
->sparse_member_p (file
);
103 tar_sparse_init (struct tar_sparse_file
*file
)
105 file
->dumped_size
= 0;
106 if (file
->optab
->init
)
107 return file
->optab
->init (file
);
112 tar_sparse_done (struct tar_sparse_file
*file
)
114 if (file
->optab
->done
)
115 return file
->optab
->done (file
);
120 tar_sparse_scan (struct tar_sparse_file
*file
, enum sparse_scan_state state
,
123 if (file
->optab
->scan_block
)
124 return file
->optab
->scan_block (file
, state
, block
);
129 tar_sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
131 if (file
->optab
->dump_region
)
132 return file
->optab
->dump_region (file
, i
);
137 tar_sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
139 if (file
->optab
->extract_region
)
140 return file
->optab
->extract_region (file
, i
);
145 tar_sparse_dump_header (struct tar_sparse_file
*file
)
147 if (file
->optab
->dump_header
)
148 return file
->optab
->dump_header (file
);
153 tar_sparse_decode_header (struct tar_sparse_file
*file
)
155 if (file
->optab
->decode_header
)
156 return file
->optab
->decode_header (file
);
161 tar_sparse_fixup_header (struct tar_sparse_file
*file
)
163 if (file
->optab
->fixup_header
)
164 return file
->optab
->fixup_header (file
);
170 lseek_or_error (struct tar_sparse_file
*file
, off_t offset
)
173 ? lseek (file
->fd
, offset
, SEEK_SET
) < 0
174 : ! dump_zeros (file
, offset
))
176 seek_diag_details (file
->stat_info
->orig_file_name
, offset
);
182 /* Takes a blockful of data and basically cruises through it to see if
183 it's made *entirely* of zeros, returning a 0 the instant it finds
184 something that is a nonzero, i.e., useful data. */
186 zero_block_p (char const *buffer
, size_t size
)
195 sparse_add_map (struct tar_stat_info
*st
, struct sp_array
const *sp
)
197 struct sp_array
*sparse_map
= st
->sparse_map
;
198 size_t avail
= st
->sparse_map_avail
;
199 if (avail
== st
->sparse_map_size
)
200 st
->sparse_map
= sparse_map
=
201 x2nrealloc (sparse_map
, &st
->sparse_map_size
, sizeof *sparse_map
);
202 sparse_map
[avail
] = *sp
;
203 st
->sparse_map_avail
= avail
+ 1;
206 /* Scan the sparse file and create its map */
208 sparse_scan_file (struct tar_sparse_file
*file
)
210 struct tar_stat_info
*st
= file
->stat_info
;
212 char buffer
[BLOCKSIZE
];
215 struct sp_array sp
= {0, 0};
217 if (!lseek_or_error (file
, 0))
220 if (!tar_sparse_scan (file
, scan_begin
, NULL
))
223 while ((count
= safe_read (fd
, buffer
, sizeof buffer
)) != 0
224 && count
!= SAFE_READ_ERROR
)
226 /* Analyze the block. */
227 if (zero_block_p (buffer
, count
))
231 sparse_add_map (st
, &sp
);
233 if (!tar_sparse_scan (file
, scan_block
, NULL
))
239 if (sp
.numbytes
== 0)
241 sp
.numbytes
+= count
;
242 st
->archive_file_size
+= count
;
243 if (!tar_sparse_scan (file
, scan_block
, buffer
))
250 if (sp
.numbytes
== 0)
253 sparse_add_map (st
, &sp
);
254 st
->archive_file_size
+= count
;
255 return tar_sparse_scan (file
, scan_end
, NULL
);
258 static struct tar_sparse_optab
const oldgnu_optab
;
259 static struct tar_sparse_optab
const star_optab
;
260 static struct tar_sparse_optab
const pax_optab
;
263 sparse_select_optab (struct tar_sparse_file
*file
)
265 switch (current_format
== DEFAULT_FORMAT
? archive_format
: current_format
)
272 case GNU_FORMAT
: /*FIXME: This one should disappear? */
273 file
->optab
= &oldgnu_optab
;
277 file
->optab
= &pax_optab
;
281 file
->optab
= &star_optab
;
291 sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
294 off_t bytes_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
296 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
299 while (bytes_left
> 0)
301 size_t bufsize
= (bytes_left
> BLOCKSIZE
) ? BLOCKSIZE
: bytes_left
;
304 blk
= find_next_block ();
305 bytes_read
= safe_read (file
->fd
, blk
->buffer
, bufsize
);
306 if (bytes_read
== SAFE_READ_ERROR
)
308 read_diag_details (file
->stat_info
->orig_file_name
,
309 (file
->stat_info
->sparse_map
[i
].offset
310 + file
->stat_info
->sparse_map
[i
].numbytes
316 memset (blk
->buffer
+ bytes_read
, 0, BLOCKSIZE
- bytes_read
);
317 bytes_left
-= bytes_read
;
318 file
->dumped_size
+= bytes_read
;
319 set_next_block_after (blk
);
326 sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
330 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
333 write_size
= file
->stat_info
->sparse_map
[i
].numbytes
;
337 /* Last block of the file is a hole */
338 if (file
->seekable
&& sys_truncate (file
->fd
))
339 truncate_warn (file
->stat_info
->orig_file_name
);
341 else while (write_size
> 0)
344 size_t wrbytes
= (write_size
> BLOCKSIZE
) ? BLOCKSIZE
: write_size
;
345 union block
*blk
= find_next_block ();
348 ERROR ((0, 0, _("Unexpected EOF in archive")));
351 set_next_block_after (blk
);
352 count
= full_write (file
->fd
, blk
->buffer
, wrbytes
);
354 file
->dumped_size
+= count
;
355 file
->offset
+= count
;
356 if (count
!= wrbytes
)
358 write_error_details (file
->stat_info
->orig_file_name
,
368 /* Interface functions */
370 sparse_dump_file (int fd
, struct tar_stat_info
*st
)
373 struct tar_sparse_file file
= { 0, };
377 file
.seekable
= true; /* File *must* be seekable for dump to work */
379 if (!sparse_select_optab (&file
)
380 || !tar_sparse_init (&file
))
381 return dump_status_not_implemented
;
383 rc
= sparse_scan_file (&file
);
384 if (rc
&& file
.optab
->dump_region
)
386 tar_sparse_dump_header (&file
);
392 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
393 rc
= tar_sparse_dump_region (&file
, i
);
397 pad_archive (file
.stat_info
->archive_file_size
- file
.dumped_size
);
398 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
401 /* Returns true if the file represented by stat is a sparse one */
403 sparse_file_p (struct tar_stat_info
*st
)
405 return (ST_NBLOCKS (st
->stat
)
406 < (st
->stat
.st_size
/ ST_NBLOCKSIZE
407 + (st
->stat
.st_size
% ST_NBLOCKSIZE
!= 0)));
411 sparse_member_p (struct tar_stat_info
*st
)
413 struct tar_sparse_file file
;
415 if (!sparse_select_optab (&file
))
418 return tar_sparse_member_p (&file
);
422 sparse_fixup_header (struct tar_stat_info
*st
)
424 struct tar_sparse_file file
;
426 if (!sparse_select_optab (&file
))
429 return tar_sparse_fixup_header (&file
);
433 sparse_extract_file (int fd
, struct tar_stat_info
*st
, off_t
*size
)
436 struct tar_sparse_file file
;
441 file
.seekable
= lseek (fd
, 0, SEEK_SET
) == 0;
444 if (!sparse_select_optab (&file
)
445 || !tar_sparse_init (&file
))
446 return dump_status_not_implemented
;
448 rc
= tar_sparse_decode_header (&file
);
449 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
450 rc
= tar_sparse_extract_region (&file
, i
);
451 *size
= file
.stat_info
->archive_file_size
- file
.dumped_size
;
452 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
456 sparse_skip_file (struct tar_stat_info
*st
)
459 struct tar_sparse_file file
;
464 if (!sparse_select_optab (&file
)
465 || !tar_sparse_init (&file
))
466 return dump_status_not_implemented
;
468 rc
= tar_sparse_decode_header (&file
);
469 skip_file (file
.stat_info
->archive_file_size
);
470 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
475 check_sparse_region (struct tar_sparse_file
*file
, off_t beg
, off_t end
)
477 if (!lseek_or_error (file
, beg
))
483 size_t rdsize
= BLOCKSIZE
< end
- beg
? BLOCKSIZE
: end
- beg
;
484 char diff_buffer
[BLOCKSIZE
];
486 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
487 if (bytes_read
== SAFE_READ_ERROR
)
489 read_diag_details (file
->stat_info
->orig_file_name
,
494 if (!zero_block_p (diff_buffer
, bytes_read
))
496 char begbuf
[INT_BUFSIZE_BOUND (off_t
)];
497 report_difference (file
->stat_info
,
498 _("File fragment at %s is not a hole"),
499 offtostr (beg
, begbuf
));
509 check_data_region (struct tar_sparse_file
*file
, size_t i
)
513 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
515 size_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
516 while (size_left
> 0)
519 size_t rdsize
= (size_left
> BLOCKSIZE
) ? BLOCKSIZE
: size_left
;
520 char diff_buffer
[BLOCKSIZE
];
522 union block
*blk
= find_next_block ();
525 ERROR ((0, 0, _("Unexpected EOF in archive")));
528 set_next_block_after (blk
);
529 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
530 if (bytes_read
== SAFE_READ_ERROR
)
532 read_diag_details (file
->stat_info
->orig_file_name
,
533 (file
->stat_info
->sparse_map
[i
].offset
534 + file
->stat_info
->sparse_map
[i
].numbytes
539 file
->dumped_size
+= bytes_read
;
540 size_left
-= bytes_read
;
541 if (memcmp (blk
->buffer
, diff_buffer
, rdsize
))
543 report_difference (file
->stat_info
, _("Contents differ"));
551 sparse_diff_file (int fd
, struct tar_stat_info
*st
)
554 struct tar_sparse_file file
;
561 if (!sparse_select_optab (&file
)
562 || !tar_sparse_init (&file
))
563 return dump_status_not_implemented
;
565 rc
= tar_sparse_decode_header (&file
);
566 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
568 rc
= check_sparse_region (&file
,
569 offset
, file
.stat_info
->sparse_map
[i
].offset
)
570 && check_data_region (&file
, i
);
571 offset
= file
.stat_info
->sparse_map
[i
].offset
572 + file
.stat_info
->sparse_map
[i
].numbytes
;
576 skip_file (file
.stat_info
->archive_file_size
- file
.dumped_size
);
578 tar_sparse_done (&file
);
583 /* Old GNU Format. The sparse file information is stored in the
584 oldgnu_header in the following manner:
586 The header is marked with type 'S'. Its `size' field contains
587 the cumulative size of all non-empty blocks of the file. The
588 actual file size is stored in `realsize' member of oldgnu_header.
590 The map of the file is stored in a list of `struct sparse'.
591 Each struct contains offset to the block of data and its
592 size (both as octal numbers). The first file header contains
593 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
594 contains more structs, then the field `isextended' of the main
595 header is set to 1 (binary) and the `struct sparse_header'
596 header follows, containing at most 21 following structs
597 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
598 field of the extended header is set and next next extension header
601 enum oldgnu_add_status
609 oldgnu_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
611 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
614 /* Add a sparse item to the sparse file and its obstack */
615 static enum oldgnu_add_status
616 oldgnu_add_sparse (struct tar_sparse_file
*file
, struct sparse
*s
)
620 if (s
->numbytes
[0] == '\0')
622 sp
.offset
= OFF_FROM_HEADER (s
->offset
);
623 sp
.numbytes
= SIZE_FROM_HEADER (s
->numbytes
);
625 || file
->stat_info
->stat
.st_size
< sp
.offset
+ sp
.numbytes
626 || file
->stat_info
->archive_file_size
< 0)
629 sparse_add_map (file
->stat_info
, &sp
);
634 oldgnu_fixup_header (struct tar_sparse_file
*file
)
636 /* NOTE! st_size was initialized from the header
637 which actually contains archived size. The following fixes it */
638 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
639 file
->stat_info
->stat
.st_size
=
640 OFF_FROM_HEADER (current_header
->oldgnu_header
.realsize
);
644 /* Convert old GNU format sparse data to internal representation */
646 oldgnu_get_sparse_info (struct tar_sparse_file
*file
)
649 union block
*h
= current_header
;
651 enum oldgnu_add_status rc
;
653 file
->stat_info
->sparse_map_avail
= 0;
654 for (i
= 0; i
< SPARSES_IN_OLDGNU_HEADER
; i
++)
656 rc
= oldgnu_add_sparse (file
, &h
->oldgnu_header
.sp
[i
]);
661 for (ext_p
= h
->oldgnu_header
.isextended
;
662 rc
== add_ok
&& ext_p
; ext_p
= h
->sparse_header
.isextended
)
664 h
= find_next_block ();
667 ERROR ((0, 0, _("Unexpected EOF in archive")));
670 set_next_block_after (h
);
671 for (i
= 0; i
< SPARSES_IN_SPARSE_HEADER
&& rc
== add_ok
; i
++)
672 rc
= oldgnu_add_sparse (file
, &h
->sparse_header
.sp
[i
]);
677 ERROR ((0, 0, _("%s: invalid sparse archive member"),
678 file
->stat_info
->orig_file_name
));
685 oldgnu_store_sparse_info (struct tar_sparse_file
*file
, size_t *pindex
,
686 struct sparse
*sp
, size_t sparse_size
)
688 for (; *pindex
< file
->stat_info
->sparse_map_avail
689 && sparse_size
> 0; sparse_size
--, sp
++, ++*pindex
)
691 OFF_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].offset
,
693 SIZE_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].numbytes
,
699 oldgnu_dump_header (struct tar_sparse_file
*file
)
701 off_t block_ordinal
= current_block_ordinal ();
705 blk
= start_header (file
->stat_info
);
706 blk
->header
.typeflag
= GNUTYPE_SPARSE
;
707 if (file
->stat_info
->sparse_map_avail
> SPARSES_IN_OLDGNU_HEADER
)
708 blk
->oldgnu_header
.isextended
= 1;
710 /* Store the real file size */
711 OFF_TO_CHARS (file
->stat_info
->stat
.st_size
, blk
->oldgnu_header
.realsize
);
712 /* Store the effective (shrunken) file size */
713 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
716 oldgnu_store_sparse_info (file
, &i
,
717 blk
->oldgnu_header
.sp
,
718 SPARSES_IN_OLDGNU_HEADER
);
719 blk
->oldgnu_header
.isextended
= i
< file
->stat_info
->sparse_map_avail
;
720 finish_header (file
->stat_info
, blk
, block_ordinal
);
722 while (i
< file
->stat_info
->sparse_map_avail
)
724 blk
= find_next_block ();
725 memset (blk
->buffer
, 0, BLOCKSIZE
);
726 oldgnu_store_sparse_info (file
, &i
,
727 blk
->sparse_header
.sp
,
728 SPARSES_IN_SPARSE_HEADER
);
729 set_next_block_after (blk
);
730 if (i
< file
->stat_info
->sparse_map_avail
)
731 blk
->sparse_header
.isextended
= 1;
738 static struct tar_sparse_optab
const oldgnu_optab
= {
739 NULL
, /* No init function */
740 NULL
, /* No done function */
741 oldgnu_sparse_member_p
,
744 oldgnu_get_sparse_info
,
745 NULL
, /* No scan_block function */
747 sparse_extract_region
,
754 star_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
756 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
760 star_fixup_header (struct tar_sparse_file
*file
)
762 /* NOTE! st_size was initialized from the header
763 which actually contains archived size. The following fixes it */
764 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
765 file
->stat_info
->stat
.st_size
=
766 OFF_FROM_HEADER (current_header
->star_in_header
.realsize
);
770 /* Convert STAR format sparse data to internal representation */
772 star_get_sparse_info (struct tar_sparse_file
*file
)
775 union block
*h
= current_header
;
777 enum oldgnu_add_status rc
= add_ok
;
779 file
->stat_info
->sparse_map_avail
= 0;
781 if (h
->star_in_header
.prefix
[0] == '\0'
782 && h
->star_in_header
.sp
[0].offset
[10] != '\0')
784 /* Old star format */
785 for (i
= 0; i
< SPARSES_IN_STAR_HEADER
; i
++)
787 rc
= oldgnu_add_sparse (file
, &h
->star_in_header
.sp
[i
]);
791 ext_p
= h
->star_in_header
.isextended
;
796 for (; rc
== add_ok
&& ext_p
; ext_p
= h
->star_ext_header
.isextended
)
798 h
= find_next_block ();
801 ERROR ((0, 0, _("Unexpected EOF in archive")));
804 set_next_block_after (h
);
805 for (i
= 0; i
< SPARSES_IN_STAR_EXT_HEADER
&& rc
== add_ok
; i
++)
806 rc
= oldgnu_add_sparse (file
, &h
->star_ext_header
.sp
[i
]);
811 ERROR ((0, 0, _("%s: invalid sparse archive member"),
812 file
->stat_info
->orig_file_name
));
819 static struct tar_sparse_optab
const star_optab
= {
820 NULL
, /* No init function */
821 NULL
, /* No done function */
822 star_sparse_member_p
,
825 star_get_sparse_info
,
826 NULL
, /* No scan_block function */
827 NULL
, /* No dump region function */
828 sparse_extract_region
,
832 /* GNU PAX sparse file format. The sparse file map is stored in
835 GNU.sparse.size Real size of the stored file
836 GNU.sparse.numblocks Number of blocks in the sparse map
837 repeat numblocks time
838 GNU.sparse.offset Offset of the next data block
839 GNU.sparse.numbytes Size of the next data block
844 pax_sparse_member_p (struct tar_sparse_file
*file
)
846 return file
->stat_info
->archive_file_size
!= file
->stat_info
->stat
.st_size
;
850 pax_dump_header (struct tar_sparse_file
*file
)
852 off_t block_ordinal
= current_block_ordinal ();
856 /* Store the real file size */
857 xheader_store ("GNU.sparse.size", file
->stat_info
, NULL
);
858 xheader_store ("GNU.sparse.numblocks", file
->stat_info
, NULL
);
859 for (i
= 0; i
< file
->stat_info
->sparse_map_avail
; i
++)
861 xheader_store ("GNU.sparse.offset", file
->stat_info
, &i
);
862 xheader_store ("GNU.sparse.numbytes", file
->stat_info
, &i
);
865 blk
= start_header (file
->stat_info
);
866 /* Store the effective (shrunken) file size */
867 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
868 finish_header (file
->stat_info
, blk
, block_ordinal
);
872 static struct tar_sparse_optab
const pax_optab
= {
873 NULL
, /* No init function */
874 NULL
, /* No done function */
877 NULL
, /* No decode_header function */
878 NULL
, /* No fixup_header function */
879 NULL
, /* No scan_block function */
881 sparse_extract_region
,