1 /* Functions for dealing with sparse files
3 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
23 struct tar_sparse_file
;
25 enum sparse_scan_state
32 struct tar_sparse_optab
34 bool (*init
) (struct tar_sparse_file
*);
35 bool (*done
) (struct tar_sparse_file
*);
36 bool (*sparse_member_p
) (struct tar_sparse_file
*);
37 bool (*dump_header
) (struct tar_sparse_file
*);
38 bool (*fixup_header
) (struct tar_sparse_file
*);
39 bool (*decode_header
) (struct tar_sparse_file
*);
40 bool (*scan_block
) (struct tar_sparse_file
*, enum sparse_scan_state
,
42 bool (*dump_region
) (struct tar_sparse_file
*, size_t);
43 bool (*extract_region
) (struct tar_sparse_file
*, size_t);
46 struct tar_sparse_file
48 int fd
; /* File descriptor */
49 bool seekable
; /* Is fd seekable? */
50 size_t offset
; /* Current offset in fd if seekable==false.
52 size_t dumped_size
; /* Number of bytes actually written
54 struct tar_stat_info
*stat_info
; /* Information about the file */
55 struct tar_sparse_optab
*optab
;
56 void *closure
; /* Any additional data optab calls might
60 /* Dump zeros to file->fd until offset is reached. It is used instead of
61 lseek if the output file is not seekable */
63 dump_zeros (struct tar_sparse_file
*file
, off_t offset
)
67 if (offset
- file
->offset
< 0)
73 memset (buf
, 0, sizeof buf
);
74 while (file
->offset
< offset
)
76 size_t size
= offset
- file
->offset
;
79 if (size
> sizeof buf
)
81 wrbytes
= write (file
->fd
, buf
, size
);
88 file
->offset
+= wrbytes
;
94 tar_sparse_member_p (struct tar_sparse_file
*file
)
96 if (file
->optab
->sparse_member_p
)
97 return file
->optab
->sparse_member_p (file
);
102 tar_sparse_init (struct tar_sparse_file
*file
)
104 file
->dumped_size
= 0;
105 if (file
->optab
->init
)
106 return file
->optab
->init (file
);
111 tar_sparse_done (struct tar_sparse_file
*file
)
113 if (file
->optab
->done
)
114 return file
->optab
->done (file
);
119 tar_sparse_scan (struct tar_sparse_file
*file
, enum sparse_scan_state state
,
122 if (file
->optab
->scan_block
)
123 return file
->optab
->scan_block (file
, state
, block
);
128 tar_sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
130 if (file
->optab
->dump_region
)
131 return file
->optab
->dump_region (file
, i
);
136 tar_sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
138 if (file
->optab
->extract_region
)
139 return file
->optab
->extract_region (file
, i
);
144 tar_sparse_dump_header (struct tar_sparse_file
*file
)
146 if (file
->optab
->dump_header
)
147 return file
->optab
->dump_header (file
);
152 tar_sparse_decode_header (struct tar_sparse_file
*file
)
154 if (file
->optab
->decode_header
)
155 return file
->optab
->decode_header (file
);
160 tar_sparse_fixup_header (struct tar_sparse_file
*file
)
162 if (file
->optab
->fixup_header
)
163 return file
->optab
->fixup_header (file
);
169 lseek_or_error (struct tar_sparse_file
*file
, off_t offset
)
174 off
= lseek (file
->fd
, offset
, SEEK_SET
);
176 off
= dump_zeros (file
, offset
);
180 seek_diag_details (file
->stat_info
->orig_file_name
, offset
);
186 /* Takes a blockful of data and basically cruises through it to see if
187 it's made *entirely* of zeros, returning a 0 the instant it finds
188 something that is a nonzero, i.e., useful data. */
190 zero_block_p (char *buffer
, size_t size
)
198 #define clear_block(p) memset (p, 0, BLOCKSIZE);
200 #define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
203 sparse_add_map (struct tar_sparse_file
*file
, struct sp_array
*sp
)
205 if (file
->stat_info
->sparse_map
== NULL
)
207 file
->stat_info
->sparse_map
=
208 xmalloc (SPARSES_INIT_COUNT
* sizeof file
->stat_info
->sparse_map
[0]);
209 file
->stat_info
->sparse_map_size
= SPARSES_INIT_COUNT
;
211 else if (file
->stat_info
->sparse_map_avail
== file
->stat_info
->sparse_map_size
)
213 file
->stat_info
->sparse_map_size
*= 2;
214 file
->stat_info
->sparse_map
=
215 xrealloc (file
->stat_info
->sparse_map
,
216 file
->stat_info
->sparse_map_size
217 * sizeof file
->stat_info
->sparse_map
[0]);
219 file
->stat_info
->sparse_map
[file
->stat_info
->sparse_map_avail
++] = *sp
;
222 /* Scan the sparse file and create its map */
224 sparse_scan_file (struct tar_sparse_file
*file
)
226 static char buffer
[BLOCKSIZE
];
229 struct sp_array sp
= {0, 0};
231 if (!lseek_or_error (file
, 0))
233 clear_block (buffer
);
235 file
->stat_info
->sparse_map_avail
= 0;
236 file
->stat_info
->archive_file_size
= 0;
238 if (!tar_sparse_scan (file
, scan_begin
, NULL
))
241 while ((count
= safe_read (file
->fd
, buffer
, sizeof buffer
)) != 0
242 && count
!= SAFE_READ_ERROR
)
244 /* Analize the block */
245 if (zero_block_p (buffer
, count
))
249 sparse_add_map (file
, &sp
);
251 if (!tar_sparse_scan (file
, scan_block
, NULL
))
257 if (sp
.numbytes
== 0)
259 sp
.numbytes
+= count
;
260 file
->stat_info
->archive_file_size
+= count
;
261 if (!tar_sparse_scan (file
, scan_block
, buffer
))
266 clear_block (buffer
);
269 if (sp
.numbytes
== 0)
272 sparse_add_map (file
, &sp
);
273 file
->stat_info
->archive_file_size
+= count
;
274 return tar_sparse_scan (file
, scan_end
, NULL
);
277 static struct tar_sparse_optab oldgnu_optab
;
278 static struct tar_sparse_optab star_optab
;
279 static struct tar_sparse_optab pax_optab
;
282 sparse_select_optab (struct tar_sparse_file
*file
)
284 switch (current_format
== DEFAULT_FORMAT
? archive_format
: current_format
)
291 case GNU_FORMAT
: /*FIXME: This one should disappear? */
292 file
->optab
= &oldgnu_optab
;
296 file
->optab
= &pax_optab
;
300 file
->optab
= &star_optab
;
310 sparse_dump_region (struct tar_sparse_file
*file
, size_t i
)
313 off_t bytes_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
315 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
318 while (bytes_left
> 0)
320 size_t bufsize
= (bytes_left
> BLOCKSIZE
) ? BLOCKSIZE
: bytes_left
;
323 blk
= find_next_block ();
324 memset (blk
->buffer
, 0, BLOCKSIZE
);
325 bytes_read
= safe_read (file
->fd
, blk
->buffer
, bufsize
);
326 if (bytes_read
== SAFE_READ_ERROR
)
328 read_diag_details (file
->stat_info
->orig_file_name
,
329 file
->stat_info
->sparse_map
[i
].offset
330 + file
->stat_info
->sparse_map
[i
].numbytes
336 bytes_left
-= bytes_read
;
337 file
->dumped_size
+= bytes_read
;
338 set_next_block_after (blk
);
345 sparse_extract_region (struct tar_sparse_file
*file
, size_t i
)
349 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
352 write_size
= file
->stat_info
->sparse_map
[i
].numbytes
;
356 /* Last block of the file is a hole */
357 if (file
->seekable
&& sys_truncate (file
->fd
))
358 truncate_warn (file
->stat_info
->orig_file_name
);
360 else while (write_size
> 0)
363 size_t wrbytes
= (write_size
> BLOCKSIZE
) ? BLOCKSIZE
: write_size
;
364 union block
*blk
= find_next_block ();
367 ERROR ((0, 0, _("Unexpected EOF in archive")));
370 set_next_block_after (blk
);
371 count
= full_write (file
->fd
, blk
->buffer
, wrbytes
);
373 file
->dumped_size
+= count
;
374 file
->offset
+= count
;
375 if (count
!= wrbytes
)
377 write_error_details (file
->stat_info
->orig_file_name
,
387 /* Interface functions */
389 sparse_dump_file (int fd
, struct tar_stat_info
*st
)
392 struct tar_sparse_file file
;
396 file
.seekable
= true; /* File *must* be seekable for dump to work */
399 if (!sparse_select_optab (&file
)
400 || !tar_sparse_init (&file
))
401 return dump_status_not_implemented
;
403 rc
= sparse_scan_file (&file
);
404 if (rc
&& file
.optab
->dump_region
)
406 tar_sparse_dump_header (&file
);
412 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
413 rc
= tar_sparse_dump_region (&file
, i
);
417 pad_archive(file
.stat_info
->archive_file_size
- file
.dumped_size
);
418 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
421 /* Returns true if the file represented by stat is a sparse one */
423 sparse_file_p (struct tar_stat_info
*st
)
425 return (ST_NBLOCKS (st
->stat
)
426 < (st
->stat
.st_size
/ ST_NBLOCKSIZE
427 + (st
->stat
.st_size
% ST_NBLOCKSIZE
!= 0)));
431 sparse_member_p (struct tar_stat_info
*st
)
433 struct tar_sparse_file file
;
435 if (!sparse_select_optab (&file
))
438 return tar_sparse_member_p (&file
);
442 sparse_fixup_header (struct tar_stat_info
*st
)
444 struct tar_sparse_file file
;
446 if (!sparse_select_optab (&file
))
449 return tar_sparse_fixup_header (&file
);
453 sparse_extract_file (int fd
, struct tar_stat_info
*st
, off_t
*size
)
456 struct tar_sparse_file file
;
461 file
.seekable
= lseek (fd
, 0, SEEK_SET
) == 0;
464 if (!sparse_select_optab (&file
)
465 || !tar_sparse_init (&file
))
466 return dump_status_not_implemented
;
468 rc
= tar_sparse_decode_header (&file
);
469 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
470 rc
= tar_sparse_extract_region (&file
, i
);
471 *size
= file
.stat_info
->archive_file_size
- file
.dumped_size
;
472 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
476 sparse_skip_file (struct tar_stat_info
*st
)
479 struct tar_sparse_file file
;
484 if (!sparse_select_optab (&file
)
485 || !tar_sparse_init (&file
))
486 return dump_status_not_implemented
;
488 rc
= tar_sparse_decode_header (&file
);
489 skip_file (file
.stat_info
->archive_file_size
);
490 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
494 static char diff_buffer
[BLOCKSIZE
];
497 check_sparse_region (struct tar_sparse_file
*file
, off_t beg
, off_t end
)
499 if (!lseek_or_error (file
, beg
))
505 size_t rdsize
= end
- beg
;
507 if (rdsize
> BLOCKSIZE
)
509 clear_block (diff_buffer
);
510 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
511 if (bytes_read
== SAFE_READ_ERROR
)
513 read_diag_details (file
->stat_info
->orig_file_name
,
518 if (!zero_block_p (diff_buffer
, bytes_read
))
520 report_difference (file
->stat_info
,
521 _("File fragment at %lu is not a hole"), beg
);
531 check_data_region (struct tar_sparse_file
*file
, size_t i
)
535 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[i
].offset
))
537 size_left
= file
->stat_info
->sparse_map
[i
].numbytes
;
538 while (size_left
> 0)
541 size_t rdsize
= (size_left
> BLOCKSIZE
) ? BLOCKSIZE
: size_left
;
543 union block
*blk
= find_next_block ();
546 ERROR ((0, 0, _("Unexpected EOF in archive")));
549 set_next_block_after (blk
);
550 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
551 if (bytes_read
== SAFE_READ_ERROR
)
553 read_diag_details (file
->stat_info
->orig_file_name
,
554 file
->stat_info
->sparse_map
[i
].offset
555 + file
->stat_info
->sparse_map
[i
].numbytes
560 file
->dumped_size
+= bytes_read
;
561 size_left
-= bytes_read
;
562 if (memcmp (blk
->buffer
, diff_buffer
, rdsize
))
564 report_difference (file
->stat_info
, _("Contents differ"));
572 sparse_diff_file (int fd
, struct tar_stat_info
*st
)
575 struct tar_sparse_file file
;
582 if (!sparse_select_optab (&file
)
583 || !tar_sparse_init (&file
))
584 return dump_status_not_implemented
;
586 rc
= tar_sparse_decode_header (&file
);
587 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
589 rc
= check_sparse_region (&file
,
590 offset
, file
.stat_info
->sparse_map
[i
].offset
)
591 && check_data_region (&file
, i
);
592 offset
= file
.stat_info
->sparse_map
[i
].offset
593 + file
.stat_info
->sparse_map
[i
].numbytes
;
597 skip_file (file
.stat_info
->archive_file_size
- file
.dumped_size
);
599 tar_sparse_done (&file
);
604 /* Old GNU Format. The sparse file information is stored in the
605 oldgnu_header in the following manner:
607 The header is marked with type 'S'. Its `size' field contains
608 the cumulative size of all non-empty blocks of the file. The
609 actual file size is stored in `realsize' member of oldgnu_header.
611 The map of the file is stored in a list of `struct sparse'.
612 Each struct contains offset to the block of data and its
613 size (both as octal numbers). The first file header contains
614 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
615 contains more structs, then the field `isextended' of the main
616 header is set to 1 (binary) and the `struct sparse_header'
617 header follows, containing at most 21 following structs
618 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
619 field of the extended header is set and next next extension header
622 enum oldgnu_add_status
630 oldgnu_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
632 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
635 /* Add a sparse item to the sparse file and its obstack */
636 static enum oldgnu_add_status
637 oldgnu_add_sparse (struct tar_sparse_file
*file
, struct sparse
*s
)
641 if (s
->numbytes
[0] == '\0')
643 sp
.offset
= OFF_FROM_HEADER (s
->offset
);
644 sp
.numbytes
= SIZE_FROM_HEADER (s
->numbytes
);
646 || file
->stat_info
->stat
.st_size
< sp
.offset
+ sp
.numbytes
647 || file
->stat_info
->archive_file_size
< 0)
650 sparse_add_map (file
, &sp
);
655 oldgnu_fixup_header (struct tar_sparse_file
*file
)
657 /* NOTE! st_size was initialized from the header
658 which actually contains archived size. The following fixes it */
659 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
660 file
->stat_info
->stat
.st_size
=
661 OFF_FROM_HEADER (current_header
->oldgnu_header
.realsize
);
665 /* Convert old GNU format sparse data to internal representation */
667 oldgnu_get_sparse_info (struct tar_sparse_file
*file
)
670 union block
*h
= current_header
;
672 static enum oldgnu_add_status rc
;
674 file
->stat_info
->sparse_map_avail
= 0;
675 for (i
= 0; i
< SPARSES_IN_OLDGNU_HEADER
; i
++)
677 rc
= oldgnu_add_sparse (file
, &h
->oldgnu_header
.sp
[i
]);
682 for (ext_p
= h
->oldgnu_header
.isextended
;
683 rc
== add_ok
&& ext_p
; ext_p
= h
->sparse_header
.isextended
)
685 h
= find_next_block ();
688 ERROR ((0, 0, _("Unexpected EOF in archive")));
691 set_next_block_after (h
);
692 for (i
= 0; i
< SPARSES_IN_SPARSE_HEADER
&& rc
== add_ok
; i
++)
693 rc
= oldgnu_add_sparse (file
, &h
->sparse_header
.sp
[i
]);
698 ERROR ((0, 0, _("%s: invalid sparse archive member"),
699 file
->stat_info
->orig_file_name
));
706 oldgnu_store_sparse_info (struct tar_sparse_file
*file
, size_t *pindex
,
707 struct sparse
*sp
, size_t sparse_size
)
709 for (; *pindex
< file
->stat_info
->sparse_map_avail
710 && sparse_size
> 0; sparse_size
--, sp
++, ++*pindex
)
712 OFF_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].offset
,
714 SIZE_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].numbytes
,
720 oldgnu_dump_header (struct tar_sparse_file
*file
)
722 off_t block_ordinal
= current_block_ordinal ();
726 blk
= start_header (file
->stat_info
);
727 blk
->header
.typeflag
= GNUTYPE_SPARSE
;
728 if (file
->stat_info
->sparse_map_avail
> SPARSES_IN_OLDGNU_HEADER
)
729 blk
->oldgnu_header
.isextended
= 1;
731 /* Store the real file size */
732 OFF_TO_CHARS (file
->stat_info
->stat
.st_size
, blk
->oldgnu_header
.realsize
);
733 /* Store the effective (shrunken) file size */
734 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
737 oldgnu_store_sparse_info (file
, &i
,
738 blk
->oldgnu_header
.sp
,
739 SPARSES_IN_OLDGNU_HEADER
);
740 blk
->oldgnu_header
.isextended
= i
< file
->stat_info
->sparse_map_avail
;
741 finish_header (file
->stat_info
, blk
, block_ordinal
);
743 while (i
< file
->stat_info
->sparse_map_avail
)
745 blk
= find_next_block ();
746 memset (blk
->buffer
, 0, BLOCKSIZE
);
747 oldgnu_store_sparse_info (file
, &i
,
748 blk
->sparse_header
.sp
,
749 SPARSES_IN_SPARSE_HEADER
);
750 set_next_block_after (blk
);
751 if (i
< file
->stat_info
->sparse_map_avail
)
752 blk
->sparse_header
.isextended
= 1;
759 static struct tar_sparse_optab oldgnu_optab
= {
760 NULL
, /* No init function */
761 NULL
, /* No done function */
762 oldgnu_sparse_member_p
,
765 oldgnu_get_sparse_info
,
766 NULL
, /* No scan_block function */
768 sparse_extract_region
,
775 star_sparse_member_p (struct tar_sparse_file
*file
__attribute__ ((unused
)))
777 return current_header
->header
.typeflag
== GNUTYPE_SPARSE
;
781 star_fixup_header (struct tar_sparse_file
*file
)
783 /* NOTE! st_size was initialized from the header
784 which actually contains archived size. The following fixes it */
785 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
786 file
->stat_info
->stat
.st_size
=
787 OFF_FROM_HEADER (current_header
->star_in_header
.realsize
);
791 /* Convert STAR format sparse data to internal representation */
793 star_get_sparse_info (struct tar_sparse_file
*file
)
796 union block
*h
= current_header
;
798 static enum oldgnu_add_status rc
;
800 file
->stat_info
->sparse_map_avail
= 0;
802 if (h
->star_in_header
.prefix
[0] == '\0'
803 && h
->star_in_header
.sp
[0].offset
[10] != '\0')
805 /* Old star format */
806 for (i
= 0; i
< SPARSES_IN_STAR_HEADER
; i
++)
808 rc
= oldgnu_add_sparse (file
, &h
->star_in_header
.sp
[i
]);
812 ext_p
= h
->star_in_header
.isextended
;
817 for (; rc
== add_ok
&& ext_p
; ext_p
= h
->star_ext_header
.isextended
)
819 h
= find_next_block ();
822 ERROR ((0, 0, _("Unexpected EOF in archive")));
825 set_next_block_after (h
);
826 for (i
= 0; i
< SPARSES_IN_STAR_EXT_HEADER
&& rc
== add_ok
; i
++)
827 rc
= oldgnu_add_sparse (file
, &h
->star_ext_header
.sp
[i
]);
832 ERROR ((0, 0, _("%s: invalid sparse archive member"),
833 file
->stat_info
->orig_file_name
));
840 static struct tar_sparse_optab star_optab
= {
841 NULL
, /* No init function */
842 NULL
, /* No done function */
843 star_sparse_member_p
,
846 star_get_sparse_info
,
847 NULL
, /* No scan_block function */
848 NULL
, /* No dump region function */
849 sparse_extract_region
,
853 /* GNU PAX sparse file format. The sparse file map is stored in
856 GNU.sparse.size Real size of the stored file
857 GNU.sparse.numblocks Number of blocks in the sparse map
858 repeat numblocks time
859 GNU.sparse.offset Offset of the next data block
860 GNU.sparse.numbytes Size of the next data block
865 pax_sparse_member_p (struct tar_sparse_file
*file
)
867 return file
->stat_info
->archive_file_size
!= file
->stat_info
->stat
.st_size
;
871 pax_dump_header (struct tar_sparse_file
*file
)
873 off_t block_ordinal
= current_block_ordinal ();
877 /* Store the real file size */
878 xheader_store ("GNU.sparse.size", file
->stat_info
, NULL
);
879 xheader_store ("GNU.sparse.numblocks", file
->stat_info
, NULL
);
880 for (i
= 0; i
< file
->stat_info
->sparse_map_avail
; i
++)
882 xheader_store ("GNU.sparse.offset", file
->stat_info
, &i
);
883 xheader_store ("GNU.sparse.numbytes", file
->stat_info
, &i
);
886 blk
= start_header (file
->stat_info
);
887 /* Store the effective (shrunken) file size */
888 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
889 finish_header (file
->stat_info
, blk
, block_ordinal
);
893 static struct tar_sparse_optab pax_optab
= {
894 NULL
, /* No init function */
895 NULL
, /* No done function */
898 NULL
, /* No decode_header function */
899 NULL
, /* No fixup_header function */
900 NULL
, /* No scan_block function */
902 sparse_extract_region
,