1 /* Functions for dealing with sparse files
3 Copyright (C) 2003 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 struct tar_sparse_file
;
25 enum sparse_scan_state
32 struct tar_sparse_optab
34 bool (*init
) (struct tar_sparse_file
*);
35 bool (*done
) (struct tar_sparse_file
*);
36 bool (*dump_header
) (struct tar_sparse_file
*);
37 bool (*decode_header
) (struct tar_sparse_file
*);
38 bool (*scan_block
) (struct tar_sparse_file
*, enum sparse_scan_state
,
40 bool (*dump_region
) (struct tar_sparse_file
*, size_t index
);
41 bool (*extract_region
) (struct tar_sparse_file
*, size_t index
);
44 struct tar_sparse_file
46 int fd
; /* File descriptor */
47 size_t dumped_size
; /* Number of bytes actually written
49 struct tar_stat_info
*stat_info
; /* Information about the file */
50 struct tar_sparse_optab
*optab
;
51 void *closure
; /* Any additional data optab calls might
56 tar_sparse_init (struct tar_sparse_file
*file
)
58 file
->dumped_size
= 0;
59 if (file
->optab
->init
)
60 return file
->optab
->init (file
);
65 tar_sparse_done (struct tar_sparse_file
*file
)
67 if (file
->optab
->done
)
68 return file
->optab
->done (file
);
73 tar_sparse_scan (struct tar_sparse_file
*file
, enum sparse_scan_state state
,
76 if (file
->optab
->scan_block
)
77 return file
->optab
->scan_block (file
, state
, block
);
82 tar_sparse_dump_region (struct tar_sparse_file
*file
, size_t index
)
84 if (file
->optab
->dump_region
)
85 return file
->optab
->dump_region (file
, index
);
90 tar_sparse_extract_region (struct tar_sparse_file
*file
, size_t index
)
92 if (file
->optab
->extract_region
)
93 return file
->optab
->extract_region (file
, index
);
98 tar_sparse_dump_header (struct tar_sparse_file
*file
)
100 if (file
->optab
->dump_header
)
101 return file
->optab
->dump_header (file
);
106 tar_sparse_decode_header (struct tar_sparse_file
*file
)
108 if (file
->optab
->decode_header
)
109 return file
->optab
->decode_header (file
);
115 lseek_or_error (struct tar_sparse_file
*file
, off_t offset
, int whence
)
117 if (lseek (file
->fd
, offset
, whence
) < 0)
119 seek_diag_details (file
->stat_info
->orig_file_name
, offset
);
125 /* Takes a blockful of data and basically cruises through it to see if
126 it's made *entirely* of zeros, returning a 0 the instant it finds
127 something that is a nonzero, i.e., useful data. */
129 zero_block_p (char *buffer
, size_t size
)
137 #define clear_block(p) memset (p, 0, BLOCKSIZE);
139 #define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
142 sparse_add_map (struct tar_sparse_file
*file
, struct sp_array
*sp
)
144 if (file
->stat_info
->sparse_map
== NULL
)
146 file
->stat_info
->sparse_map
=
147 xmalloc (SPARSES_INIT_COUNT
* sizeof file
->stat_info
->sparse_map
[0]);
148 file
->stat_info
->sparse_map_size
= SPARSES_INIT_COUNT
;
150 else if (file
->stat_info
->sparse_map_avail
== file
->stat_info
->sparse_map_size
)
152 file
->stat_info
->sparse_map_size
*= 2;
153 file
->stat_info
->sparse_map
=
154 xrealloc (file
->stat_info
->sparse_map
,
155 file
->stat_info
->sparse_map_size
156 * sizeof file
->stat_info
->sparse_map
[0]);
158 file
->stat_info
->sparse_map
[file
->stat_info
->sparse_map_avail
++] = *sp
;
161 /* Scan the sparse file and create its map */
163 sparse_scan_file (struct tar_sparse_file
*file
)
165 static char buffer
[BLOCKSIZE
];
168 struct sp_array sp
= {0, 0};
170 if (!lseek_or_error (file
, 0, SEEK_SET
))
172 clear_block (buffer
);
174 file
->stat_info
->sparse_map_size
= 0;
175 file
->stat_info
->archive_file_size
= 0;
177 if (!tar_sparse_scan (file
, scan_begin
, NULL
))
180 while ((count
= safe_read (file
->fd
, buffer
, sizeof buffer
)) > 0)
182 /* Analize the block */
183 if (zero_block_p (buffer
, count
))
187 sparse_add_map (file
, &sp
);
189 if (!tar_sparse_scan (file
, scan_block
, NULL
))
195 if (sp
.numbytes
== 0)
197 sp
.numbytes
+= count
;
198 file
->stat_info
->archive_file_size
+= count
;
199 if (!tar_sparse_scan (file
, scan_block
, buffer
))
204 clear_block (buffer
);
207 if (sp
.numbytes
== 0)
209 sp
.offset
= offset
- 1;
212 sparse_add_map (file
, &sp
);
213 file
->stat_info
->archive_file_size
+= count
;
214 return tar_sparse_scan (file
, scan_end
, NULL
);
217 static struct tar_sparse_optab oldgnu_optab
;
218 static struct tar_sparse_optab star_optab
;
219 static struct tar_sparse_optab pax_optab
;
222 sparse_select_optab (struct tar_sparse_file
*file
)
224 switch (current_format
== DEFAULT_FORMAT
? archive_format
: current_format
)
231 case GNU_FORMAT
: /*FIXME: This one should disappear? */
232 file
->optab
= &oldgnu_optab
;
236 file
->optab
= &pax_optab
;
240 file
->optab
= &star_optab
;
250 sparse_dump_region (struct tar_sparse_file
*file
, size_t index
)
253 off_t bytes_left
= file
->stat_info
->sparse_map
[index
].numbytes
;
255 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[index
].offset
,
261 size_t bufsize
= (bytes_left
> BLOCKSIZE
) ? BLOCKSIZE
: bytes_left
;
264 blk
= find_next_block ();
265 memset (blk
->buffer
, 0, BLOCKSIZE
);
266 bytes_read
= safe_read (file
->fd
, blk
->buffer
, bufsize
);
269 read_diag_details (file
->stat_info
->orig_file_name
,
270 file
->stat_info
->sparse_map
[index
].offset
271 + file
->stat_info
->sparse_map
[index
].numbytes
277 bytes_left
-= bytes_read
;
278 file
->dumped_size
+= bytes_read
;
279 set_next_block_after (blk
);
281 while (bytes_left
> 0);
286 sparse_extract_region (struct tar_sparse_file
*file
, size_t index
)
290 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[index
].offset
,
293 write_size
= file
->stat_info
->sparse_map
[index
].numbytes
;
294 while (write_size
> 0)
297 size_t wrbytes
= (write_size
> BLOCKSIZE
) ? BLOCKSIZE
: write_size
;
298 union block
*blk
= find_next_block ();
301 ERROR ((0, 0, _("Unexpected EOF in archive")));
304 set_next_block_after (blk
);
305 count
= full_write (file
->fd
, blk
->buffer
, wrbytes
);
307 file
->dumped_size
+= count
;
308 if (count
!= wrbytes
)
310 write_error_details (file
->stat_info
->orig_file_name
,
320 /* Interface functions */
322 sparse_dump_file (int fd
, struct tar_stat_info
*stat
)
325 struct tar_sparse_file file
;
327 file
.stat_info
= stat
;
330 if (!sparse_select_optab (&file
)
331 || !tar_sparse_init (&file
))
332 return dump_status_not_implemented
;
334 rc
= sparse_scan_file (&file
);
335 if (rc
&& file
.optab
->dump_region
)
337 tar_sparse_dump_header (&file
);
343 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
344 rc
= tar_sparse_dump_region (&file
, i
);
348 pad_archive(file
.stat_info
->archive_file_size
- file
.dumped_size
);
349 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
352 /* Returns true if the file represented by stat is a sparse one */
354 sparse_file_p (struct tar_stat_info
*stat
)
356 return (ST_NBLOCKS (stat
->stat
)
357 < (stat
->stat
.st_size
/ ST_NBLOCKSIZE
358 + (stat
->stat
.st_size
% ST_NBLOCKSIZE
!= 0)));
362 sparse_extract_file (int fd
, struct tar_stat_info
*stat
, off_t
*size
)
365 struct tar_sparse_file file
;
368 file
.stat_info
= stat
;
371 if (!sparse_select_optab (&file
)
372 || !tar_sparse_init (&file
))
373 return dump_status_not_implemented
;
375 rc
= tar_sparse_decode_header (&file
);
376 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
377 rc
= tar_sparse_extract_region (&file
, i
);
378 *size
= file
.stat_info
->archive_file_size
- file
.dumped_size
;
379 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
383 static char diff_buffer
[BLOCKSIZE
];
386 check_sparse_region (struct tar_sparse_file
*file
, off_t beg
, off_t end
)
388 if (!lseek_or_error (file
, beg
, SEEK_SET
))
394 size_t rdsize
= end
- beg
;
396 if (rdsize
> BLOCKSIZE
)
398 clear_block (diff_buffer
);
399 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
402 read_diag_details (file
->stat_info
->orig_file_name
,
407 if (!zero_block_p (diff_buffer
, bytes_read
))
409 report_difference (file
->stat_info
,
410 _("File fragment at %lu is not a hole"), beg
);
420 check_data_region (struct tar_sparse_file
*file
, size_t index
)
424 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[index
].offset
,
427 size_left
= file
->stat_info
->sparse_map
[index
].numbytes
;
428 while (size_left
> 0)
431 size_t rdsize
= (size_left
> BLOCKSIZE
) ? BLOCKSIZE
: size_left
;
433 union block
*blk
= find_next_block ();
436 ERROR ((0, 0, _("Unexpected EOF in archive")));
439 set_next_block_after (blk
);
440 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
443 read_diag_details (file
->stat_info
->orig_file_name
,
444 file
->stat_info
->sparse_map
[index
].offset
445 + file
->stat_info
->sparse_map
[index
].numbytes
450 file
->dumped_size
+= bytes_read
;
451 size_left
-= bytes_read
;
452 if (memcmp (blk
->buffer
, diff_buffer
, rdsize
))
454 report_difference (file
->stat_info
, _("Contents differ"));
462 sparse_diff_file (int fd
, struct tar_stat_info
*stat
)
465 struct tar_sparse_file file
;
469 file
.stat_info
= stat
;
472 if (!sparse_select_optab (&file
)
473 || !tar_sparse_init (&file
))
474 return dump_status_not_implemented
;
476 rc
= tar_sparse_decode_header (&file
);
477 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
479 rc
= check_sparse_region (&file
,
480 offset
, file
.stat_info
->sparse_map
[i
].offset
)
481 && check_data_region (&file
, i
);
482 offset
= file
.stat_info
->sparse_map
[i
].offset
483 + file
.stat_info
->sparse_map
[i
].numbytes
;
487 skip_file (file
.stat_info
->archive_file_size
- file
.dumped_size
);
489 tar_sparse_done (&file
);
494 /* Old GNU Format. The sparse file information is stored in the
495 oldgnu_header in the following manner:
497 The header is marked with type 'S'. Its `size' field contains
498 the cumulative size of all non-empty blocks of the file. The
499 actual file size is stored in `realsize' member of oldgnu_header.
501 The map of the file is stored in a list of `struct sparse'.
502 Each struct contains offset to the block of data and its
503 size (both as octal numbers). The first file header contains
504 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
505 contains more structs, then the field `isextended' of the main
506 header is set to 1 (binary) and the `struct sparse_header'
507 header follows, containing at most 21 following structs
508 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
509 field of the extended header is set and next next extension header
512 enum oldgnu_add_status
519 /* Add a sparse item to the sparse file and its obstack */
520 static enum oldgnu_add_status
521 oldgnu_add_sparse (struct tar_sparse_file
*file
, struct sparse
*s
)
525 if (s
->numbytes
[0] == '\0')
527 sp
.offset
= OFF_FROM_HEADER (s
->offset
);
528 sp
.numbytes
= SIZE_FROM_HEADER (s
->numbytes
);
530 || file
->stat_info
->stat
.st_size
< sp
.offset
+ sp
.numbytes
531 || file
->stat_info
->archive_file_size
< 0)
534 sparse_add_map (file
, &sp
);
538 /* Convert old GNU format sparse data to internal representation
539 FIXME: Clubbers current_header! */
541 oldgnu_get_sparse_info (struct tar_sparse_file
*file
)
544 union block
*h
= current_header
;
546 static enum oldgnu_add_status rc
;
548 /* FIXME: note this! st_size was initialized from the header
549 which actually contains archived size. The following fixes it */
550 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
551 file
->stat_info
->stat
.st_size
=
552 OFF_FROM_HEADER (current_header
->oldgnu_header
.realsize
);
554 file
->stat_info
->sparse_map_size
= 0;
555 for (i
= 0; i
< SPARSES_IN_OLDGNU_HEADER
; i
++)
557 rc
= oldgnu_add_sparse (file
, &h
->oldgnu_header
.sp
[i
]);
562 for (ext_p
= h
->oldgnu_header
.isextended
;
563 rc
== add_ok
&& ext_p
; ext_p
= h
->sparse_header
.isextended
)
565 h
= find_next_block ();
568 ERROR ((0, 0, _("Unexpected EOF in archive")));
571 set_next_block_after (h
);
572 for (i
= 0; i
< SPARSES_IN_SPARSE_HEADER
&& rc
== add_ok
; i
++)
573 rc
= oldgnu_add_sparse (file
, &h
->sparse_header
.sp
[i
]);
578 ERROR ((0, 0, _("%s: invalid sparse archive member"),
579 file
->stat_info
->orig_file_name
));
586 oldgnu_store_sparse_info (struct tar_sparse_file
*file
, size_t *pindex
,
587 struct sparse
*sp
, size_t sparse_size
)
589 for (; *pindex
< file
->stat_info
->sparse_map_avail
590 && sparse_size
> 0; sparse_size
--, sp
++, ++*pindex
)
592 OFF_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].offset
,
594 SIZE_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].numbytes
,
600 oldgnu_dump_header (struct tar_sparse_file
*file
)
602 off_t block_ordinal
= current_block_ordinal ();
606 blk
= start_header (file
->stat_info
);
607 blk
->header
.typeflag
= GNUTYPE_SPARSE
;
608 if (file
->stat_info
->sparse_map_avail
> SPARSES_IN_OLDGNU_HEADER
)
609 blk
->oldgnu_header
.isextended
= 1;
611 /* Store the real file size */
612 OFF_TO_CHARS (file
->stat_info
->stat
.st_size
, blk
->oldgnu_header
.realsize
);
613 /* Store the effective (shrunken) file size */
614 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
617 oldgnu_store_sparse_info (file
, &i
,
618 blk
->oldgnu_header
.sp
,
619 SPARSES_IN_OLDGNU_HEADER
);
620 blk
->oldgnu_header
.isextended
= i
< file
->stat_info
->sparse_map_avail
;
621 finish_header (file
->stat_info
, blk
, block_ordinal
);
623 while (i
< file
->stat_info
->sparse_map_avail
)
625 blk
= find_next_block ();
626 memset (blk
->buffer
, 0, BLOCKSIZE
);
627 oldgnu_store_sparse_info (file
, &i
,
628 blk
->sparse_header
.sp
,
629 SPARSES_IN_SPARSE_HEADER
);
630 set_next_block_after (blk
);
631 if (i
< file
->stat_info
->sparse_map_avail
)
632 blk
->sparse_header
.isextended
= 1;
639 static struct tar_sparse_optab oldgnu_optab
= {
640 NULL
, /* No init function */
641 NULL
, /* No done function */
643 oldgnu_get_sparse_info
,
644 NULL
, /* No scan_block function */
646 sparse_extract_region
,
652 /* Convert STAR format sparse data to internal representation
653 FIXME: Clubbers current_header! */
655 star_get_sparse_info (struct tar_sparse_file
*file
)
658 union block
*h
= current_header
;
660 static enum oldgnu_add_status rc
;
662 /* FIXME: note this! st_size was initialized from the header
663 which actually contains archived size. The following fixes it */
664 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
665 file
->stat_info
->stat
.st_size
=
666 OFF_FROM_HEADER (current_header
->star_in_header
.realsize
);
668 file
->stat_info
->sparse_map_size
= 0;
670 if (h
->star_in_header
.prefix
[0] == '\0'
671 && h
->star_in_header
.sp
[0].offset
[10] != '\0')
673 /* Old star format */
674 for (i
= 0; i
< SPARSES_IN_STAR_HEADER
; i
++)
676 rc
= oldgnu_add_sparse (file
, &h
->star_in_header
.sp
[i
]);
680 ext_p
= h
->star_in_header
.isextended
;
685 for (; rc
== add_ok
&& ext_p
; ext_p
= h
->star_ext_header
.isextended
)
687 h
= find_next_block ();
690 ERROR ((0, 0, _("Unexpected EOF in archive")));
693 set_next_block_after (h
);
694 for (i
= 0; i
< SPARSES_IN_STAR_EXT_HEADER
&& rc
== add_ok
; i
++)
695 rc
= oldgnu_add_sparse (file
, &h
->star_ext_header
.sp
[i
]);
700 ERROR ((0, 0, _("%s: invalid sparse archive member"),
701 file
->stat_info
->orig_file_name
));
708 static struct tar_sparse_optab star_optab
= {
709 NULL
, /* No init function */
710 NULL
, /* No done function */
712 star_get_sparse_info
,
713 NULL
, /* No scan_block function */
714 NULL
, /* No dump region function */
715 sparse_extract_region
,
719 /* GNU PAX sparse file format. The sparse file map is stored in
722 GNU.sparse.size Real size of the stored file
723 GNU.sparse.numblocks Number of blocks in the sparse map
724 repeat numblocks time
725 GNU.sparse.offset Offset of the next data block
726 GNU.sparse.numbytes Size of the next data block
731 pax_dump_header (struct tar_sparse_file
*file
)
733 off_t block_ordinal
= current_block_ordinal ();
737 /* Store the real file size */
738 xheader_store ("GNU.sparse.size", file
->stat_info
, NULL
);
739 xheader_store ("GNU.sparse.numblocks", file
->stat_info
, NULL
);
740 for (i
= 0; i
< file
->stat_info
->sparse_map_avail
; i
++)
742 xheader_store ("GNU.sparse.offset", file
->stat_info
, &i
);
743 xheader_store ("GNU.sparse.numbytes", file
->stat_info
, &i
);
746 blk
= start_header (file
->stat_info
);
747 /* Store the effective (shrunken) file size */
748 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
749 finish_header (file
->stat_info
, blk
, block_ordinal
);
754 pax_decode_header (struct tar_sparse_file
*file
)
756 /* Restore actual size */
757 size_t s
= file
->stat_info
->archive_file_size
;
758 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
759 file
->stat_info
->stat
.st_size
= s
;
763 static struct tar_sparse_optab pax_optab
= {
764 NULL
, /* No init function */
765 NULL
, /* No done function */
768 NULL
, /* No scan_block function */
770 sparse_extract_region
,