1 /* Functions for dealing with sparse files
3 Copyright (C) 2003 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 struct tar_sparse_file
;
25 enum sparse_scan_state
32 struct tar_sparse_optab
34 bool (*init
) (struct tar_sparse_file
*);
35 bool (*done
) (struct tar_sparse_file
*);
36 bool (*dump_header
) (struct tar_sparse_file
*);
37 bool (*decode_header
) (struct tar_sparse_file
*);
38 bool (*scan_block
) (struct tar_sparse_file
*, enum sparse_scan_state
,
40 bool (*dump_region
) (struct tar_sparse_file
*, size_t index
);
41 bool (*extract_region
) (struct tar_sparse_file
*, size_t index
);
44 struct tar_sparse_file
46 int fd
; /* File descriptor */
47 size_t dumped_size
; /* Number of bytes actually written
49 struct tar_stat_info
*stat_info
; /* Information about the file */
50 struct tar_sparse_optab
*optab
;
51 void *closure
; /* Any additional data optab calls might
56 tar_sparse_init (struct tar_sparse_file
*file
)
58 file
->dumped_size
= 0;
59 if (file
->optab
->init
)
60 return file
->optab
->init (file
);
65 tar_sparse_done (struct tar_sparse_file
*file
)
67 if (file
->optab
->done
)
68 return file
->optab
->done (file
);
73 tar_sparse_scan (struct tar_sparse_file
*file
, enum sparse_scan_state state
,
76 if (file
->optab
->scan_block
)
77 return file
->optab
->scan_block (file
, state
, block
);
82 tar_sparse_dump_region (struct tar_sparse_file
*file
, size_t index
)
84 if (file
->optab
->dump_region
)
85 return file
->optab
->dump_region (file
, index
);
90 tar_sparse_extract_region (struct tar_sparse_file
*file
, size_t index
)
92 if (file
->optab
->extract_region
)
93 return file
->optab
->extract_region (file
, index
);
98 tar_sparse_dump_header (struct tar_sparse_file
*file
)
100 if (file
->optab
->dump_header
)
101 return file
->optab
->dump_header (file
);
106 tar_sparse_decode_header (struct tar_sparse_file
*file
)
108 if (file
->optab
->decode_header
)
109 return file
->optab
->decode_header (file
);
115 lseek_or_error (struct tar_sparse_file
*file
, off_t offset
, int whence
)
117 if (lseek (file
->fd
, offset
, whence
) < 0)
119 seek_diag_details (file
->stat_info
->orig_file_name
, offset
);
125 /* Takes a blockful of data and basically cruises through it to see if
126 it's made *entirely* of zeros, returning a 0 the instant it finds
127 something that is a nonzero, i.e., useful data. */
129 zero_block_p (char *buffer
, size_t size
)
137 #define clear_block(p) memset (p, 0, BLOCKSIZE);
139 #define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
142 sparse_add_map (struct tar_sparse_file
*file
, struct sp_array
*sp
)
144 if (file
->stat_info
->sparse_map
== NULL
)
146 file
->stat_info
->sparse_map
=
147 xmalloc (SPARSES_INIT_COUNT
* sizeof file
->stat_info
->sparse_map
[0]);
148 file
->stat_info
->sparse_map_size
= SPARSES_INIT_COUNT
;
150 else if (file
->stat_info
->sparse_map_avail
== file
->stat_info
->sparse_map_size
)
152 file
->stat_info
->sparse_map_size
*= 2;
153 file
->stat_info
->sparse_map
=
154 xrealloc (file
->stat_info
->sparse_map
,
155 file
->stat_info
->sparse_map_size
156 * sizeof file
->stat_info
->sparse_map
[0]);
158 file
->stat_info
->sparse_map
[file
->stat_info
->sparse_map_avail
++] = *sp
;
161 /* Scan the sparse file and create its map */
163 sparse_scan_file (struct tar_sparse_file
*file
)
165 static char buffer
[BLOCKSIZE
];
168 struct sp_array sp
= {0, 0};
170 if (!lseek_or_error (file
, 0, SEEK_SET
))
172 clear_block (buffer
);
174 file
->stat_info
->sparse_map_size
= 0;
175 file
->stat_info
->archive_file_size
= 0;
177 if (!tar_sparse_scan (file
, scan_begin
, NULL
))
180 while ((count
= safe_read (file
->fd
, buffer
, sizeof buffer
)) > 0)
182 /* Analize the block */
183 if (zero_block_p (buffer
, count
))
187 sparse_add_map (file
, &sp
);
189 if (!tar_sparse_scan (file
, scan_block
, NULL
))
195 if (sp
.numbytes
== 0)
197 sp
.numbytes
+= count
;
198 file
->stat_info
->archive_file_size
+= count
;
199 if (!tar_sparse_scan (file
, scan_block
, buffer
))
204 clear_block (buffer
);
207 if (sp
.numbytes
== 0)
210 sparse_add_map (file
, &sp
);
211 file
->stat_info
->archive_file_size
+= count
;
212 return tar_sparse_scan (file
, scan_end
, NULL
);
215 static struct tar_sparse_optab oldgnu_optab
;
216 static struct tar_sparse_optab star_optab
;
217 static struct tar_sparse_optab pax_optab
;
220 sparse_select_optab (struct tar_sparse_file
*file
)
222 switch (current_format
== DEFAULT_FORMAT
? archive_format
: current_format
)
229 case GNU_FORMAT
: /*FIXME: This one should disappear? */
230 file
->optab
= &oldgnu_optab
;
234 file
->optab
= &pax_optab
;
238 file
->optab
= &star_optab
;
248 sparse_dump_region (struct tar_sparse_file
*file
, size_t index
)
251 off_t bytes_left
= file
->stat_info
->sparse_map
[index
].numbytes
;
253 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[index
].offset
,
257 while (bytes_left
> 0)
259 size_t bufsize
= (bytes_left
> BLOCKSIZE
) ? BLOCKSIZE
: bytes_left
;
262 blk
= find_next_block ();
263 memset (blk
->buffer
, 0, BLOCKSIZE
);
264 bytes_read
= safe_read (file
->fd
, blk
->buffer
, bufsize
);
267 read_diag_details (file
->stat_info
->orig_file_name
,
268 file
->stat_info
->sparse_map
[index
].offset
269 + file
->stat_info
->sparse_map
[index
].numbytes
275 bytes_left
-= bytes_read
;
276 file
->dumped_size
+= bytes_read
;
277 set_next_block_after (blk
);
284 sparse_extract_region (struct tar_sparse_file
*file
, size_t index
)
288 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[index
].offset
,
292 write_size
= file
->stat_info
->sparse_map
[index
].numbytes
;
296 /* Last block of the file is a hole */
297 if (sys_truncate (file
->fd
))
298 truncate_warn (file
->stat_info
->orig_file_name
);
300 else while (write_size
> 0)
303 size_t wrbytes
= (write_size
> BLOCKSIZE
) ? BLOCKSIZE
: write_size
;
304 union block
*blk
= find_next_block ();
307 ERROR ((0, 0, _("Unexpected EOF in archive")));
310 set_next_block_after (blk
);
311 count
= full_write (file
->fd
, blk
->buffer
, wrbytes
);
313 file
->dumped_size
+= count
;
314 if (count
!= wrbytes
)
316 write_error_details (file
->stat_info
->orig_file_name
,
326 /* Interface functions */
328 sparse_dump_file (int fd
, struct tar_stat_info
*stat
)
331 struct tar_sparse_file file
;
333 file
.stat_info
= stat
;
336 if (!sparse_select_optab (&file
)
337 || !tar_sparse_init (&file
))
338 return dump_status_not_implemented
;
340 rc
= sparse_scan_file (&file
);
341 if (rc
&& file
.optab
->dump_region
)
343 tar_sparse_dump_header (&file
);
349 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
350 rc
= tar_sparse_dump_region (&file
, i
);
354 pad_archive(file
.stat_info
->archive_file_size
- file
.dumped_size
);
355 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
358 /* Returns true if the file represented by stat is a sparse one */
360 sparse_file_p (struct tar_stat_info
*stat
)
362 return (ST_NBLOCKS (stat
->stat
)
363 < (stat
->stat
.st_size
/ ST_NBLOCKSIZE
364 + (stat
->stat
.st_size
% ST_NBLOCKSIZE
!= 0)));
368 sparse_extract_file (int fd
, struct tar_stat_info
*stat
, off_t
*size
)
371 struct tar_sparse_file file
;
374 file
.stat_info
= stat
;
377 if (!sparse_select_optab (&file
)
378 || !tar_sparse_init (&file
))
379 return dump_status_not_implemented
;
381 rc
= tar_sparse_decode_header (&file
);
382 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
383 rc
= tar_sparse_extract_region (&file
, i
);
384 *size
= file
.stat_info
->archive_file_size
- file
.dumped_size
;
385 return (tar_sparse_done (&file
) && rc
) ? dump_status_ok
: dump_status_short
;
389 static char diff_buffer
[BLOCKSIZE
];
392 check_sparse_region (struct tar_sparse_file
*file
, off_t beg
, off_t end
)
394 if (!lseek_or_error (file
, beg
, SEEK_SET
))
400 size_t rdsize
= end
- beg
;
402 if (rdsize
> BLOCKSIZE
)
404 clear_block (diff_buffer
);
405 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
408 read_diag_details (file
->stat_info
->orig_file_name
,
413 if (!zero_block_p (diff_buffer
, bytes_read
))
415 report_difference (file
->stat_info
,
416 _("File fragment at %lu is not a hole"), beg
);
426 check_data_region (struct tar_sparse_file
*file
, size_t index
)
430 if (!lseek_or_error (file
, file
->stat_info
->sparse_map
[index
].offset
,
433 size_left
= file
->stat_info
->sparse_map
[index
].numbytes
;
434 while (size_left
> 0)
437 size_t rdsize
= (size_left
> BLOCKSIZE
) ? BLOCKSIZE
: size_left
;
439 union block
*blk
= find_next_block ();
442 ERROR ((0, 0, _("Unexpected EOF in archive")));
445 set_next_block_after (blk
);
446 bytes_read
= safe_read (file
->fd
, diff_buffer
, rdsize
);
449 read_diag_details (file
->stat_info
->orig_file_name
,
450 file
->stat_info
->sparse_map
[index
].offset
451 + file
->stat_info
->sparse_map
[index
].numbytes
456 file
->dumped_size
+= bytes_read
;
457 size_left
-= bytes_read
;
458 if (memcmp (blk
->buffer
, diff_buffer
, rdsize
))
460 report_difference (file
->stat_info
, _("Contents differ"));
468 sparse_diff_file (int fd
, struct tar_stat_info
*stat
)
471 struct tar_sparse_file file
;
475 file
.stat_info
= stat
;
478 if (!sparse_select_optab (&file
)
479 || !tar_sparse_init (&file
))
480 return dump_status_not_implemented
;
482 rc
= tar_sparse_decode_header (&file
);
483 for (i
= 0; rc
&& i
< file
.stat_info
->sparse_map_avail
; i
++)
485 rc
= check_sparse_region (&file
,
486 offset
, file
.stat_info
->sparse_map
[i
].offset
)
487 && check_data_region (&file
, i
);
488 offset
= file
.stat_info
->sparse_map
[i
].offset
489 + file
.stat_info
->sparse_map
[i
].numbytes
;
493 skip_file (file
.stat_info
->archive_file_size
- file
.dumped_size
);
495 tar_sparse_done (&file
);
500 /* Old GNU Format. The sparse file information is stored in the
501 oldgnu_header in the following manner:
503 The header is marked with type 'S'. Its `size' field contains
504 the cumulative size of all non-empty blocks of the file. The
505 actual file size is stored in `realsize' member of oldgnu_header.
507 The map of the file is stored in a list of `struct sparse'.
508 Each struct contains offset to the block of data and its
509 size (both as octal numbers). The first file header contains
510 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
511 contains more structs, then the field `isextended' of the main
512 header is set to 1 (binary) and the `struct sparse_header'
513 header follows, containing at most 21 following structs
514 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
515 field of the extended header is set and next next extension header
518 enum oldgnu_add_status
525 /* Add a sparse item to the sparse file and its obstack */
526 static enum oldgnu_add_status
527 oldgnu_add_sparse (struct tar_sparse_file
*file
, struct sparse
*s
)
531 if (s
->numbytes
[0] == '\0')
533 sp
.offset
= OFF_FROM_HEADER (s
->offset
);
534 sp
.numbytes
= SIZE_FROM_HEADER (s
->numbytes
);
536 || file
->stat_info
->stat
.st_size
< sp
.offset
+ sp
.numbytes
537 || file
->stat_info
->archive_file_size
< 0)
540 sparse_add_map (file
, &sp
);
544 /* Convert old GNU format sparse data to internal representation
545 FIXME: Clubbers current_header! */
547 oldgnu_get_sparse_info (struct tar_sparse_file
*file
)
550 union block
*h
= current_header
;
552 static enum oldgnu_add_status rc
;
554 /* FIXME: note this! st_size was initialized from the header
555 which actually contains archived size. The following fixes it */
556 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
557 file
->stat_info
->stat
.st_size
=
558 OFF_FROM_HEADER (current_header
->oldgnu_header
.realsize
);
560 file
->stat_info
->sparse_map_size
= 0;
561 for (i
= 0; i
< SPARSES_IN_OLDGNU_HEADER
; i
++)
563 rc
= oldgnu_add_sparse (file
, &h
->oldgnu_header
.sp
[i
]);
568 for (ext_p
= h
->oldgnu_header
.isextended
;
569 rc
== add_ok
&& ext_p
; ext_p
= h
->sparse_header
.isextended
)
571 h
= find_next_block ();
574 ERROR ((0, 0, _("Unexpected EOF in archive")));
577 set_next_block_after (h
);
578 for (i
= 0; i
< SPARSES_IN_SPARSE_HEADER
&& rc
== add_ok
; i
++)
579 rc
= oldgnu_add_sparse (file
, &h
->sparse_header
.sp
[i
]);
584 ERROR ((0, 0, _("%s: invalid sparse archive member"),
585 file
->stat_info
->orig_file_name
));
592 oldgnu_store_sparse_info (struct tar_sparse_file
*file
, size_t *pindex
,
593 struct sparse
*sp
, size_t sparse_size
)
595 for (; *pindex
< file
->stat_info
->sparse_map_avail
596 && sparse_size
> 0; sparse_size
--, sp
++, ++*pindex
)
598 OFF_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].offset
,
600 SIZE_TO_CHARS (file
->stat_info
->sparse_map
[*pindex
].numbytes
,
606 oldgnu_dump_header (struct tar_sparse_file
*file
)
608 off_t block_ordinal
= current_block_ordinal ();
612 blk
= start_header (file
->stat_info
);
613 blk
->header
.typeflag
= GNUTYPE_SPARSE
;
614 if (file
->stat_info
->sparse_map_avail
> SPARSES_IN_OLDGNU_HEADER
)
615 blk
->oldgnu_header
.isextended
= 1;
617 /* Store the real file size */
618 OFF_TO_CHARS (file
->stat_info
->stat
.st_size
, blk
->oldgnu_header
.realsize
);
619 /* Store the effective (shrunken) file size */
620 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
623 oldgnu_store_sparse_info (file
, &i
,
624 blk
->oldgnu_header
.sp
,
625 SPARSES_IN_OLDGNU_HEADER
);
626 blk
->oldgnu_header
.isextended
= i
< file
->stat_info
->sparse_map_avail
;
627 finish_header (file
->stat_info
, blk
, block_ordinal
);
629 while (i
< file
->stat_info
->sparse_map_avail
)
631 blk
= find_next_block ();
632 memset (blk
->buffer
, 0, BLOCKSIZE
);
633 oldgnu_store_sparse_info (file
, &i
,
634 blk
->sparse_header
.sp
,
635 SPARSES_IN_SPARSE_HEADER
);
636 set_next_block_after (blk
);
637 if (i
< file
->stat_info
->sparse_map_avail
)
638 blk
->sparse_header
.isextended
= 1;
645 static struct tar_sparse_optab oldgnu_optab
= {
646 NULL
, /* No init function */
647 NULL
, /* No done function */
649 oldgnu_get_sparse_info
,
650 NULL
, /* No scan_block function */
652 sparse_extract_region
,
658 /* Convert STAR format sparse data to internal representation
659 FIXME: Clubbers current_header! */
661 star_get_sparse_info (struct tar_sparse_file
*file
)
664 union block
*h
= current_header
;
666 static enum oldgnu_add_status rc
;
668 /* FIXME: note this! st_size was initialized from the header
669 which actually contains archived size. The following fixes it */
670 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
671 file
->stat_info
->stat
.st_size
=
672 OFF_FROM_HEADER (current_header
->star_in_header
.realsize
);
674 file
->stat_info
->sparse_map_size
= 0;
676 if (h
->star_in_header
.prefix
[0] == '\0'
677 && h
->star_in_header
.sp
[0].offset
[10] != '\0')
679 /* Old star format */
680 for (i
= 0; i
< SPARSES_IN_STAR_HEADER
; i
++)
682 rc
= oldgnu_add_sparse (file
, &h
->star_in_header
.sp
[i
]);
686 ext_p
= h
->star_in_header
.isextended
;
691 for (; rc
== add_ok
&& ext_p
; ext_p
= h
->star_ext_header
.isextended
)
693 h
= find_next_block ();
696 ERROR ((0, 0, _("Unexpected EOF in archive")));
699 set_next_block_after (h
);
700 for (i
= 0; i
< SPARSES_IN_STAR_EXT_HEADER
&& rc
== add_ok
; i
++)
701 rc
= oldgnu_add_sparse (file
, &h
->star_ext_header
.sp
[i
]);
706 ERROR ((0, 0, _("%s: invalid sparse archive member"),
707 file
->stat_info
->orig_file_name
));
714 static struct tar_sparse_optab star_optab
= {
715 NULL
, /* No init function */
716 NULL
, /* No done function */
718 star_get_sparse_info
,
719 NULL
, /* No scan_block function */
720 NULL
, /* No dump region function */
721 sparse_extract_region
,
725 /* GNU PAX sparse file format. The sparse file map is stored in
728 GNU.sparse.size Real size of the stored file
729 GNU.sparse.numblocks Number of blocks in the sparse map
730 repeat numblocks time
731 GNU.sparse.offset Offset of the next data block
732 GNU.sparse.numbytes Size of the next data block
737 pax_dump_header (struct tar_sparse_file
*file
)
739 off_t block_ordinal
= current_block_ordinal ();
743 /* Store the real file size */
744 xheader_store ("GNU.sparse.size", file
->stat_info
, NULL
);
745 xheader_store ("GNU.sparse.numblocks", file
->stat_info
, NULL
);
746 for (i
= 0; i
< file
->stat_info
->sparse_map_avail
; i
++)
748 xheader_store ("GNU.sparse.offset", file
->stat_info
, &i
);
749 xheader_store ("GNU.sparse.numbytes", file
->stat_info
, &i
);
752 blk
= start_header (file
->stat_info
);
753 /* Store the effective (shrunken) file size */
754 OFF_TO_CHARS (file
->stat_info
->archive_file_size
, blk
->header
.size
);
755 finish_header (file
->stat_info
, blk
, block_ordinal
);
760 pax_decode_header (struct tar_sparse_file
*file
)
762 /* Restore actual size */
763 size_t s
= file
->stat_info
->archive_file_size
;
764 file
->stat_info
->archive_file_size
= file
->stat_info
->stat
.st_size
;
765 file
->stat_info
->stat
.st_size
= s
;
769 static struct tar_sparse_optab pax_optab
= {
770 NULL
, /* No init function */
771 NULL
, /* No done function */
774 NULL
, /* No scan_block function */
776 sparse_extract_region
,