]> Dogcows Code - chaz/tar/blob - scripts/tar-snapshot-edit
Bugfix: entries read from the -T file did not get proper matching_flag.
[chaz/tar] / scripts / tar-snapshot-edit
1 #! /usr/bin/perl -w
2 # Display and edit the 'dev' field in tar's snapshots
3 # Copyright 2007, 2011, 2013-2014 Free Software Foundation, Inc.
4
5 # This file is part of GNU tar.
6
7 # GNU tar is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
11
12 # GNU tar is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20
21 # tar-snapshot-edit
22 #
23 # This script is capable of replacing values in the 'dev' field of an
24 # incremental backup 'snapshot' file. This is useful when the device
25 # used to store files in a tar archive changes, without the files
26 # themselves changing. This may happen when, for example, a device
27 # driver changes major or minor numbers.
28 #
29 # It can also run a check on all the field values found in the
30 # snapshot file, printing out a detailed message when it finds values
31 # that would cause an "Unexpected field value in snapshot file",
32 # "Numerical result out of range", or "Invalid argument" error
33 # if tar were run using that snapshot file as input. (See the
34 # comments included in the definition of the check_field_values
35 # routine for more detailed information regarding these checks.)
36 #
37 #
38 #
39 # Author: Dustin J. Mitchell <dustin@zmanda.com>
40 #
41 # Modified Aug 25, 2011 by Nathan Stratton Treadway <nathanst AT ontko.com>:
42 # * update Perl syntax to work correctly with more recent versions of
43 # Perl. (The original code worked with in the v5.8 timeframe but
44 # not with Perl v5.10.1 and later.)
45 # * added a "-c" option to check the snapshot file for invalid field values.
46 # * handle NFS indicator character ("+") in version 0 and 1 files
47 # * preserve the original header/version line when editing version 1
48 # or 2 files.
49 # * tweak output formatting
50 #
51 # Modified March 13, 2013 by Nathan Stratton Treadway <nathanst AT ontko.com>:
52 # * configure field ranges used for -c option based on the system
53 # architecture (in response to the December 2012 update to GNU tar
54 # enabling support for systems with signed dev_t values).
55 # * when printing the list of device ids found in the snapshot file
56 # (when run in the default mode), print the raw device id values
57 # instead of the hex-string version in those cases where they
58 # can't be converted successfully.
59
60 use Getopt::Std;
61 use Config;
62
63 my %snapshot_field_ranges; # used in check_field_values function
64
65 ## reading
66
67 sub read_incr_db ($) {
68 my $filename = shift;
69 open(my $file, "<$filename") || die "Could not open '$filename' for reading";
70
71 my $header_str = <$file>;
72 my $file_version;
73 if ($header_str =~ /^GNU tar-[^-]*-([0-9]+)\n$/) {
74 $file_version = $1+0;
75 } else {
76 $file_version = 0;
77 }
78
79 print "\nFile: $filename\n";
80 print " Detected snapshot file version: $file_version\n\n";
81
82 if ($file_version == 0) {
83 return read_incr_db_0($file, $header_str);
84 } elsif ($file_version == 1) {
85 return read_incr_db_1($file, $header_str);
86 } elsif ($file_version == 2) {
87 return read_incr_db_2($file, $header_str);
88 } else {
89 die "Unrecognized snapshot version in header '$header_str'";
90 }
91 }
92
93 sub read_incr_db_0 ($$) {
94 my $file = shift;
95 my $header_str = shift;
96
97 my $hdr_timestamp_sec = $header_str;
98 chop $hdr_timestamp_sec;
99 my $hdr_timestamp_nsec = ''; # not present in file format 0
100
101 my $nfs;
102 my @dirs;
103
104 while (<$file>) {
105 /^(\+?)([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_");
106
107 if ( $1 eq "+" ) {
108 $nfs="1";
109 } else {
110 $nfs="0";
111 }
112 push @dirs, { nfs=>$nfs,
113 dev=>$2,
114 ino=>$3,
115 name=>$4 };
116 }
117
118 close($file);
119
120 # file version, timestamp, timestamp, dir list, file header line
121 return [ 0, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, ""];
122 }
123
124 sub read_incr_db_1 ($$) {
125 my $file = shift;
126 my $header_str = shift;
127
128
129 my $timestamp = <$file>; # "sec nsec"
130 my ($hdr_timestamp_sec, $hdr_timestamp_nsec) = ($timestamp =~ /([0-9]*) ([0-9]*)/);
131
132 my $nfs;
133 my @dirs;
134
135 while (<$file>) {
136 /^(\+?)([0-9]*) ([0-9]*) ([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_");
137
138 if ( $1 eq "+" ) {
139 $nfs="1";
140 } else {
141 $nfs="0";
142 }
143
144 push @dirs, { nfs=>$nfs,
145 timestamp_sec=>$2,
146 timestamp_nsec=>$3,
147 dev=>$4,
148 ino=>$5,
149 name=>$6 };
150 }
151
152 close($file);
153
154 # file version, timestamp, timestamp, dir list, file header line
155 return [ 1, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str ];
156 }
157
158 sub read_incr_db_2 ($$) {
159 my $file = shift;
160 my $header_str = shift;
161
162 $/="\0"; # $INPUT_RECORD_SEPARATOR
163 my $hdr_timestamp_sec = <$file>;
164 chop $hdr_timestamp_sec;
165 my $hdr_timestamp_nsec = <$file>;
166 chop $hdr_timestamp_nsec;
167 my @dirs;
168
169 while (1) {
170 last if eof($file);
171
172 my $nfs = <$file>;
173 my $timestamp_sec = <$file>;
174 my $timestamp_nsec = <$file>;
175 my $dev = <$file>;
176 my $ino = <$file>;
177 my $name = <$file>;
178
179 # get rid of trailing NULs
180 chop $nfs;
181 chop $timestamp_sec;
182 chop $timestamp_nsec;
183 chop $dev;
184 chop $ino;
185 chop $name;
186
187 my @dirents;
188 while (my $dirent = <$file>) {
189 chop $dirent;
190 push @dirents, $dirent;
191 last if ($dirent eq "");
192 }
193 die "missing terminator" unless (<$file> eq "\0");
194
195 push @dirs, { nfs=>$nfs,
196 timestamp_sec=>$timestamp_sec,
197 timestamp_nsec=>$timestamp_nsec,
198 dev=>$dev,
199 ino=>$ino,
200 name=>$name,
201 dirents=>\@dirents };
202 }
203
204 close($file);
205 $/ = "\n"; # reset to normal
206
207 # file version, timestamp, timestamp, dir list, file header line
208 return [ 2, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str];
209 }
210
211 ## display
212
213 sub show_device_counts ($) {
214 my $info = shift;
215 my %devices;
216 foreach my $dir (@{$info->[3]}) {
217 my $dev = $dir->{'dev'};
218 $devices{$dev}++;
219 }
220
221 my $devstr;
222 foreach $dev (sort {$a <=> $b} keys %devices) {
223 $devstr = sprintf ("0x%04x", $dev);
224 if ( $dev > 0xffffffff or $dev < 0 or hex($devstr) != $dev ) {
225 # sprintf "%x" will not return a useful value for device ids
226 # that are negative or which overflow the integer size on this
227 # instance of Perl, so we convert the hex string back to a
228 # number, and if it doesn't (numerically) equal the original
229 # device id value, we know the hex conversion hasn't worked.
230 #
231 # Unfortunately, since we're running in "-w" mode, Perl will
232 # also print a warning message if the hex() routine is called
233 # on anything larger than "0xffffffff", even in 64-bit Perl
234 # where such values are actually supported... so we have to
235 # avoid calling hex() at all if the device id is too large or
236 # negative. (If it's negative, the conversion to an unsigned
237 # integer for the "%x" specifier will mean the result will
238 # always trigger hex()'s warning on a 64-bit machine.)
239 #
240 # These situations don't seem to occur very often, so for now
241 # when they do occur, we simply print the original text value
242 # that was read from the snapshot file; it will look a bit
243 # funny next to the values that do print in hex, but that's
244 # preferable to printing values that aren't actually correct.
245 $devstr = $dev;
246 }
247 printf " Device %s occurs $devices{$dev} times.\n", $devstr;
248 }
249 }
250
251 ## check field values
252
253 # initializes the global %snapshot_field_ranges hash, based on the "-a"
254 # command-line option if given, otherwise based on the "archname" of
255 # the current system.
256 #
257 # Each value in the hash is a two-element array containing the minimum
258 # and maximum allowed values, respectively, for that field in the snapshot
259 # file. GNU tar's allowed values for each architecture are determined
260 # in the incremen.c source file, where the TYPE_MIN and TYPE_MAX
261 # pre-processor expressions are used to determine the range that can be
262 # expressed by the C data type used for each field; the values in the
263 # array defined below should match those calculations. (For tar v1.27
264 # and later, the valid ranges for a particular tar binary can easily
265 # be determined using the "tar --show-snapshot-field-ranges" command.)
266
267 sub choose_architecture ($) {
268 my $opt_a = shift;
269
270 my $arch = $opt_a ? $opt_a : $Config{'archname'};
271
272 # These ranges apply to Linux 2.4/2.6 on iX86 systems, but are used
273 # by default on unrecognized/unsupported systems, too.
274 %iX86_linux_field_ranges = (
275 timestamp_sec => [ -2147483648, 2147483647 ], # min/max of time_t
276 timestamp_nsec => [ 0, 999999999 ], # 0 to BILLION-1
277 nfs => [ 0, 1 ],
278 dev => [ 0, 18446744073709551615 ], # min/max of dev_t
279 ino => [ 0, 4294967295 ], # min/max of ino_t
280 );
281
282
283 if ( $arch =~ m/^i[\dxX]86-linux/i ) {
284 %snapshot_field_ranges = %iX86_linux_field_ranges;
285 print "Checking snapshot field values using \"iX86-linux\" ranges.\n\n";
286 } elsif ( $arch =~ m/^x86_64-linux/i ) {
287 %snapshot_field_ranges = (
288 timestamp_sec => [ -9223372036854775808, 9223372036854775807 ],
289 timestamp_nsec => [ 0, 999999999 ],
290 nfs => [ 0, 1 ],
291 dev => [ 0, 18446744073709551615 ],
292 ino => [ 0, 18446744073709551615 ],
293 );
294 print "Checking snapshot field values using \"x86_64-linux\" ranges.\n\n";
295 } elsif ( $arch =~ m/^IA64.ARCHREV_0/i ) {
296 # HP/UX running on Itanium/ia64 architecture
297 %snapshot_field_ranges = (
298 timestamp_sec => [ -2147483648, 2147483647 ],
299 timestamp_nsec => [ 0, 999999999 ],
300 nfs => [ 0, 1 ],
301 dev => [ -2147483648, 2147483647 ],
302 ino => [ 0, 4294967295 ],
303 );
304 print "Checking snapshot field values using \"IA64.ARCHREV_0\" (HP/UX) ranges.\n\n";
305 } else {
306 %snapshot_field_ranges = %iX86_linux_field_ranges;
307 print "Unrecognized architecture \"$arch\"; defaulting to \"iX86-linux\".\n";
308 print "(Use -a option to override.)\n" unless $opt_a;
309 print "\n";
310 }
311
312 if ( ref(1) ne "" ) {
313 print "(\"bignum\" mode is in effect; skipping 64-bit-integer check.)\n\n"
314 } else {
315 # find the largest max value in the current set of ranges
316 my $maxmax = 0;
317 for $v (values %snapshot_field_ranges ) {
318 $maxmax = $v->[1] if ($v->[1] > $maxmax);
319 }
320
321 # "~0" translates into a platform-native integer with all bits turned
322 # on -- that is, the largest value that can be represented as
323 # an integer. We print a warning if our $maxmax value is greater
324 # than that largest integer, since in that case Perl will switch
325 # to using floats for those large max values. The wording of
326 # the message assumes that the only way this situation can exist
327 # is that the platform uses 32-bit integers but some of the
328 # snapshot-file fields have 64-bit values.
329 if ( ~0 < $maxmax ) {
330 print <<EOF
331 Note: this version of Perl uses 32-bit integers, which means that it
332 will switch to using floating-point numbers when checking the ranges
333 for 64-bit snapshot-file fields. This normally will work fine, but
334 might fail to detect cases where the value in the input field value is
335 only slightly out of range. (For example, a "9223372036854775808"
336 might not be recognized as being larger than 9223372036854775807.)
337 If you suspect you are experiencing this problem, you can try running
338 the program using the "-Mbignum" option, as in
339 \$ perl $0 -Mbignum -c [FILES]
340 (but doing so will make the program run *much* slower).
341
342 EOF
343 }
344 }
345
346
347 }
348
349 # returns a warning message if $field_value isn't a valid string
350 # representation of an integer, or if the resulting integer is out of range
351 # defined by the two-element array retrieved using up the $field_name key in
352 # the global %snapshot_field_ranges hash.
353 sub validate_integer_field ($$) {
354 my $field_value = shift;
355 my $field_name = shift;
356
357 my ($min, $max) = @{$snapshot_field_ranges{$field_name}};
358
359 my $msg = "";
360
361 if ( not $field_value =~ /^-?\d+$/ ) {
362 $msg = " $field_name value contains invalid characters: \"$field_value\"\n";
363 } else {
364 if ( $field_value < $min ) {
365 $msg = " $field_name value too low: \"$field_value\" < $min \n";
366 } elsif ( $field_value > $max ) {
367 $msg = " $field_name value too high: \"$field_value\" > $max \n";
368 }
369 }
370 return $msg;
371 }
372
373
374 # This routine loops through each directory entry in the $info data
375 # structure and prints a warning message if tar would abort with an
376 # "Unexpected field value in snapshot file", "Numerical result out of
377 # range", or "Invalid argument" error upon reading this snapshot file.
378 #
379 # (Note that the "Unexpected field value in snapshot file" error message
380 # was introduced along with the change to snapshot file format "2",
381 # starting with tar v1.16 [or, more precisely, v1.15.91], while the
382 # other two were introduced in v1.27.)
383 #
384 # The checks here are intended to match those found in the incremen.c
385 # source file. See the choose_architecture() function (above) for more
386 # information on how to configure the range of values considered valid
387 # by this script.
388 #
389 # (Note: the checks here are taken from the code that processes
390 # version 2 snapshot files, but to keep things simple we apply those
391 # same checks to files having earlier versions -- but only for
392 # the fields that actually exist in those input files.)
393
394 sub check_field_values ($) {
395 my $info = shift;
396
397 my $msg;
398 my $error_found = 0;
399
400 print " Checking field values in snapshot file...\n";
401
402 $snapver = $info->[0];
403
404 $msg = "";
405 $msg .= validate_integer_field($info->[1], 'timestamp_sec');
406 if ($snapver >= 1) {
407 $msg .= validate_integer_field($info->[2], 'timestamp_nsec');
408 }
409 if ( $msg ne "" ) {
410 $error_found = 1;
411 print "\n shapshot file header:\n";
412 print $msg;
413 }
414
415
416 foreach my $dir (@{$info->[3]}) {
417
418 $msg = "";
419
420 $msg .= validate_integer_field($dir->{'nfs'}, 'nfs');
421 if ($snapver >= 1) {
422 $msg .= validate_integer_field($dir->{'timestamp_sec'}, 'timestamp_sec');
423 $msg .= validate_integer_field($dir->{'timestamp_nsec'}, 'timestamp_nsec');
424 }
425 $msg .= validate_integer_field($dir->{'dev'}, 'dev');
426 $msg .= validate_integer_field($dir->{'ino'}, 'ino');
427
428 if ( $msg ne "" ) {
429 $error_found = 1;
430 print "\n directory: $dir->{'name'}\n";
431 print $msg;
432 }
433 }
434
435 print "\n Snapshot field value check complete" ,
436 $error_found ? "" : ", no errors found" ,
437 ".\n";
438 }
439
440 ## editing
441
442 sub replace_device_number ($@) {
443 my $info = shift(@_);
444 my @repl = @_;
445
446 my $count = 0;
447
448 foreach my $dir (@{$info->[3]}) {
449 foreach $x (@repl) {
450 if ($dir->{'dev'} eq $$x[0]) {
451 $dir->{'dev'} = $$x[1];
452 $count++;
453 last;
454 }
455 }
456 }
457 print " Updated $count records.\n"
458 }
459
460 ## writing
461
462 sub write_incr_db ($$) {
463 my $info = shift;
464 my $filename = shift;
465 my $file_version = $$info[0];
466
467 open($file, ">$filename") || die "Could not open '$filename' for writing";
468
469 if ($file_version == 0) {
470 write_incr_db_0($info, $file);
471 } elsif ($file_version == 1) {
472 write_incr_db_1($info, $file);
473 } elsif ($file_version == 2) {
474 write_incr_db_2($info, $file);
475 } else {
476 die "Unknown file version $file_version.";
477 }
478
479 close($file);
480 }
481
482 sub write_incr_db_0 ($$) {
483 my $info = shift;
484 my $file = shift;
485
486 my $timestamp_sec = $info->[1];
487 print $file "$timestamp_sec\n";
488
489 foreach my $dir (@{$info->[3]}) {
490 if ($dir->{'nfs'}) {
491 print $file '+'
492 }
493 print $file "$dir->{'dev'} ";
494 print $file "$dir->{'ino'} ";
495 print $file "$dir->{'name'}\n";
496 }
497 }
498
499
500 sub write_incr_db_1 ($$) {
501 my $info = shift;
502 my $file = shift;
503
504 print $file $info->[4];
505
506 my $timestamp_sec = $info->[1];
507 my $timestamp_nsec = $info->[2];
508 print $file "$timestamp_sec $timestamp_nsec\n";
509
510 foreach my $dir (@{$info->[3]}) {
511 if ($dir->{'nfs'}) {
512 print $file '+'
513 }
514 print $file "$dir->{'timestamp_sec'} ";
515 print $file "$dir->{'timestamp_nsec'} ";
516 print $file "$dir->{'dev'} ";
517 print $file "$dir->{'ino'} ";
518 print $file "$dir->{'name'}\n";
519 }
520 }
521
522
523 sub write_incr_db_2 ($$) {
524 my $info = shift;
525 my $file = shift;
526
527 print $file $info->[4];
528
529 my $timestamp_sec = $info->[1];
530 my $timestamp_nsec = $info->[2];
531 print $file $timestamp_sec . "\0";
532 print $file $timestamp_nsec . "\0";
533
534 foreach my $dir (@{$info->[3]}) {
535 print $file $dir->{'nfs'} . "\0";
536 print $file $dir->{'timestamp_sec'} . "\0";
537 print $file $dir->{'timestamp_nsec'} . "\0";
538 print $file $dir->{'dev'} . "\0";
539 print $file $dir->{'ino'} . "\0";
540 print $file $dir->{'name'} . "\0";
541 foreach my $dirent (@{$dir->{'dirents'}}) {
542 print $file $dirent . "\0";
543 }
544 print $file "\0";
545 }
546 }
547
548 ## main
549
550 sub main {
551 our ($opt_b, $opt_r, $opt_h, $opt_c, $opt_a);
552 getopts('br:hca:');
553 HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r) ||
554 ($opt_a && !$opt_c) || ($opt_r && $opt_c) );
555
556 my @repl;
557 if ($opt_r) {
558 foreach my $spec (split(/,/, $opt_r)) {
559 ($spec =~ /^([^-]+)-([^-]+)/) || die "Invalid replacement specification '$opt_r'";
560 push @repl, [interpret_dev($1), interpret_dev($2)];
561 }
562 }
563
564 choose_architecture($opt_a) if ($opt_c);
565
566 foreach my $snapfile (@ARGV) {
567 my $info = read_incr_db($snapfile);
568 if ($opt_r) {
569 if ($opt_b) {
570 rename($snapfile, $snapfile . "~") || die "Could not rename '$snapfile' to backup";
571 }
572
573 replace_device_number($info, @repl);
574 write_incr_db($info, $snapfile);
575 } elsif ($opt_c) {
576 check_field_values($info);
577 } else {
578 show_device_counts($info);
579 }
580 }
581 }
582
583 sub HELP_MESSAGE {
584 print <<EOF;
585
586 Usage:
587 tar-snapshot-edit SNAPFILE [SNAPFILE [...]]
588 tar-snapshot-edit -r 'DEV1-DEV2[,DEV3-DEV4...]' [-b] SNAPFILE [SNAPFILE [...]]
589 tar-snapshot-edit -c [-aARCH] SNAPFILE [SNAPFILE [...]]
590
591 With no options specified: print a summary of the 'device' values
592 found in each SNAPFILE.
593
594 With -r: replace occurrences of DEV1 with DEV2 in each SNAPFILE.
595 DEV1 and DEV2 may be specified in hex (e.g., 0xfe01), decimal (e.g.,
596 65025), or MAJ:MIN (e.g., 254:1). To replace multiple occurrences,
597 separate them with commas. If -b is also specified, backup files
598 (ending with '~') will be created.
599
600 With -c: Check the field values in each SNAPFILE and print warning
601 messages if any invalid values are found. (An invalid value is one
602 that would cause \"tar\" to abort with an error message such as
603 Unexpected field value in snapshot file
604 Numerical result out of range
605 or
606 Invalid argument
607 as it processed the snapshot file.)
608
609 Normally the program automatically chooses the valid ranges for
610 the fields based on the current system's architecture, but the
611 -a option can be used to override the selection, e.g. in order
612 to validate a snapshot file generated on a some other system.
613 (Currently only three architectures are supported, "iX86-linux",
614 "x86_64-linux", and "IA64.ARCHREV_0" [HP/UX running on Itanium/ia64],
615 and if the current system isn't recognized, then the iX86-linux
616 values are used by default.)
617
618 EOF
619 exit 1;
620 }
621
622 sub interpret_dev ($) {
623 my $dev = shift;
624
625 if ($dev =~ /^([0-9]+):([0-9]+)$/) {
626 return $1 * 256 + $2;
627 } elsif ($dev =~ /^0x[0-9a-fA-F]+$/) {
628 return oct $dev;
629 } elsif ($dev =~ /^[0-9]+$/) {
630 return $dev+0;
631 } else {
632 die "Invalid device specification '$dev'";
633 }
634 }
635
636 main
This page took 0.078255 seconds and 4 git commands to generate.