From: Sergey Poznyakoff Date: Fri, 7 Aug 2009 11:52:55 +0000 (+0300) Subject: Improve listed incremental dumps. X-Git-Url: https://git.brokenzipper.com/gitweb?a=commitdiff_plain;h=1bcbbcf1ff2c537ffa970dbf82e3843d4ad110e5;p=chaz%2Ftar Improve listed incremental dumps. The modified algorithm tries to avoid dumping the same directory twice and ensures the order of the directories in the resulting archive is the same, whatever their order on the command line. It also fixes the operation of --listed-incremental -C. * gnulib.modules: Add canonicalize * src/common.h (incremental_level): New global. (check_exclusion_tags): first argument is const. (get_directory_contents): Add third argument. (zap_slashes, normalize_filename): New prototypes. (chdir_count): New prototype. (WARN_VERBOSE_WARNINGS): New define. (WARN_ALL): Exclude WARN_VERBOSE_WARNINGS. * src/compare.c (diff_dumpdir): Update the call to get_directory_contents. * src/create.c (check_exclusion_tags): First argument is const. Use ISSLASH and DIRECTORY_SEPARATOR instead of referring to '/'. * src/incremen.c (struct directory): New member `caname'. (hash_directory_name): Rename to hash_directory_canonical_name. Operate on the canonical name. (compare_directory_names): Rename to compare_directory_canonical_names. Operate on the canonical name. (make_directory): Take two arguments. (free_directory): Free caname. (attach_directory): Create caname. (find_directory): Use caname for lookups. (PD_VERBOSE): Remove. (PD_FORCE_INIT): New define. (procdir): First argument is const. Reinitialize directory if PD_FORCE_INIT bit is set. Do not use PD_VERBOSE or verbose_option for issuing warnings. Rely on WARNOPT instead. Always set *entry. (scan_directory): Take three arguments. The third one is a boolean which is true if the directory is explicitly mentioned on the command line. (get_directory_contents): Remove. Use scan_directory instead. All callers updated. (read_directory_file): Truncate the file if --level=0 is given. * src/misc.c: Include canonicalize.h (zap_slashes, normalize_filename): New functions. (chdir_count): New function. * src/names.c (add_hierarchy_to_namelist): Take three arguments, as get_directory_contents and scan_directory. (collect_and_sort_names): Allow at most one -C, before file name arguments. Read directory file after eventual changing to another directory. Avoid adding the same directory under different pathnames to the list. * src/tar.c: New option --level. * tests/incr03.at, tests/incr04.at, tests/listed01.at, tests/listed02.at, tests/rename01.at, tests/rename02.at, tests/rename03.at: Update for new tar behavior. * tests/multiv01.at: Do not use --listed-incremental. --- diff --git a/gnulib.modules b/gnulib.modules index 858967b..9ec354f 100644 --- a/gnulib.modules +++ b/gnulib.modules @@ -5,6 +5,7 @@ alloca argmatch argp backupfile +canonicalize closeout configmake dirname diff --git a/src/common.h b/src/common.h index 2e048a3..d734b64 100644 --- a/src/common.h +++ b/src/common.h @@ -1,7 +1,7 @@ /* Common declarations for the tar program. Copyright (C) 1988, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, - 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -185,6 +185,8 @@ GLOBAL enum old_files old_files_option; /* Specified file name for incremental list. */ GLOBAL const char *listed_incremental_option; +/* Incremental dump level */ +GLOBAL int incremental_level; /* Check device numbers when doing incremental dumps. */ GLOBAL bool check_device_option; @@ -322,15 +324,22 @@ GLOBAL char const *index_file_name; struct name { struct name *next; /* Link to the next element */ + struct name *prev; /* Link to the previous element */ int change_dir; /* Number of the directory to change to. Set with the -C option. */ uintmax_t found_count; /* number of times a matching file has been found */ int matching_flags; /* this name is a regexp, not literal */ - char const *dir_contents; /* for incremental_option */ - + size_t length; /* cached strlen(name) */ - char name[1]; + char *name; + + /* The following members are used for incremental dumps only */ + char const *dir_contents; /* directory contents */ + struct name *parent; /* pointer to the parent hierarchy */ + struct name *child; /* pointer to the first child */ + struct name *sibling; /* pointer to the next sibling */ + char *caname; /* canonical name */ }; /* Obnoxious test to see if dimwit is trying to dump the archive. */ @@ -440,7 +449,7 @@ void write_eot (void); void check_links (void); void exclusion_tag_warning (const char *dirname, const char *tagname, const char *message); -enum exclusion_tag_type check_exclusion_tags (char *dirname, +enum exclusion_tag_type check_exclusion_tags (const char *dirname, const char **tag_file_name); #define GID_TO_CHARS(val, where) gid_to_chars (val, where, sizeof (where)) @@ -497,7 +506,7 @@ char *dumpdir_next (dumpdir_iter_t itr); char *dumpdir_first (dumpdir_t dump, int all, dumpdir_iter_t *pitr); -const char *get_directory_contents (char *dir_name, dev_t device); +const char *scan_directory (char *dir_name, dev_t device, bool cmdline); const char *append_incremental_renames (const char *dump); void read_directory_file (void); void write_directory_file (void); @@ -565,6 +574,8 @@ void skip_member (void); void assign_string (char **dest, const char *src); char *quote_copy_string (const char *str); int unquote_string (char *str); +char *zap_slashes (char *name); +char *normalize_filename (const char *name); void code_ns_fraction (int ns, char *p); char const *code_timespec (struct timespec ts, char *sbuf); @@ -594,6 +605,7 @@ int deref_stat (bool deref, char const *name, struct stat *buf); int chdir_arg (char const *dir); void chdir_do (int dir); +int chdir_count (void); void close_diag (char const *name); void open_diag (char const *name); @@ -629,7 +641,7 @@ void name_add_dir (const char *name); void name_term (void); const char *name_next (int change_dirs); void name_gather (void); -struct name *addname (char const *string, int change_dir); +struct name *addname (char const *string, int change_dir, struct name *parent); bool name_match (const char *name); void names_notfound (void); void collect_and_sort_names (void); @@ -774,7 +786,10 @@ void checkpoint_run (bool do_write); #define WARN_UNKNOWN_KEYWORD 0x00020000 #define WARN_XDEV 0x00040000 -#define WARN_ALL 0xffffffff +/* The warnings composing WARN_VERBOSE_WARNINGS are enabled by default + in verbose mode */ +#define WARN_VERBOSE_WARNINGS (WARN_RENAME_DIRECTORY|WARN_NEW_DIRECTORY) +#define WARN_ALL (0xffffffff & ~WARN_VERBOSE_WARNINGS) void set_warning_option (const char *arg); diff --git a/src/compare.c b/src/compare.c index da03d86..b45c616 100644 --- a/src/compare.c +++ b/src/compare.c @@ -1,7 +1,7 @@ /* Diff files from a tar archive. Copyright (C) 1988, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, - 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2003, 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. Written by John Gilmore, on 1987-04-30. @@ -380,7 +380,7 @@ diff_dumpdir (void) else dev = stat_data.st_dev; - dumpdir_buffer = get_directory_contents (current_stat_info.file_name, dev); + dumpdir_buffer = scan_directory (current_stat_info.file_name, dev, false); if (dumpdir_buffer) { diff --git a/src/create.c b/src/create.c index 2724fc4..072732a 100644 --- a/src/create.c +++ b/src/create.c @@ -1,7 +1,7 @@ /* Create a tar archive. Copyright (C) 1985, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, - 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2003, 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. Written by John Gilmore, on 1985-08-25. @@ -72,13 +72,13 @@ exclusion_tag_warning (const char *dirname, const char *tagname, } enum exclusion_tag_type -check_exclusion_tags (char *dirname, const char **tag_file_name) +check_exclusion_tags (const char *dirname, const char **tag_file_name) { static char *tagname; static size_t tagsize; struct exclusion_tag *tag; size_t dlen = strlen (dirname); - int addslash = dirname[dlen-1] != '/'; + int addslash = !ISSLASH (dirname[dlen-1]); char *nptr = NULL; for (tag = exclusion_tags; tag; tag = tag->next) @@ -1303,7 +1303,7 @@ create_archive (void) } memcpy (buffer, p, plen); if (! ISSLASH (buffer[plen - 1])) - buffer[plen++] = '/'; + buffer[plen++] = DIRECTORY_SEPARATOR; q = gnu_list_name->dir_contents; if (q) while (*q) @@ -1533,10 +1533,10 @@ dump_file0 (struct tar_stat_info *st, const char *p, /* See if we want only new files, and check if this one is too old to put in the archive. - + This check is omitted if incremental_option is set *and* the requested file is not explicitely listed in the command line. */ - + if (!(incremental_option && !is_individual_file (p)) && !S_ISDIR (st->stat.st_mode) && OLDER_TAR_STAT_TIME (*st, m) diff --git a/src/incremen.c b/src/incremen.c index 025701b..5641217 100644 --- a/src/incremen.c +++ b/src/incremen.c @@ -1,7 +1,7 @@ /* GNU dump extensions to tar. Copyright (C) 1988, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, - 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -73,6 +73,7 @@ struct directory the original directory structure */ const char *tagfile; /* Tag file, if the directory falls under exclusion_tag_under */ + char *caname; /* canonical name */ char *name; /* file name of directory */ }; @@ -212,19 +213,19 @@ static Hash_table *directory_meta_table; /* Calculate the hash of a directory. */ static size_t -hash_directory_name (void const *entry, size_t n_buckets) +hash_directory_canonical_name (void const *entry, size_t n_buckets) { struct directory const *directory = entry; - return hash_string (directory->name, n_buckets); + return hash_string (directory->caname, n_buckets); } /* Compare two directories for equality of their names. */ static bool -compare_directory_names (void const *entry1, void const *entry2) +compare_directory_canonical_names (void const *entry1, void const *entry2) { struct directory const *directory1 = entry1; struct directory const *directory2 = entry2; - return strcmp (directory1->name, directory2->name) == 0; + return strcmp (directory1->caname, directory2->caname) == 0; } static size_t @@ -245,9 +246,11 @@ compare_directory_meta (void const *entry1, void const *entry2) && directory1->inode_number == directory2->inode_number; } -/* Make a directory entry for given NAME */ +/* Make a directory entry for given relative NAME and canonical name CANAME. + The latter is "stolen", i.e. the returned directory contains pointer to + it. */ static struct directory * -make_directory (const char *name) +make_directory (const char *name, char *caname) { size_t namelen = strlen (name); struct directory *directory = xmalloc (sizeof (*directory)); @@ -260,6 +263,7 @@ make_directory (const char *name) directory->name = xmalloc (namelen + 1); memcpy (directory->name, name, namelen); directory->name[namelen] = 0; + directory->caname = caname; directory->tagfile = NULL; return directory; } @@ -267,6 +271,7 @@ make_directory (const char *name) static void free_directory (struct directory *dir) { + free (dir->caname); free (dir->name); free (dir); } @@ -274,7 +279,8 @@ free_directory (struct directory *dir) static struct directory * attach_directory (const char *name) { - struct directory *dir = make_directory (name); + char *cname = normalize_filename (name); + struct directory *dir = make_directory (name, cname); if (dirtail) dirtail->next = dir; else @@ -338,8 +344,8 @@ note_directory (char const *name, struct timespec mtime, if (! ((directory_table || (directory_table = hash_initialize (0, 0, - hash_directory_name, - compare_directory_names, 0))) + hash_directory_canonical_name, + compare_directory_canonical_names, 0))) && hash_insert (directory_table, directory))) xalloc_die (); @@ -362,13 +368,51 @@ find_directory (const char *name) return 0; else { - struct directory *dir = make_directory (name); + char *caname = normalize_filename (name); + struct directory *dir = make_directory (name, caname); struct directory *ret = hash_lookup (directory_table, dir); free_directory (dir); return ret; } } +#if 0 +/* Remove directory entry for the given CANAME */ +void +remove_directory (const char *caname) +{ + struct directory *dir = make_directory (caname, xstrdup (caname)); + struct directory *ret = hash_delete (directory_table, dir); + if (ret) + free_directory (ret); + free_directory (dir); +} +#endif + +/* Find a directory entry for NAME. If first OLD_PREFIX_LEN + bytes of its name match OLD_PREFIX, replace them with + NEW_PREFIX. */ +void +rebase_directory (const char *name, size_t old_prefix_len, + const char *old_prefix, + const char *new_prefix) +{ + struct directory *dir = find_directory (name); + if (dir) + { + size_t len = strlen (dir->name); + if (len > old_prefix_len + && memcmp (dir->name, old_prefix, old_prefix_len) == 0) + { + char *newp = xmalloc (len - old_prefix_len + strlen (new_prefix)); + strcpy (newp, new_prefix); + strcat (newp, dir->name + old_prefix_len); + free (dir->name); + dir->name = newp; + } + } +} + /* Return a directory entry for a given combination of device and inode numbers, or zero if none found. */ static struct directory * @@ -378,7 +422,7 @@ find_directory_meta (dev_t dev, ino_t ino) return 0; else { - struct directory *dir = make_directory (""); + struct directory *dir = make_directory ("", NULL); struct directory *ret; dir->device_number = dev; dir->inode_number = ino; @@ -407,12 +451,12 @@ update_parent_directory (const char *name) free (p); } -#define PD_VERBOSE 0x10 -#define PD_FORCE_CHILDREN 0x20 +#define PD_FORCE_CHILDREN 0x10 +#define PD_FORCE_INIT 0x20 #define PD_CHILDREN(f) ((f) & 3) static struct directory * -procdir (char *name_buffer, struct stat *stat_data, +procdir (const char *name_buffer, struct stat *stat_data, dev_t device, int flag, char *entry) @@ -423,15 +467,31 @@ procdir (char *name_buffer, struct stat *stat_data, if ((directory = find_directory (name_buffer)) != NULL) { if (DIR_IS_INITED (directory)) - return directory; + { + if (flag & PD_FORCE_INIT) + { + assign_string (&directory->name, name_buffer); + } + else + { + *entry = 'N'; /* Avoid duplicating this directory */ + return directory; + } + } + if (strcmp (directory->name, name_buffer)) + { + *entry = 'N'; + return directory; + } + /* With NFS, the same file can have two different devices if an NFS directory is mounted in multiple locations, which is relatively common when automounting. To avoid spurious incremental redumping of directories, consider all NFS devices as equal, relying on the i-node to establish differences. */ - + if (! ((!check_device_option || (DIR_IS_NFS (directory) && nfs) || directory->device_number == stat_data->st_dev) @@ -444,12 +504,11 @@ procdir (char *name_buffer, struct stat *stat_data, { if (strcmp (d->name, name_buffer)) { - if (verbose_option) - WARNOPT (WARN_RENAME_DIRECTORY, - (0, 0, - _("%s: Directory has been renamed from %s"), - quotearg_colon (name_buffer), - quote_n (1, d->name))); + WARNOPT (WARN_RENAME_DIRECTORY, + (0, 0, + _("%s: Directory has been renamed from %s"), + quotearg_colon (name_buffer), + quote_n (1, d->name))); directory->orig = d; DIR_SET_FLAG (directory, DIRF_RENAMED); dirlist_replace_prefix (d->name, name_buffer); @@ -458,10 +517,9 @@ procdir (char *name_buffer, struct stat *stat_data, } else { - if (verbose_option) - WARNOPT (WARN_RENAME_DIRECTORY, - (0, 0, _("%s: Directory has been renamed"), - quotearg_colon (name_buffer))); + WARNOPT (WARN_RENAME_DIRECTORY, + (0, 0, _("%s: Directory has been renamed"), + quotearg_colon (name_buffer))); directory->children = ALL_CHILDREN; directory->device_number = stat_data->st_dev; directory->inode_number = stat_data->st_ino; @@ -471,14 +529,14 @@ procdir (char *name_buffer, struct stat *stat_data, } else directory->children = CHANGED_CHILDREN; - + DIR_SET_FLAG (directory, DIRF_FOUND); } else { struct directory *d = find_directory_meta (stat_data->st_dev, stat_data->st_ino); - + directory = note_directory (name_buffer, get_stat_mtime(stat_data), stat_data->st_dev, @@ -491,11 +549,10 @@ procdir (char *name_buffer, struct stat *stat_data, { if (strcmp (d->name, name_buffer)) { - if (flag & PD_VERBOSE) - WARNOPT (WARN_RENAME_DIRECTORY, - (0, 0, _("%s: Directory has been renamed from %s"), - quotearg_colon (name_buffer), - quote_n (1, d->name))); + WARNOPT (WARN_RENAME_DIRECTORY, + (0, 0, _("%s: Directory has been renamed from %s"), + quotearg_colon (name_buffer), + quote_n (1, d->name))); directory->orig = d; DIR_SET_FLAG (directory, DIRF_RENAMED); dirlist_replace_prefix (d->name, name_buffer); @@ -505,10 +562,9 @@ procdir (char *name_buffer, struct stat *stat_data, else { DIR_SET_FLAG (directory, DIRF_NEW); - if (flag & PD_VERBOSE) - WARNOPT (WARN_NEW_DIRECTORY, - (0, 0, _("%s: Directory is new"), - quotearg_colon (name_buffer))); + WARNOPT (WARN_NEW_DIRECTORY, + (0, 0, _("%s: Directory is new"), + quotearg_colon (name_buffer))); directory->children = (listed_incremental_option || (OLDER_STAT_TIME (*stat_data, m) @@ -546,8 +602,7 @@ procdir (char *name_buffer, struct stat *stat_data, an exclusion tag. */ exclusion_tag_warning (name_buffer, tag_file_name, _("directory not dumped")); - if (entry) - *entry = 'N'; + *entry = 'N'; directory->children = NO_CHILDREN; break; @@ -648,9 +703,12 @@ makedumpdir (struct directory *directory, const char *dir) free (array); } -/* Recursively scan the given directory. */ -static const char * -scan_directory (char *dir, dev_t device) +/* Recursively scan the given directory DIR. + DEVICE is the device number where DIR resides (for --one-file-system). + If CMDLINE is true, the directory name was explicitly listed in the + command line. */ +const char * +scan_directory (char *dir, dev_t device, bool cmdline) { char *dirp = savedir (dir); /* for scanning directory */ char *name_buffer; /* directory, `/', and directory member */ @@ -658,6 +716,7 @@ scan_directory (char *dir, dev_t device) size_t name_length; /* used length in name_buffer */ struct stat stat_data; struct directory *directory; + char ch; if (! dirp) savedir_error (dir); @@ -665,10 +724,8 @@ scan_directory (char *dir, dev_t device) name_buffer_size = strlen (dir) + NAME_FIELD_SIZE; name_buffer = xmalloc (name_buffer_size + 2); strcpy (name_buffer, dir); - if (! ISSLASH (dir[strlen (dir) - 1])) - strcat (name_buffer, "/"); - name_length = strlen (name_buffer); - + zap_slashes (name_buffer); + if (deref_stat (dereference_option, name_buffer, &stat_data)) { stat_diag (name_buffer); @@ -680,7 +737,17 @@ scan_directory (char *dir, dev_t device) return NULL; } - directory = procdir (name_buffer, &stat_data, device, 0, NULL); + directory = procdir (name_buffer, &stat_data, device, + (cmdline ? PD_FORCE_INIT : 0), + &ch); + + name_length = strlen (name_buffer); + if (! ISSLASH (name_buffer[name_length - 1])) + { + name_buffer[name_length] = DIRECTORY_SEPARATOR; + /* name_buffer has been allocated an extra slot */ + name_buffer[++name_length] = 0; + } if (dirp && directory->children != NO_CHILDREN) { @@ -719,7 +786,7 @@ scan_directory (char *dir, dev_t device) if (S_ISDIR (stat_data.st_mode)) { - int pd_flag = (verbose_option ? PD_VERBOSE : 0); + int pd_flag = 0; if (!recursion_option) pd_flag |= PD_FORCE_CHILDREN | NO_CHILDREN; else if (directory->children == ALL_CHILDREN) @@ -755,9 +822,9 @@ scan_directory (char *dir, dev_t device) } const char * -get_directory_contents (char *dir, dev_t device) +get_directory_contents (char *dir, dev_t device, bool force) { - return scan_directory (dir, device); + return scan_directory (dir, device, force); } @@ -1236,11 +1303,14 @@ read_directory_file (void) int fd; char *buf = 0; size_t bufsize; + int flags = O_RDWR | O_CREAT; + if (incremental_level == 0) + flags |= O_TRUNC; /* Open the file for both read and write. That way, we can write it later without having to reopen it, and don't have to worry if we chdir in the meantime. */ - fd = open (listed_incremental_option, O_RDWR | O_CREAT, MODE_RW); + fd = open (listed_incremental_option, flags, MODE_RW); if (fd < 0) { open_error (listed_incremental_option); diff --git a/src/misc.c b/src/misc.c index b609b86..b56a916 100644 --- a/src/misc.c +++ b/src/misc.c @@ -1,7 +1,7 @@ /* Miscellaneous functions, not really specific to GNU tar. Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1999, 2000, 2001, - 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2003, 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -25,6 +25,7 @@ #include #include #include +#include #if HAVE_STROPTS_H # include @@ -214,6 +215,27 @@ unquote_string (char *string) *destination = '\0'; return result; } + +/* Zap trailing slashes. */ +char * +zap_slashes (char *name) +{ + char *q; + + if (!name || *name == 0) + return name; + q = name + strlen (name) - 1; + while (q > name && ISSLASH (*q)) + *q-- = '\0'; + return name; +} + +char * +normalize_filename (const char *name) +{ + return zap_slashes (canonicalize_filename_mode (name, CAN_MISSING)); +} + /* Handling numbers. */ @@ -532,17 +554,25 @@ struct wd static struct wd *wd; /* The number of working directories in the vector. */ -static size_t wds; +static size_t wd_count; /* The allocated size of the vector. */ static size_t wd_alloc; +int +chdir_count () +{ + if (wd_count == 0) + return wd_count; + return wd_count - 1; +} + /* DIR is the operand of a -C option; add it to vector of chdir targets, and return the index of its location. */ int chdir_arg (char const *dir) { - if (wds == wd_alloc) + if (wd_count == wd_alloc) { if (wd_alloc == 0) { @@ -552,11 +582,11 @@ chdir_arg (char const *dir) else wd = x2nrealloc (wd, &wd_alloc, sizeof *wd); - if (! wds) + if (! wd_count) { - wd[wds].name = "."; - wd[wds].saved = 0; - wds++; + wd[wd_count].name = "."; + wd[wd_count].saved = 0; + wd_count++; } } @@ -568,12 +598,12 @@ chdir_arg (char const *dir) for (dir += 2; ISSLASH (*dir); dir++) continue; if (! dir[dir[0] == '.']) - return wds - 1; + return wd_count - 1; } - wd[wds].name = dir; - wd[wds].saved = 0; - return wds++; + wd[wd_count].name = dir; + wd[wd_count].saved = 0; + return wd_count++; } /* Change to directory I. If I is 0, change to the initial working diff --git a/src/names.c b/src/names.c index 05f89b1..eaa94d2 100644 --- a/src/names.c +++ b/src/names.c @@ -1,7 +1,7 @@ /* Various processing of names. Copyright (C) 1988, 1992, 1994, 1996, 1997, 1998, 1999, 2000, 2001, - 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + 2003, 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -178,6 +178,29 @@ gname_to_gid (char const *gname, gid_t *gidp) return 1; } + +struct name * +make_name (const char *file_name) +{ + struct name *p = xzalloc (sizeof (*p)); + if (!file_name) + file_name = ""; + p->name = xstrdup (file_name); + p->length = strlen (p->name); + return p; +} + +void +free_name (struct name *p) +{ + if (p) + { + free (p->name); + free (p->caname); + free (p); + } +} + /* Names from the command call. */ @@ -376,8 +399,7 @@ void name_gather (void) { /* Buffer able to hold a single name. */ - static struct name *buffer; - static size_t allocated_size; + static struct name *buffer = NULL; struct name_elt *ep; @@ -385,44 +407,25 @@ name_gather (void) { static int change_dir; - if (allocated_size == 0) - { - allocated_size = offsetof (struct name, name) + NAME_FIELD_SIZE + 1; - buffer = xzalloc (allocated_size); - } - while ((ep = name_next_elt (0)) && ep->type == NELT_CHDIR) change_dir = chdir_arg (xstrdup (ep->v.name)); if (ep) { - size_t needed_size; - - buffer->length = strlen (ep->v.name); - needed_size = offsetof (struct name, name) + buffer->length + 1; - if (allocated_size < needed_size) - { - do - { - allocated_size *= 2; - if (! allocated_size) - xalloc_die (); - } - while (allocated_size < needed_size); - - buffer = xrealloc (buffer, allocated_size); - } + free_name (buffer); + buffer = make_name (ep->v.name); buffer->change_dir = change_dir; - strcpy (buffer->name, ep->v.name); buffer->next = 0; buffer->found_count = 0; buffer->matching_flags = matching_flags; + buffer->dir_contents = NULL; + buffer->parent = NULL; namelist = buffer; nametail = &namelist->next; } else if (change_dir) - addname (0, change_dir); + addname (0, change_dir, NULL); } else { @@ -436,11 +439,11 @@ name_gather (void) change_dir = chdir_arg (xstrdup (ep->v.name)); if (ep) - addname (ep->v.name, change_dir); + addname (ep->v.name, change_dir, NULL); else { if (change_dir != change_dir0) - addname (0, change_dir); + addname (0, change_dir, NULL); break; } } @@ -449,23 +452,18 @@ name_gather (void) /* Add a name to the namelist. */ struct name * -addname (char const *string, int change_dir) +addname (char const *string, int change_dir, struct name *parent) { - size_t length = string ? strlen (string) : 0; - struct name *name = xmalloc (offsetof (struct name, name) + length + 1); - - if (string) - strcpy (name->name, string); - else - name->name[0] = 0; + struct name *name = make_name (string); + name->prev = *nametail; name->next = NULL; - name->length = length; name->found_count = 0; name->matching_flags = matching_flags; name->change_dir = change_dir; name->dir_contents = NULL; - + name->parent = parent; + *nametail = name; nametail = &name->next; return name; @@ -488,6 +486,22 @@ namelist_match (char const *file_name, size_t length) return NULL; } +void +remname (struct name *name) +{ + struct name *p; + + if ((p = name->prev) != NULL) + p->next = name->next; + else + namelist = name->next; + + if ((p = name->next) != NULL) + p->prev = name->prev; + else + nametail = &name->prev; +} + /* Return true if and only if name FILE_NAME (from an archive) matches any name from the namelist. */ bool @@ -635,15 +649,18 @@ names_notfound (void) /* Sorting name lists. */ -/* Sort linked LIST of names, of given LENGTH, using COMPARE to order - names. Return the sorted list. Apart from the type `struct name' - and the definition of SUCCESSOR, this is a generic list-sorting - function, but it's too painful to make it both generic and portable +/* Sort *singly* linked LIST of names, of given LENGTH, using COMPARE + to order names. Return the sorted list. Note that after calling + this function, the `prev' links in list elements are messed up. + + Apart from the type `struct name' and the definition of SUCCESSOR, + this is a generic list-sorting function, but it's too painful to + make it both generic and portable in C. */ static struct name * -merge_sort (struct name *list, int length, - int (*compare) (struct name const*, struct name const*)) +merge_sort_sll (struct name *list, int length, + int (*compare) (struct name const*, struct name const*)) { struct name *first_list; struct name *second_list; @@ -681,8 +698,8 @@ merge_sort (struct name *list, int length, second_list = SUCCESSOR (cursor); SUCCESSOR (cursor) = 0; - first_list = merge_sort (first_list, first_length, compare); - second_list = merge_sort (second_list, second_length, compare); + first_list = merge_sort_sll (first_list, first_length, compare); + second_list = merge_sort_sll (second_list, second_length, compare); merge_point = &result; while (first_list && second_list) @@ -710,30 +727,54 @@ merge_sort (struct name *list, int length, #undef SUCCESSOR } +/* Sort doubly linked LIST of names, of given LENGTH, using COMPARE + to order names. Return the sorted list. */ +static struct name * +merge_sort (struct name *list, int length, + int (*compare) (struct name const*, struct name const*)) +{ + struct name *head, *p, *prev; + head = merge_sort_sll (list, length, compare); + /* Fixup prev pointers */ + for (prev = NULL, p = head; p; prev = p, p = p->next) + p->prev = prev; + return head; +} + /* A comparison function for sorting names. Put found names last; break ties by string comparison. */ static int -compare_names (struct name const *n1, struct name const *n2) +compare_names_found (struct name const *n1, struct name const *n2) { - int found_diff = WASFOUND(n2) - WASFOUND(n1); + int found_diff = WASFOUND (n2) - WASFOUND (n1); return found_diff ? found_diff : strcmp (n1->name, n2->name); } + +/* Simple comparison by names. */ +static int +compare_names (struct name const *n1, struct name const *n2) +{ + return strcmp (n1->name, n2->name); +} + /* Add all the dirs under NAME, which names a directory, to the namelist. If any of the files is a directory, recurse on the subdirectory. - DEVICE is the device not to leave, if the -l option is specified. */ + DEVICE is the device not to leave, if the -l option is specified. + CMDLINE is true, if the NAME appeared on the command line. */ static void -add_hierarchy_to_namelist (struct name *name, dev_t device) +add_hierarchy_to_namelist (struct name *name, dev_t device, bool cmdline) { char *file_name = name->name; - const char *buffer = get_directory_contents (file_name, device); - + const char *buffer = scan_directory (file_name, device, cmdline); + if (! buffer) name->dir_contents = "\0\0\0\0"; else { + struct name *child_head = NULL, *child_tail = NULL; size_t name_length = name->length; size_t allocated_length = (name_length >= NAME_FIELD_SIZE ? name_length + NAME_FIELD_SIZE @@ -772,15 +813,64 @@ add_hierarchy_to_namelist (struct name *name, dev_t device) namebuf = xrealloc (namebuf, allocated_length + 1); } strcpy (namebuf + name_length, string + 1); - np = addname (namebuf, change_dir); - add_hierarchy_to_namelist (np, device); + np = addname (namebuf, change_dir, name); + if (!child_head) + child_head = np; + else + child_tail->sibling = np; + child_tail = np; + add_hierarchy_to_namelist (np, device, false); } } free (namebuf); + name->child = child_head; } } +/* Auxiliary functions for hashed table of struct name's. */ + +static size_t +name_hash (void const *entry, size_t n_buckets) +{ + struct name const *name = entry; + return hash_string (name->caname, n_buckets); +} + +/* Compare two directories for equality of their names. */ +static bool +name_compare (void const *entry1, void const *entry2) +{ + struct name const *name1 = entry1; + struct name const *name2 = entry2; + return strcmp (name1->caname, name2->caname) == 0; +} + + +/* Rebase `name' member of CHILD and all its siblings to + the new PARENT. */ +static void +rebase_child_list (struct name *child, struct name *parent) +{ + size_t old_prefix_len = child->parent->length; + size_t new_prefix_len = parent->length; + char *new_prefix = parent->name; + + for (; child; child = child->sibling) + { + size_t size = child->length - old_prefix_len + new_prefix_len; + char *newp = xmalloc (size + 1); + strcpy (newp, new_prefix); + strcat (newp, child->name + old_prefix_len); + free (child->name); + child->name = newp; + child->length = size; + + rebase_directory (child->name, old_prefix_len, child->parent->name, + new_prefix); + } +} + /* Collect all the names from argv[] (or whatever), expand them into a directory tree, and sort them. This gets only subdirectories, not all files. */ @@ -789,18 +879,39 @@ void collect_and_sort_names (void) { struct name *name; - struct name *next_name; + struct name *next_name, *prev_name; int num_names; struct stat statbuf; - + Hash_table *nametab; + name_gather (); - if (listed_incremental_option) - read_directory_file (); - if (!namelist) - addname (".", 0); + addname (".", 0, NULL); + if (listed_incremental_option) + { + switch (chdir_count ()) + { + case 0: + break; + + case 1: + if (namelist->change_dir == 0) + USAGE_ERROR ((0, 0, + _("Using -C option inside file list is not " + "allowed with --listed-incremental"))); + break; + + default: + USAGE_ERROR ((0, 0, + _("Only one -C option is allowed with " + "--listed-incremental"))); + } + chdir_do (namelist->change_dir); + read_directory_file (); + } + for (name = namelist; name; name = next_name) { next_name = name->next; @@ -811,6 +922,7 @@ collect_and_sort_names (void) /* FIXME: just skip regexps for now */ continue; chdir_do (name->change_dir); + if (name->name[0] == 0) continue; @@ -822,17 +934,60 @@ collect_and_sort_names (void) if (S_ISDIR (statbuf.st_mode)) { name->found_count++; - add_hierarchy_to_namelist (name, statbuf.st_dev); + if (name->found_count == 1) + add_hierarchy_to_namelist (name, statbuf.st_dev, true); } } num_names = 0; for (name = namelist; name; name = name->next) num_names++; + namelist = merge_sort (namelist, num_names, compare_names); - for (name = namelist; name; name = name->next) - name->found_count = 0; + num_names = 0; + nametab = hash_initialize (0, 0, + name_hash, + name_compare, NULL); + for (name = namelist; name; name = next_name) + { + next_name = name->next; + name->caname = normalize_filename (name->name); + if (prev_name) + { + struct name *p = hash_lookup (nametab, name); + if (p) + { + /* Keep the one listed in the command line */ + if (!name->parent) + { + if (p->child) + rebase_child_list (p->child, name); + /* FIXME: remove_directory (p->caname); ? */ + remname (p); + free_name (p); + num_names--; + } + else + { + if (name->child) + rebase_child_list (name->child, p); + /* FIXME: remove_directory (name->caname); ? */ + remname (name); + free_name (name); + continue; + } + } + } + name->found_count = 0; + hash_insert (nametab, name); + prev_name = name; + num_names++; + } + nametail = &prev_name; + hash_free (nametab); + + namelist = merge_sort (namelist, num_names, compare_names_found); if (listed_incremental_option) { @@ -953,12 +1108,12 @@ static void register_individual_file (char const *name) { struct stat st; - + if (deref_stat (dereference_option, name, &st) != 0) return; /* Will be complained about later */ if (S_ISDIR (st.st_mode)) return; - + hash_string_insert (&individual_file_table, name); } diff --git a/src/tar.c b/src/tar.c index b11edad..8d23a4f 100644 --- a/src/tar.c +++ b/src/tar.c @@ -272,6 +272,7 @@ enum IGNORE_FAILED_READ_OPTION, INDEX_FILE_OPTION, KEEP_NEWER_FILES_OPTION, + LEVEL_OPTION, LZMA_OPTION, LZOP_OPTION, MODE_OPTION, @@ -406,6 +407,8 @@ static struct argp_option options[] = { N_("handle old GNU-format incremental backup"), GRID+1 }, {"listed-incremental", 'g', N_("FILE"), 0, N_("handle new GNU-format incremental backup"), GRID+1 }, + {"level", LEVEL_OPTION, N_("NUMBER"), 0, + N_("dump level for created listed-incremental archive"), GRID+1 }, {"ignore-failed-read", IGNORE_FAILED_READ_OPTION, 0, 0, N_("do not exit with nonzero on unreadable files"), GRID+1 }, {"occurrence", OCCURRENCE_OPTION, N_("NUMBER"), OPTION_ARG_OPTIONAL, @@ -1385,7 +1388,7 @@ parse_opt (int key, char *arg, struct argp_state *state) case 'K': starting_file_option = true; - addname (arg, 0); + addname (arg, 0, NULL); break; case ONE_FILE_SYSTEM_OPTION: @@ -1409,6 +1412,15 @@ parse_opt (int key, char *arg, struct argp_state *state) } break; + case LEVEL_OPTION: + { + char *p; + incremental_level = strtoul (arg, &p, 10); + if (*p) + USAGE_ERROR ((0, 0, _("Invalid incremental level value"))); + } + break; + case LZMA_OPTION: set_use_compress_program_option ("lzma"); break; @@ -1538,6 +1550,7 @@ parse_opt (int key, char *arg, struct argp_state *state) case 'v': verbose_option++; + warning_option |= WARN_VERBOSE_WARNINGS; break; case 'V': @@ -2117,6 +2130,8 @@ decode_options (int argc, char **argv) group_option = -1; check_device_option = true; + + incremental_level = -1; /* Convert old-style tar call by exploding option element and rearranging options accordingly. */ @@ -2282,7 +2297,10 @@ decode_options (int argc, char **argv) && NEWER_OPTION_INITIALIZED (newer_mtime_option)) USAGE_ERROR ((0, 0, _("Cannot combine --listed-incremental with --newer"))); - + if (incremental_level != -1 && !listed_incremental_option) + WARN ((0, 0, + _("--level is meaningless without --listed-incremental"))); + if (volume_label_option) { if (archive_format == GNU_FORMAT || archive_format == OLDGNU_FORMAT) diff --git a/tests/incr03.at b/tests/incr03.at index 08421bb..673191e 100644 --- a/tests/incr03.at +++ b/tests/incr03.at @@ -1,7 +1,7 @@ # Process this file with autom4te to create testsuite. -*- Autotest -*- # Test suite for GNU tar. -# Copyright (C) 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -39,6 +39,7 @@ sleep 1 tar -cf archive.1 -g db directory mv directory/x directory/z +cp db db.old tar -cf archive.2 -g db directory mv directory orig diff --git a/tests/incr04.at b/tests/incr04.at index 885b0ef..5658ac9 100644 --- a/tests/incr04.at +++ b/tests/incr04.at @@ -1,7 +1,7 @@ # Process this file with autom4te to create testsuite. -*- Autotest -*- # Test suite for GNU tar. -# Copyright (C) 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -56,7 +56,8 @@ Incremental dump a/ a/c/ ], -[tar: a/b: Directory is new +[tar: a: Directory is new +tar: a/b: Directory is new tar: a/c: Directory has been renamed from `a/b' ],[],[],[gnu, oldgnu, posix]) diff --git a/tests/listed01.at b/tests/listed01.at index b9fc2e4..5884f8c 100644 --- a/tests/listed01.at +++ b/tests/listed01.at @@ -1,7 +1,7 @@ # Process this file with autom4te to create testsuite. -*- Autotest -*- # Test suite for GNU tar. -# Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -44,7 +44,7 @@ sleep 2 genfile --length 10240 --pattern zeros --file directory/file2 echo "separator" - +cp listing listing.old tar --create \ --file=archive.2 \ --listed-incremental=listing \ diff --git a/tests/listed02.at b/tests/listed02.at index 3ab19a8..2f1f070 100644 --- a/tests/listed02.at +++ b/tests/listed02.at @@ -1,7 +1,7 @@ # Process this file with autom4te to create testsuite. -*- Autotest -*- # Test suite for GNU tar. -# Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -47,6 +47,7 @@ done sleep 1 echo Creating main archive +echo >&2 "Creating main archive" tar -c -v --listed-incremental=tart.incr1 -f archive.1 tart 2> err || exit 1 # The above prints two lines to stderr announcing the new directories c0 and c1. @@ -66,6 +67,7 @@ find tart -print | sort 2>/dev/null sleep 1 echo Creating incremental archive +echo >&2 "Creating incremental archive" cp -p tart.incr1 tart.incr2 tar -c -v --listed-incremental=tart.incr2 -f archive.2 tart || exit 1 @@ -73,6 +75,7 @@ sleep 1 rm -rf tart/* echo Extracting main archive +echo >&2 "Extracting main archive" tar -x -v --listed-incremental=tart.incr1 -f archive.1 || exit 1 echo Extracting incremental archive # This command should produce three messages about deletion @@ -141,9 +144,13 @@ tart/c2/ca1 tart/c2/ca2 tart/c2/ca3 ], -[tar: tart/c0: Directory is new +[Creating main archive +tar: tart/c0: Directory is new tar: tart/c1: Directory is new +tar: tart: Directory is new +Creating incremental archive tar: tart/c2: Directory has been renamed from `tart/c1' +Extracting main archive ], [],[],[gnu, oldgnu]) diff --git a/tests/multiv01.at b/tests/multiv01.at index 5c37e03..2b39558 100644 --- a/tests/multiv01.at +++ b/tests/multiv01.at @@ -1,7 +1,7 @@ # Process this file with autom4te to create testsuite. -*- Autotest -*- # Test suite for GNU tar. -# Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 2004, 2006, 2007, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -46,7 +46,6 @@ else fi tar -c --multi-volume --tape-length=$TAPE_LENGTH \ - --listed-incremental=t.snar \ -f t1-pipe.tar -f t2-pipe.tar ./file1 ./file2 || exit 1 mkdir extract-dir-pipe diff --git a/tests/rename01.at b/tests/rename01.at index 4e2a6aa..30d1350 100644 --- a/tests/rename01.at +++ b/tests/rename01.at @@ -1,7 +1,7 @@ # Process this file with autom4te to create testsuite. -*- Autotest -*- # Test suite for GNU tar. -# Copyright (C) 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -79,7 +79,8 @@ foo/file1 foo/file2 End directory listing 2 ], -[tar: foo/bar: Directory is new +[tar: foo: Directory is new +tar: foo/bar: Directory is new tar: foo/baz: Directory has been renamed from `foo/bar' ], [],[],[gnu, oldgnu, posix]) diff --git a/tests/rename02.at b/tests/rename02.at index 2c8ca0d..d20f2b0 100644 --- a/tests/rename02.at +++ b/tests/rename02.at @@ -1,7 +1,7 @@ # Process this file with autom4te to create testsuite. -*- Autotest -*- # Test suite for GNU tar. -# Copyright (C) 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -93,7 +93,8 @@ foo/file1 foo/file2 End directory listing 2 ], -[tar: foo/bar: Directory is new +[tar: foo: Directory is new +tar: foo/bar: Directory is new tar: foo/bar/baz: Directory is new tar: foo/baz: Directory has been renamed from `foo/bar/baz' ], diff --git a/tests/rename03.at b/tests/rename03.at index d08c9fc..c79df8b 100644 --- a/tests/rename03.at +++ b/tests/rename03.at @@ -1,7 +1,7 @@ # Process this file with autom4te to create testsuite. -*- Autotest -*- # Test suite for GNU tar. -# Copyright (C) 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -116,6 +116,7 @@ End directory listing 2 tar: foo/a: Directory is new tar: foo/b: Directory is new tar: foo/c: Directory is new +tar: foo: Directory is new Second dump tar: foo/a: Directory has been renamed from `foo/c' tar: foo/b: Directory has been renamed from `foo/a'