From: Paul Eggert Date: Mon, 6 Sep 2010 20:38:44 +0000 (-0700) Subject: tar: more reliable directory traversal when creating archives X-Git-Url: https://git.brokenzipper.com/gitweb?a=commitdiff_plain;h=de328a580ab6f5ff4a3237ce21f1ef0b7dd12984;p=chaz%2Ftar tar: more reliable directory traversal when creating archives * NEWS: Document this. * gnulib.modules: Add openat, readlinkat. * src/common.h (open_read_flags, fstatat_flags): New global variables. (cachedir_file_p, dump_file, check_exclusion_tags, scan_directory): Adjust to new signatures, described below. (name_fill_directory): Remove. * src/compare.c (diff_file, diff_multivol): Use open_read_flags. * src/create.c (struct exclusion_tag): Exclusion predicates now take a file descriptor, not a file name. (add_exclusion_tag): Likewise. All uses changed. (cachedir_file_p): Likewise. (check_exclusion_tags): The directory is now a file descriptor, not a file name. All uses changed. Use openat for better traversal. (file_dumpable_p): Arg is now a struct stat, not a struct tar_stat_info. All uses changed. Check the arg's file types too. (dump_dir0, dump_dir, dump_file0, dump_file): Omit top_level and parent_device args, since st->parent tells us that now. All uses changed. (dump_dir): Likewise. Also, omit fd arg for similar reasons. Apply fdsavedir to a dup of the file descriptor, since we need a file descriptor for openat etc. as well, and fdsavedir (perhaps unwisely) consumes its file descriptor when successful. Do not consume st->fd when successful; this simplifies the caller. (create_archive): Allocate a file descriptor when retraversing a directory, during incremental dumps. (dump_file0): Use fstatat, openat, and readlinkat for better traversal. When opening a file, use the result of fstat on the file descriptor rather than the fstatat on the directory entry, to avoid some race conditions. No need to reopen the directory since we now no longer close it. Change "did we open the file?" test from 0 <= fd to 0 < fd since fd == 0 now represents uninitialized. (dump_file): Now accepts struct tar_stat_info describing parent, not parent_device. Also, accept basename and fullname of entry. All uses changed. * src/incremen.c (update_parent_directory): Accept struct tar_stat_info for parent, not name. All callers changed. Use fstatat for safer directory traversal. (procdir): Accept struct tar_stat_info, not struct stat and dev_t, for info about directory. All callers changed. (scan_directory): Accept struct tar_stat_info, not name, device, and cmdline, for info about directory. All callers changed. Do not consume the file descriptor, since caller might need it. Use fstatat and openat for safer directory traversal; also, use fstat after opening to double-check. (name_fill_directory): Remove. * src/names.c (add_hierarchy_to_namelist): Accept struct tar_stat_info instead of device and cmdline. All callers changed. When descending into a subdirectory, use openat and fstat for safer directory traversal. (collect_and_sort_names): Use open and fstat for safer directory traversal. Set up struct tar_stat_info for callee's new API. * src/tar.c (decode_options): Initialize open_read_flags and fstatat_flags. (tar_stat_destroy): Close st->fd if it is positive (not zero!). * src/tar.h (struct tar_stat_info): New members parent, fd. * src/update.c (update_archive): Adjust to dump_file's API change. * tests/filerem02.at: Ignore stderr since its contents now depend on the file system implementation. --- diff --git a/NEWS b/NEWS index d6712d0..b305f26 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU tar NEWS - User visible changes. 2010-07-16 +GNU tar NEWS - User visible changes. 2010-09-06 Please send GNU tar bug reports to @@ -11,6 +11,14 @@ time stamps to the full resolution. * Bugfixes. +** More reliable directory traversal when creating archives + +Tar now checks for inconsistencies caused when a file system is +modified while tar is creating an archive. The new checks are +implemented via the openat, fstatat, and readlinkat calls standardized +by POSIX.1-2008. On an older system that lacks these calls, tar +emulates them at some cost in efficiency and reliability. + ** Spurious error diagnostics on broken pipe. When receiving SIGPIPE, tar would exit with error status and diff --git a/gnulib.modules b/gnulib.modules index 12b788e..6d25072 100644 --- a/gnulib.modules +++ b/gnulib.modules @@ -32,10 +32,12 @@ localcharset mkdtemp modechange obstack +openat priv-set progname quote quotearg +readlinkat rpmatch safe-read save-cwd diff --git a/src/common.h b/src/common.h index ce77005..5be282c 100644 --- a/src/common.h +++ b/src/common.h @@ -357,6 +357,10 @@ struct name GLOBAL dev_t ar_dev; GLOBAL ino_t ar_ino; +/* Flags for reading and fstatatting arbitrary files. */ +GLOBAL int open_read_flags; +GLOBAL int fstatat_flags; + GLOBAL int seek_option; GLOBAL bool seekable_archive; @@ -446,12 +450,13 @@ enum dump_status }; void add_exclusion_tag (const char *name, enum exclusion_tag_type type, - bool (*)(const char*)); -bool cachedir_file_p (const char *name); + bool (*predicate) (int)); +bool cachedir_file_p (int fd); void create_archive (void); void pad_archive (off_t size_left); -void dump_file (const char *st, bool top_level, dev_t parent_device); +void dump_file (struct tar_stat_info *parent, char const *name, + char const *fullname); union block *start_header (struct tar_stat_info *st); void finish_header (struct tar_stat_info *st, union block *header, off_t block_ordinal); @@ -463,7 +468,7 @@ void write_eot (void); void check_links (void); void exclusion_tag_warning (const char *dirname, const char *tagname, const char *message); -enum exclusion_tag_type check_exclusion_tags (const char *dirname, +enum exclusion_tag_type check_exclusion_tags (int dirfd, const char **tag_file_name); #define OFF_TO_CHARS(val, where) off_to_chars (val, where, sizeof (where)) @@ -493,8 +498,7 @@ void delete_archive_members (void); /* Module incremen.c. */ -struct directory *scan_directory (char *dir, dev_t device, bool cmdline); -void name_fill_directory (struct name *name, dev_t device, bool cmdline); +struct directory *scan_directory (struct tar_stat_info *st); const char *directory_contents (struct directory *dir); const char *safe_directory_contents (struct directory *dir); @@ -507,7 +511,7 @@ void read_directory_file (void); void write_directory_file (void); void purge_directory (char const *directory_name); void list_dumpdir (char *buffer, size_t size); -void update_parent_directory (const char *name); +void update_parent_directory (struct tar_stat_info *st); size_t dumpdir_size (const char *p); bool is_dumpdir (struct tar_stat_info *stat_info); diff --git a/src/compare.c b/src/compare.c index 2a314da..b74793f 100644 --- a/src/compare.c +++ b/src/compare.c @@ -217,12 +217,7 @@ diff_file (void) } else { - int atime_flag = - (atime_preserve_option == system_atime_preserve - ? O_NOATIME - : 0); - - diff_handle = open (file_name, O_RDONLY | O_BINARY | atime_flag); + diff_handle = open (file_name, open_read_flags); if (diff_handle < 0) { @@ -379,8 +374,7 @@ diff_dumpdir (void) else dev = stat_data.st_dev; - dumpdir_buffer = directory_contents - (scan_directory (current_stat_info.file_name, dev, false)); + dumpdir_buffer = directory_contents (scan_directory (¤t_stat_info)); if (dumpdir_buffer) { @@ -422,7 +416,7 @@ diff_multivol (void) return; } - fd = open (current_stat_info.file_name, O_RDONLY | O_BINARY); + fd = open (current_stat_info.file_name, open_read_flags); if (fd < 0) { diff --git a/src/create.c b/src/create.c index 9a4a282..e137325 100644 --- a/src/create.c +++ b/src/create.c @@ -39,7 +39,7 @@ struct exclusion_tag const char *name; size_t length; enum exclusion_tag_type type; - bool (*predicate) (const char *name); + bool (*predicate) (int fd); struct exclusion_tag *next; }; @@ -47,7 +47,7 @@ static struct exclusion_tag *exclusion_tags; void add_exclusion_tag (const char *name, enum exclusion_tag_type type, - bool (*predicate) (const char *name)) + bool (*predicate) (int fd)) { struct exclusion_tag *tag = xmalloc (sizeof tag[0]); tag->next = exclusion_tags; @@ -72,38 +72,23 @@ exclusion_tag_warning (const char *dirname, const char *tagname, } enum exclusion_tag_type -check_exclusion_tags (const char *dirname, const char **tag_file_name) +check_exclusion_tags (int fd, char const **tag_file_name) { - static char *tagname; - static size_t tagsize; struct exclusion_tag *tag; - size_t dlen = strlen (dirname); - int addslash = !ISSLASH (dirname[dlen-1]); - size_t noff = 0; for (tag = exclusion_tags; tag; tag = tag->next) { - size_t size = dlen + addslash + tag->length + 1; - if (size > tagsize) + int tagfd = openat (fd, tag->name, open_read_flags); + if (0 <= tagfd) { - tagsize = size; - tagname = xrealloc (tagname, tagsize); - } - - if (noff == 0) - { - strcpy (tagname, dirname); - noff = dlen; - if (addslash) - tagname[noff++] = '/'; - } - strcpy (tagname + noff, tag->name); - if (access (tagname, F_OK) == 0 - && (!tag->predicate || tag->predicate (tagname))) - { - if (tag_file_name) - *tag_file_name = tag->name; - return tag->type; + bool satisfied = !tag->predicate || tag->predicate (tagfd); + close (tagfd); + if (satisfied) + { + if (tag_file_name) + *tag_file_name = tag->name; + return tag->type; + } } } @@ -121,22 +106,13 @@ check_exclusion_tags (const char *dirname, const char **tag_file_name) #define CACHEDIR_SIGNATURE_SIZE (sizeof CACHEDIR_SIGNATURE - 1) bool -cachedir_file_p (const char *name) +cachedir_file_p (int fd) { - bool tag_present = false; - int fd = open (name, O_RDONLY); - if (fd >= 0) - { - static char tagbuf[CACHEDIR_SIGNATURE_SIZE]; + char tagbuf[CACHEDIR_SIGNATURE_SIZE]; - if (read (fd, tagbuf, CACHEDIR_SIGNATURE_SIZE) - == CACHEDIR_SIGNATURE_SIZE - && memcmp (tagbuf, CACHEDIR_SIGNATURE, CACHEDIR_SIGNATURE_SIZE) == 0) - tag_present = true; - - close (fd); - } - return tag_present; + return + (read (fd, tagbuf, CACHEDIR_SIGNATURE_SIZE) == CACHEDIR_SIGNATURE_SIZE + && memcmp (tagbuf, CACHEDIR_SIGNATURE, CACHEDIR_SIGNATURE_SIZE) == 0); } @@ -482,7 +458,9 @@ string_to_chars (char const *str, char *p, size_t s) } -/* A file is considered dumpable if it is sparse and both --sparse and --totals +/* A directory is always considered dumpable. + Otherwise, only regular and contiguous files are considered dumpable. + Such a file is dumpable if it is sparse and both --sparse and --totals are specified. Otherwise, it is dumpable unless any of the following conditions occur: @@ -490,12 +468,15 @@ string_to_chars (char const *str, char *p, size_t s) b) current archive is /dev/null */ static bool -file_dumpable_p (struct tar_stat_info *st) +file_dumpable_p (struct stat const *st) { + if (S_ISDIR (st->st_mode)) + return true; + if (! (S_ISREG (st->st_mode) || S_ISCTG (st->st_mode))) + return false; if (dev_null_output) - return totals_option && sparse_option && ST_IS_SPARSE (st->stat); - return !(st->archive_file_size == 0 - && (st->stat.st_mode & MODE_R) == MODE_R); + return totals_option && sparse_option && ST_IS_SPARSE (*st); + return ! (st->st_size == 0 && (st->st_mode & MODE_R) == MODE_R); } @@ -1089,11 +1070,13 @@ dump_regular_file (int fd, struct tar_stat_info *st) } +/* Copy info from the directory identified by ST into the archive. + DIRECTORY contains the directory's entries. */ + static void -dump_dir0 (char *directory, - struct tar_stat_info *st, bool top_level, dev_t parent_device) +dump_dir0 (struct tar_stat_info *st, char const *directory) { - dev_t our_device = st->stat.st_dev; + bool top_level = ! st->parent; const char *tag_file_name; union block *blk = NULL; off_t block_ordinal = current_block_ordinal (); @@ -1163,7 +1146,7 @@ dump_dir0 (char *directory, if (one_file_system_option && !top_level - && parent_device != st->stat.st_dev) + && st->parent->stat.st_dev != st->stat.st_dev) { if (verbose_option) WARNOPT (WARN_XDEV, @@ -1176,7 +1159,7 @@ dump_dir0 (char *directory, char *name_buf; size_t name_size; - switch (check_exclusion_tags (st->orig_file_name, &tag_file_name)) + switch (check_exclusion_tags (st->fd, &tag_file_name)) { case exclusion_tag_all: /* Handled in dump_file0 */ @@ -1192,7 +1175,6 @@ dump_dir0 (char *directory, name_size = name_len = strlen (name_buf); /* Now output all the files in the directory. */ - /* FIXME: Should speed this up by cd-ing into the dir. */ for (entry = directory; (entry_len = strlen (entry)) != 0; entry += entry_len + 1) { @@ -1203,7 +1185,7 @@ dump_dir0 (char *directory, } strcpy (name_buf + name_len, entry); if (!excluded_name (name_buf)) - dump_file (name_buf, false, our_device); + dump_file (st, entry, name_buf); } free (name_buf); @@ -1217,7 +1199,7 @@ dump_dir0 (char *directory, name_buf = xmalloc (name_size); strcpy (name_buf, st->orig_file_name); strcat (name_buf, tag_file_name); - dump_file (name_buf, false, our_device); + dump_file (st, tag_file_name, name_buf); free (name_buf); break; @@ -1243,17 +1225,27 @@ ensure_slash (char **pstr) } static bool -dump_dir (int fd, struct tar_stat_info *st, bool top_level, - dev_t parent_device) +dump_dir (struct tar_stat_info *st) { - char *directory = fdsavedir (fd); - if (!directory) + char *directory = 0; + int dupfd = dup (st->fd); + if (0 <= dupfd) + { + directory = fdsavedir (dupfd); + if (! directory) + { + int e = errno; + close (dupfd); + errno = e; + } + } + if (! directory) { savedir_diag (st->orig_file_name); return false; } - dump_dir0 (directory, st, top_level, parent_device); + dump_dir0 (st, directory); free (directory); return true; @@ -1288,12 +1280,13 @@ create_archive (void) while ((p = name_from_list ()) != NULL) if (!excluded_name (p->name)) - dump_file (p->name, p->cmdline, (dev_t) 0); + dump_file (0, p->name, p->name); blank_name_list (); while ((p = name_from_list ()) != NULL) if (!excluded_name (p->name)) { + struct tar_stat_info st; size_t plen = strlen (p->name); if (buffer_size <= plen) { @@ -1304,6 +1297,7 @@ create_archive (void) memcpy (buffer, p->name, plen); if (! ISSLASH (buffer[plen - 1])) buffer[plen++] = DIRECTORY_SEPARATOR; + tar_stat_init (&st); q = directory_contents (gnu_list_name->directory); if (q) while (*q) @@ -1311,6 +1305,24 @@ create_archive (void) size_t qlen = strlen (q); if (*q == 'Y') { + if (! st.orig_file_name) + { + st.orig_file_name = xstrdup (p->name); + st.fd = open (st.orig_file_name, + ((open_read_flags - O_RDONLY + + O_SEARCH) + | O_DIRECTORY)); + if (st.fd < 0) + { + open_diag (p->name); + break; + } + if (fstat (st.fd, &st.stat) != 0) + { + stat_diag (p->name); + break; + } + } if (buffer_size < plen + qlen) { while ((buffer_size *=2 ) < plen + qlen) @@ -1318,10 +1330,11 @@ create_archive (void) buffer = xrealloc (buffer, buffer_size); } strcpy (buffer + plen, q + 1); - dump_file (buffer, false, (dev_t) 0); + dump_file (&st, q + 1, buffer); } q += qlen + 1; } + tar_stat_destroy (&st); } free (buffer); } @@ -1330,7 +1343,7 @@ create_archive (void) const char *name; while ((name = name_next (1)) != NULL) if (!excluded_name (name)) - dump_file (name, true, (dev_t) 0); + dump_file (0, name, name); } write_eot (); @@ -1479,18 +1492,15 @@ check_links (void) } } -/* Dump a single file, recursing on directories. P is the file name - to dump. TOP_LEVEL tells whether this is a top-level call; zero - means no, positive means yes, and negative means the top level - of an incremental dump. PARENT_DEVICE is the device of P's - parent directory; it is examined only if TOP_LEVEL is zero. */ +/* Dump a single file, recursing on directories. ST is the file's + status info, NAME its name relative to the parent directory, and P + its full name (which may be relative to the working directory). */ /* FIXME: One should make sure that for *every* path leading to setting exit_status to failure, a clear diagnostic has been issued. */ static void -dump_file0 (struct tar_stat_info *st, const char *p, - bool top_level, dev_t parent_device) +dump_file0 (struct tar_stat_info *st, char const *name, char const *p) { union block *header; char type; @@ -1498,7 +1508,11 @@ dump_file0 (struct tar_stat_info *st, const char *p, struct timespec original_ctime; struct timespec restore_times[2]; off_t block_ordinal = -1; + int fd = -1; bool is_dir; + bool top_level = ! st->parent; + int parentfd = top_level ? AT_FDCWD : st->parent->fd; + void (*diag) (char const *) = 0; if (interactive_option && !confirm ("add", p)) return; @@ -1509,11 +1523,22 @@ dump_file0 (struct tar_stat_info *st, const char *p, transform_name (&st->file_name, XFORM_REGFILE); - if (deref_stat (dereference_option, p, &st->stat) != 0) + if (fstatat (parentfd, name, &st->stat, fstatat_flags) != 0) + diag = stat_diag; + else if (file_dumpable_p (&st->stat)) { - file_removed_diag (p, top_level, stat_diag); + fd = st->fd = openat (parentfd, name, open_read_flags); + if (fd < 0) + diag = open_diag; + else if (fstat (fd, &st->stat) != 0) + diag = stat_diag; + } + if (diag) + { + file_removed_diag (p, top_level, diag); return; } + st->archive_file_size = original_size = st->stat.st_size; st->atime = restore_times[0] = get_stat_atime (&st->stat); st->mtime = restore_times[1] = get_stat_mtime (&st->stat); @@ -1567,45 +1592,22 @@ dump_file0 (struct tar_stat_info *st, const char *p, if (is_dir || S_ISREG (st->stat.st_mode) || S_ISCTG (st->stat.st_mode)) { bool ok; - int fd = -1; struct stat final_stat; - if (is_dir || file_dumpable_p (st)) - { - fd = open (p, - (O_RDONLY | O_BINARY - | (is_dir ? O_DIRECTORY | O_NONBLOCK : 0) - | (atime_preserve_option == system_atime_preserve - ? O_NOATIME - : 0))); - if (fd < 0) - { - file_removed_diag (p, top_level, open_diag); - return; - } - } - if (is_dir) { const char *tag_file_name; ensure_slash (&st->orig_file_name); ensure_slash (&st->file_name); - if (check_exclusion_tags (st->orig_file_name, &tag_file_name) - == exclusion_tag_all) + if (check_exclusion_tags (fd, &tag_file_name) == exclusion_tag_all) { exclusion_tag_warning (st->orig_file_name, tag_file_name, _("directory not dumped")); - if (fd >= 0) - close (fd); return; } - ok = dump_dir (fd, st, top_level, parent_device); - - /* dump_dir consumes FD if successful. */ - if (ok) - fd = -1; + ok = dump_dir (st); } else { @@ -1639,15 +1641,8 @@ dump_file0 (struct tar_stat_info *st, const char *p, if (ok) { - /* If possible, reopen a directory if we are preserving - atimes, so that we can set just the atime on systems with - _FIOSATIME. */ - if (fd < 0 && is_dir - && atime_preserve_option == replace_atime_preserve) - fd = open (p, O_RDONLY | O_BINARY | O_DIRECTORY | O_NONBLOCK); - if ((fd < 0 - ? deref_stat (dereference_option, p, &final_stat) + ? fstatat (parentfd, name, &final_stat, fstatat_flags) : fstat (fd, &final_stat)) != 0) { @@ -1674,10 +1669,14 @@ dump_file0 (struct tar_stat_info *st, const char *p, utime_error (p); } - if (0 <= fd && close (fd) != 0) + if (0 < fd) { - close_diag (p); - ok = false; + if (close (fd) != 0) + { + close_diag (p); + ok = false; + } + st->fd = 0; } if (ok && remove_files_option) @@ -1694,7 +1693,7 @@ dump_file0 (struct tar_stat_info *st, const char *p, if (linklen != st->stat.st_size || linklen + 1 == 0) xalloc_die (); buffer = (char *) alloca (linklen + 1); - size = readlink (p, buffer, linklen + 1); + size = readlinkat (parentfd, name, buffer, linklen + 1); if (size < 0) { file_removed_diag (p, top_level, readlink_diag); @@ -1773,13 +1772,20 @@ dump_file0 (struct tar_stat_info *st, const char *p, queue_deferred_unlink (p, false); } +/* Dump a file, recursively. PARENT describes the file's parent + directory, NAME is the file's name relative to PARENT, and FULLNAME + its full name, possibly relative to the working directory. NAME + may contain slashes at the top level of invocation. */ + void -dump_file (const char *p, bool top_level, dev_t parent_device) +dump_file (struct tar_stat_info *parent, char const *name, + char const *fullname) { struct tar_stat_info st; tar_stat_init (&st); - dump_file0 (&st, p, top_level, parent_device); - if (listed_incremental_option) - update_parent_directory (p); + st.parent = parent; + dump_file0 (&st, name, fullname); + if (parent && listed_incremental_option) + update_parent_directory (parent); tar_stat_destroy (&st); } diff --git a/src/incremen.c b/src/incremen.c index 96d0e40..afd19af 100644 --- a/src/incremen.c +++ b/src/incremen.c @@ -402,26 +402,17 @@ find_directory_meta (dev_t dev, ino_t ino) } void -update_parent_directory (const char *name) +update_parent_directory (struct tar_stat_info *parent) { - struct directory *directory; - char *p; - - p = dir_name (name); - directory = find_directory (p); + struct directory *directory = find_directory (parent->orig_file_name); if (directory) { struct stat st; - if (deref_stat (dereference_option, p, &st) != 0) - { - if (errno != ENOENT) - stat_diag (directory->name); - /* else: should have been already reported */ - } + if (fstatat (parent->fd, ".", &st, fstatat_flags) != 0) + stat_diag (directory->name); else directory->mtime = get_stat_mtime (&st); } - free (p); } #define PD_FORCE_CHILDREN 0x10 @@ -429,12 +420,14 @@ update_parent_directory (const char *name) #define PD_CHILDREN(f) ((f) & 3) static struct directory * -procdir (const char *name_buffer, struct stat *stat_data, - dev_t device, +procdir (const char *name_buffer, struct tar_stat_info *st, int flag, char *entry) { struct directory *directory; + struct stat *stat_data = &st->stat; + int fd = st->fd; + dev_t device = st->parent ? st->parent->stat.st_dev : 0; bool nfs = NFS_FILE_STAT (*stat_data); if ((directory = find_directory (name_buffer)) != NULL) @@ -573,7 +566,7 @@ procdir (const char *name_buffer, struct stat *stat_data, { const char *tag_file_name; - switch (check_exclusion_tags (name_buffer, &tag_file_name)) + switch (check_exclusion_tags (fd, &tag_file_name)) { case exclusion_tag_all: /* This warning can be duplicated by code in dump_file0, but only @@ -682,37 +675,39 @@ makedumpdir (struct directory *directory, const char *dir) free (array); } -/* Recursively scan the given directory DIR. - DEVICE is the device number where DIR resides (for --one-file-system). - If CMDLINE is true, the directory name was explicitly listed in the - command line. - Unless *PDIR is NULL, store there a pointer to the struct directory - describing DIR. */ +/* Recursively scan the directory identified by ST. */ struct directory * -scan_directory (char *dir, dev_t device, bool cmdline) +scan_directory (struct tar_stat_info *st) { - char *dirp = savedir (dir); /* for scanning directory */ + char const *dir = st->orig_file_name; + int fd = st->fd; + char *dirp = 0; + dev_t device = st->stat.st_dev; + bool cmdline = ! st->parent; namebuf_t nbuf; char *tmp; - struct stat stat_data; struct directory *directory; char ch; + int dupfd = dup (fd); + if (0 <= dupfd) + { + dirp = fdsavedir (dupfd); + if (! dirp) + { + int e = errno; + close (dupfd); + errno = e; + } + } + if (! dirp) savedir_error (dir); tmp = xstrdup (dir); zap_slashes (tmp); - if (deref_stat (dereference_option, tmp, &stat_data)) - { - dir_removed_diag (tmp, cmdline, stat_diag); - free (tmp); - free (dirp); - return NULL; - } - - directory = procdir (tmp, &stat_data, device, + directory = procdir (tmp, st, (cmdline ? PD_FORCE_INIT : 0), &ch); @@ -739,14 +734,27 @@ scan_directory (char *dir, dev_t device, bool cmdline) *entry = 'N'; else { - if (deref_stat (dereference_option, full_name, &stat_data)) + void (*diag) (char const *) = 0; + struct tar_stat_info stsub; + tar_stat_init (&stsub); + + if (fstatat (fd, entry + 1, &stsub.stat, fstatat_flags) != 0) + diag = stat_diag; + else if (S_ISDIR (stsub.stat.st_mode)) { - file_removed_diag (full_name, false, stat_diag); - *entry = 'N'; - continue; + stsub.fd = openat (fd, entry + 1, open_read_flags); + if (stsub.fd < 0) + diag = open_diag; + else if (fstat (stsub.fd, &stsub.stat) != 0) + diag = stat_diag; } - if (S_ISDIR (stat_data.st_mode)) + if (diag) + { + file_removed_diag (full_name, false, diag); + *entry = 'N'; + } + else if (S_ISDIR (stsub.stat.st_mode)) { int pd_flag = 0; if (!recursion_option) @@ -754,23 +762,21 @@ scan_directory (char *dir, dev_t device, bool cmdline) else if (directory->children == ALL_CHILDREN) pd_flag |= PD_FORCE_CHILDREN | ALL_CHILDREN; *entry = 'D'; - procdir (full_name, &stat_data, device, pd_flag, entry); + procdir (full_name, &stsub, pd_flag, entry); } - - else if (one_file_system_option && device != stat_data.st_dev) + else if (one_file_system_option && device != stsub.stat.st_dev) *entry = 'N'; - else if (*entry == 'Y') /* New entry, skip further checks */; - /* FIXME: if (S_ISHIDDEN (stat_data.st_mode))?? */ - - else if (OLDER_STAT_TIME (stat_data, m) + else if (OLDER_STAT_TIME (stsub.stat, m) && (!after_date_option - || OLDER_STAT_TIME (stat_data, c))) + || OLDER_STAT_TIME (stsub.stat, c))) *entry = 'N'; else *entry = 'Y'; + + tar_stat_destroy (&stsub); } } free (itr); @@ -801,12 +807,6 @@ safe_directory_contents (struct directory *dir) return ret ? ret : "\0\0\0\0"; } -void -name_fill_directory (struct name *name, dev_t device, bool cmdline) -{ - name->directory = scan_directory (name->name, device, cmdline); -} - static void obstack_code_rename (struct obstack *stk, char const *from, char const *to) diff --git a/src/names.c b/src/names.c index d2f14b0..2fc751d 100644 --- a/src/names.c +++ b/src/names.c @@ -781,17 +781,15 @@ compare_names (struct name const *n1, struct name const *n2) } -/* Add all the dirs under NAME, which names a directory, to the namelist. - If any of the files is a directory, recurse on the subdirectory. - DEVICE is the device not to leave, if the -l option is specified. - CMDLINE is true, if the NAME appeared on the command line. */ +/* Add all the dirs under ST to the namelist NAME, descending the + directory hierarchy recursively. */ static void -add_hierarchy_to_namelist (struct name *name, dev_t device, bool cmdline) +add_hierarchy_to_namelist (struct tar_stat_info *st, struct name *name) { const char *buffer; - name_fill_directory (name, device, cmdline); + name->directory = scan_directory (st); buffer = directory_contents (name->directory); if (buffer) { @@ -819,6 +817,7 @@ add_hierarchy_to_namelist (struct name *name, dev_t device, bool cmdline) if (*string == 'D') { struct name *np; + struct tar_stat_info subdir; if (allocated_length <= name_length + string_length) { @@ -839,7 +838,27 @@ add_hierarchy_to_namelist (struct name *name, dev_t device, bool cmdline) else child_tail->sibling = np; child_tail = np; - add_hierarchy_to_namelist (np, device, false); + + tar_stat_init (&subdir); + subdir.parent = st; + subdir.fd = openat (st->fd, string + 1, + open_read_flags | O_DIRECTORY); + if (subdir.fd < 0) + open_diag (namebuf); + else if (fstat (subdir.fd, &subdir.stat) != 0) + stat_diag (namebuf); + else if (! (O_DIRECTORY || S_ISDIR (subdir.stat.st_mode))) + { + errno = ENOTDIR; + open_diag (namebuf); + } + else + { + subdir.orig_file_name = xstrdup (namebuf); + add_hierarchy_to_namelist (&subdir, np); + } + + tar_stat_destroy (&subdir); } } @@ -902,7 +921,6 @@ collect_and_sort_names (void) struct name *name; struct name *next_name, *prev_name = NULL; int num_names; - struct stat statbuf; Hash_table *nametab; name_gather (); @@ -936,6 +954,8 @@ collect_and_sort_names (void) num_names = 0; for (name = namelist; name; name = name->next, num_names++) { + struct tar_stat_info st; + if (name->found_count || name->directory) continue; if (name->matching_flags & EXCLUDE_WILDCARDS) @@ -947,16 +967,29 @@ collect_and_sort_names (void) if (name->name[0] == 0) continue; - if (deref_stat (dereference_option, name->name, &statbuf) != 0) + tar_stat_init (&st); + + if (deref_stat (dereference_option, name->name, &st.stat) != 0) { stat_diag (name->name); continue; } - if (S_ISDIR (statbuf.st_mode)) + if (S_ISDIR (st.stat.st_mode)) { - name->found_count++; - add_hierarchy_to_namelist (name, statbuf.st_dev, true); + st.fd = open (name->name, open_read_flags | O_DIRECTORY); + if (st.fd < 0) + open_diag (name->name); + else if (fstat (st.fd, &st.stat) != 0) + stat_diag (name->name); + else if (O_DIRECTORY || S_ISDIR (st.stat.st_mode)) + { + st.orig_file_name = xstrdup (name->name); + name->found_count++; + add_hierarchy_to_namelist (&st, name); + } } + + tar_stat_destroy (&st); } namelist = merge_sort (namelist, num_names, compare_names); diff --git a/src/tar.c b/src/tar.c index 782ed3b..ab53cec 100644 --- a/src/tar.c +++ b/src/tar.c @@ -2465,6 +2465,14 @@ decode_options (int argc, char **argv) if (recursive_unlink_option) old_files_option = UNLINK_FIRST_OLD_FILES; + /* Flags for accessing files to be copied into. POSIX says + O_NONBLOCK has unspecified effect on most types of files, but in + practice it never harms and sometimes helps. */ + open_read_flags = + (O_RDONLY | O_BINARY | O_NOCTTY | O_NONBLOCK + | (dereference_option ? 0 : O_NOFOLLOW) + | (atime_preserve_option == system_atime_preserve ? O_NOATIME : 0)); + fstatat_flags = dereference_option ? 0 : AT_SYMLINK_NOFOLLOW; if (subcommand_option == TEST_LABEL_SUBCOMMAND) { @@ -2686,6 +2694,8 @@ tar_stat_destroy (struct tar_stat_info *st) free (st->gname); free (st->sparse_map); free (st->dumpdir); + if (0 < st->fd) + close (st->fd); xheader_destroy (&st->xhdr); memset (st, 0, sizeof (*st)); } diff --git a/src/tar.h b/src/tar.h index fddc83f..c35ba5d 100644 --- a/src/tar.h +++ b/src/tar.h @@ -317,6 +317,18 @@ struct tar_stat_info bool skipped; /* The member contents is already read (for GNUTYPE_DUMPDIR) */ char *dumpdir; /* Contents of the dump directory */ + + /* Parent directory, if creating an archive. This is null if the + file is at the top level. */ + struct tar_stat_info *parent; + + /* File descriptor, if creating an archive, and if a directory or a + regular file or a contiguous file. This is AT_FDCWD if it is the + working directory, which is possible only for a dummy parent node + just above the top level. It may be -1 if the file could not be + opened. Zero represents an otherwise-uninitialized value; + standard input is never used here. */ + int fd; }; union block diff --git a/src/update.c b/src/update.c index 73b9d53..53ce553 100644 --- a/src/update.c +++ b/src/update.c @@ -224,7 +224,7 @@ update_archive (void) if (subcommand_option == CAT_SUBCOMMAND) append_file (file_name); else - dump_file (file_name, 1, (dev_t) 0); + dump_file (0, file_name, file_name); } } diff --git a/tests/filerem02.at b/tests/filerem02.at index 2390298..ed444ad 100644 --- a/tests/filerem02.at +++ b/tests/filerem02.at @@ -39,10 +39,10 @@ genfile --run --checkpoint=3 --exec 'rm -rf dir2' -- \ ], [2], [ignore], -[tar: dir2: Cannot stat: No such file or directory -tar: dir2/file1: File removed before we read it -tar: Exiting with failure status due to previous errors -],[],[],[gnu, posix]) +[ignore],[],[],[gnu, posix]) + +# Ignore stdout and stderr because their contents depend on +# the file system implementation. # Timing information: see filerem01.at