From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 24 Oct 2000 06:18:37 +0000 (+0000)
Subject: Include hash.h.
X-Git-Url: https://git.brokenzipper.com/gitweb?a=commitdiff_plain;h=8111f9d34df33cb06072275b2ec7dd10ef119d08;p=chaz%2Ftar

Include hash.h.
(gnu_list_name): Remove decl.
(struct link): Remove "next" member.
(linklist): Remove.
(start_header): Say "leading `FOO'" rather than "`FOO' prefix" for consistency
with other diagnostics.
(deal_with_sparse): Check for I/O error when closing the file.
(create_archive):
Do not allocate an array of size PATH_MAX, as PATH_MAX might be (size_t) -1.
Instead, allocate an array with the size that's needed.
(hash_link, compare_links): New functions.
(dump_file): Do not exhaust open file descriptors when descending deeply
into a directory, by using savedir rather than opendir/readdir.
Do not zero-fill the name buffer unnecessarily.
Hash the set of links already created, instead of using a linked list.
Fix some bugs in outputting sparse files which caused the sparse tables
to be incorrect.
When a file unexpectedly shrinks, output zeros rather than garbage.
Do not allocate an array of size PATH_MAX, as PATH_MAX might be (size_t) -1.
Instead, allocate an array with the size that's needed.
---

diff --git a/src/create.c b/src/create.c
index 6f38836..30fe90b 100644
--- a/src/create.c
+++ b/src/create.c
@@ -36,25 +36,19 @@ struct utimbuf
 #include <quotearg.h>
 
 #include "common.h"
+#include <hash.h>
 
 #ifndef MSDOS
 extern dev_t ar_dev;
 extern ino_t ar_ino;
 #endif
 
-extern struct name *gnu_list_name;
-
-/* This module is the only one that cares about `struct link's.  */
-
 struct link
   {
-    struct link *next;
     dev_t dev;
     ino_t ino;
     char name[1];
   };
-
-static struct link *linklist;	/* points to first link in list */
 
 /* The maximum uintmax_t value that can be represented with DIGITS digits,
    assuming that each digit is BITS_PER_DIGIT wide.  */
@@ -335,23 +329,17 @@ uintmax_to_chars (uintmax_t v, char *p, size_t s)
 
 /* Writing routines.  */
 
-/*-----------------------------------------------------------------------.
-| Just zeroes out the buffer so we don't confuse ourselves with leftover |
-| data.									 |
-`-----------------------------------------------------------------------*/
-
+/* Zero out the buffer so we don't confuse ourselves with leftover
+   data.  */
 static void
 clear_buffer (char *buffer)
 {
   memset (buffer, 0, BLOCKSIZE);
 }
 
-/*-------------------------------------------------------------------------.
-| Write the EOT block(s).  We zero at least two blocks, through		   |
-| the end of the record.  Old tar, as previous versions of GNU tar, writes |
-| garbage after two zeroed blocks.					   |
-`-------------------------------------------------------------------------*/
-
+/* Write the EOT block(s).  Zero at least two blocks, through the end
+   of the record.  Old tar, as previous versions of GNU tar, writes
+   garbage after two zeroed blocks.  */
 void
 write_eot (void)
 {
@@ -363,9 +351,7 @@ write_eot (void)
   set_next_block_after (pointer);
 }
 
-/*-----------------------------------------------------.
-| Write a GNUTYPE_LONGLINK or GNUTYPE_LONGNAME block.  |
-`-----------------------------------------------------*/
+/* Write a GNUTYPE_LONGLINK or GNUTYPE_LONGNAME block.  */
 
 /* FIXME: Cross recursion between start_header and write_long!  */
 
@@ -424,7 +410,7 @@ start_header (const char *name, struct stat *st)
 	  if (!warned_once)
 	    {
 	      warned_once = 1;
-	      WARN ((0, 0, _("Removing `%.*s' prefix from member names"),
+	      WARN ((0, 0, _("Removing leading `%.*s' from member names"),
 		     (int) prefix_len, name));
 	    }
 	  name += prefix_len;
@@ -546,11 +532,8 @@ start_header (const char *name, struct stat *st)
   return header;
 }
 
-/*-------------------------------------------------------------------------.
-| Finish off a filled-in header block and write it out.  We also print the |
-| file name and/or full info if verbose is on.				   |
-`-------------------------------------------------------------------------*/
-
+/* Finish off a filled-in header block and write it out.  We also
+   print the file name and/or full info if verbose is on.  */
 void
 finish_header (union block *header)
 {
@@ -595,12 +578,9 @@ finish_header (union block *header)
 
 /* Sparse file processing.  */
 
-/*-------------------------------------------------------------------------.
-| Takes a blockful of data and basically cruises through it to see if it's |
-| made *entirely* of zeros, returning a 0 the instant it finds something   |
-| that is a nonzero, i.e., useful data.					   |
-`-------------------------------------------------------------------------*/
-
+/* Takes a blockful of data and basically cruises through it to see if
+   it's made *entirely* of zeros, returning a 0 the instant it finds
+   something that is a nonzero, i.e., useful data.  */
 static int
 zero_block_p (char *buffer)
 {
@@ -612,10 +592,6 @@ zero_block_p (char *buffer)
   return 1;
 }
 
-/*---.
-| ?  |
-`---*/
-
 static void
 init_sparsearray (void)
 {
@@ -633,10 +609,6 @@ init_sparsearray (void)
     }
 }
 
-/*---.
-| ?  |
-`---*/
-
 static off_t
 find_new_file_size (int sparses)
 {
@@ -647,12 +619,11 @@ find_new_file_size (int sparses)
   return s;
 }
 
-/*-----------------------------------------------------------------------.
-| Make one pass over the file NAME, studying where any non-zero data is, |
-| that is, how far into the file each instance of data is, and how many  |
-| bytes are there.  Save this information in the sparsearray, which will |
-| later be translated into header information.                           |
-`-----------------------------------------------------------------------*/
+/* Make one pass over the file NAME, studying where any non-zero data
+   is, that is, how far into the file each instance of data is, and
+   how many bytes are there.  Save this information in the
+   sparsearray, which will later be translated into header
+   information.  */
 
 /* There is little point in trimming small amounts of null data at the head
    and tail of blocks, only avoid dumping full null blocks.  */
@@ -747,14 +718,9 @@ deal_with_sparse (char *name, union block *header)
       sparsearray[sparses++].numbytes = 1;
     }
 
-  close (file);
-  return count < 0 ? 0 : sparses;
+  return close (file) == 0 && 0 <= count ? sparses : 0;
 }
 
-/*---.
-| ?  |
-`---*/
-
 static int
 finish_sparse_file (int file, off_t *sizeleft, off_t fullsize, char *name)
 {
@@ -828,10 +794,6 @@ finish_sparse_file (int file, off_t *sizeleft, off_t fullsize, char *name)
 
 /* Main functions of this module.  */
 
-/*---.
-| ?  |
-`---*/
-
 void
 create_archive (void)
 {
@@ -841,9 +803,9 @@ create_archive (void)
 
   if (incremental_option)
     {
-      char *buffer = xmalloc (PATH_MAX);
+      size_t buffer_size = 1000;
+      char *buffer = xmalloc (buffer_size);
       const char *q;
-      char *bufp;
 
       collect_and_sort_names ();
 
@@ -855,18 +817,34 @@ create_archive (void)
       while (p = name_from_list (), p)
 	if (!excluded_name (p))
 	  {
-	    strcpy (buffer, p);
-	    if (p[strlen (p) - 1] != '/')
-	      strcat (buffer, "/");
-	    bufp = buffer + strlen (buffer);
+	    size_t plen = strlen (p);
+	    if (buffer_size <= plen)
+	      {
+		while ((buffer_size *= 2) <= plen)
+		  continue;
+		buffer = xrealloc (buffer, buffer_size);
+	      }
+	    memcpy (buffer, p, plen);
+	    if (buffer[plen - 1] != '/')
+	      buffer[plen++] = '/';
 	    q = gnu_list_name->dir_contents;
 	    if (q)
-	      for (; *q; q += strlen (q) + 1)
-		if (*q == 'Y')
-		  {
-		    strcpy (bufp, q + 1);
-		    dump_file (buffer, -1, (dev_t) 0);
-		  }
+	      while (*q)
+		{
+		  size_t qlen = strlen (q);
+		  if (*q == 'Y')
+		    {
+		      if (buffer_size < plen + qlen)
+			{
+			  while ((buffer_size *=2 ) < plen + qlen)
+			    continue;
+			  buffer = xrealloc (buffer, buffer_size);
+			}
+		      strcpy (buffer + plen, q + 1);
+		      dump_file (buffer, -1, (dev_t) 0);
+		    }
+		  q += qlen + 1;
+		}
 	  }
       free (buffer);
     }
@@ -884,6 +862,24 @@ create_archive (void)
     write_directory_file ();
 }
 
+
+/* Calculate the hash of a link.  */
+static unsigned
+hash_link (void const *entry, unsigned n_buckets)
+{
+  struct link const *link = entry;
+  return (uintmax_t) (link->dev ^ link->ino) % n_buckets;
+}
+
+/* Compare two links for equality.  */
+static bool
+compare_links (void const *entry1, void const *entry2)
+{
+  struct link const *link1 = entry1;
+  struct link const *link2 = entry2;
+  return ((link1->dev ^ link2->dev) | (link1->ino ^ link2->ino)) == 0;
+}
+
 /* Dump a single file, recursing on directories.  P is the file name
    to dump.  TOP_LEVEL tells whether this is a top-level call; zero
    means no, positive means yes, and negative means an incremental
@@ -905,7 +901,7 @@ dump_file (char *p, int top_level, dev_t parent_device)
   time_t original_ctime;
   struct utimbuf restore_times;
 
-  /* FIXME: `header' and `sparses' might be used uninitialized in this
+  /* FIXME: `header' might be used uninitialized in this
      function.  Reported by Bruno Haible.  */
 
   if (interactive_option && !confirm ("add", p))
@@ -965,8 +961,9 @@ dump_file (char *p, int top_level, dev_t parent_device)
 
   if (S_ISDIR (current_stat.st_mode))
     {
-      DIR *directory;
-      struct dirent *entry;
+      char *directory;
+      char const *entry;
+      size_t entrylen;
       char *namebuf;
       size_t buflen;
       size_t len;
@@ -974,13 +971,13 @@ dump_file (char *p, int top_level, dev_t parent_device)
 
       errno = 0;
 
-      directory = opendir (p);
+      directory = savedir (p, current_stat.st_size);
       if (! directory)
 	{
 	  if (ignore_failed_read_option)
-	    opendir_warn (p);
+	    savedir_warn (p);
 	  else
-	    opendir_error (p);
+	    savedir_error (p);
 	  return;
 	}
 
@@ -989,7 +986,7 @@ dump_file (char *p, int top_level, dev_t parent_device)
       len = strlen (p);
       buflen = len + NAME_FIELD_SIZE;
       namebuf = xmalloc (buflen + 1);
-      strncpy (namebuf, p, buflen);
+      memcpy (namebuf, p, len);
       while (len >= 1 && namebuf[len - 1] == '/')
 	len--;
       namebuf[len++] = '/';
@@ -1105,40 +1102,23 @@ dump_file (char *p, int top_level, dev_t parent_device)
 
       /* FIXME: Should speed this up by cd-ing into the dir.  */
 
-      while (errno = 0, (entry = readdir (directory)))
+      for (entry = directory;
+	   (entrylen = strlen (entry)) != 0;
+	   entry += entrylen + 1)
 	{
-	  /* Skip `.', `..', and excluded file names.  */
-
-	  if (is_dot_or_dotdot (entry->d_name))
-	    continue;
-
-	  if ((int) NAMLEN (entry) + len >= buflen)
+	  if (buflen <= len + entrylen)
 	    {
-	      buflen = len + NAMLEN (entry);
+	      buflen = len + entrylen;
 	      namebuf = xrealloc (namebuf, buflen + 1);
 	    }
-	  strcpy (namebuf + len, entry->d_name);
+	  strcpy (namebuf + len, entry);
 	  if (!excluded_name (namebuf))
 	    dump_file (namebuf, 0, our_device);
 	}
 
-      if (errno)
-	{
-	  if (ignore_failed_read_option)
-	    readdir_warn (p);
-	  else
-	    readdir_error (p);
-	}
-
     finish_dir:
-      if (closedir (directory) != 0)
-	{
-	  if (ignore_failed_read_option)
-	    closedir_warn (p);
-	  else
-	    closedir_error (p);
-	}
 
+      free (directory);
       free (namebuf);
       if (atime_preserve_option)
 	utime (p, &restore_times);
@@ -1150,25 +1130,34 @@ dump_file (char *p, int top_level, dev_t parent_device)
     {
       /* Check for multiple links.
 
-	 We maintain a list of all such files that we've written so far.  Any
-	 time we see another, we check the list and avoid dumping the data
-	 again if we've done it once already.  */
+	 We maintain a table of all such files that we've written so
+	 far.  Any time we see another, we check the table and avoid
+	 dumping the data again if we've done it once already.  */
 
       if (1 < current_stat.st_nlink)
 	{
-	  struct link *lp;
+	  static Hash_table *link_table;
+	  struct link *lp = xmalloc (sizeof *lp + strlen (p));
+	  struct link *dup;
+	  lp->ino = current_stat.st_ino;
+	  lp->dev = current_stat.st_dev;
+	  strcpy (lp->name, p);
 
-	  /* FIXME: First quick and dirty.  Hashing, etc later.  */
+	  if (! ((link_table
+		  || (link_table = hash_initialize (0, 0, hash_link,
+						    compare_links, 0)))
+		 && (dup = hash_insert (link_table, lp))))
+	    xalloc_die ();
 
-	  for (lp = linklist; lp; lp = lp->next)
-	    if (lp->ino == current_stat.st_ino
-		&& lp->dev == current_stat.st_dev)
-	      {
-		char *link_name = lp->name;
+	  if (dup != lp)
+	    {
+	      /* We found a link.  */
+	      char const *link_name = dup->name;
 
-		/* We found a link.  */
+	      free (lp);
 
-		while (!absolute_names_option && *link_name == '/')
+	      if (! absolute_names_option)
+		for (; *link_name == '/'; link_name++)
 		  {
 		    static int warned_once;
 		    if (!warned_once)
@@ -1177,42 +1166,30 @@ dump_file (char *p, int top_level, dev_t parent_device)
 			WARN ((0, 0,
 			       _("Removing leading `/' from link names")));
 		      }
-		    link_name++;
 		  }
-		if (strlen (link_name) >= NAME_FIELD_SIZE)
-		  write_long (link_name, GNUTYPE_LONGLINK);
-		assign_string (&current_link_name, link_name);
-
-		current_stat.st_size = 0;
-		header = start_header (p, &current_stat);
-		strncpy (header->header.linkname,
-			 link_name, NAME_FIELD_SIZE);
 
-		/* Force null truncated.  */
+	      if (NAME_FIELD_SIZE <= strlen (link_name))
+		write_long (link_name, GNUTYPE_LONGLINK);
+	      assign_string (&current_link_name, link_name);
 
-		header->header.linkname[NAME_FIELD_SIZE - 1] = 0;
+	      current_stat.st_size = 0;
+	      header = start_header (p, &current_stat);
+	      strncpy (header->header.linkname, link_name, NAME_FIELD_SIZE);
 
-		header->header.typeflag = LNKTYPE;
-		finish_header (header);
+	      /* Force null termination.  */
+	      header->header.linkname[NAME_FIELD_SIZE - 1] = 0;
 
-		/* FIXME: Maybe remove from list after all links found?  */
+	      header->header.typeflag = LNKTYPE;
+	      finish_header (header);
 
-		if (remove_files_option)
-		  if (unlink (p) == -1)
-		    unlink_error (p);
+	      /* FIXME: Maybe remove from table after all links found?  */
 
-		/* We dumped it.  */
-		return;
-	      }
-
-	  /* Not found.  Add it to the list of possible links.  */
+	      if (remove_files_option && unlink (p) != 0)
+		unlink_error (p);
 
-	  lp = xmalloc (sizeof (struct link) + strlen (p));
-	  lp->ino = current_stat.st_ino;
-	  lp->dev = current_stat.st_dev;
-	  strcpy (lp->name, p);
-	  lp->next = linklist;
-	  linklist = lp;
+	      /* We dumped it.  */
+	      return;
+	    }
 	}
 
       /* This is not a link to a previously dumped file, so dump it.  */
@@ -1227,7 +1204,7 @@ dump_file (char *p, int top_level, dev_t parent_device)
 	  union block *start;
 	  int header_moved;
 	  char isextended = 0;
-	  int sparses;
+	  int sparses = 0;
 
 	  header_moved = 0;
 
@@ -1312,8 +1289,6 @@ dump_file (char *p, int top_level, dev_t parent_device)
 		    }
 		}
 	    }
-	  else
-	    sparses = SPARSES_IN_OLDGNU_HEADER;
 
 	  sizeleft = current_stat.st_size;
 
@@ -1353,30 +1328,29 @@ dump_file (char *p, int top_level, dev_t parent_device)
 	  finish_header (header);
 	  if (isextended)
 	    {
-	      int counter;
-	      /* static */ int index_offset = SPARSES_IN_OLDGNU_HEADER;
+	      int sparses_emitted = SPARSES_IN_OLDGNU_HEADER;
 
-	    extend:
-	      exhdr = find_next_block ();
-	      memset (exhdr->buffer, 0, BLOCKSIZE);
-	      for (counter = 0; counter < SPARSES_IN_SPARSE_HEADER; counter++)
+	      for (;;)
 		{
-		  if (sparses <= counter + index_offset)
+		  int i;
+		  exhdr = find_next_block ();
+		  memset (exhdr->buffer, 0, BLOCKSIZE);
+		  for (i = 0;
+		       (i < SPARSES_IN_SPARSE_HEADER
+			&& sparses_emitted + i < sparses);
+		       i++)
+		    {
+		      SIZE_TO_CHARS (sparsearray[sparses_emitted + i].numbytes,
+				     exhdr->sparse_header.sp[i].numbytes);
+		      OFF_TO_CHARS (sparsearray[sparses_emitted + i].offset,
+				    exhdr->sparse_header.sp[i].offset);
+		    }
+		  set_next_block_after (exhdr);
+		  sparses_emitted += i;
+		  if (sparses == sparses_emitted)
 		    break;
-
-		  SIZE_TO_CHARS (sparsearray[counter + index_offset].numbytes,
-				 exhdr->sparse_header.sp[counter].numbytes);
-		  OFF_TO_CHARS (sparsearray[counter + index_offset].offset,
-				exhdr->sparse_header.sp[counter].offset);
-		}
-	      set_next_block_after (exhdr);
-	      if (counter + index_offset < sparses)
-		{
-		  index_offset += counter;
 		  exhdr->sparse_header.isextended = 1;
-		  goto extend;
 		}
-
 	    }
 	  if (save_typeflag == GNUTYPE_SPARSE)
 	    {
@@ -1419,17 +1393,17 @@ dump_file (char *p, int top_level, dev_t parent_device)
 		      (p, current_stat.st_size - sizeleft, bufsize);
 		    goto padit;
 		  }
-		sizeleft -= count;
+		sizeleft -= bufsize;
 
 		/* This is nonportable (the type of set_next_block_after's arg).  */
 
-		set_next_block_after (start + (count - 1) / BLOCKSIZE);
+		set_next_block_after (start + (bufsize - 1) / BLOCKSIZE);
 
-		if (count == bufsize)
-		  continue;
-		else
+
+		if (count != bufsize)
 		  {
 		    char buf[UINTMAX_STRSIZE_BOUND];
+		    memset (start->buffer + count, 0, bufsize - count);
 		    WARN ((0, 0,
 			   _("%s: File shrank by %s bytes; padding with zeros"),
 			   quotearg_colon (p),
@@ -1502,10 +1476,13 @@ dump_file (char *p, int top_level, dev_t parent_device)
 #ifdef HAVE_READLINK
       else if (S_ISLNK (current_stat.st_mode))
 	{
+	  char *buffer;
 	  int size;
-	  char *buffer = (char *) alloca (PATH_MAX + 1);
-
-	  size = readlink (p, buffer, PATH_MAX + 1);
+	  size_t linklen = current_stat.st_size;
+	  if (linklen != current_stat.st_size || linklen + 1 == 0)
+	    xalloc_die ();
+	  buffer = (char *) alloca (linklen + 1);
+	  size = readlink (p, buffer, linklen + 1);
 	  if (size < 0)
 	    {
 	      if (ignore_failed_read_option)