From 7b5e803963822e69a73d00ba62ac01b1c23f112c Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Sun, 10 Feb 2013 14:40:23 +0200 Subject: [PATCH] Pass command line arguments to external commands. Any option taking a command name as its argument accepts additional arguments as well. * lib/wordsplit.c: New file. * lib/wordsplit.h: New file. * lib/Makefile.am: Add new files. * src/system.c (xexec): New function. (run_decompress_program): Use wordsplit. (sys_child_open_for_compress,sys_exec_command) (sys_exec_info_script) (sys_exec_checkpoint_script): Use xexec to invoke external command. * NEWS: Update. * doc/tar.texi: Update. --- NEWS | 20 +- doc/tar.texi | 180 ++++-- lib/Makefile.am | 11 +- lib/wordsplit.c | 1625 +++++++++++++++++++++++++++++++++++++++++++++++ lib/wordsplit.h | 162 +++++ src/system.c | 70 +- 6 files changed, 1990 insertions(+), 78 deletions(-) create mode 100644 lib/wordsplit.c create mode 100644 lib/wordsplit.h diff --git a/NEWS b/NEWS index 29b4486..3108798 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU tar NEWS - User visible changes. 2013-01-26 +GNU tar NEWS - User visible changes. 2013-02-10 Please send GNU tar bug reports to @@ -50,6 +50,24 @@ Additionally, the options --xattrs-include and --xattrs-exclude allow you to selectively control for which files to store (or extract) the extended attributes. +* Passing command line arguments to external commands. + +Any option taking a command name as its argument now accepts a full +command line as well. Thus, it is now possible to pass additional +arguments to invoked programs. The affected options are: + + --checkpoint-action=exec + -I, --use-compress-program + -F, --info-script + --to-command + +Furthermore, if any additional information is supplied to such a +command via environment variables, these variables can now be used in +the command line itself. Care should be taken to escape them, to +prevent from being expanded too early, for example: + + tar -x -f a.tar --info-script='changevol $TAR_ARCHIVE $TAR_VOLUME' + * New configure option --enable-gcc-warnings, intended for debugging. diff --git a/doc/tar.texi b/doc/tar.texi index 4a49282..480fe89 100644 --- a/doc/tar.texi +++ b/doc/tar.texi @@ -1800,13 +1800,14 @@ and @option{--interactive} options (@pxref{interactive}). * Synopsis:: * using tar options:: * Styles:: -* All Options:: -* help:: -* defaults:: -* verbose:: -* checkpoints:: -* warnings:: -* interactive:: +* All Options:: All @command{tar} Options. +* help:: Where to Get Help. +* defaults:: What are the Default Values. +* verbose:: Checking @command{tar} progress. +* checkpoints:: Checkpoints. +* warnings:: Controlling Warning Messages. +* interactive:: Asking for Confirmation During Operations. +* external:: Running External Commands. @end menu @node Synopsis @@ -2784,14 +2785,14 @@ Send verbose output to @var{file} instead of to standard output. @opsummary{info-script} @opsummary{new-volume-script} -@item --info-script=@var{script-file} -@itemx --new-volume-script=@var{script-file} -@itemx -F @var{script-file} +@item --info-script=@var{command} +@itemx --new-volume-script=@var{command} +@itemx -F @var{command} -When @command{tar} is performing multi-tape backups, @var{script-file} is run -at the end of each tape. If @var{script-file} exits with nonzero status, -@command{tar} fails immediately. @xref{info-script}, for a detailed -discussion of @var{script-file}. +When @command{tar} is performing multi-tape backups, @var{command} is run +at the end of each tape. If it exits with nonzero status, +@command{tar} fails immediately. @xref{info-script}, for a detailed +discussion of this feature. @opsummary{interactive} @item --interactive @@ -4004,17 +4005,22 @@ checkpoint: $ @kbd{tar -c --checkpoint=1000 --checkpoint-action=sleep=30} @end smallexample +@anchor{checkpoint exec} @cindex @code{exec}, checkpoint action -Finally, the @code{exec} action executes a given external program. +Finally, the @code{exec} action executes a given external command. For example: @smallexample $ @kbd{tar -c --checkpoint=1000 --checkpoint-action=exec=/sbin/cpoint} @end smallexample -This program is executed using @command{/bin/sh -c}, with no -additional arguments. Its exit code is ignored. It gets a copy of -@command{tar}'s environment plus the following variables: +The supplied command can be any valid command invocation, with or +without additional command line arguments. If it does contain +arguments, don't forget to quote it to prevent it from being split by +the shell. @xref{external, Running External Commands}, for more detail. + +The command gets a copy of @command{tar}'s environment plus the +following variables: @table @env @vrindex TAR_VERSION, checkpoint script environment @@ -4044,6 +4050,18 @@ Format of the archive being processed. @xref{Formats}, for a complete list of archive format names. @end table +These environment variables can also be passed as arguments to the +command, provided that they are properly escaped, for example: + +@smallexample +@kbd{tar -c -f arc.tar \ + --checkpoint-action='exec=/sbin/cpoint $TAR_FILENAME'} +@end smallexample + +@noindent +Notice single quotes to prevent variable names from being expanded by +the shell when invoking @command{tar}. + Any number of actions can be defined, by supplying several @option{--checkpoint-action} options in the command line. For example, the command below displays two messages, pauses @@ -4258,6 +4276,42 @@ named pipe to receive the archive, and having the consumer process to read from that named pipe. This has the advantage of letting standard output free to receive verbose output, all separate from errors. +@node external +@section Running External Commands + +Certain @GNUTAR{} operations imply running external commands that you +supply on the command line. One of such operations is checkpointing, +described above (@pxref{checkpoint exec}). Another example of this +feature is the @option{-I} option, which allows you to supply the +program to use for compressing or decompressing the archive +(@pxref{use-compress-program}). + +Whenever such operation is requested, @command{tar} first splits the +supplied command into words much like the shell does. It then treats +the first word as the name of the program or the shell script to execute +and the rest of words as its command line arguments. The program, +unless given as an absolute file name, is searched in the shell's +@env{PATH}. + +Any additional information is normally supplied to external commands +in environment variables, specific to each particular operation. For +example, the @option{--checkpoint-action=exec} option, defines the +@env{TAR_ARCHIVE} variable to the name of the archive being worked +upon. You can, should the need be, use these variables in the +command line of the external command. For example: + +@smallexample +$ @kbd{tar -x -f archive.tar \ + --checkpoint=exec='printf "%04d in %32s\r" $TAR_CHECKPOINT $TAR_ARCHIVE'} +@end smallexample + +@noindent +This command prints for each checkpoint its number and the name of the +archive, using the same output line on the screen. + +Notice the use of single quotes to prevent variable names from being +expanded by the shell when invoking @command{tar}. + @node operations @chapter @GNUTAR{} Operations @@ -5470,11 +5524,13 @@ file to the standard input of an external program: @opindex to-command @item --to-command=@var{command} Extract files and pipe their contents to the standard input of -@var{command}. When this option is used, instead of creating the +@var{command}. When this option is used, instead of creating the files specified, @command{tar} invokes @var{command} and pipes the -contents of the files to its standard output. The @var{command} may -contain command line arguments. The program is executed via -@code{sh -c}. Notice, that @var{command} is executed once for each regular file +contents of the files to its standard output. The @var{command} may +contain command line arguments (see @ref{external, Running External Commands}, +for more detail). + +Notice, that @var{command} is executed once for each regular file extracted. Non-regular files (directories, etc.) are ignored when this option is used. @end table @@ -5572,6 +5628,20 @@ Format of the archive being processed. @xref{Formats}, for a complete list of archive format names. @end table +These variables are defined prior to executing the command, so you can +pass them as arguments, if you prefer. For example, if the command +@var{proc} takes the member name and size as its arguments, then you +could do: + +@smallexample +$ @kbd{tar -x -f archive.tar \ + --to-command='proc $TAR_FILENAME $TAR_SIZE'} +@end smallexample + +@noindent +Notice single quotes to prevent variable names from being expanded by +the shell when invoking @command{tar}. + If @var{command} exits with a non-0 status, @command{tar} will print an error message similar to the following: @@ -8932,9 +9002,15 @@ environment variable. For example, with @command{gzip} you can set @smallexample $ @kbd{GZIP='-9 -n' tar czf archive.tar.gz subdir} @end smallexample +Another way would be to use the @option{-I} option instead (see +below), e.g.: + +@smallexample +$ @kbd{tar -cf archive.tar.gz -I 'gzip -9 -n' subdir} +@end smallexample @noindent -The traditional way to do this is to use a pipe: +Finally, the third, traditional, way to do this is to use a pipe: @smallexample $ @kbd{tar cf - subdir | gzip -9 -n > archive.tar.gz} @@ -8977,20 +9053,29 @@ suffix. The following suffixes are recognized: @item @samp{.xz} @tab @command{xz} @end multitable +@anchor{use-compress-program} @opindex use-compress-program -@item --use-compress-program=@var{prog} -@itemx -I=@var{prog} -Use external compression program @var{prog}. Use this option if you +@item --use-compress-program=@var{command} +@itemx -I=@var{command} +Use external compression program @var{command}. Use this option if you are not happy with the compression program associated with the suffix at compile time or if you have a compression program that @GNUTAR{} -does not support. The program should follow two conventions: +does not support. The @var{command} argument is a valid command +invocation, as you would type it at the command line prompt, with any +additional options as needed. Enclose it in quotes if it contains +white space (see @ref{external, Running External Commands}, for more detail). + +The @var{command} should follow two conventions: -First, when invoked without options, it should read data from standard -input, compress it and output it on standard output. +First, when invoked without additional options, it should read data +from standard input, compress it and output it on standard output. -Secondly, if invoked with the @option{-d} option, it should do exactly -the opposite, i.e., read the compressed data from the standard input -and produce uncompressed data on the standard output. +Secondly, if invoked with the additional @option{-d} option, it should +do exactly the opposite, i.e., read the compressed data from the +standard input and produce uncompressed data on the standard output. + +The latter requirement means that you must not use the @option{-d} +option as a part of the @var{command} itself. @end table @cindex gpg, using with tar @@ -10462,10 +10547,10 @@ maximum tape length, you might avoid the problem entirely. @xopindex{info-script, short description} @xopindex{new-volume-script, short description} -@item -F @var{file} -@itemx --info-script=@var{file} -@itemx --new-volume-script=@var{file} -Execute @file{file} at end of each tape. This implies +@item -F @var{command} +@itemx --info-script=@var{command} +@itemx --new-volume-script=@var{command} +Execute @var{command} at end of each tape. This implies @option{--multi-volume} (@option{-M}). @xref{info-script}, for a detailed description of this option. @end table @@ -11345,19 +11430,20 @@ volume, and instruct @command{tar} to use it instead of its normal prompting procedure: @table @option -@item --info-script=@var{script-name} -@itemx --new-volume-script=@var{script-name} -@itemx -F @var{script-name} -Specify the full name of the volume script to use. The script can be -used to eject cassettes, or to broadcast messages such as +@item --info-script=@var{command} +@itemx --new-volume-script=@var{command} +@itemx -F @var{command} +Specify the command to invoke when switching volumes. The @var{command} +can be used to eject cassettes, or to broadcast messages such as @samp{Someone please come change my tape} when performing unattended backups. @end table -The @var{script-name} is executed without any command line -arguments. It inherits @command{tar}'s shell environment. -Additional data is passed to it via the following -environment variables: +The @var{command} can contain additional options, if such are needed. +@xref{external, Running External Commands}, for a detailed discussion +of the way @GNUTAR{} runs external commands. It inherits +@command{tar}'s shell environment. Additional data is passed to it +via the following environment variables: @table @env @vrindex TAR_VERSION, info script environment variable @@ -11392,6 +11478,10 @@ File descriptor which can be used to communicate the new volume name to @command{tar}. @end table +These variables can be used in the @var{command} itself, provided that +they are properly quoted to prevent them from being expanded by the +shell that invokes @command{tar}. + The volume script can instruct @command{tar} to use new archive name, by writing in to file descriptor @env{$TAR_FD} (see below for an example). diff --git a/lib/Makefile.am b/lib/Makefile.am index 29b5382..3cbd060 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -30,7 +30,15 @@ CLEANFILES = rmt-command.h rmt-command.h-t AM_CPPFLAGS = -I$(top_srcdir)/gnu -I../ -I../gnu AM_CFLAGS = $(GNULIB_WARN_CFLAGS) $(WERROR_CFLAGS) -noinst_HEADERS = system.h system-ioctl.h rmt.h paxlib.h stdopen.h xattr-at.h +noinst_HEADERS = \ + paxlib.h\ + rmt.h\ + stdopen.h\ + system.h\ + system-ioctl.h\ + wordsplit.h\ + xattr-at.h + libtar_a_SOURCES = \ paxerror.c paxexit-status.c paxlib.h paxnames.c \ prepargs.c prepargs.h \ @@ -38,6 +46,7 @@ libtar_a_SOURCES = \ rmt.h \ stdopen.c stdopen.h \ system.h system-ioctl.h \ + wordsplit.c\ xattr-at.c if !TAR_COND_XATTR_H diff --git a/lib/wordsplit.c b/lib/wordsplit.c new file mode 100644 index 0000000..bd5d59d --- /dev/null +++ b/lib/wordsplit.c @@ -0,0 +1,1625 @@ +/* wordsplit - a word splitter + Copyright (C) 2009-2013 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see . + + Written by Sergey Poznyakoff +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if ENABLE_NLS +# include +#else +# define gettext(msgid) msgid +#endif +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +#include + +#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n') +#define ISDELIM(ws,c) \ + (strchr ((ws)->ws_delim, (c)) != NULL) +#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL) +#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z') +#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z') +#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c)) +#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9') +#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL) +#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c)) +#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127) + +#define ALLOC_INIT 128 +#define ALLOC_INCR 128 + +static void +_wsplt_alloc_die (struct wordsplit *wsp) +{ + wsp->ws_error (_("memory exhausted")); + abort (); +} + +static void +_wsplt_error (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + vfprintf (stderr, fmt, ap); + va_end (ap); + fputc ('\n', stderr); +} + +static void wordsplit_free_nodes (struct wordsplit *); + +static int +_wsplt_nomem (struct wordsplit *wsp) +{ + errno = ENOMEM; + wsp->ws_errno = WRDSE_NOSPACE; + if (wsp->ws_flags & WRDSF_ENOMEMABRT) + wsp->ws_alloc_die (wsp); + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + if (!(wsp->ws_flags & WRDSF_REUSE)) + wordsplit_free (wsp); + wordsplit_free_nodes (wsp); + return wsp->ws_errno; +} + +static void +wordsplit_init0 (struct wordsplit *wsp) +{ + if (wsp->ws_flags & WRDSF_REUSE) + { + if (!(wsp->ws_flags & WRDSF_APPEND)) + wordsplit_free_words (wsp); + } + else + { + wsp->ws_wordv = NULL; + wsp->ws_wordc = 0; + wsp->ws_wordn = 0; + } + + wsp->ws_errno = 0; + wsp->ws_head = wsp->ws_tail = NULL; +} + +static int +wordsplit_init (struct wordsplit *wsp, const char *input, size_t len, + int flags) +{ + wsp->ws_flags = flags; + + if (!(wsp->ws_flags & WRDSF_ALLOC_DIE)) + wsp->ws_alloc_die = _wsplt_alloc_die; + if (!(wsp->ws_flags & WRDSF_ERROR)) + wsp->ws_error = _wsplt_error; + + if (!(wsp->ws_flags & WRDSF_NOVAR) + && !(wsp->ws_flags & (WRDSF_ENV | WRDSF_GETVAR))) + { + errno = EINVAL; + wsp->ws_errno = WRDSE_USAGE; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return wsp->ws_errno; + } + + if (!(wsp->ws_flags & WRDSF_NOCMD)) + { + errno = EINVAL; + wsp->ws_errno = WRDSE_NOSUPP; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return wsp->ws_errno; + } + + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + if (!(wsp->ws_flags & WRDSF_DEBUG)) + { + if (wsp->ws_flags & WRDSF_ERROR) + wsp->ws_debug = wsp->ws_error; + else if (wsp->ws_flags & WRDSF_SHOWERR) + wsp->ws_debug = _wsplt_error; + else + wsp->ws_flags &= ~WRDSF_SHOWDBG; + } + } + + wsp->ws_input = input; + wsp->ws_len = len; + + if (!(wsp->ws_flags & WRDSF_DOOFFS)) + wsp->ws_offs = 0; + + if (!(wsp->ws_flags & WRDSF_DELIM)) + wsp->ws_delim = " \t\n"; + + if (!(wsp->ws_flags & WRDSF_COMMENT)) + wsp->ws_comment = NULL; + + if (!(wsp->ws_flags & WRDSF_CLOSURE)) + wsp->ws_closure = NULL; + + wsp->ws_endp = 0; + + wordsplit_init0 (wsp); + + return 0; +} + +static int +alloc_space (struct wordsplit *wsp, size_t count) +{ + size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0; + char **ptr; + size_t newalloc; + + if (wsp->ws_wordv == NULL) + { + newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT; + ptr = calloc (newalloc, sizeof (ptr[0])); + } + else if (wsp->ws_wordn < offs + wsp->ws_wordc + count) + { + newalloc = offs + wsp->ws_wordc + + (count > ALLOC_INCR ? count : ALLOC_INCR); + ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0])); + } + else + return 0; + + if (ptr) + { + wsp->ws_wordn = newalloc; + wsp->ws_wordv = ptr; + } + else + return _wsplt_nomem (wsp); + return 0; +} + + +/* Node state flags */ +#define _WSNF_NULL 0x01 /* null node (a noop) */ +#define _WSNF_WORD 0x02 /* node contains word in v.word */ +#define _WSNF_QUOTE 0x04 /* text is quoted */ +#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */ +#define _WSNF_JOIN 0x10 /* node must be joined with the next node */ +#define _WSNF_SEXP 0x20 /* is a sed expression */ + +#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that + wordsplit_add_segm must add the + segment even if it is empty */ + +struct wordsplit_node +{ + struct wordsplit_node *prev; /* Previous element */ + struct wordsplit_node *next; /* Next element */ + int flags; /* Node flags */ + union + { + struct + { + size_t beg; /* Start of word in ws_input */ + size_t end; /* End of word in ws_input */ + } segm; + char *word; + } v; +}; + +static const char * +wsnode_flagstr (int flags) +{ + static char retbuf[6]; + char *p = retbuf; + + if (flags & _WSNF_WORD) + *p++ = 'w'; + else if (flags & _WSNF_NULL) + *p++ = 'n'; + else + *p++ = '-'; + if (flags & _WSNF_QUOTE) + *p++ = 'q'; + else + *p++ = '-'; + if (flags & _WSNF_NOEXPAND) + *p++ = 'E'; + else + *p++ = '-'; + if (flags & _WSNF_JOIN) + *p++ = 'j'; + else + *p++ = '-'; + if (flags & _WSNF_SEXP) + *p++ = 's'; + else + *p++ = '-'; + *p = 0; + return retbuf; +} + +static const char * +wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p) +{ + if (p->flags & _WSNF_NULL) + return ""; + else if (p->flags & _WSNF_WORD) + return p->v.word; + else + return wsp->ws_input + p->v.segm.beg; +} + +static size_t +wsnode_len (struct wordsplit_node *p) +{ + if (p->flags & _WSNF_NULL) + return 0; + else if (p->flags & _WSNF_WORD) + return strlen (p->v.word); + else + return p->v.segm.end - p->v.segm.beg; +} + +static int +wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode) +{ + struct wordsplit_node *node = calloc (1, sizeof (*node)); + if (!node) + return _wsplt_nomem (wsp); + *pnode = node; + return 0; +} + +static void +wsnode_free (struct wordsplit_node *p) +{ + if (p->flags & _WSNF_WORD) + free (p->v.word); + free (p); +} + +static void +wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node) +{ + node->next = NULL; + node->prev = wsp->ws_tail; + if (wsp->ws_tail) + wsp->ws_tail->next = node; + else + wsp->ws_head = node; + wsp->ws_tail = node; +} + +static void +wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node) +{ + struct wordsplit_node *p; + + p = node->prev; + if (p) + { + p->next = node->next; + if (!node->next) + p->flags &= ~_WSNF_JOIN; + } + else + wsp->ws_head = node->next; + + p = node->next; + if (p) + p->prev = node->prev; + else + wsp->ws_tail = node->prev; + + node->next = node->prev = NULL; +} + +static void +wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node, + struct wordsplit_node *anchor, int before) +{ + if (!wsp->ws_head) + { + node->next = node->prev = NULL; + wsp->ws_head = wsp->ws_tail = node; + } + else if (before) + { + if (anchor->prev) + wsnode_insert (wsp, node, anchor->prev, 0); + else + { + node->prev = NULL; + node->next = anchor; + anchor->prev = node; + wsp->ws_head = node; + } + } + else + { + struct wordsplit_node *p; + + p = anchor->next; + if (p) + p->prev = node; + else + wsp->ws_tail = node; + node->next = p; + node->prev = anchor; + anchor->next = node; + } +} + +static int +wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg) +{ + struct wordsplit_node *node; + int rc; + + if (end == beg && !(flg & _WSNF_EMPTYOK)) + return 0; + rc = wsnode_new (wsp, &node); + if (rc) + return rc; + node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK); + node->v.segm.beg = beg; + node->v.segm.end = end; + wsnode_append (wsp, node); + return 0; +} + +static void +wordsplit_free_nodes (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + wsnode_free (p); + p = next; + } + wsp->ws_head = wsp->ws_tail = NULL; +} + +static void +wordsplit_dump_nodes (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + int n = 0; + + for (p = wsp->ws_head, n = 0; p; p = p->next, n++) + { + if (p->flags & _WSNF_WORD) + wsp->ws_debug ("%4d: %p: %#04x (%s):%s;", + n, p, p->flags, wsnode_flagstr (p->flags), p->v.word); + else + wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;", + n, p, p->flags, wsnode_flagstr (p->flags), + (int) (p->v.segm.end - p->v.segm.beg), + wsp->ws_input + p->v.segm.beg); + } +} + +static int +coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node) +{ + struct wordsplit_node *p, *end; + size_t len = 0; + char *buf, *cur; + int stop; + + for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next) + { + len += wsnode_len (p); + } + len += wsnode_len (p); + end = p; + + buf = malloc (len + 1); + if (!buf) + return _wsplt_nomem (wsp); + cur = buf; + + p = node; + for (stop = 0; !stop;) + { + struct wordsplit_node *next = p->next; + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + + memcpy (cur, str, slen); + cur += slen; + if (p != node) + { + wsnode_remove (wsp, p); + stop = p == end; + wsnode_free (p); + } + p = next; + } + + *cur = 0; + + node->flags &= ~_WSNF_JOIN; + + if (node->flags & _WSNF_WORD) + free (node->v.word); + else + node->flags |= _WSNF_WORD; + node->v.word = buf; + return 0; +} + +static int +wsnode_quoteremoval (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + void (*uqfn) (char *, const char *, size_t) = + (wsp->ws_flags & WRDSF_CESCAPES) ? + wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy; + + for (p = wsp->ws_head; p; p = p->next) + { + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + int unquote; + + if (wsp->ws_flags & WRDSF_QUOTE) + { + unquote = !(p->flags & _WSNF_NOEXPAND); + } + else + unquote = 0; + + if (unquote) + { + if (!(p->flags & _WSNF_WORD)) + { + char *newstr = malloc (slen + 1); + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str, slen); + newstr[slen] = 0; + p->v.word = newstr; + p->flags |= _WSNF_WORD; + } + + if (wsp->ws_flags & WRDSF_ESCAPE) + wordsplit_general_unquote_copy (p->v.word, str, slen, + wsp->ws_escape); + else + uqfn (p->v.word, str, slen); + } + } + return 0; +} + +static int +wsnode_coalesce (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p; p = p->next) + { + if (p->flags & _WSNF_JOIN) + if (coalesce_segment (wsp, p)) + return 1; + } + return 0; +} + +static int +wordsplit_finish (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + size_t n; + + n = 0; + + for (p = wsp->ws_head; p; p = p->next) + n++; + + if (alloc_space (wsp, n + 1)) + return 1; + + for (p = wsp->ws_head; p; p = p->next) + { + const char *str = wsnode_ptr (wsp, p); + size_t slen = wsnode_len (p); + char *newstr = malloc (slen + 1); + + /* Assign newstr first, even if it is NULL. This way + wordsplit_free will work even if we return + nomem later. */ + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr; + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str, slen); + newstr[slen] = 0; + + wsp->ws_wordc++; + + } + wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL; + return 0; +} + + +/* Variable expansion */ +static int +node_split_prefix (struct wordsplit *wsp, + struct wordsplit_node **ptail, + struct wordsplit_node *node, + size_t beg, size_t len, int flg) +{ + struct wordsplit_node *newnode; + + if (len == 0) + return 0; + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + if (node->flags & _WSNF_WORD) + { + const char *str = wsnode_ptr (wsp, node); + char *newstr = malloc (len + 1); + if (!newstr) + return _wsplt_nomem (wsp); + memcpy (newstr, str + beg, len); + newstr[len] = 0; + newnode->flags = _WSNF_WORD; + newnode->v.word = newstr; + } + else + { + newnode->v.segm.beg = node->v.segm.beg + beg; + newnode->v.segm.end = newnode->v.segm.beg + len; + } + newnode->flags |= flg; + *ptail = newnode; + return 0; +} + +static int +find_closing_cbrace (const char *str, size_t i, size_t len, size_t * poff) +{ + enum + { st_init, st_squote, st_dquote } state = st_init; + size_t level = 1; + + for (; i < len; i++) + { + switch (state) + { + case st_init: + switch (str[i]) + { + case '{': + level++; + break; + + case '}': + if (--level == 0) + { + *poff = i; + return 0; + } + break; + + case '"': + state = st_dquote; + break; + + case '\'': + state = st_squote; + break; + } + break; + + case st_squote: + if (str[i] == '\'') + state = st_init; + break; + + case st_dquote: + if (str[i] == '\\') + i++; + else if (str[i] == '"') + state = st_init; + break; + } + } + return 1; +} + +static const char * +wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len) +{ + size_t i; + + if (!(wsp->ws_flags & WRDSF_ENV)) + return NULL; + + if (wsp->ws_flags & WRDSF_ENV_KV) + { + /* A key-value pair environment */ + for (i = 0; wsp->ws_env[i]; i++) + { + size_t elen = strlen (wsp->ws_env[i]); + if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0) + return wsp->ws_env[i + 1]; + /* Skip the value. Break the loop if it is NULL. */ + i++; + if (wsp->ws_env[i] == NULL) + break; + } + } + else + { + /* Usual (A=B) environment. */ + for (i = 0; wsp->ws_env[i]; i++) + { + size_t j; + const char *var = wsp->ws_env[i]; + + for (j = 0; j < len; j++) + if (name[j] != var[j]) + break; + if (j == len && var[j] == '=') + return var + j + 1; + } + } + return NULL; +} + +static int +expvar (struct wordsplit *wsp, const char *str, size_t len, + struct wordsplit_node **ptail, const char **pend, int flg) +{ + size_t i = 0; + const char *defstr = NULL; + const char *value; + const char *vptr; + struct wordsplit_node *newnode; + const char *start = str - 1; + + if (ISALPHA (str[0]) || str[0] == '_') + { + for (i = 1; i < len; i++) + if (!(ISALNUM (str[i]) || str[i] == '_')) + break; + *pend = str + i - 1; + } + else if (str[0] == '{') + { + str++; + len--; + for (i = 1; i < len; i++) + if (str[i] == '}' || str[i] == ':') + break; + if (str[i] == ':') + { + size_t j; + + defstr = str + i + 1; + if (find_closing_cbrace (str, i + 1, len, &j)) + { + wsp->ws_errno = WRDSE_CBRACE; + return 1; + } + *pend = str + j; + } + else if (str[i] == '}') + { + defstr = NULL; + *pend = str + i; + } + else + { + wsp->ws_errno = WRDSE_CBRACE; + return 1; + } + } + else + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | flg; + newnode->v.word = malloc (3); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + newnode->v.word[0] = '$'; + newnode->v.word[1] = str[0]; + newnode->v.word[2] = 0; + *pend = str; + return 0; + } + + /* Actually expand the variable */ + /* str - start of the variable name + i - its length + defstr - default replacement str */ + + vptr = wordsplit_find_env (wsp, str, i); + if (vptr) + { + value = strdup (vptr); + if (!value) + return _wsplt_nomem (wsp); + } + else if (wsp->ws_flags & WRDSF_GETVAR) + value = wsp->ws_getvar (str, i, wsp->ws_closure); + else if (wsp->ws_flags & WRDSF_UNDEF) + { + wsp->ws_errno = WRDSE_UNDEF; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return 1; + } + else + { + if (wsp->ws_flags & WRDSF_WARNUNDEF) + wsp->ws_error (_("warning: undefined variable `%.*s'"), (int) i, str); + if (wsp->ws_flags & WRDSF_KEEPUNDEF) + value = NULL; + else + value = ""; + } + /* FIXME: handle defstr */ + if (value) + { + if (flg & _WSNF_QUOTE) + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; + newnode->v.word = strdup (value); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + } + else if (*value == 0) + { + /* Empty string is a special case */ + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; + } + else + { + struct wordsplit ws; + int i; + + ws.ws_delim = wsp->ws_delim; + if (wordsplit (value, &ws, + WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS)) + { + wordsplit_free (&ws); + return 1; + } + for (i = 0; i < ws.ws_wordc; i++) + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | + _WSNF_NOEXPAND | + (i + 1 < ws.ws_wordc ? (flg & ~_WSNF_JOIN) : flg); + newnode->v.word = strdup (ws.ws_wordv[i]); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + } + wordsplit_free (&ws); + } + } + else if (wsp->ws_flags & WRDSF_KEEPUNDEF) + { + size_t size = *pend - start + 1; + + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg; + newnode->v.word = malloc (size + 1); + if (!newnode->v.word) + return _wsplt_nomem (wsp); + memcpy (newnode->v.word, start, size); + newnode->v.word[size] = 0; + } + else + { + if (wsnode_new (wsp, &newnode)) + return 1; + wsnode_insert (wsp, newnode, *ptail, 0); + *ptail = newnode; + newnode->flags = _WSNF_NULL; + } + return 0; +} + +static int +node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node) +{ + const char *str = wsnode_ptr (wsp, node); + size_t slen = wsnode_len (node); + const char *end = str + slen; + const char *p; + size_t off = 0; + struct wordsplit_node *tail = node; + + for (p = str; p < end; p++) + { + if (*p == '\\') + { + p++; + continue; + } + if (*p == '$') + { + size_t n = p - str; + + if (tail != node) + tail->flags |= _WSNF_JOIN; + if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN)) + return 1; + p++; + if (expvar (wsp, p, slen - n, &tail, &p, + node->flags & (_WSNF_JOIN | _WSNF_QUOTE))) + return 1; + off += p - str + 1; + str = p + 1; + } + } + if (p > str) + { + if (tail != node) + tail->flags |= _WSNF_JOIN; + if (node_split_prefix (wsp, &tail, node, off, p - str, + node->flags & _WSNF_JOIN)) + return 1; + } + if (tail != node) + { + wsnode_remove (wsp, node); + wsnode_free (node); + } + return 0; +} + +/* Remove NULL lists */ +static void +wsnode_nullelim (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + if (p->flags & _WSNF_NULL) + { + wsnode_remove (wsp, p); + wsnode_free (p); + } + p = next; + } +} + +static int +wordsplit_varexp (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p;) + { + struct wordsplit_node *next = p->next; + if (!(p->flags & _WSNF_NOEXPAND)) + if (node_expand_vars (wsp, p)) + return 1; + p = next; + } + + wsnode_nullelim (wsp); + return 0; +} + +/* Strip off any leading and trailing whitespace. This function is called + right after the initial scanning, therefore it assumes that every + node in the list is a text reference node. */ +static void +wordsplit_trimws (struct wordsplit *wsp) +{ + struct wordsplit_node *p; + + for (p = wsp->ws_head; p; p = p->next) + { + size_t n; + + if (p->flags & _WSNF_QUOTE) + continue; + + /* Skip leading whitespace: */ + for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]); + n++) + ; + p->v.segm.beg = n; + /* Trim trailing whitespace */ + for (n = p->v.segm.end; + n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--); + p->v.segm.end = n; + if (p->v.segm.beg == p->v.segm.end) + p->flags |= _WSNF_NULL; + } + + wsnode_nullelim (wsp); +} + +static int +skip_sed_expr (const char *command, size_t i, size_t len) +{ + int state; + + do + { + int delim; + + if (command[i] == ';') + i++; + if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))) + break; + + delim = command[++i]; + state = 1; + for (i++; i < len; i++) + { + if (state == 3) + { + if (command[i] == delim || !ISALNUM (command[i])) + break; + } + else if (command[i] == '\\') + i++; + else if (command[i] == delim) + state++; + } + } + while (state == 3 && i < len && command[i] == ';'); + return i; +} + +static size_t +skip_delim (struct wordsplit *wsp) +{ + size_t start = wsp->ws_endp; + if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS) + { + if ((wsp->ws_flags & WRDSF_RETURN_DELIMS) && + ISDELIM (wsp, wsp->ws_input[start])) + { + int delim = wsp->ws_input[start]; + do + start++; + while (start < wsp->ws_len && delim == wsp->ws_input[start]); + } + else + { + do + start++; + while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start])); + } + start--; + } + + if (!(wsp->ws_flags & WRDSF_RETURN_DELIMS)) + start++; + + return start; +} + +#define _WRDS_EOF 0 +#define _WRDS_OK 1 +#define _WRDS_ERR 2 + +static int +scan_qstring (struct wordsplit *wsp, size_t start, size_t * end) +{ + size_t j; + const char *command = wsp->ws_input; + size_t len = wsp->ws_len; + char q = command[start]; + + for (j = start + 1; j < len && command[j] != q; j++) + if (q == '"' && command[j] == '\\') + j++; + if (j < len && command[j] == q) + { + int flags = _WSNF_QUOTE | _WSNF_EMPTYOK; + if (q == '\'') + flags |= _WSNF_NOEXPAND; + if (wordsplit_add_segm (wsp, start + 1, j, flags)) + return _WRDS_ERR; + *end = j; + } + else + { + wsp->ws_endp = start; + wsp->ws_errno = WRDSE_QUOTE; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return _WRDS_ERR; + } + return 0; +} + +static int +scan_word (struct wordsplit *wsp, size_t start) +{ + size_t len = wsp->ws_len; + const char *command = wsp->ws_input; + const char *comment = wsp->ws_comment; + int join = 0; + int flags = 0; + + size_t i = start; + + if (i >= len) + { + wsp->ws_errno = WRDSE_EOF; + return _WRDS_EOF; + } + + start = i; + + if (wsp->ws_flags & WRDSF_SED_EXPR + && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])) + { + flags = _WSNF_SEXP; + i = skip_sed_expr (command, i, len); + } + else if (!ISDELIM (wsp, command[i])) + { + while (i < len) + { + if (comment && strchr (comment, command[i]) != NULL) + { + size_t j; + for (j = i + 1; j < len && command[j] != '\n'; j++) + ; + if (wordsplit_add_segm (wsp, start, i, 0)) + return _WRDS_ERR; + wsp->ws_endp = j; + return _WRDS_OK; + } + + if (wsp->ws_flags & WRDSF_QUOTE) + { + if (command[i] == '\\') + { + if (++i == len) + break; + i++; + continue; + } + + if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') || + ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"')) + { + if (join && wsp->ws_tail) + wsp->ws_tail->flags |= _WSNF_JOIN; + if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN)) + return _WRDS_ERR; + if (scan_qstring (wsp, i, &i)) + return _WRDS_ERR; + start = i + 1; + join = 1; + } + } + + if (ISDELIM (wsp, command[i])) + break; + else + i++; + } + } + else if (wsp->ws_flags & WRDSF_RETURN_DELIMS) + { + i++; + } + else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)) + flags |= _WSNF_EMPTYOK; + + if (join && i > start && wsp->ws_tail) + wsp->ws_tail->flags |= _WSNF_JOIN; + if (wordsplit_add_segm (wsp, start, i, flags)) + return _WRDS_ERR; + wsp->ws_endp = i; + if (wsp->ws_flags & WRDSF_INCREMENTAL) + return _WRDS_EOF; + return _WRDS_OK; +} + +static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v"; + +int +wordsplit_c_unquote_char (int c) +{ + char *p; + + for (p = quote_transtab; *p; p += 2) + { + if (*p == c) + return p[1]; + } + return c; +} + +int +wordsplit_c_quote_char (int c) +{ + char *p; + + for (p = quote_transtab + sizeof (quote_transtab) - 2; + p > quote_transtab; p -= 2) + { + if (*p == c) + return p[-1]; + } + return -1; +} + +#define to_num(c) \ + (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 )) + +static int +xtonum (int *pval, const char *src, int base, int cnt) +{ + int i, val; + + for (i = 0, val = 0; i < cnt; i++, src++) + { + int n = *(unsigned char *) src; + if (n > 127 || (n = to_num (n)) >= base) + break; + val = val * base + n; + } + *pval = val; + return i; +} + +size_t +wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote) +{ + size_t len = 0; + + *quote = 0; + for (; *str; str++) + { + if (strchr (" \"", *str)) + *quote = 1; + + if (*str == ' ') + len++; + else if (*str == '"') + len += 2; + else if (*str != '\t' && *str != '\\' && ISPRINT (*str)) + len++; + else if (quote_hex) + len += 3; + else + { + if (wordsplit_c_quote_char (*str) != -1) + len += 2; + else + len += 4; + } + } + return len; +} + +void +wordsplit_general_unquote_copy (char *dst, const char *src, size_t n, + const char *escapable) +{ + int i; + + for (i = 0; i < n;) + { + if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1])) + i++; + *dst++ = src[i++]; + } + *dst = 0; +} + +void +wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n) +{ + int i; + + for (i = 0; i < n;) + { + if (src[i] == '\\') + i++; + *dst++ = src[i++]; + } + *dst = 0; +} + +void +wordsplit_c_unquote_copy (char *dst, const char *src, size_t n) +{ + int i = 0; + int c; + + while (i < n) + { + if (src[i] == '\\') + { + ++i; + if (src[i] == 'x' || src[i] == 'X') + { + if (n - i < 2) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + int off = xtonum (&c, src + i + 1, + 16, 2); + if (off == 0) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + *dst++ = c; + i += off + 1; + } + } + } + else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i])) + { + if (n - i < 1) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + int off = xtonum (&c, src + i, 8, 3); + if (off == 0) + { + *dst++ = '\\'; + *dst++ = src[i++]; + } + else + { + *dst++ = c; + i += off; + } + } + } + else + *dst++ = wordsplit_c_unquote_char (src[i++]); + } + else + *dst++ = src[i++]; + } + *dst = 0; +} + +void +wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex) +{ + for (; *src; src++) + { + if (*src == '"') + { + *dst++ = '\\'; + *dst++ = *src; + } + else if (*src != '\t' && *src != '\\' && ISPRINT (*src)) + *dst++ = *src; + else + { + char tmp[4]; + + if (quote_hex) + { + snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src); + memcpy (dst, tmp, 3); + dst += 3; + } + else + { + int c = wordsplit_c_quote_char (*src); + *dst++ = '\\'; + if (c != -1) + *dst++ = c; + else + { + snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src); + memcpy (dst, tmp, 3); + dst += 3; + } + } + } + } +} + +static int +wordsplit_process_list (struct wordsplit *wsp, size_t start) +{ + if (wsp->ws_flags & WRDSF_NOSPLIT) + { + /* Treat entire input as a quoted argument */ + if (wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE)) + return wsp->ws_errno; + } + else + { + int rc; + + while ((rc = scan_word (wsp, start)) == _WRDS_OK) + start = skip_delim (wsp); + /* Make sure tail element is not joinable */ + if (wsp->ws_tail) + wsp->ws_tail->flags &= ~_WSNF_JOIN; + if (rc == _WRDS_ERR) + return wsp->ws_errno; + } + + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("Initial list:"); + wordsplit_dump_nodes (wsp); + } + + if (wsp->ws_flags & WRDSF_WS) + { + /* Trim leading and trailing whitespace */ + wordsplit_trimws (wsp); + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("After WS trimming:"); + wordsplit_dump_nodes (wsp); + } + } + + /* Expand variables (FIXME: & commands) */ + if (!(wsp->ws_flags & WRDSF_NOVAR)) + { + if (wordsplit_varexp (wsp)) + { + wordsplit_free_nodes (wsp); + return wsp->ws_errno; + } + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("Expanded list:"); + wordsplit_dump_nodes (wsp); + } + } + + do + { + if (wsnode_quoteremoval (wsp)) + break; + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("After quote removal:"); + wordsplit_dump_nodes (wsp); + } + + if (wsnode_coalesce (wsp)) + break; + + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + wsp->ws_debug ("Coalesced list:"); + wordsplit_dump_nodes (wsp); + } + } + while (0); + return wsp->ws_errno; +} + +int +wordsplit_len (const char *command, size_t length, struct wordsplit *wsp, + int flags) +{ + int rc; + size_t start; + const char *cmdptr; + size_t cmdlen; + + if (!command) + { + if (!(flags & WRDSF_INCREMENTAL)) + return EINVAL; + + start = skip_delim (wsp); + if (wsp->ws_endp == wsp->ws_len) + { + wsp->ws_errno = WRDSE_NOINPUT; + if (wsp->ws_flags & WRDSF_SHOWERR) + wordsplit_perror (wsp); + return wsp->ws_errno; + } + + cmdptr = wsp->ws_input + wsp->ws_endp; + cmdlen = wsp->ws_len - wsp->ws_endp; + wsp->ws_flags |= WRDSF_REUSE; + wordsplit_init0 (wsp); + } + else + { + cmdptr = command; + cmdlen = length; + start = 0; + rc = wordsplit_init (wsp, cmdptr, cmdlen, flags); + if (rc) + return rc; + } + + if (wsp->ws_flags & WRDSF_SHOWDBG) + wsp->ws_debug ("Input:%.*s;", (int) cmdlen, cmdptr); + + rc = wordsplit_process_list (wsp, start); + if (rc == 0 && (flags & WRDSF_INCREMENTAL)) + { + while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len) + { + start = skip_delim (wsp); + if (wsp->ws_flags & WRDSF_SHOWDBG) + { + cmdptr = wsp->ws_input + wsp->ws_endp; + cmdlen = wsp->ws_len - wsp->ws_endp; + wsp->ws_debug ("Restart:%.*s;", (int) cmdlen, cmdptr); + } + rc = wordsplit_process_list (wsp, start); + if (rc) + break; + } + } + if (rc) + { + wordsplit_free_nodes (wsp); + return rc; + } + wordsplit_finish (wsp); + wordsplit_free_nodes (wsp); + return wsp->ws_errno; +} + +int +wordsplit (const char *command, struct wordsplit *ws, int flags) +{ + return wordsplit_len (command, command ? strlen (command) : 0, ws, + flags); +} + +void +wordsplit_free_words (struct wordsplit *ws) +{ + size_t i; + + for (i = 0; i < ws->ws_wordc; i++) + { + char *p = ws->ws_wordv[ws->ws_offs + i]; + if (p) + { + free (p); + ws->ws_wordv[ws->ws_offs + i] = NULL; + } + } + ws->ws_wordc = 0; +} + +void +wordsplit_free (struct wordsplit *ws) +{ + wordsplit_free_words (ws); + free (ws->ws_wordv); + ws->ws_wordv = NULL; +} + +void +wordsplit_perror (struct wordsplit *wsp) +{ + switch (wsp->ws_errno) + { + case WRDSE_EOF: + wsp->ws_error (_("no error")); + break; + + case WRDSE_QUOTE: + wsp->ws_error (_("missing closing %c (start near #%lu)"), + wsp->ws_input[wsp->ws_endp], + (unsigned long) wsp->ws_endp); + break; + + case WRDSE_NOSPACE: + wsp->ws_error (_("memory exhausted")); + break; + + case WRDSE_NOSUPP: + wsp->ws_error (_("command substitution is not yet supported")); + + case WRDSE_USAGE: + wsp->ws_error (_("invalid wordsplit usage")); + break; + + case WRDSE_CBRACE: + wsp->ws_error (_("unbalanced curly brace")); + break; + + case WRDSE_UNDEF: + wsp->ws_error (_("undefined variable")); + break; + + case WRDSE_NOINPUT: + wsp->ws_error (_("input exhausted")); + break; + + default: + wsp->ws_error (_("unknown error")); + } +} + +const char *_wordsplit_errstr[] = { + N_("no error"), + N_("missing closing quote"), + N_("memory exhausted"), + N_("command substitution is not yet supported"), + N_("invalid wordsplit usage"), + N_("unbalanced curly brace"), + N_("undefined variable"), + N_("input exhausted") +}; +int _wordsplit_nerrs = + sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]); + +const char * +wordsplit_strerror (struct wordsplit *ws) +{ + if (ws->ws_errno < _wordsplit_nerrs) + return _wordsplit_errstr[ws->ws_errno]; + return N_("unknown error"); +} diff --git a/lib/wordsplit.h b/lib/wordsplit.h new file mode 100644 index 0000000..b48e3cd --- /dev/null +++ b/lib/wordsplit.h @@ -0,0 +1,162 @@ +/* wordsplit - a word splitter + Copyright (C) 2009-2013 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see . + + Written by Sergey Poznyakoff +*/ + +#ifndef __WORDSPLIT_H +#define __WORDSPLIT_H + +#include + +struct wordsplit +{ + size_t ws_wordc; + char **ws_wordv; + size_t ws_offs; + size_t ws_wordn; + int ws_flags; + const char *ws_delim; + const char *ws_comment; + const char *ws_escape; + void (*ws_alloc_die) (struct wordsplit * wsp); + void (*ws_error) (const char *, ...) + __attribute__ ((__format__ (__printf__, 1, 2))); + void (*ws_debug) (const char *, ...) + __attribute__ ((__format__ (__printf__, 1, 2))); + + const char **ws_env; + const char *(*ws_getvar) (const char *, size_t, void *); + void *ws_closure; + + const char *ws_input; + size_t ws_len; + size_t ws_endp; + int ws_errno; + struct wordsplit_node *ws_head, *ws_tail; +}; + +/* Wordsplit flags. Only 2 bits of a 32-bit word remain unused. + It is getting crowded... */ +/* Append the words found to the array resulting from a previous + call. */ +#define WRDSF_APPEND 0x00000001 +/* Insert we_offs initial NULLs in the array ws_wordv. + (These are not counted in the returned ws_wordc.) */ +#define WRDSF_DOOFFS 0x00000002 +/* Don't do command substitution. Reserved for future use. */ +#define WRDSF_NOCMD 0x00000004 +/* The parameter p resulted from a previous call to + wordsplit(), and wordsplit_free() was not called. Reuse the + allocated storage. */ +#define WRDSF_REUSE 0x00000008 +/* Print errors */ +#define WRDSF_SHOWERR 0x00000010 +/* Consider it an error if an undefined shell variable + is expanded. */ +#define WRDSF_UNDEF 0x00000020 + +/* Don't do variable expansion. */ +#define WRDSF_NOVAR 0x00000040 +/* Abort on ENOMEM error */ +#define WRDSF_ENOMEMABRT 0x00000080 +/* Trim off any leading and trailind whitespace */ +#define WRDSF_WS 0x00000100 +/* Handle single quotes */ +#define WRDSF_SQUOTE 0x00000200 +/* Handle double quotes */ +#define WRDSF_DQUOTE 0x00000400 +/* Handle quotes and escape directives */ +#define WRDSF_QUOTE (WRDSF_SQUOTE|WRDSF_DQUOTE) +/* Replace each input sequence of repeated delimiters with a single + delimiter */ +#define WRDSF_SQUEEZE_DELIMS 0x00000800 +/* Return delimiters */ +#define WRDSF_RETURN_DELIMS 0x00001000 +/* Treat sed expressions as words */ +#define WRDSF_SED_EXPR 0x00002000 +/* ws_delim field is initialized */ +#define WRDSF_DELIM 0x00004000 +/* ws_comment field is initialized */ +#define WRDSF_COMMENT 0x00008000 +/* ws_alloc_die field is initialized */ +#define WRDSF_ALLOC_DIE 0x00010000 +/* ws_error field is initialized */ +#define WRDSF_ERROR 0x00020000 +/* ws_debug field is initialized */ +#define WRDSF_DEBUG 0x00040000 +/* ws_env field is initialized */ +#define WRDSF_ENV 0x00080000 +/* ws_getvar field is initialized */ +#define WRDSF_GETVAR 0x00100000 +/* enable debugging */ +#define WRDSF_SHOWDBG 0x00200000 +/* Don't split input into words. Useful for side effects. */ +#define WRDSF_NOSPLIT 0x00400000 +/* Keep undefined variables in place, instead of expanding them to + empty string */ +#define WRDSF_KEEPUNDEF 0x00800000 +/* Warn about undefined variables */ +#define WRDSF_WARNUNDEF 0x01000000 +/* Handle C escapes */ +#define WRDSF_CESCAPES 0x02000000 + +/* ws_closure is set */ +#define WRDSF_CLOSURE 0x04000000 +/* ws_env is a Key/Value environment, i.e. the value of a variable is + stored in the element that follows its name. */ +#define WRDSF_ENV_KV 0x08000000 + +/* ws_escape is set */ +#define WRDSF_ESCAPE 0x10000000 + +/* Incremental mode */ +#define WRDSF_INCREMENTAL 0x20000000 + +#define WRDSF_DEFFLAGS \ + (WRDSF_NOVAR | WRDSF_NOCMD | \ + WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES) + +#define WRDSE_EOF 0 +#define WRDSE_QUOTE 1 +#define WRDSE_NOSPACE 2 +#define WRDSE_NOSUPP 3 +#define WRDSE_USAGE 4 +#define WRDSE_CBRACE 5 +#define WRDSE_UNDEF 6 +#define WRDSE_NOINPUT 7 + +int wordsplit (const char *s, struct wordsplit *p, int flags); +int wordsplit_len (const char *s, size_t len, + struct wordsplit *p, int flags); +void wordsplit_free (struct wordsplit *p); +void wordsplit_free_words (struct wordsplit *ws); + +int wordsplit_c_unquote_char (int c); +int wordsplit_c_quote_char (int c); +size_t wordsplit_c_quoted_length (const char *str, int quote_hex, + int *quote); +void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n, + const char *escapable); +void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n); +void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n); +void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex); + +void wordsplit_perror (struct wordsplit *ws); +const char *wordsplit_strerror (struct wordsplit *ws); + + +#endif diff --git a/src/system.c b/src/system.c index 9dfffcf..6adcbf0 100644 --- a/src/system.c +++ b/src/system.c @@ -21,6 +21,20 @@ #include #include #include +#include + +static void +xexec (const char *cmd) +{ + struct wordsplit ws; + + ws.ws_env = (const char **) environ; + if (wordsplit (cmd, &ws, (WRDSF_DEFFLAGS | WRDSF_ENV) & ~WRDSF_NOVAR)) + FATAL_ERROR ((0, 0, _("cannot split string '%s': %s"), + cmd, wordsplit_strerror (&ws))); + execvp (ws.ws_wordv[0], ws.ws_wordv); + exec_fatal (cmd); +} #if MSDOS @@ -192,7 +206,7 @@ sys_spawn_shell (void) if (child == 0) { priv_set_restore_linkdir (); - execlp (shell, "-sh", "-i", (char *) 0); + execlp (shell, "-sh", "-i", NULL); exec_fatal (shell); } else @@ -315,7 +329,7 @@ sys_child_open_for_compress (void) int child_pipe[2]; pid_t grandchild_pid; pid_t child_pid; - + xpipe (parent_pipe); child_pid = xfork (); @@ -363,8 +377,7 @@ sys_child_open_for_compress (void) xdup2 (archive, STDOUT_FILENO); } priv_set_restore_linkdir (); - execlp (use_compress_program_option, use_compress_program_option, NULL); - exec_fatal (use_compress_program_option); + xexec (use_compress_program_option); } /* We do need a grandchild tar. */ @@ -381,9 +394,7 @@ sys_child_open_for_compress (void) xdup2 (child_pipe[PWRITE], STDOUT_FILENO); xclose (child_pipe[PREAD]); priv_set_restore_linkdir (); - execlp (use_compress_program_option, use_compress_program_option, - (char *) 0); - exec_fatal (use_compress_program_option); + xexec (use_compress_program_option); } /* The child tar is still here! */ @@ -458,7 +469,12 @@ run_decompress_program (void) { int i; const char *p, *prog = NULL; + struct wordsplit ws; + int wsflags = (WRDSF_DEFFLAGS | WRDSF_ENV | WRDSF_DOOFFS) & ~WRDSF_NOVAR; + ws.ws_env = (const char **) environ; + ws.ws_offs = 1; + for (p = first_decompress_program (&i); p; p = next_decompress_program (&i)) { if (prog) @@ -468,8 +484,16 @@ run_decompress_program (void) WARNOPT (WARN_DECOMPRESS_PROGRAM, (0, 0, _("trying %s"), p)); } - prog = p; - execlp (p, p, "-d", NULL); + if (wordsplit (p, &ws, wsflags)) + FATAL_ERROR ((0, 0, _("cannot split string '%s': %s"), + p, wordsplit_strerror (&ws))); + wsflags |= WRDSF_REUSE; + memmove(ws.ws_wordv, ws.ws_wordv + ws.ws_offs, + sizeof(ws.ws_wordv[0])*ws.ws_wordc); + ws.ws_wordv[ws.ws_wordc] = "-d"; + prog = p; + execvp (ws.ws_wordv[0], ws.ws_wordv); + ws.ws_wordv[ws.ws_wordc] = NULL; } if (!prog) FATAL_ERROR ((0, 0, _("unable to run decompression program"))); @@ -703,7 +727,7 @@ sys_exec_command (char *file_name, int typechar, struct tar_stat_info *st) { int p[2]; char *argv[4]; - + xpipe (p); pipe_handler = signal (SIGPIPE, SIG_IGN); global_pid = xfork (); @@ -720,15 +744,8 @@ sys_exec_command (char *file_name, int typechar, struct tar_stat_info *st) stat_to_env (file_name, typechar, st); - argv[0] = (char *) "/bin/sh"; - argv[1] = (char *) "-c"; - argv[2] = to_command_option; - argv[3] = NULL; - priv_set_restore_linkdir (); - execv ("/bin/sh", argv); - - exec_fatal (file_name); + xexec (to_command_option); } void @@ -832,18 +849,11 @@ sys_exec_info_script (const char **archive_name, int volume_number) archive_format_string (current_format == DEFAULT_FORMAT ? archive_format : current_format), 1); setenv ("TAR_FD", STRINGIFY_BIGINT (p[PWRITE], uintbuf), 1); - + xclose (p[PREAD]); - argv[0] = (char *) "/bin/sh"; - argv[1] = (char *) "-c"; - argv[2] = (char *) info_script_option; - argv[3] = NULL; - priv_set_restore_linkdir (); - execv (argv[0], argv); - - exec_fatal (info_script_option); + xexec (info_script_option); } void @@ -854,7 +864,7 @@ sys_exec_checkpoint_script (const char *script_name, pid_t pid; char *argv[4]; char uintbuf[UINTMAX_STRSIZE_BOUND]; - + pid = xfork (); if (pid != 0) @@ -889,9 +899,7 @@ sys_exec_checkpoint_script (const char *script_name, argv[3] = NULL; priv_set_restore_linkdir (); - execv (argv[0], argv); - - exec_fatal (script_name); + xexec (script_name); } #endif /* not MSDOS */ -- 2.45.2