1 /* wordsplit - a word splitter
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Sergey Poznyakoff
35 # define gettext(msgid) msgid
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
40 #include <wordsplit.h>
42 #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
43 #define ISDELIM(ws,c) \
44 (strchr ((ws)->ws_delim, (c)) != NULL)
45 #define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
46 #define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
47 #define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
48 #define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
49 #define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
50 #define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
51 #define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
52 #define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
54 #define ALLOC_INIT 128
55 #define ALLOC_INCR 128
58 _wsplt_alloc_die (struct wordsplit
*wsp
)
60 wsp
->ws_error (_("memory exhausted"));
64 static void __attribute__ ((__format__ (__printf__
, 1, 2)))
65 _wsplt_error (const char *fmt
, ...)
70 vfprintf (stderr
, fmt
, ap
);
75 static void wordsplit_free_nodes (struct wordsplit
*);
78 _wsplt_nomem (struct wordsplit
*wsp
)
81 wsp
->ws_errno
= WRDSE_NOSPACE
;
82 if (wsp
->ws_flags
& WRDSF_ENOMEMABRT
)
83 wsp
->ws_alloc_die (wsp
);
84 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
85 wordsplit_perror (wsp
);
86 if (!(wsp
->ws_flags
& WRDSF_REUSE
))
88 wordsplit_free_nodes (wsp
);
93 wordsplit_init0 (struct wordsplit
*wsp
)
95 if (wsp
->ws_flags
& WRDSF_REUSE
)
97 if (!(wsp
->ws_flags
& WRDSF_APPEND
))
98 wordsplit_free_words (wsp
);
102 wsp
->ws_wordv
= NULL
;
108 wsp
->ws_head
= wsp
->ws_tail
= NULL
;
112 wordsplit_init (struct wordsplit
*wsp
, const char *input
, size_t len
,
115 wsp
->ws_flags
= flags
;
117 if (!(wsp
->ws_flags
& WRDSF_ALLOC_DIE
))
118 wsp
->ws_alloc_die
= _wsplt_alloc_die
;
119 if (!(wsp
->ws_flags
& WRDSF_ERROR
))
120 wsp
->ws_error
= _wsplt_error
;
122 if (!(wsp
->ws_flags
& WRDSF_NOVAR
)
123 && !(wsp
->ws_flags
& (WRDSF_ENV
| WRDSF_GETVAR
)))
126 wsp
->ws_errno
= WRDSE_USAGE
;
127 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
128 wordsplit_perror (wsp
);
129 return wsp
->ws_errno
;
132 if (!(wsp
->ws_flags
& WRDSF_NOCMD
))
135 wsp
->ws_errno
= WRDSE_NOSUPP
;
136 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
137 wordsplit_perror (wsp
);
138 return wsp
->ws_errno
;
141 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
143 if (!(wsp
->ws_flags
& WRDSF_DEBUG
))
145 if (wsp
->ws_flags
& WRDSF_ERROR
)
146 wsp
->ws_debug
= wsp
->ws_error
;
147 else if (wsp
->ws_flags
& WRDSF_SHOWERR
)
148 wsp
->ws_debug
= _wsplt_error
;
150 wsp
->ws_flags
&= ~WRDSF_SHOWDBG
;
154 wsp
->ws_input
= input
;
157 if (!(wsp
->ws_flags
& WRDSF_DOOFFS
))
160 if (!(wsp
->ws_flags
& WRDSF_DELIM
))
161 wsp
->ws_delim
= " \t\n";
163 if (!(wsp
->ws_flags
& WRDSF_COMMENT
))
164 wsp
->ws_comment
= NULL
;
166 if (!(wsp
->ws_flags
& WRDSF_CLOSURE
))
167 wsp
->ws_closure
= NULL
;
171 wordsplit_init0 (wsp
);
177 alloc_space (struct wordsplit
*wsp
, size_t count
)
179 size_t offs
= (wsp
->ws_flags
& WRDSF_DOOFFS
) ? wsp
->ws_offs
: 0;
183 if (wsp
->ws_wordv
== NULL
)
185 newalloc
= offs
+ count
> ALLOC_INIT
? count
: ALLOC_INIT
;
186 ptr
= calloc (newalloc
, sizeof (ptr
[0]));
188 else if (wsp
->ws_wordn
< offs
+ wsp
->ws_wordc
+ count
)
190 newalloc
= offs
+ wsp
->ws_wordc
+
191 (count
> ALLOC_INCR
? count
: ALLOC_INCR
);
192 ptr
= realloc (wsp
->ws_wordv
, newalloc
* sizeof (ptr
[0]));
199 wsp
->ws_wordn
= newalloc
;
203 return _wsplt_nomem (wsp
);
208 /* Node state flags */
209 #define _WSNF_NULL 0x01 /* null node (a noop) */
210 #define _WSNF_WORD 0x02 /* node contains word in v.word */
211 #define _WSNF_QUOTE 0x04 /* text is quoted */
212 #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
213 #define _WSNF_JOIN 0x10 /* node must be joined with the next node */
214 #define _WSNF_SEXP 0x20 /* is a sed expression */
216 #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
217 wordsplit_add_segm must add the
218 segment even if it is empty */
220 struct wordsplit_node
222 struct wordsplit_node
*prev
; /* Previous element */
223 struct wordsplit_node
*next
; /* Next element */
224 int flags
; /* Node flags */
229 size_t beg
; /* Start of word in ws_input */
230 size_t end
; /* End of word in ws_input */
237 wsnode_flagstr (int flags
)
239 static char retbuf
[6];
242 if (flags
& _WSNF_WORD
)
244 else if (flags
& _WSNF_NULL
)
248 if (flags
& _WSNF_QUOTE
)
252 if (flags
& _WSNF_NOEXPAND
)
256 if (flags
& _WSNF_JOIN
)
260 if (flags
& _WSNF_SEXP
)
269 wsnode_ptr (struct wordsplit
*wsp
, struct wordsplit_node
*p
)
271 if (p
->flags
& _WSNF_NULL
)
273 else if (p
->flags
& _WSNF_WORD
)
276 return wsp
->ws_input
+ p
->v
.segm
.beg
;
280 wsnode_len (struct wordsplit_node
*p
)
282 if (p
->flags
& _WSNF_NULL
)
284 else if (p
->flags
& _WSNF_WORD
)
285 return strlen (p
->v
.word
);
287 return p
->v
.segm
.end
- p
->v
.segm
.beg
;
291 wsnode_new (struct wordsplit
*wsp
, struct wordsplit_node
**pnode
)
293 struct wordsplit_node
*node
= calloc (1, sizeof (*node
));
295 return _wsplt_nomem (wsp
);
301 wsnode_free (struct wordsplit_node
*p
)
303 if (p
->flags
& _WSNF_WORD
)
309 wsnode_append (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
312 node
->prev
= wsp
->ws_tail
;
314 wsp
->ws_tail
->next
= node
;
321 wsnode_remove (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
323 struct wordsplit_node
*p
;
328 p
->next
= node
->next
;
330 p
->flags
&= ~_WSNF_JOIN
;
333 wsp
->ws_head
= node
->next
;
337 p
->prev
= node
->prev
;
339 wsp
->ws_tail
= node
->prev
;
341 node
->next
= node
->prev
= NULL
;
345 wsnode_insert (struct wordsplit
*wsp
, struct wordsplit_node
*node
,
346 struct wordsplit_node
*anchor
, int before
)
350 node
->next
= node
->prev
= NULL
;
351 wsp
->ws_head
= wsp
->ws_tail
= node
;
356 wsnode_insert (wsp
, node
, anchor
->prev
, 0);
367 struct wordsplit_node
*p
;
381 wordsplit_add_segm (struct wordsplit
*wsp
, size_t beg
, size_t end
, int flg
)
383 struct wordsplit_node
*node
;
386 if (end
== beg
&& !(flg
& _WSNF_EMPTYOK
))
388 rc
= wsnode_new (wsp
, &node
);
391 node
->flags
= flg
& ~(_WSNF_WORD
| _WSNF_EMPTYOK
);
392 node
->v
.segm
.beg
= beg
;
393 node
->v
.segm
.end
= end
;
394 wsnode_append (wsp
, node
);
399 wordsplit_free_nodes (struct wordsplit
*wsp
)
401 struct wordsplit_node
*p
;
403 for (p
= wsp
->ws_head
; p
;)
405 struct wordsplit_node
*next
= p
->next
;
409 wsp
->ws_head
= wsp
->ws_tail
= NULL
;
413 wordsplit_dump_nodes (struct wordsplit
*wsp
)
415 struct wordsplit_node
*p
;
418 for (p
= wsp
->ws_head
, n
= 0; p
; p
= p
->next
, n
++)
420 if (p
->flags
& _WSNF_WORD
)
421 wsp
->ws_debug ("%4d: %p: %#04x (%s):%s;",
422 n
, p
, p
->flags
, wsnode_flagstr (p
->flags
), p
->v
.word
);
424 wsp
->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
425 n
, p
, p
->flags
, wsnode_flagstr (p
->flags
),
426 (int) (p
->v
.segm
.end
- p
->v
.segm
.beg
),
427 wsp
->ws_input
+ p
->v
.segm
.beg
);
432 coalesce_segment (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
434 struct wordsplit_node
*p
, *end
;
439 for (p
= node
; p
&& (p
->flags
& _WSNF_JOIN
); p
= p
->next
)
441 len
+= wsnode_len (p
);
443 len
+= wsnode_len (p
);
446 buf
= malloc (len
+ 1);
448 return _wsplt_nomem (wsp
);
452 for (stop
= 0; !stop
;)
454 struct wordsplit_node
*next
= p
->next
;
455 const char *str
= wsnode_ptr (wsp
, p
);
456 size_t slen
= wsnode_len (p
);
458 memcpy (cur
, str
, slen
);
462 wsnode_remove (wsp
, p
);
471 node
->flags
&= ~_WSNF_JOIN
;
473 if (node
->flags
& _WSNF_WORD
)
476 node
->flags
|= _WSNF_WORD
;
482 wsnode_quoteremoval (struct wordsplit
*wsp
)
484 struct wordsplit_node
*p
;
485 void (*uqfn
) (char *, const char *, size_t) =
486 (wsp
->ws_flags
& WRDSF_CESCAPES
) ?
487 wordsplit_c_unquote_copy
: wordsplit_sh_unquote_copy
;
489 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
491 const char *str
= wsnode_ptr (wsp
, p
);
492 size_t slen
= wsnode_len (p
);
495 if (wsp
->ws_flags
& WRDSF_QUOTE
)
497 unquote
= !(p
->flags
& _WSNF_NOEXPAND
);
504 if (!(p
->flags
& _WSNF_WORD
))
506 char *newstr
= malloc (slen
+ 1);
508 return _wsplt_nomem (wsp
);
509 memcpy (newstr
, str
, slen
);
512 p
->flags
|= _WSNF_WORD
;
515 if (wsp
->ws_flags
& WRDSF_ESCAPE
)
516 wordsplit_general_unquote_copy (p
->v
.word
, str
, slen
,
519 uqfn (p
->v
.word
, str
, slen
);
526 wsnode_coalesce (struct wordsplit
*wsp
)
528 struct wordsplit_node
*p
;
530 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
532 if (p
->flags
& _WSNF_JOIN
)
533 if (coalesce_segment (wsp
, p
))
540 wordsplit_finish (struct wordsplit
*wsp
)
542 struct wordsplit_node
*p
;
547 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
550 if (alloc_space (wsp
, n
+ 1))
553 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
555 const char *str
= wsnode_ptr (wsp
, p
);
556 size_t slen
= wsnode_len (p
);
557 char *newstr
= malloc (slen
+ 1);
559 /* Assign newstr first, even if it is NULL. This way
560 wordsplit_free will work even if we return
562 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
] = newstr
;
564 return _wsplt_nomem (wsp
);
565 memcpy (newstr
, str
, slen
);
571 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
] = NULL
;
576 /* Variable expansion */
578 node_split_prefix (struct wordsplit
*wsp
,
579 struct wordsplit_node
**ptail
,
580 struct wordsplit_node
*node
,
581 size_t beg
, size_t len
, int flg
)
583 struct wordsplit_node
*newnode
;
587 if (wsnode_new (wsp
, &newnode
))
589 wsnode_insert (wsp
, newnode
, *ptail
, 0);
590 if (node
->flags
& _WSNF_WORD
)
592 const char *str
= wsnode_ptr (wsp
, node
);
593 char *newstr
= malloc (len
+ 1);
595 return _wsplt_nomem (wsp
);
596 memcpy (newstr
, str
+ beg
, len
);
598 newnode
->flags
= _WSNF_WORD
;
599 newnode
->v
.word
= newstr
;
603 newnode
->v
.segm
.beg
= node
->v
.segm
.beg
+ beg
;
604 newnode
->v
.segm
.end
= newnode
->v
.segm
.beg
+ len
;
606 newnode
->flags
|= flg
;
612 find_closing_cbrace (const char *str
, size_t i
, size_t len
, size_t * poff
)
615 { st_init
, st_squote
, st_dquote
} state
= st_init
;
655 else if (str
[i
] == '"')
664 wordsplit_find_env (struct wordsplit
*wsp
, const char *name
, size_t len
)
668 if (!(wsp
->ws_flags
& WRDSF_ENV
))
671 if (wsp
->ws_flags
& WRDSF_ENV_KV
)
673 /* A key-value pair environment */
674 for (i
= 0; wsp
->ws_env
[i
]; i
++)
676 size_t elen
= strlen (wsp
->ws_env
[i
]);
677 if (elen
== len
&& memcmp (wsp
->ws_env
[i
], name
, elen
) == 0)
678 return wsp
->ws_env
[i
+ 1];
679 /* Skip the value. Break the loop if it is NULL. */
681 if (wsp
->ws_env
[i
] == NULL
)
687 /* Usual (A=B) environment. */
688 for (i
= 0; wsp
->ws_env
[i
]; i
++)
691 const char *var
= wsp
->ws_env
[i
];
693 for (j
= 0; j
< len
; j
++)
694 if (name
[j
] != var
[j
])
696 if (j
== len
&& var
[j
] == '=')
704 expvar (struct wordsplit
*wsp
, const char *str
, size_t len
,
705 struct wordsplit_node
**ptail
, const char **pend
, int flg
)
708 const char *defstr
= NULL
;
711 struct wordsplit_node
*newnode
;
712 const char *start
= str
- 1;
714 if (ISALPHA (str
[0]) || str
[0] == '_')
716 for (i
= 1; i
< len
; i
++)
717 if (!(ISALNUM (str
[i
]) || str
[i
] == '_'))
721 else if (str
[0] == '{')
725 for (i
= 1; i
< len
; i
++)
726 if (str
[i
] == '}' || str
[i
] == ':')
732 defstr
= str
+ i
+ 1;
733 if (find_closing_cbrace (str
, i
+ 1, len
, &j
))
735 wsp
->ws_errno
= WRDSE_CBRACE
;
740 else if (str
[i
] == '}')
747 wsp
->ws_errno
= WRDSE_CBRACE
;
753 if (wsnode_new (wsp
, &newnode
))
755 wsnode_insert (wsp
, newnode
, *ptail
, 0);
757 newnode
->flags
= _WSNF_WORD
| flg
;
758 newnode
->v
.word
= malloc (3);
759 if (!newnode
->v
.word
)
760 return _wsplt_nomem (wsp
);
761 newnode
->v
.word
[0] = '$';
762 newnode
->v
.word
[1] = str
[0];
763 newnode
->v
.word
[2] = 0;
768 /* Actually expand the variable */
769 /* str - start of the variable name
771 defstr - default replacement str */
773 vptr
= wordsplit_find_env (wsp
, str
, i
);
776 value
= strdup (vptr
);
778 return _wsplt_nomem (wsp
);
780 else if (wsp
->ws_flags
& WRDSF_GETVAR
)
781 value
= wsp
->ws_getvar (str
, i
, wsp
->ws_closure
);
782 else if (wsp
->ws_flags
& WRDSF_UNDEF
)
784 wsp
->ws_errno
= WRDSE_UNDEF
;
785 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
786 wordsplit_perror (wsp
);
791 if (wsp
->ws_flags
& WRDSF_WARNUNDEF
)
792 wsp
->ws_error (_("warning: undefined variable `%.*s'"), (int) i
, str
);
793 if (wsp
->ws_flags
& WRDSF_KEEPUNDEF
)
799 /* FIXME: handle defstr */
804 if (flg
& _WSNF_QUOTE
)
806 if (wsnode_new (wsp
, &newnode
))
808 wsnode_insert (wsp
, newnode
, *ptail
, 0);
810 newnode
->flags
= _WSNF_WORD
| _WSNF_NOEXPAND
| flg
;
811 newnode
->v
.word
= strdup (value
);
812 if (!newnode
->v
.word
)
813 return _wsplt_nomem (wsp
);
815 else if (*value
== 0)
817 /* Empty string is a special case */
818 if (wsnode_new (wsp
, &newnode
))
820 wsnode_insert (wsp
, newnode
, *ptail
, 0);
822 newnode
->flags
= _WSNF_NULL
;
829 ws
.ws_delim
= wsp
->ws_delim
;
830 if (wordsplit (value
, &ws
,
831 WRDSF_NOVAR
| WRDSF_NOCMD
| WRDSF_DELIM
| WRDSF_WS
))
833 wordsplit_free (&ws
);
836 for (i
= 0; i
< ws
.ws_wordc
; i
++)
838 if (wsnode_new (wsp
, &newnode
))
840 wsnode_insert (wsp
, newnode
, *ptail
, 0);
842 newnode
->flags
= _WSNF_WORD
|
844 (i
+ 1 < ws
.ws_wordc
? (flg
& ~_WSNF_JOIN
) : flg
);
845 newnode
->v
.word
= strdup (ws
.ws_wordv
[i
]);
846 if (!newnode
->v
.word
)
847 return _wsplt_nomem (wsp
);
849 wordsplit_free (&ws
);
852 else if (wsp
->ws_flags
& WRDSF_KEEPUNDEF
)
854 size_t size
= *pend
- start
+ 1;
856 if (wsnode_new (wsp
, &newnode
))
858 wsnode_insert (wsp
, newnode
, *ptail
, 0);
860 newnode
->flags
= _WSNF_WORD
| _WSNF_NOEXPAND
| flg
;
861 newnode
->v
.word
= malloc (size
+ 1);
862 if (!newnode
->v
.word
)
863 return _wsplt_nomem (wsp
);
864 memcpy (newnode
->v
.word
, start
, size
);
865 newnode
->v
.word
[size
] = 0;
869 if (wsnode_new (wsp
, &newnode
))
871 wsnode_insert (wsp
, newnode
, *ptail
, 0);
873 newnode
->flags
= _WSNF_NULL
;
879 node_expand_vars (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
881 const char *str
= wsnode_ptr (wsp
, node
);
882 size_t slen
= wsnode_len (node
);
883 const char *end
= str
+ slen
;
886 struct wordsplit_node
*tail
= node
;
888 for (p
= str
; p
< end
; p
++)
900 tail
->flags
|= _WSNF_JOIN
;
901 if (node_split_prefix (wsp
, &tail
, node
, off
, n
, _WSNF_JOIN
))
904 if (expvar (wsp
, p
, slen
- n
, &tail
, &p
,
905 node
->flags
& (_WSNF_JOIN
| _WSNF_QUOTE
)))
914 tail
->flags
|= _WSNF_JOIN
;
915 if (node_split_prefix (wsp
, &tail
, node
, off
, p
- str
,
916 node
->flags
& _WSNF_JOIN
))
921 wsnode_remove (wsp
, node
);
927 /* Remove NULL lists */
929 wsnode_nullelim (struct wordsplit
*wsp
)
931 struct wordsplit_node
*p
;
933 for (p
= wsp
->ws_head
; p
;)
935 struct wordsplit_node
*next
= p
->next
;
936 if (p
->flags
& _WSNF_NULL
)
938 wsnode_remove (wsp
, p
);
946 wordsplit_varexp (struct wordsplit
*wsp
)
948 struct wordsplit_node
*p
;
950 for (p
= wsp
->ws_head
; p
;)
952 struct wordsplit_node
*next
= p
->next
;
953 if (!(p
->flags
& _WSNF_NOEXPAND
))
954 if (node_expand_vars (wsp
, p
))
959 wsnode_nullelim (wsp
);
963 /* Strip off any leading and trailing whitespace. This function is called
964 right after the initial scanning, therefore it assumes that every
965 node in the list is a text reference node. */
967 wordsplit_trimws (struct wordsplit
*wsp
)
969 struct wordsplit_node
*p
;
971 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
975 if (p
->flags
& _WSNF_QUOTE
)
978 /* Skip leading whitespace: */
979 for (n
= p
->v
.segm
.beg
; n
< p
->v
.segm
.end
&& ISWS (wsp
->ws_input
[n
]);
983 /* Trim trailing whitespace */
984 for (n
= p
->v
.segm
.end
;
985 n
> p
->v
.segm
.beg
&& ISWS (wsp
->ws_input
[n
- 1]); n
--);
987 if (p
->v
.segm
.beg
== p
->v
.segm
.end
)
988 p
->flags
|= _WSNF_NULL
;
991 wsnode_nullelim (wsp
);
995 skip_sed_expr (const char *command
, size_t i
, size_t len
)
1003 if (command
[i
] == ';')
1005 if (!(command
[i
] == 's' && i
+ 3 < len
&& ISPUNCT (command
[i
+ 1])))
1008 delim
= command
[++i
];
1010 for (i
++; i
< len
; i
++)
1014 if (command
[i
] == delim
|| !ISALNUM (command
[i
]))
1017 else if (command
[i
] == '\\')
1019 else if (command
[i
] == delim
)
1023 while (state
== 3 && i
< len
&& command
[i
] == ';');
1028 skip_delim (struct wordsplit
*wsp
)
1030 size_t start
= wsp
->ws_endp
;
1031 if (wsp
->ws_flags
& WRDSF_SQUEEZE_DELIMS
)
1033 if ((wsp
->ws_flags
& WRDSF_RETURN_DELIMS
) &&
1034 ISDELIM (wsp
, wsp
->ws_input
[start
]))
1036 int delim
= wsp
->ws_input
[start
];
1039 while (start
< wsp
->ws_len
&& delim
== wsp
->ws_input
[start
]);
1045 while (start
< wsp
->ws_len
&& ISDELIM (wsp
, wsp
->ws_input
[start
]));
1050 if (!(wsp
->ws_flags
& WRDSF_RETURN_DELIMS
))
1061 scan_qstring (struct wordsplit
*wsp
, size_t start
, size_t * end
)
1064 const char *command
= wsp
->ws_input
;
1065 size_t len
= wsp
->ws_len
;
1066 char q
= command
[start
];
1068 for (j
= start
+ 1; j
< len
&& command
[j
] != q
; j
++)
1069 if (q
== '"' && command
[j
] == '\\')
1071 if (j
< len
&& command
[j
] == q
)
1073 int flags
= _WSNF_QUOTE
| _WSNF_EMPTYOK
;
1075 flags
|= _WSNF_NOEXPAND
;
1076 if (wordsplit_add_segm (wsp
, start
+ 1, j
, flags
))
1082 wsp
->ws_endp
= start
;
1083 wsp
->ws_errno
= WRDSE_QUOTE
;
1084 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
1085 wordsplit_perror (wsp
);
1092 scan_word (struct wordsplit
*wsp
, size_t start
)
1094 size_t len
= wsp
->ws_len
;
1095 const char *command
= wsp
->ws_input
;
1096 const char *comment
= wsp
->ws_comment
;
1104 wsp
->ws_errno
= WRDSE_EOF
;
1110 if (wsp
->ws_flags
& WRDSF_SED_EXPR
1111 && command
[i
] == 's' && i
+ 3 < len
&& ISPUNCT (command
[i
+ 1]))
1114 i
= skip_sed_expr (command
, i
, len
);
1116 else if (!ISDELIM (wsp
, command
[i
]))
1120 if (comment
&& strchr (comment
, command
[i
]) != NULL
)
1123 for (j
= i
+ 1; j
< len
&& command
[j
] != '\n'; j
++)
1125 if (wordsplit_add_segm (wsp
, start
, i
, 0))
1131 if (wsp
->ws_flags
& WRDSF_QUOTE
)
1133 if (command
[i
] == '\\')
1141 if (((wsp
->ws_flags
& WRDSF_SQUOTE
) && command
[i
] == '\'') ||
1142 ((wsp
->ws_flags
& WRDSF_DQUOTE
) && command
[i
] == '"'))
1144 if (join
&& wsp
->ws_tail
)
1145 wsp
->ws_tail
->flags
|= _WSNF_JOIN
;
1146 if (wordsplit_add_segm (wsp
, start
, i
, _WSNF_JOIN
))
1148 if (scan_qstring (wsp
, i
, &i
))
1155 if (ISDELIM (wsp
, command
[i
]))
1161 else if (wsp
->ws_flags
& WRDSF_RETURN_DELIMS
)
1165 else if (!(wsp
->ws_flags
& WRDSF_SQUEEZE_DELIMS
))
1166 flags
|= _WSNF_EMPTYOK
;
1168 if (join
&& i
> start
&& wsp
->ws_tail
)
1169 wsp
->ws_tail
->flags
|= _WSNF_JOIN
;
1170 if (wordsplit_add_segm (wsp
, start
, i
, flags
))
1173 if (wsp
->ws_flags
& WRDSF_INCREMENTAL
)
1178 static char quote_transtab
[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
1181 wordsplit_c_unquote_char (int c
)
1185 for (p
= quote_transtab
; *p
; p
+= 2)
1194 wordsplit_c_quote_char (int c
)
1198 for (p
= quote_transtab
+ sizeof (quote_transtab
) - 2;
1199 p
> quote_transtab
; p
-= 2)
1208 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
1211 xtonum (int *pval
, const char *src
, int base
, int cnt
)
1215 for (i
= 0, val
= 0; i
< cnt
; i
++, src
++)
1217 int n
= *(unsigned char *) src
;
1218 if (n
> 127 || (n
= to_num (n
)) >= base
)
1220 val
= val
* base
+ n
;
1227 wordsplit_c_quoted_length (const char *str
, int quote_hex
, int *quote
)
1234 if (strchr (" \"", *str
))
1239 else if (*str
== '"')
1241 else if (*str
!= '\t' && *str
!= '\\' && ISPRINT (*str
))
1247 if (wordsplit_c_quote_char (*str
) != -1)
1257 wordsplit_general_unquote_copy (char *dst
, const char *src
, size_t n
,
1258 const char *escapable
)
1264 if (src
[i
] == '\\' && i
< n
&& strchr (escapable
, src
[i
+ 1]))
1272 wordsplit_sh_unquote_copy (char *dst
, const char *src
, size_t n
)
1286 wordsplit_c_unquote_copy (char *dst
, const char *src
, size_t n
)
1296 if (src
[i
] == 'x' || src
[i
] == 'X')
1305 int off
= xtonum (&c
, src
+ i
+ 1,
1319 else if ((unsigned char) src
[i
] < 128 && ISDIGIT (src
[i
]))
1328 int off
= xtonum (&c
, src
+ i
, 8, 3);
1342 *dst
++ = wordsplit_c_unquote_char (src
[i
++]);
1351 wordsplit_c_quote_copy (char *dst
, const char *src
, int quote_hex
)
1360 else if (*src
!= '\t' && *src
!= '\\' && ISPRINT (*src
))
1368 snprintf (tmp
, sizeof tmp
, "%%%02X", *(unsigned char *) src
);
1369 memcpy (dst
, tmp
, 3);
1374 int c
= wordsplit_c_quote_char (*src
);
1380 snprintf (tmp
, sizeof tmp
, "%03o", *(unsigned char *) src
);
1381 memcpy (dst
, tmp
, 3);
1390 wordsplit_process_list (struct wordsplit
*wsp
, size_t start
)
1392 if (wsp
->ws_flags
& WRDSF_NOSPLIT
)
1394 /* Treat entire input as a quoted argument */
1395 if (wordsplit_add_segm (wsp
, start
, wsp
->ws_len
, _WSNF_QUOTE
))
1396 return wsp
->ws_errno
;
1402 while ((rc
= scan_word (wsp
, start
)) == _WRDS_OK
)
1403 start
= skip_delim (wsp
);
1404 /* Make sure tail element is not joinable */
1406 wsp
->ws_tail
->flags
&= ~_WSNF_JOIN
;
1407 if (rc
== _WRDS_ERR
)
1408 return wsp
->ws_errno
;
1411 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1413 wsp
->ws_debug ("Initial list:");
1414 wordsplit_dump_nodes (wsp
);
1417 if (wsp
->ws_flags
& WRDSF_WS
)
1419 /* Trim leading and trailing whitespace */
1420 wordsplit_trimws (wsp
);
1421 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1423 wsp
->ws_debug ("After WS trimming:");
1424 wordsplit_dump_nodes (wsp
);
1428 /* Expand variables (FIXME: & commands) */
1429 if (!(wsp
->ws_flags
& WRDSF_NOVAR
))
1431 if (wordsplit_varexp (wsp
))
1433 wordsplit_free_nodes (wsp
);
1434 return wsp
->ws_errno
;
1436 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1438 wsp
->ws_debug ("Expanded list:");
1439 wordsplit_dump_nodes (wsp
);
1445 if (wsnode_quoteremoval (wsp
))
1447 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1449 wsp
->ws_debug ("After quote removal:");
1450 wordsplit_dump_nodes (wsp
);
1453 if (wsnode_coalesce (wsp
))
1456 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1458 wsp
->ws_debug ("Coalesced list:");
1459 wordsplit_dump_nodes (wsp
);
1463 return wsp
->ws_errno
;
1467 wordsplit_len (const char *command
, size_t length
, struct wordsplit
*wsp
,
1477 if (!(flags
& WRDSF_INCREMENTAL
))
1480 start
= skip_delim (wsp
);
1481 if (wsp
->ws_endp
== wsp
->ws_len
)
1483 wsp
->ws_errno
= WRDSE_NOINPUT
;
1484 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
1485 wordsplit_perror (wsp
);
1486 return wsp
->ws_errno
;
1489 cmdptr
= wsp
->ws_input
+ wsp
->ws_endp
;
1490 cmdlen
= wsp
->ws_len
- wsp
->ws_endp
;
1491 wsp
->ws_flags
|= WRDSF_REUSE
;
1492 wordsplit_init0 (wsp
);
1499 rc
= wordsplit_init (wsp
, cmdptr
, cmdlen
, flags
);
1504 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1505 wsp
->ws_debug ("Input:%.*s;", (int) cmdlen
, cmdptr
);
1507 rc
= wordsplit_process_list (wsp
, start
);
1508 if (rc
== 0 && (flags
& WRDSF_INCREMENTAL
))
1510 while (!wsp
->ws_head
&& wsp
->ws_endp
< wsp
->ws_len
)
1512 start
= skip_delim (wsp
);
1513 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
1515 cmdptr
= wsp
->ws_input
+ wsp
->ws_endp
;
1516 cmdlen
= wsp
->ws_len
- wsp
->ws_endp
;
1517 wsp
->ws_debug ("Restart:%.*s;", (int) cmdlen
, cmdptr
);
1519 rc
= wordsplit_process_list (wsp
, start
);
1526 wordsplit_free_nodes (wsp
);
1529 wordsplit_finish (wsp
);
1530 wordsplit_free_nodes (wsp
);
1531 return wsp
->ws_errno
;
1535 wordsplit (const char *command
, struct wordsplit
*ws
, int flags
)
1537 return wordsplit_len (command
, command
? strlen (command
) : 0, ws
,
1542 wordsplit_free_words (struct wordsplit
*ws
)
1546 for (i
= 0; i
< ws
->ws_wordc
; i
++)
1548 char *p
= ws
->ws_wordv
[ws
->ws_offs
+ i
];
1552 ws
->ws_wordv
[ws
->ws_offs
+ i
] = NULL
;
1559 wordsplit_free (struct wordsplit
*ws
)
1561 wordsplit_free_words (ws
);
1562 free (ws
->ws_wordv
);
1563 ws
->ws_wordv
= NULL
;
1567 wordsplit_perror (struct wordsplit
*wsp
)
1569 switch (wsp
->ws_errno
)
1572 wsp
->ws_error (_("no error"));
1576 wsp
->ws_error (_("missing closing %c (start near #%lu)"),
1577 wsp
->ws_input
[wsp
->ws_endp
],
1578 (unsigned long) wsp
->ws_endp
);
1582 wsp
->ws_error (_("memory exhausted"));
1586 wsp
->ws_error (_("command substitution is not yet supported"));
1589 wsp
->ws_error (_("invalid wordsplit usage"));
1593 wsp
->ws_error (_("unbalanced curly brace"));
1597 wsp
->ws_error (_("undefined variable"));
1601 wsp
->ws_error (_("input exhausted"));
1605 wsp
->ws_error (_("unknown error"));
1609 const char *_wordsplit_errstr
[] = {
1611 N_("missing closing quote"),
1612 N_("memory exhausted"),
1613 N_("command substitution is not yet supported"),
1614 N_("invalid wordsplit usage"),
1615 N_("unbalanced curly brace"),
1616 N_("undefined variable"),
1617 N_("input exhausted")
1619 int _wordsplit_nerrs
=
1620 sizeof (_wordsplit_errstr
) / sizeof (_wordsplit_errstr
[0]);
1623 wordsplit_strerror (struct wordsplit
*ws
)
1625 if (ws
->ws_errno
< _wordsplit_nerrs
)
1626 return _wordsplit_errstr
[ws
->ws_errno
];
1627 return N_("unknown error");