]> Dogcows Code - chaz/tar/blob - src/transform.c
* NEWS: Remove support for mangled names.
[chaz/tar] / src / transform.c
1 /* This file is part of GNU tar.
2 Copyright (C) 2006 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
12 Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18 #include <system.h>
19 #include <regex.h>
20 #include "common.h"
21
22 static enum transform_type
23 {
24 transform_none,
25 transform_first,
26 transform_global
27 }
28 transform_type = transform_none;
29 static unsigned match_number = 0;
30 static regex_t regex;
31 static struct obstack stk;
32
33 enum replace_segm_type
34 {
35 segm_literal, /* Literal segment */
36 segm_backref, /* Back-reference segment */
37 segm_case_ctl /* Case control segment (GNU extension) */
38 };
39
40 enum case_ctl_type
41 {
42 ctl_stop, /* Stop case conversion */
43 ctl_upcase_next,/* Turn the next character to uppercase */
44 ctl_locase_next,/* Turn the next character to lowercase */
45 ctl_upcase, /* Turn the replacement to uppercase until ctl_stop */
46 ctl_locase /* Turn the replacement to lowercase until ctl_stop */
47 };
48
49 struct replace_segm
50 {
51 struct replace_segm *next;
52 enum replace_segm_type type;
53 union
54 {
55 struct
56 {
57 char *ptr;
58 size_t size;
59 } literal; /* type == segm_literal */
60 size_t ref; /* type == segm_backref */
61 enum case_ctl_type ctl; /* type == segm_case_ctl */
62 } v;
63 };
64
65 /* Compiled replacement expression */
66 static struct replace_segm *repl_head, *repl_tail;
67 static size_t segm_count; /* Number of elements in the above list */
68
69 static struct replace_segm *
70 add_segment (void)
71 {
72 struct replace_segm *segm = xmalloc (sizeof *segm);
73 segm->next = NULL;
74 if (repl_tail)
75 repl_tail->next = segm;
76 else
77 repl_head = segm;
78 repl_tail = segm;
79 segm_count++;
80 return segm;
81 }
82
83 static void
84 add_literal_segment (char *str, char *end)
85 {
86 size_t len = end - str;
87 if (len)
88 {
89 struct replace_segm *segm = add_segment ();
90 segm->type = segm_literal;
91 segm->v.literal.ptr = xmalloc (len + 1);
92 memcpy (segm->v.literal.ptr, str, len);
93 segm->v.literal.ptr[len] = 0;
94 segm->v.literal.size = len;
95 }
96 }
97
98 static void
99 add_char_segment (int chr)
100 {
101 struct replace_segm *segm = add_segment ();
102 segm->type = segm_literal;
103 segm->v.literal.ptr = xmalloc (2);
104 segm->v.literal.ptr[0] = chr;
105 segm->v.literal.ptr[1] = 0;
106 segm->v.literal.size = 1;
107 }
108
109 static void
110 add_backref_segment (size_t ref)
111 {
112 struct replace_segm *segm = add_segment ();
113 segm->type = segm_backref;
114 segm->v.ref = ref;
115 }
116
117 static void
118 add_case_ctl_segment (enum case_ctl_type ctl)
119 {
120 struct replace_segm *segm = add_segment ();
121 segm->type = segm_case_ctl;
122 segm->v.ctl = ctl;
123 }
124
125 void
126 set_transform_expr (const char *expr)
127 {
128 int delim;
129 int i, j, rc;
130 char *str, *beg, *cur;
131 const char *p;
132 int cflags = 0;
133
134 if (transform_type == transform_none)
135 obstack_init (&stk);
136 else
137 {
138 /* Redefinition of the transform expression */
139 regfree (&regex);
140 }
141
142 if (expr[0] != 's')
143 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
144
145 delim = expr[1];
146
147 /* Scan regular expression */
148 for (i = 2; expr[i] && expr[i] != delim; i++)
149 if (expr[i] == '\\' && expr[i+1])
150 i++;
151
152 if (expr[i] != delim)
153 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
154
155 /* Scan replacement expression */
156 for (j = i + 1; expr[j] && expr[j] != delim; j++)
157 if (expr[j] == '\\' && expr[j+1])
158 j++;
159
160 if (expr[j] != delim)
161 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
162
163 /* Check flags */
164 transform_type = transform_first;
165 for (p = expr + j + 1; *p; p++)
166 switch (*p)
167 {
168 case 'g':
169 transform_type = transform_global;
170 break;
171
172 case 'i':
173 cflags |= REG_ICASE;
174 break;
175
176 case 'x':
177 cflags |= REG_EXTENDED;
178 break;
179
180 case '0': case '1': case '2': case '3': case '4':
181 case '5': case '6': case '7': case '8': case '9':
182 match_number = strtoul (p, (char**) &p, 0);
183 p--;
184 break;
185
186 default:
187 USAGE_ERROR ((0, 0, _("Unknown flag in transform expression")));
188 }
189
190 /* Extract and compile regex */
191 str = xmalloc (i - 1);
192 memcpy (str, expr + 2, i - 2);
193 str[i - 2] = 0;
194
195 rc = regcomp (&regex, str, cflags);
196
197 if (rc)
198 {
199 char errbuf[512];
200 regerror (rc, &regex, errbuf, sizeof (errbuf));
201 USAGE_ERROR ((0, 0, _("Invalid transform expression: %s"), errbuf));
202 }
203
204 if (str[0] == '^' || str[strlen (str) - 1] == '$')
205 transform_type = transform_first;
206
207 free (str);
208
209 /* Extract and compile replacement expr */
210 i++;
211 str = xmalloc (j - i + 1);
212 memcpy (str, expr + i, j - i);
213 str[j - i] = 0;
214
215 for (cur = beg = str; *cur;)
216 {
217 if (*cur == '\\')
218 {
219 size_t n;
220
221 add_literal_segment (beg, cur);
222 switch (*++cur)
223 {
224 case '0': case '1': case '2': case '3': case '4':
225 case '5': case '6': case '7': case '8': case '9':
226 n = strtoul (cur, &cur, 10);
227 if (n > regex.re_nsub)
228 USAGE_ERROR ((0, 0, _("Invalid transform replacement: back reference out of range")));
229 add_backref_segment (n);
230 break;
231
232 case '\\':
233 add_char_segment ('\\');
234 cur++;
235 break;
236
237 case 'a':
238 add_char_segment ('\a');
239 cur++;
240 break;
241
242 case 'b':
243 add_char_segment ('\b');
244 cur++;
245 break;
246
247 case 'f':
248 add_char_segment ('\f');
249 cur++;
250 break;
251
252 case 'n':
253 add_char_segment ('\n');
254 cur++;
255 break;
256
257 case 'r':
258 add_char_segment ('\r');
259 cur++;
260 break;
261
262 case 't':
263 add_char_segment ('\t');
264 cur++;
265 break;
266
267 case 'v':
268 add_char_segment ('\v');
269 cur++;
270 break;
271
272 case '&':
273 add_char_segment ('&');
274 cur++;
275 break;
276
277 case 'L':
278 /* Turn the replacement to lowercase until a `\U' or `\E'
279 is found, */
280 add_case_ctl_segment (ctl_locase);
281 cur++;
282 break;
283
284 case 'l':
285 /* Turn the next character to lowercase, */
286 add_case_ctl_segment (ctl_locase_next);
287 cur++;
288 break;
289
290 case 'U':
291 /* Turn the replacement to uppercase until a `\L' or `\E'
292 is found, */
293 add_case_ctl_segment (ctl_upcase);
294 cur++;
295 break;
296
297 case 'u':
298 /* Turn the next character to uppercase, */
299 add_case_ctl_segment (ctl_upcase_next);
300 cur++;
301 break;
302
303 case 'E':
304 /* Stop case conversion started by `\L' or `\U'. */
305 add_case_ctl_segment (ctl_stop);
306 cur++;
307 break;
308
309 default:
310 /* Try to be nice */
311 {
312 char buf[2];
313 buf[0] = '\\';
314 buf[1] = *cur;
315 add_literal_segment (buf, buf + 2);
316 }
317 cur++;
318 break;
319 }
320 beg = cur;
321 }
322 else if (*cur == '&')
323 {
324 add_literal_segment (beg, cur);
325 add_backref_segment (0);
326 beg = ++cur;
327 }
328 else
329 cur++;
330 }
331 add_literal_segment (beg, cur);
332
333 }
334
335 /* Run case conversion specified by CASE_CTL on array PTR of SIZE
336 characters. Returns pointer to statically allocated storage. */
337 static char *
338 run_case_conv (enum case_ctl_type case_ctl, char *ptr, size_t size)
339 {
340 static char *case_ctl_buffer;
341 static size_t case_ctl_bufsize;
342 char *p;
343
344 if (case_ctl_bufsize < size)
345 {
346 case_ctl_bufsize = size;
347 case_ctl_buffer = xrealloc (case_ctl_buffer, case_ctl_bufsize);
348 }
349 memcpy (case_ctl_buffer, ptr, size);
350 switch (case_ctl)
351 {
352 case ctl_upcase_next:
353 case_ctl_buffer[0] = toupper (case_ctl_buffer[0]);
354 break;
355
356 case ctl_locase_next:
357 case_ctl_buffer[0] = tolower (case_ctl_buffer[0]);
358 break;
359
360 case ctl_upcase:
361 for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
362 *p = toupper (*p);
363 break;
364
365 case ctl_locase:
366 for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
367 *p = tolower (*p);
368 break;
369
370 case ctl_stop:
371 break;
372 }
373 return case_ctl_buffer;
374 }
375
376 bool
377 _transform_name_to_obstack (char *input)
378 {
379 regmatch_t *rmp;
380 int rc;
381 size_t nmatches = 0;
382 enum case_ctl_type case_ctl = ctl_stop, /* Current case conversion op */
383 save_ctl = ctl_stop; /* Saved case_ctl for \u and \l */
384
385 /* Reset case conversion after a single-char operation */
386 #define CASE_CTL_RESET() if (case_ctl == ctl_upcase_next \
387 || case_ctl == ctl_locase_next) \
388 { \
389 case_ctl = save_ctl; \
390 save_ctl = ctl_stop; \
391 }
392
393 if (transform_type == transform_none)
394 return false;
395
396 rmp = xmalloc ((regex.re_nsub + 1) * sizeof (*rmp));
397
398 while (*input)
399 {
400 size_t disp;
401 char *ptr;
402
403 rc = regexec (&regex, input, regex.re_nsub + 1, rmp, 0);
404
405 if (rc == 0)
406 {
407 struct replace_segm *segm;
408
409 disp = rmp[0].rm_eo;
410
411 if (rmp[0].rm_so)
412 obstack_grow (&stk, input, rmp[0].rm_so);
413
414 nmatches++;
415 if (match_number && nmatches < match_number)
416 {
417 obstack_grow (&stk, input, disp);
418 input += disp;
419 continue;
420 }
421
422 for (segm = repl_head; segm; segm = segm->next)
423 {
424 switch (segm->type)
425 {
426 case segm_literal: /* Literal segment */
427 if (case_ctl == ctl_stop)
428 ptr = segm->v.literal.ptr;
429 else
430 {
431 ptr = run_case_conv (case_ctl,
432 segm->v.literal.ptr,
433 segm->v.literal.size);
434 CASE_CTL_RESET();
435 }
436 obstack_grow (&stk, ptr, segm->v.literal.size);
437 break;
438
439 case segm_backref: /* Back-reference segment */
440 if (rmp[segm->v.ref].rm_so != -1
441 && rmp[segm->v.ref].rm_eo != -1)
442 {
443 size_t size = rmp[segm->v.ref].rm_eo
444 - rmp[segm->v.ref].rm_so;
445 ptr = input + rmp[segm->v.ref].rm_so;
446 if (case_ctl != ctl_stop)
447 {
448 ptr = run_case_conv (case_ctl, ptr, size);
449 CASE_CTL_RESET();
450 }
451
452 obstack_grow (&stk, ptr, size);
453 }
454 break;
455
456 case segm_case_ctl:
457 switch (segm->v.ctl)
458 {
459 case ctl_upcase_next:
460 case ctl_locase_next:
461 switch (save_ctl)
462 {
463 case ctl_stop:
464 case ctl_upcase:
465 case ctl_locase:
466 save_ctl = case_ctl;
467 default:
468 break;
469 }
470 /*FALL THROUGH*/
471
472 case ctl_upcase:
473 case ctl_locase:
474 case ctl_stop:
475 case_ctl = segm->v.ctl;
476 }
477 }
478 }
479 }
480 else
481 {
482 disp = strlen (input);
483 obstack_grow (&stk, input, disp);
484 }
485
486 input += disp;
487
488 if (transform_type == transform_first)
489 {
490 obstack_grow (&stk, input, strlen (input));
491 break;
492 }
493 }
494
495 obstack_1grow (&stk, 0);
496 free (rmp);
497 return true;
498 }
499
500 bool
501 transform_name_fp (char **pinput, char *(*fun)(char *))
502 {
503 char *str;
504 bool ret = _transform_name_to_obstack (*pinput);
505 if (ret)
506 {
507 str = obstack_finish (&stk);
508 assign_string (pinput, fun ? fun (str) : str);
509 obstack_free (&stk, str);
510 }
511 else if (fun)
512 {
513 str = *pinput;
514 *pinput = NULL;
515 assign_string (pinput, fun (str));
516 free (str);
517 ret = true;
518 }
519 return ret;
520 }
521
522 bool
523 transform_name (char **pinput)
524 {
525 return transform_name_fp (pinput, NULL);
526 }
527
This page took 0.058647 seconds and 4 git commands to generate.