]> Dogcows Code - chaz/tar/blob - src/transform.c
(_transform_name_to_obstack,set_transform_expr): Implement case conversion operations...
[chaz/tar] / src / transform.c
1 /* This file is part of GNU tar.
2 Copyright (C) 2006 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
12 Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18 #include <system.h>
19 #include <regex.h>
20 #include "common.h"
21
22 enum transform_type
23 {
24 transform_none,
25 transform_first,
26 transform_global
27 }
28 transform_type = transform_none;
29 static regex_t regex;
30 static struct obstack stk;
31
32 enum replace_segm_type
33 {
34 segm_literal, /* Literal segment */
35 segm_backref, /* Back-reference segment */
36 segm_case_ctl /* Case control segment (GNU extension) */
37 };
38
39 enum case_ctl_type
40 {
41 ctl_stop, /* Stop case conversion */
42 ctl_upcase_next,/* Turn the next character to uppercase */
43 ctl_locase_next,/* Turn the next character to lowercase */
44 ctl_upcase, /* Turn the replacement to uppercase until ctl_stop */
45 ctl_locase /* Turn the replacement to lowercase until ctl_stop */
46 };
47
48 struct replace_segm
49 {
50 struct replace_segm *next;
51 enum replace_segm_type type;
52 union
53 {
54 struct
55 {
56 char *ptr;
57 size_t size;
58 } literal; /* type == segm_literal */
59 size_t ref; /* type == segm_backref */
60 enum case_ctl_type ctl; /* type == segm_case_ctl */
61 } v;
62 };
63
64 /* Compiled replacement expression */
65 static struct replace_segm *repl_head, *repl_tail;
66 static segm_count; /* Number of elements in the above list */
67
68 static struct replace_segm *
69 add_segment (void)
70 {
71 struct replace_segm *segm = xmalloc (sizeof *segm);
72 segm->next = NULL;
73 if (repl_tail)
74 repl_tail->next = segm;
75 else
76 repl_head = segm;
77 repl_tail = segm;
78 segm_count++;
79 return segm;
80 }
81
82 static void
83 add_literal_segment (char *str, char *end)
84 {
85 size_t len = end - str;
86 if (len)
87 {
88 struct replace_segm *segm = add_segment ();
89 segm->type = segm_literal;
90 segm->v.literal.ptr = xmalloc (len + 1);
91 memcpy (segm->v.literal.ptr, str, len);
92 segm->v.literal.ptr[len] = 0;
93 segm->v.literal.size = len;
94 }
95 }
96
97 static void
98 add_char_segment (int chr)
99 {
100 struct replace_segm *segm = add_segment ();
101 segm->type = segm_literal;
102 segm->v.literal.ptr = xmalloc (2);
103 segm->v.literal.ptr[0] = chr;
104 segm->v.literal.ptr[1] = 0;
105 segm->v.literal.size = 2;
106 }
107
108 static void
109 add_backref_segment (size_t ref)
110 {
111 struct replace_segm *segm = add_segment ();
112 segm->type = segm_backref;
113 segm->v.ref = ref;
114 }
115
116 static void
117 add_case_ctl_segment (enum case_ctl_type ctl)
118 {
119 struct replace_segm *segm = add_segment ();
120 segm->type = segm_case_ctl;
121 segm->v.ctl = ctl;
122 }
123
124 void
125 set_transform_expr (const char *expr)
126 {
127 int delim;
128 int i, j, rc;
129 char *str, *beg, *cur;
130 const char *p;
131 int cflags = 0;
132
133 if (transform_type == transform_none)
134 obstack_init (&stk);
135 else
136 {
137 /* Redefinition of the transform expression */
138 regfree (&regex);
139 }
140
141 if (expr[0] != 's')
142 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
143
144 delim = expr[1];
145
146 /* Scan regular expression */
147 for (i = 2; expr[i] && expr[i] != delim; i++)
148 if (expr[i] == '\\' && expr[i+1])
149 i++;
150
151 if (expr[i] != delim)
152 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
153
154 /* Scan replacement expression */
155 for (j = i + 1; expr[j] && expr[j] != delim; j++)
156 if (expr[j] == '\\' && expr[j+1])
157 j++;
158
159 if (expr[j] != delim)
160 USAGE_ERROR ((0, 0, _("Invalid transform expression")));
161
162 /* Check flags */
163 transform_type = transform_first;
164 for (p = expr + j + 1; *p; p++)
165 switch (*p)
166 {
167 case 'g':
168 transform_type = transform_global;
169 break;
170
171 case 'i':
172 cflags |= REG_ICASE;
173 break;
174
175 case 'x':
176 cflags |= REG_EXTENDED;
177 break;
178
179 default:
180 USAGE_ERROR ((0, 0, _("Unknown flag in transform expression")));
181 }
182
183 /* Extract and compile regex */
184 str = xmalloc (i - 1);
185 memcpy (str, expr + 2, i - 2);
186 str[i - 2] = 0;
187
188 rc = regcomp (&regex, str, cflags);
189
190 if (rc)
191 {
192 char errbuf[512];
193 regerror (rc, &regex, errbuf, sizeof (errbuf));
194 USAGE_ERROR ((0, 0, _("Invalid transform expression: %s"), errbuf));
195 }
196
197 if (str[0] == '^' || str[strlen (str) - 1] == '$')
198 transform_type = transform_first;
199
200 free (str);
201
202 /* Extract and compile replacement expr */
203 i++;
204 str = xmalloc (j - i + 1);
205 memcpy (str, expr + i, j - i);
206 str[j - i] = 0;
207
208 for (cur = beg = str; *cur;)
209 {
210 if (*cur == '\\')
211 {
212 size_t n;
213
214 add_literal_segment (beg, cur);
215 switch (*++cur)
216 {
217 case '0': case '1': case '2': case '3': case '4':
218 case '5': case '6': case '7': case '8': case '9':
219 n = strtoul (cur, &cur, 10);
220 if (n > regex.re_nsub)
221 USAGE_ERROR ((0, 0, _("Invalid transform replacement: back reference out of range")));
222 add_backref_segment (n);
223 break;
224
225 case '\\':
226 add_char_segment ('\\');
227 cur++;
228 break;
229
230 case 'a':
231 add_char_segment ('\a');
232 cur++;
233 break;
234
235 case 'b':
236 add_char_segment ('\b');
237 cur++;
238 break;
239
240 case 'f':
241 add_char_segment ('\f');
242 cur++;
243 break;
244
245 case 'n':
246 add_char_segment ('\n');
247 cur++;
248 break;
249
250 case 'r':
251 add_char_segment ('\r');
252 cur++;
253 break;
254
255 case 't':
256 add_char_segment ('\t');
257 cur++;
258 break;
259
260 case 'v':
261 add_char_segment ('\v');
262 cur++;
263 break;
264
265 case '&':
266 add_char_segment ('&');
267 cur++;
268 break;
269
270 case 'L':
271 /* Turn the replacement to lowercase until a `\U' or `\E'
272 is found, */
273 add_case_ctl_segment (ctl_locase);
274 cur++;
275 break;
276
277 case 'l':
278 /* Turn the next character to lowercase, */
279 add_case_ctl_segment (ctl_locase_next);
280 cur++;
281 break;
282
283 case 'U':
284 /* Turn the replacement to uppercase until a `\L' or `\E'
285 is found, */
286 add_case_ctl_segment (ctl_upcase);
287 cur++;
288 break;
289
290 case 'u':
291 /* Turn the next character to uppercase, */
292 add_case_ctl_segment (ctl_upcase_next);
293 cur++;
294 break;
295
296 case 'E':
297 /* Stop case conversion started by `\L' or `\U'. */
298 add_case_ctl_segment (ctl_stop);
299 cur++;
300 break;
301
302 default:
303 /* Try to be nice */
304 {
305 char buf[2];
306 buf[0] = '\\';
307 buf[1] = *cur;
308 add_literal_segment (buf, buf + 2);
309 }
310 cur++;
311 break;
312 }
313 beg = cur;
314 }
315 else if (*cur == '&')
316 {
317 add_literal_segment (beg, cur);
318 add_backref_segment (0);
319 beg = ++cur;
320 }
321 else
322 cur++;
323 }
324 add_literal_segment (beg, cur);
325
326 }
327
328 /* Run case conversion specified by CASE_CTL on array PTR of SIZE
329 characters. Returns pointer to statically allocated storage. */
330 static char *
331 run_case_conv (enum case_ctl_type case_ctl, char *ptr, size_t size)
332 {
333 static char *case_ctl_buffer;
334 static size_t case_ctl_bufsize;
335 char *p;
336
337 if (case_ctl_bufsize < size)
338 {
339 case_ctl_bufsize = size;
340 case_ctl_buffer = xrealloc (case_ctl_buffer, case_ctl_bufsize);
341 }
342 memcpy (case_ctl_buffer, ptr, size);
343 switch (case_ctl)
344 {
345 case ctl_upcase_next:
346 case_ctl_buffer[0] = toupper (case_ctl_buffer[0]);
347 break;
348
349 case ctl_locase_next:
350 case_ctl_buffer[0] = tolower (case_ctl_buffer[0]);
351 break;
352
353 case ctl_upcase:
354 for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
355 *p = toupper (*p);
356 break;
357
358 case ctl_locase:
359 for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
360 *p = tolower (*p);
361 break;
362
363 case ctl_stop:
364 break;
365 }
366 return case_ctl_buffer;
367 }
368
369 bool
370 _transform_name_to_obstack (char *input)
371 {
372 regmatch_t *rmp;
373 char *p;
374 int rc;
375 enum case_ctl_type case_ctl = ctl_stop, /* Current case conversion op */
376 save_ctl = ctl_stop; /* Saved case_ctl for \u and \l */
377
378 /* Reset case conversion after a single-char operation */
379 #define CASE_CTL_RESET() if (case_ctl == ctl_upcase_next \
380 || case_ctl == ctl_locase_next) \
381 { \
382 case_ctl = save_ctl; \
383 save_ctl = ctl_stop; \
384 }
385
386 if (transform_type == transform_none)
387 return false;
388
389 rmp = xmalloc ((regex.re_nsub + 1) * sizeof (*rmp));
390
391 while (*input)
392 {
393 size_t disp;
394 char *ptr;
395
396 rc = regexec (&regex, input, regex.re_nsub + 1, rmp, 0);
397
398 if (rc == 0)
399 {
400 struct replace_segm *segm;
401
402 disp = rmp[0].rm_eo;
403
404 if (rmp[0].rm_so)
405 obstack_grow (&stk, input, rmp[0].rm_so);
406
407 for (segm = repl_head; segm; segm = segm->next)
408 {
409 switch (segm->type)
410 {
411 case segm_literal: /* Literal segment */
412 if (case_ctl == ctl_stop)
413 ptr = segm->v.literal.ptr;
414 else
415 {
416 ptr = run_case_conv (case_ctl,
417 segm->v.literal.ptr,
418 segm->v.literal.size);
419 CASE_CTL_RESET();
420 }
421 obstack_grow (&stk, ptr, segm->v.literal.size);
422 break;
423
424 case segm_backref: /* Back-reference segment */
425 if (rmp[segm->v.ref].rm_so != -1
426 && rmp[segm->v.ref].rm_eo != -1)
427 {
428 size_t size = rmp[segm->v.ref].rm_eo
429 - rmp[segm->v.ref].rm_so;
430 ptr = input + rmp[segm->v.ref].rm_so;
431 if (case_ctl != ctl_stop)
432 {
433 ptr = run_case_conv (case_ctl, ptr, size);
434 CASE_CTL_RESET();
435 }
436
437 obstack_grow (&stk, ptr, size);
438 }
439 break;
440
441 case segm_case_ctl:
442 switch (segm->v.ctl)
443 {
444 case ctl_upcase_next:
445 case ctl_locase_next:
446 switch (save_ctl)
447 {
448 case ctl_stop:
449 case ctl_upcase:
450 case ctl_locase:
451 save_ctl = case_ctl;
452 default:
453 break;
454 }
455 /*FALL THROUGH*/
456
457 case ctl_upcase:
458 case ctl_locase:
459 case ctl_stop:
460 case_ctl = segm->v.ctl;
461 }
462 }
463 }
464 }
465 else
466 {
467 disp = strlen (input);
468 obstack_grow (&stk, input, disp);
469 }
470
471 input += disp;
472
473 if (transform_type == transform_first)
474 {
475 obstack_grow (&stk, input, strlen (input));
476 break;
477 }
478 }
479
480 obstack_1grow (&stk, 0);
481 free (rmp);
482 return true;
483 }
484
485 bool
486 transform_name_fp (char **pinput, char *(*fun)(char *))
487 {
488 char *str, *p;
489 bool ret = _transform_name_to_obstack (*pinput);
490 if (ret)
491 {
492 str = obstack_finish (&stk);
493 assign_string (pinput, fun ? fun (str) : str);
494 obstack_free (&stk, str);
495 }
496 else if (fun)
497 {
498 str = *pinput;
499 *pinput = NULL;
500 assign_string (pinput, fun (str));
501 free (str);
502 ret = true;
503 }
504 return ret;
505 }
506
507 bool
508 transform_name (char **pinput)
509 {
510 return transform_name_fp (pinput, NULL);
511 }
512
513 #if 0
514 void
515 read_and_transform_loop ()
516 {
517 char buf[512];
518 while (fgets (buf, sizeof buf, stdin))
519 {
520 char *p = buf + strlen (buf);
521 if (p[-1] == '\n')
522 p[-1] = 0;
523 if (transform_name (buf, &p))
524 printf ("=> %s\n", p);
525 else
526 printf ("=\n");
527 }
528 }
529 #endif
This page took 0.059855 seconds and 5 git commands to generate.