]>
Dogcows Code - chaz/p5-CGI-Ex/blob - lib/CGI/Ex/Fill.pm
5 CGI::Ex::Fill - Fast but compliant regex based form filler
9 ###----------------------------------------------------------------###
10 # Copyright 2007 - Paul Seamons #
11 # Distributed under the Perl Artistic License without warranty #
12 ###----------------------------------------------------------------###
24 use base
qw(Exporter);
28 @EXPORT = qw(form_fill);
29 @EXPORT_OK = qw(fill form_fill html_escape get_tagval_by_key swap_tagval_by_key);
32 ### These directives are used to determine whether or not to
33 ### remove html comments and script sections while filling in
34 ### a form. Default is on. This may give some trouble if you
35 ### have a javascript section with form elements that you would
40 $MARKER_SCRIPT = "\0SCRIPT\0";
41 $MARKER_COMMENT = "\0COMMENT\0";
42 $OBJECT_METHOD = "param";
45 ###----------------------------------------------------------------###
47 ### Regex based filler - as opposed to HTML::Parser based HTML::FillInForm
48 ### arguments are positional
49 ### pos1 - text or textref - if textref it is modified in place
50 ### pos2 - hash or cgi obj ref, or array ref of hash and cgi obj refs
51 ### pos3 - target - to be used for choosing a specific form - default undef
52 ### pos4 - boolean fill in password fields - default is true
53 ### pos5 - hashref or arrayref of fields to ignore
56 my $ref = ref($text) ? $text : \
$text;
59 my $fill_password = shift;
60 my $ignore = shift || {};
66 fill_password
=> $fill_password,
67 ignore_fields
=> $ignore,
70 return ref($text) ? 1 : $$ref;
75 my $ref = $args->{'text'};
76 my $form = $args->{'form'};
77 my $target = $args->{'target'};
78 my $ignore = $args->{'ignore_fields'};
79 my $fill_password = $args->{'fill_password'};
81 my $forms = UNIVERSAL
::isa
($form, 'ARRAY') ? $form : [$form];
82 $ignore = {map {$_ => 1} @$ignore} if UNIVERSAL
::isa
($ignore, 'ARRAY');
83 $fill_password = 1 if ! defined $fill_password;
86 ### allow for optionally removing comments and script
89 if (defined($args->{'remove_script'}) ? $args->{'remove_script'} : $REMOVE_SCRIPT) {
90 $$ref =~ s
|(<script
\b.+?</script
>)|push(@script, $1);$MARKER_SCRIPT|egi
;
92 if (defined($args->{'remove_comment'}) ? $args->{'remove_comment'} : $REMOVE_COMMENT) {
93 $$ref =~ s
|(<!--.*?-->)|push(@comment, $1);$MARKER_COMMENT|eg
;
96 ### if there is a target - focus in on it
97 ### possible bug here - name won't be found if
98 ### there is nested html inside the form tag that comes before
99 ### the name field - if no close form tag - don't swap in anything
101 local $_TEMP_TARGET = $target;
102 $$ref =~ s
{(<form
# open form
104 \bname
=([\"\']?) # the name tag
105 $target # with the correct name (allows for regex)
107 .+? # as much as there is
108 (?=</form
>)) # then end
111 local $args->{'text'} = \
$str;
112 local $args->{'remove_script'} = 0;
113 local $args->{'remove_comment'} = 0;
114 local $args->{'target'} = undef;
116 $str; # return of the s///;
119 ### put scripts and comments back and return
120 $$ref =~ s/$MARKER_COMMENT/shift(@comment)/eg if $#comment != -1;
121 $$ref =~ s/$MARKER_SCRIPT/ shift(@script) /eg if $#script != -1;
125 ### build a sub to get a value from the passed forms on a request basis
126 my %indexes = (); # store indexes for multivalued elements
127 my $get_form_value = sub {
129 my $all = $_[0] && $_[0] eq 'all';
130 if (! defined $key || ! length $key) {
131 return $all ? [] : undef;
136 foreach my $form (@$forms) {
138 if (UNIVERSAL
::isa
($form, 'HASH') && defined $form->{$key}) {
139 $val = $form->{$key};
141 } elsif ($meth = UNIVERSAL
::can
($form, $args->{'object_method'} || $OBJECT_METHOD)) {
142 $val = $form->$meth($key);
143 last if defined $val;
144 } elsif (UNIVERSAL
::isa
($form, 'CODE')) {
145 $val = $form->($key, $_TEMP_TARGET);
146 last if defined $val;
149 if (! defined $val) {
150 return $all ? [] : undef;
153 ### fix up the value some
154 if (UNIVERSAL
::isa
($val, 'CODE')) {
155 $val = $val->($key, $_TEMP_TARGET);
157 if (UNIVERSAL
::isa
($val, 'ARRAY')) {
158 $val = [@$val]; # copy the values
160 # die "Value for $key is not an array or a scalar";
161 $val = "$val"; # stringify anything else
164 ### html escape them all
165 html_escape
(\
$_) foreach (ref($val) ? @$val : $val);
167 ### allow for returning all elements
170 return ref($val) ? $val : [$val];
171 } elsif (ref($val)) {
172 $indexes{$key} ||= 0;
173 my $ret = $val->[$indexes{$key}];
174 $ret = '' if ! defined $ret;
175 $indexes{$key} ++; # don't wrap - if we run out of values - we're done
183 ###--------------------------------------------------------------###
186 ### swap <input > form elements if they have a name
188 (<input \s
(?: ([\"\'])(?:|.*?[^\\])\
2 | [^>] )+ >) # nested html ok
190 ### get the type and name - intentionally exlude names with nested "'
192 my $type = uc(get_tagval_by_key
(\
$tag, 'type') || '');
193 my $name = get_tagval_by_key
(\
$tag, 'name');
195 if ($name && ! $ignore->{$name}) {
200 || ($type eq 'PASSWORD' && $fill_password)) {
202 my $value = $get_form_value->($name, 'next');
203 if (defined $value) {
204 swap_tagval_by_key
(\
$tag, 'value', $value);
205 } elsif (! defined get_tagval_by_key
(\
$tag, 'value')) {
206 swap_tagval_by_key
(\
$tag, 'value', '');
209 } elsif ($type eq 'CHECKBOX'
210 || $type eq 'RADIO') {
211 my $values = $get_form_value->($name, 'all');
213 $tag =~ s{\s+\bCHECKED\b(?:=([\"\']?)checked\1)?(?=\s|>|/>)}{}ig;
215 my $fvalue = get_tagval_by_key
(\
$tag, 'value');
216 $fvalue = 'on' if ! defined $fvalue;
217 if (defined $fvalue) {
219 next if $_ ne $fvalue;
220 $tag =~ s
|(\s
*/?>\s
*)$| checked
="checked"$1|;
228 $tag; # return of swap
233 ### swap select boxes (must be done in such a way as to allow no closing tag)
236 push @start, pos($$ref) - length($1) while $$ref =~ m
|(<\s
*select\b)|ig
;
237 push @close, pos($$ref) - length($1) while $$ref =~ m
|(</\s
*select\b)|ig
;
238 for (my $i = 0; $i <= $#start; $i ++) {
239 while (defined($close[$i]) && $close[$i] < $start[$i]) {
240 splice (@close,$i,1,());
243 $close[$i] = length($$ref) if ! defined $close[$i]; # set to end of string if no closing
244 } elsif (! defined($close[$i]) || $close[$i] > $start[$i + 1]) {
245 $close[$i] = $start[$i + 1]; # set to start of next select if no closing or > next select
248 for (my $i = $#start; $i >= 0; $i --) {
249 my $opts = substr($$ref, $start[$i], $close[$i] - $start[$i]);
251 (<select \s
# opening
252 (?: "" | '' | ([\"\']).*?[^\\]\
2 | [^>] )+ # nested html ok
257 my $name = get_tagval_by_key
(\
$tag, 'name');
258 my $values = $ignore->{$name} ? [] : $get_form_value->($name, 'all');
259 if ($#$values != -1) {
261 (<option
[^>]*>) # opening tag - no embedded > allowed
262 (.*?) # the text value
263 (?=<option
|$|</option
>) # the next tag
265 my ($tag2, $opt) = ($1, $2);
266 $tag2 =~ s
%\s
+\bSELECTED
\b(?:=([\"\']?)selected\
1)?(?=\s
|>|/>)%%ig;
268 my $fvalues = get_tagval_by_key
(\
$tag2, 'value', 'all');
269 my $fvalue = @$fvalues ? $fvalues->[0]
270 : $opt =~ /^\s*(.*?)\s*$/ ? $1 : "";
272 next if $_ ne $fvalue;
273 $tag2 =~ s
|(\s
*/?>\s
*)$| selected
="selected"$1|;
276 "$tag2$opt"; # return of the swap
279 substr($$ref, $start[$i], $close[$i] - $start[$i], "$tag$opts");
286 ### swap textareas (must be done in such a way as to allow no closing tag)
289 push @start, pos($$ref) - length($1) while $$ref =~ m
|(<\s
*textarea
\b)|ig
;
290 push @close, pos($$ref) - length($1) while $$ref =~ m
|(</\s
*textarea
\b)|ig
;
291 for (my $i = 0; $i <= $#start; $i ++) {
292 while (defined($close[$i]) && $close[$i] < $start[$i]) {
293 splice (@close,$i,1,()); # get rid of extra closes
296 $close[$i] = length($$ref) if ! defined $close[$i]; # set to end of string if no closing
297 } elsif (! defined($close[$i]) || $close[$i] > $start[$i + 1]) {
298 splice(@close, $i, 0, $start[$i + 1]); # set to start of next select if no closing or > next select
302 for (my $i = 0; $i <= $#start; $i ++) {
303 my $oldval = substr($$ref, $start[$i] + $offset, $close[$i] - $start[$i]);
305 (<textarea \s
# opening
306 (?: "" | '' | ([\"\']).*?[^\\]\
2 | [^>] )+ # nested html ok
310 my $name = get_tagval_by_key
(\
$tag, 'name');
311 if ($name && ! $ignore->{$name}) {
312 my $value = $get_form_value->($name, 'next');
313 next if ! defined $value;
314 substr($$ref, $start[$i] + $offset, $close[$i] - $start[$i], "$tag$value");
315 $offset += length($value) - length($oldval);
319 ### put scripts and comments back and return
320 $$ref =~ s/$MARKER_COMMENT/shift(@comment)/eg if $#comment != -1;
321 $$ref =~ s/$MARKER_SCRIPT/ shift(@script) /eg if $#script != -1;
326 ### yet another html escaper
327 ### allow pass by value or by reference (reference is modified inplace)
330 return $str if ! $str;
331 my $ref = ref($str) ? $str : \
$str;
333 $$ref =~ s/&/&/g;
336 $$ref =~ s/\"/"/g;
338 return ref($str) ? 1 : $$ref;
341 ### get a named value for key="value" pairs
342 ### usage: my $val = get_tagval_by_key(\$tag, $key);
343 ### usage: my $valsref = get_tagval_by_key(\$tag, $key, 'all');
344 sub get_tagval_by_key
{
346 my $ref = ref($tag) ? $tag : \
$tag;
348 my $all = $_[0] && $_[0] eq 'all';
350 pos($$ref) = 0; # fix for regex below not resetting and forcing order on key value pairs
352 ### loop looking for tag pairs
354 (?<![\w\
.\
-]) # 0 - not proceded by letter or .
355 ([\w\
.\
-]+) # 1 - the key
357 (?: \s
*([\"\'])(|.*?[^\\])\
2 # 2 - a quote, 3 - the quoted
358 | ([^\s
/]*? (?=\s|>|/>)) # 4 - a non-quoted string
361 next if lc($1) ne $key;
362 my ($val,$quot) = ($2) ? ($3,$2) : ($4,undef);
363 $val =~ s/\\$quot/$quot/ if $quot;
364 return $val if ! $all;
367 return undef if ! $all;
371 ### swap out values for key="value" pairs
372 ### usage: my $count = &swap_tagval_by_key(\$tag, $key, $val);
373 ### usage: my $newtag = &swap_tagval_by_key($tag, $key, $val);
374 sub swap_tagval_by_key
{
376 my $ref = ref($tag) ? $tag : \
$tag;
381 ### swap a key/val pair at time
382 $$ref =~ s
{(^\s
*<\s
*\w
+\s
+ | \G\s
+) # 1 - open tag or previous position
383 ( ([\w\
-\
.]+) # 2 - group, 3 - the key
385 (?: \s
* ([\"\']) (?:|.*?[^\\]) \
5 # 5 - the quote mark, the quoted
386 | [^\s
/]*? (?=\s|>|/>) # a non-quoted string (may be zero length)
388 | ([^\s
/]+?) (?=\s|>|/>) # 6 - a non keyvalue chunk (CHECKED)
391 if (defined($3) && lc($3) eq $key) { # has matching key value pair
392 if (! $n ++) { # only put value back on first match
393 "$1$3$4\"$val\""; # always double quote
397 } elsif (defined($6) && lc($6) eq $key) { # has matching key
398 if (! $n ++) { # only put value back on first match
408 ### append value on if none were swapped
410 $$ref =~ s
|(\s
*/?>\s
*)$| value
="$val"$1|;
414 return ref($tag) ? $n : $$ref;
421 ###----------------------------------------------------------------###
425 use CGI::Ex::Fill qw(form_fill fill);
427 my $text = my_own_template_from_somewhere();
431 # my $form = {key => 'value'}
433 # my $form = [CGI->new, CGI->new, {key1 => 'val1'}, CGI->new];
436 form_fill(\$text, $form); # modifies $text
439 # my $copy = form_fill($text, $form); # copies $text
450 my $formname = 'formname'; # form to parse (undef = anytable)
451 my $fp = 0; # fill_passwords ? default is true
452 my $ignore = ['key1', 'key2']; # OR {key1 => 1, key2 => 1};
454 form_fill(\$text, $form, $formname, $fp, $ignore);
460 target => 'my_formname',
461 fill_password => $fp,
462 ignore_fields => $ignore,
467 ### delay getting the value until we find an element that needs it
468 my $form = {key => sub {my $key = shift; # get and return value}};
473 form_fill is directly comparable to HTML::FillInForm. It will pass
474 the same suite of tests (actually - it is a little bit kinder on the
475 parse as it won't change case, reorder your attributes, or alter
476 miscellaneous spaces and it won't require the HTML to be well formed).
478 HTML::FillInForm is based upon HTML::Parser while CGI::Ex::Fill is
479 purely regex driven. The performance of CGI::Ex::Fill will be better
480 on HTML with many markup tags because HTML::Parser will parse each tag
481 while CGI::Ex::Fill will search only for those tags it knows how to
482 handle. And CGI::Ex::Fill generally won't break on malformed html.
484 On tiny forms (< 1 k) form_fill was ~ 13% slower than FillInForm. If
485 the html document incorporated very many entities at all, the
486 performance of FillInForm goes down (adding 360 <br> tags pushed
487 form_fill to ~ 350% faster). However, if you are only filling in one
488 form every so often, then it shouldn't matter which you use - but
489 form_fill will be nicer on the tags and won't balk at ugly html and
490 will decrease performance only at a slow rate as the size of the html
491 increases. See the benchmarks in the t/samples/bench_cgix_hfif.pl
492 file for more information (ALL BENCHMARKS SHOULD BE TAKEN WITH A GRAIN
495 There are two functions, fill and form_fill. The function fill takes
496 a hashref of named arguments. The function form_fill takes a list
497 of positional parameters.
499 =head1 ARGUMENTS TO form_fill
501 The following are the arguments to the main function C<fill>.
507 A reference to an html string that includes one or more forms.
511 A form hash, CGI object, or an array of hashrefs and objects.
515 The name of the form to swap. Default is undef which means
516 to swap all form entities in all forms.
520 Default true. If set to false, fields of type password will
525 Hashref of fields to be ignored from swapping.
529 Defaults to the package global $REMOVE_SCRIPT which defaults to true.
530 Removes anything in <script></script> tags which often cause problems for
535 Defaults to the package global $REMOVE_COMMENT which defaults to true.
536 Removes anything in <!-- --> tags which can sometimes cause problems for
541 The method to call on objects passed to the form argument. Default value
542 is the package global $OBJECT_METHOD which defaults to 'param'. If a
543 CGI object is passed, it would call param on that object passing
544 the desired keyname as an argument.
548 =head1 ARGUMENTS TO form_fill
550 The following are the arguments to the legacy function C<form_fill>.
556 A reference to an html string that includes one or more forms or form
561 A form hash, or CGI query object, or an arrayref of multiple hash refs
562 and/or CGI query objects that will supply values for the form.
566 The name of the form to fill in values for. The default is undef
567 which indicates that all forms are to be filled in.
571 Default true. Indicates that C<<lt>input type="password"<gt>> fields
572 are to be swapped as well. Set to false to disable this behavior.
574 =item C<\%IGNORE_FIELDS> OR C<\@IGNORE_FIELDS>
576 A hash ref of key names or an array ref of key names that will be
577 ignored during the fill in of the form.
583 fill and form_fill will attempt to DWYM when filling in values. The following behaviors
584 are used on the following types of form elements.
588 =item C<E<lt>input type="text"E<gt>>
590 The following rules are used when matching this type:
592 1) Get the value from the form that matches the input's "name".
593 2) If the value is defined - it adds or replaces the existing value.
594 3) If the value is not defined and the existing value is not defined,
595 a value of "" is added.
599 my $form = {foo => "FOO", bar => "BAR", baz => "BAZ"};
602 <input type=text name=foo>
603 <input type=text name=foo>
604 <input type=text name=bar value="">
605 <input type=text name=baz value="Something else">
606 <input type=text name=hem value="Another thing">
607 <input type=text name=haw>
610 form_fill(\$html, $form);
613 <input type=text name=foo value="FOO">
614 <input type=text name=foo value="FOO">
615 <input type=text name=bar value="BAR">
616 <input type=text name=baz value="BAZ">
617 <input type=text name=hem value="Another thing">
618 <input type=text name=haw value="">
622 If the value returned from the form is an array ref, the values of the array ref
623 will be sequentially used for each input found by that name until the values
624 run out. If the value is not an array ref - it will be used to fill in any values
625 by that name. For example:
627 $form = {foo => ['aaaa', 'bbbb', 'cccc']};
630 <input type=text name=foo>
631 <input type=text name=foo>
632 <input type=text name=foo>
633 <input type=text name=foo>
634 <input type=text name=foo>
637 form_fill(\$html, $form);
640 <input type=text name=foo value="aaaa">
641 <input type=text name=foo value="bbbb">
642 <input type=text name=foo value="cccc">
643 <input type=text name=foo value="">
644 <input type=text name=foo value="">
647 =item C<E<lt>input type="hidden"E<gt>>
649 Same as C<E<lt>input type="text"E<gt>>.
651 =item C<E<lt>input type="password"E<gt>>
653 Same as C<E<lt>input type="text"E<gt>>.
655 =item C<E<lt>input type="file"E<gt>>
657 Same as C<E<lt>input type="text"E<gt>>. (Note - this is subject
658 to browser support for pre-population)
660 =item C<E<lt>input type="checkbox"E<gt>>
662 As each checkbox is found the following rules are applied:
664 1) Get the values from the form (do nothing if no values found)
665 2) Remove any existing "checked=checked" or "checked" markup from the tag.
666 3) Compare the "value" field to the values and mark with checked="checked"
669 If no "value" field is found in the html, a default value of "on" will be used (which is
670 what most browsers will send as the default value for checked boxes without
673 $form = {foo => 'FOO', bar => ['aaaa', 'bbbb', 'cccc'], baz => 'on'};
676 <input type=checkbox name=foo value="123">
677 <input type=checkbox name=foo value="FOO">
678 <input type=checkbox name=bar value="aaaa">
679 <input type=checkbox name=bar value="cccc">
680 <input type=checkbox name=bar value="dddd" checked="checked">
681 <input type=checkbox name=baz>
684 form_fill(\$html, $form);
687 <input type=checkbox name=foo value="123">
688 <input type=checkbox name=foo value="FOO" checked="checked">
689 <input type=checkbox name=bar value="aaaa" checked="checked">
690 <input type=checkbox name=bar value="cccc" checked="checked">
691 <input type=checkbox name=bar value="dddd">
692 <input type=checkbox name=baz checked="checked">
696 =item C<E<lt>input type="radio"E<gt>>
698 Same as C<E<lt>input type="checkbox"E<gt>>.
700 =item C<E<lt>selectE<gt>>
702 As each select box is found the following rules are applied (these rules are
703 applied regardless of if the box is a select-one or a select-multi - if multiple
704 values are selected on a select-one it is up to the browser to choose which one
707 1) Get the values from the form (do nothing if no values found)
708 2) Remove any existing "selected=selected" or "selected" markup from the tag.
709 3) Compare the "value" field to the values and mark with selected="selected"
711 4) If there is no "value" field - use the text in between the "option" tags.
713 (Note: There does not need to be a closing "select" tag or closing "option" tag)
716 $form = {foo => 'FOO', bar => ['aaaa', 'bbbb', 'cccc']};
719 <select name=foo><option>FOO<option>123<br>
722 <option>aaaa</option>
723 <option value="cccc">cccc</option>
724 <option value="dddd" selected="selected">dddd</option>
728 form_fill(\$html, $form);
732 <select name=foo><option selected="selected">FOO<option>123<br>
735 <option selected="selected">aaaa</option>
736 <option value="cccc" selected="selected">cccc</option>
737 <option value="dddd">dddd</option>
739 ', "Perldoc example 4 passed");
742 =item C<E<lt>textareaE<gt>>
744 The rules for swapping textarea are as follows:
746 1) Get the value from the form that matches the textarea's "name".
747 2) If the value is defined - it adds or replaces the existing value.
748 3) If the value is not defined, the text area is left alone.
750 (Note - there does not need to be a closing textarea tag. In the case of
751 a missing close textarea tag, the contents of the text area will be
752 assumed to be the start of the next textarea of the end of the document -
753 which ever comes sooner)
755 If the form returned an array ref of values, then these values will be
756 used sequentially each time a textarea by that name is found. If a single value
757 (not array ref) is found, that value will be used for each textarea by that name.
761 $form = {foo => 'FOO', bar => ['aaaa', 'bbbb']};
764 <textarea name=foo></textarea>
765 <textarea name=foo></textarea>
768 <textarea name=bar></textarea><br>
769 <textarea name=bar>dddd</textarea><br>
770 <textarea name=bar><br><br>
773 form_fill(\$html, $form);
776 <textarea name=foo>FOO</textarea>
777 <textarea name=foo>FOO</textarea>
779 <textarea name=bar>aaaa<textarea name=bar>bbbb</textarea><br>
780 <textarea name=bar></textarea><br>
781 <textarea name=bar>';
783 =item C<E<lt>input type="submit"E<gt>>
785 Does nothing. The value for submit should typically be set by the
786 templating system or application system.
788 =item C<E<lt>input type="button"E<gt>>
794 =head1 HTML COMMENT / JAVASCRIPT
796 Because there are too many problems that could occur with html
797 comments and javascript, form_fill temporarily removes them during the
798 fill. You may disable this behavior by setting $REMOVE_COMMENT and
799 $REMOVE_SCRIPT to 0 before calling form_fill. The main reason for
800 doing this would be if you wanted to have form elements inside the
801 javascript and comments get filled. Disabling the removal only
802 results in a speed increase of 5%. The function uses \0COMMENT\0 and
803 \0SCRIPT\0 as placeholders so it would be good to avoid these in your
804 text (Actually they may be reset to whatever you'd like via
805 $MARKER_COMMENT and $MARKER_SCRIPT).
807 =head1 UTILITY FUNCTIONS
813 Very minimal entity escaper for filled in values.
815 my $escaped = html_escape($unescaped);
817 html_escape(\$text_to_escape);
819 =item C<get_tagval_by_key>
821 Get a named value for from an html tag (key="value" pairs).
823 my $val = get_tagval_by_key(\$tag, $key);
824 my $valsref = get_tagval_by_key(\$tag, $key, 'all'); # get all values
826 =item C<swap_tagval_by_key>
828 Swap out values in an html tag (key="value" pairs).
830 my $count = swap_tagval_by_key(\$tag, $key, $val); # modify ref
831 my $newtag = swap_tagval_by_key($tag, $key, $val); # copies tag
837 This module may distributed under the same terms as Perl itself.
This page took 0.09122 seconds and 5 git commands to generate.