File Coverage

blib/lib/Text/Fragment.pm
Criterion Covered Total %
statement 182 187 97.3
branch 85 98 86.7
condition 16 22 72.7
subroutine 21 21 100.0
pod 5 5 100.0
total 309 333 92.7


line stmt bran cond sub pod time code
1             package Text::Fragment;
2              
3             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
4             our $DATE = '2021-03-25'; # DATE
5             our $DIST = 'Text-Fragment'; # DIST
6             our $VERSION = '0.110'; # VERSION
7              
8 5     5   21085 use 5.010001;
  5         54  
9 5     5   25 use strict;
  5         10  
  5         127  
10 5     5   33 use warnings;
  5         7  
  5         156  
11 5     5   9259 use Log::ger;
  5         466  
  5         31  
12              
13 5     5   3768 use Data::Clone;
  5         7344  
  5         9280  
14              
15             require Exporter;
16             our @ISA = qw(Exporter);
17             our @EXPORT_OK = qw(
18             list_fragments
19             get_fragment
20             set_fragment_attrs
21             insert_fragment
22             delete_fragment
23             );
24              
25             our $re_id = qr/\A[A-Za-z0-9_.,:-]+\z/;
26              
27             our %SPEC;
28              
29             sub _format_quoted {
30 3     3   8 my $unquoted = shift;
31 3         7 my $res = "";
32 3         7 my $i = -1;
33 3         13 while (++$i < length($unquoted)) {
34 6         20 my $c = substr($unquoted, $i, 1);
35 6 50 33     45 if ($c eq '\\' or $c eq '"') {
    50          
36 0         0 $res .= "\\$c";
37             } elsif ($c !~ /[\x20-\x7F]/) {
38             # strip non-printables
39             } else {
40 6         18 $res .= $c;
41             }
42             }
43 3         14 qq("$res");
44             }
45              
46             sub _parse_quoted {
47 1     1   3 my $quoted = shift;
48 1         5 $quoted =~ s/\A"//; $quoted =~ s/"\z//;
  1         5  
49 1         2 my $res = "";
50 1         2 my $i = -1;
51 1         4 while (++$i < length($quoted)) {
52 2         6 my $c = substr($quoted, $i, 1);
53 2 50       5 if ($c eq '\\') {
54 0         0 $res .= substr($quoted, ++$i, 1);
55             } else {
56 2         6 $res .= $c;
57             }
58             }
59 1         5 $res;
60             }
61              
62             sub _format_attr_value {
63 8     8   15 my $val = shift;
64 8 100       59 $val =~ /\s|"|[^\x20-\x7f]/ ? _format_quoted($val) : $val;
65             }
66              
67             sub _label {
68 33     33   206 my %args = @_;
69 33   100     102 my $id = $args{id} // "";
70 33         63 my $label = $args{label}; # str
71 33         58 my $comment_style = $args{comment_style};
72 33         58 my $attrs = $args{attrs};
73              
74 33         126 my $quoted_re = qr/"(?:[^\n\r"\\]|\\[^\n\r])*"/;
75              
76 33         65 my $a_re; # regex to match attributes
77             my $ai_re; # also match attributes, but attribute id must be present
78 33 100       89 if (length $id) {
79 27         1688 $ai_re = qr/(?:\w+=\S*[ \t]+)*id=(?\Q$id\E)(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
80             } else {
81 6         131 $ai_re = qr/(?:\w+=\S*[ \t]+)*id=(?\S*)(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
82             }
83 33         441 $a_re = qr/(?:\w+=\S*)?(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
84              
85 33         90 my ($ts, $te); # tag start and end
86 33 100       128 if ($comment_style eq 'shell') {
    100          
    100          
    100          
    50          
87 22         47 $ts = "#";
88 22         35 $te = "";
89             } elsif ($comment_style eq 'c') {
90 3         9 $ts = "/*";
91 3         7 $te = "*/";
92             } elsif ($comment_style eq 'cpp') {
93 3         7 $ts = "//";
94 3         5 $te = "";
95             } elsif ($comment_style eq 'html') {
96 2         4 $ts = "";
98             } elsif ($comment_style eq 'ini') {
99 3         9 $ts = ";";
100 3         6 $te = "";
101             }
102             # regex to detect fragment
103 33         1745 my $ore = qr!^(?.*?)[ \t]*\Q$ts\E[ \t]*
104             \Q$label\E[ \t]+
105             (?$ai_re)[ \t]*
106             \Q$te\E[ \t]*(?\R|\z)!mx;
107              
108 33         2786 my $mre = qr!^\Q$ts\E[ \t]*
109             BEGIN[ \t]+\Q$label\E[ \t]+
110             (?$ai_re)[ \t]*
111             \Q$te\E[ \t]*(?\R)
112             (?:
113             (?.*)
114             ^\Q$ts\E[ \t]*END[ \t]+\Q$label\E[ \t]+
115             (?:\w+=\S*[ \t]+)*id=\g{id}(?:[ \t]+\w+=\S+)*
116             [ \t]*\Q$te\E |
117             (?.*?) # without any ID at the ending comment
118             ^\Q$ts\E[ \t]*END[ \t]+\Q$label\E(?:[ \t]+$a_re)?[ \t]*
119             \Q$te\E
120             )
121             [ \t]*(?\R|\z)!msx;
122              
123             my $parse_attrs = sub {
124 18   50 18   71 my $s = shift // "";
125 18         30 my %a;
126 18         270 while ($s =~ /(\w+)=(?:($quoted_re)|(\S+))(?:\s+|\z)/g) {
127 28 100       211 $a{$1} = $2 ? _parse_quoted($2) : $3;
128             }
129 18         224 \%a;
130 33         222 };
131              
132             return {
133             one_line_pattern => $ore,
134             multi_line_pattern => $mre,
135             parse_attrs => $parse_attrs,
136             format_fragment => sub {
137 12     12   55 my %f = @_;
138              
139             # formatted attrs as string
140 12         24 my $as = "";
141 12 100       46 if (ref($f{attrs})) {
142 10         17 for (sort keys %{ $f{attrs} }) {
  10         70  
143 6         27 $as .= " " . "$_="._format_attr_value($f{attrs}{$_});
144             }
145             } else {
146 2         8 my $a = $parse_attrs->($f{attrs});
147 2         9 $as = join("", map {" $_="._format_attr_value($a->{$_})}
148 2         17 grep {$_ ne 'id'}
  4         13  
149             sort keys %$a);
150             }
151              
152 12         30 my $pl = $f{payload};
153              
154             # to keep things simple here, regardless of whether the replaced
155             # pattern contains ending newline (enl), we still format with ending
156             # newline. then we'll just need to strip ending newline if it's not
157             # needed.
158              
159 12 100 66     78 if ($f{is_multi} || $pl =~ /\R/) {
160 2 100       13 $pl .= "\n" unless $pl =~ /\R\z/;
161 2 50       33 "$ts BEGIN $label id=$id$as" . ($te ? " $te":"") . "\n" .
    50          
162             $pl .
163             "$ts END $label id=$id" . ($te ? " $te":"") . "\n";
164             } else {
165 10 100       124 "$pl $ts $label id=$id$as" . ($te ? " $te":"") . "\n";
166             }
167             },
168 33         359 };
169             }
170              
171             sub _doit {
172 35     35   152 my ($which, %args) = @_;
173              
174 35 50       287 die "BUG: invalid which"
175             unless $which =~ /\A(?:list|get|insert|delete|set_attrs)\z/;
176 35         72 my ($label_str, $label_sub);
177 35 50       124 if (ref($args{label}) eq 'CODE') {
178 0         0 $label_str = "FRAGMENT";
179 0         0 $label_sub = $args{label};
180             } else {
181 35   100     171 $label_str = $args{label} || "FRAGMENT";
182 35         92 $label_sub = \&_label;
183             }
184              
185 35         83 my $text = $args{text};
186 35 50       96 defined($text) or return [400, "Please specify text"];
187 35         62 my $id = $args{id};
188 35 100       156 if ($which =~ /\A(?:get|insert|set_attrs|delete)\z/) {
189 29 50       85 defined($id) or return [400, "Please specify id"];
190             }
191 35 100       86 if (defined $id) {
192 29 100       241 $id =~ $re_id or return [400, "Invalid ID syntax '$id', please use ".
193             "letters/numbers/dots/dashes only"];
194             }
195 34   100     146 my $attrs = $args{attrs} // {};
196 34         125 for (keys %$attrs) {
197 12 100       59 /\A\w+\z/ or return [400, "Invalid attribute name '$_', please use ".
198             "letters/numbers only"];
199 11 100       135 if (!defined($attrs->{$_})) {
200 2 50       10 if ($which eq 'set_attrs') {
201 2         8 next;
202             } else {
203 0         0 return [400, "Undefined value for attribute name '$_'"];
204             }
205             }
206             }
207              
208 33         72 my $good_pattern = $args{good_pattern};
209 33         65 my $replace_pattern = $args{replace_pattern};
210 33         59 my $top_style = $args{top_style};
211 33   100     108 my $comment_style = $args{comment_style} // "shell";
212 33 50       152 $comment_style =~ /\A(cpp|c|shell|html|ini)\z/ or return [
213             400, "Invalid comment_style '$comment_style', ".
214             "please use cpp/c/shell/html/ini"];
215 33         100 my $res = $label_sub->(id=>$id, label=>$label_str,
216             comment_style=>$comment_style);
217 33         73 my $one_line_pattern = $res->{one_line_pattern};
218 33         57 my $multi_line_pattern = $res->{multi_line_pattern};
219 33         53 my $parse_attrs = $res->{parse_attrs};
220 33         53 my $format_fragment = $res->{format_fragment};
221 33         65 my $payload = $args{payload};
222 33 100       89 if ($which eq 'insert') {
223 12 50       38 defined($payload) or return [400, "Please specify payload"];
224             }
225              
226 33 100       155 if ($which eq 'list') {
    100          
    100          
    100          
227              
228 6         8 my @ff;
229 6         724 while ($text =~ /($one_line_pattern|$multi_line_pattern)/xg) {
230             push @ff, {
231             raw => $1,
232 5     5   2898 id => $+{id},
  5         2125  
  5         8551  
233             payload => $+{payload},
234 12         115 attrs => $parse_attrs->($+{attrs}),
235             };
236             }
237 6         191 return [200, "OK", \@ff];
238              
239             } elsif ($which eq 'get') {
240              
241 3 100       276 if ($text =~ /($one_line_pattern|$multi_line_pattern)/x) {
242             return [200, "OK", {
243             raw => $1,
244             id => $+{id},
245             payload => $+{payload},
246 2         26 attrs => $parse_attrs->($+{attrs}),
247             }];
248             } else {
249 1         32 return [404, "Fragment with ID '$id' not found"];
250             }
251              
252             } elsif ($which eq 'set_attrs') {
253              
254 3         4 my $orig_attrs;
255             my $sub = sub {
256 2     2   25 my %f = @_;
257 2         10 $orig_attrs = $parse_attrs->($f{attrs});
258 2         11 my %a = %$orig_attrs; delete $a{id};
  2         7  
259 2         7 for my $k (keys %$attrs) {
260 6         10 my $v = $attrs->{$k};
261 6 100       12 if (defined $v) {
262 4         9 $a{$k} = $v;
263             } else {
264 2         5 delete $a{$k};
265             }
266             }
267 2         6 $f{attrs} = \%a;
268 2         8 $format_fragment->(%f);
269 3         16 };
270 3 100       309 if ($text =~ s{$one_line_pattern | $multi_line_pattern}
  2         42  
271 2         79 {$sub->(%+)}egx) {
272             return [200, "OK", {text=>$text, orig_attrs=>$orig_attrs}];
273 1         41 } else {
274             return [404, "Fragment with ID '$id' not found"];
275             }
276              
277             } elsif ($which eq 'delete') {
278 9         17  
279             my %f;
280 8     8   82 my $sub = sub {
281 8 100       88 %f = @_;
282 9         23 $f{enl} ? $f{bnl} : "";
283 9 100       746 };
284 8         110 if ($text =~ s{(?\R?)
285             (?$one_line_pattern | $multi_line_pattern)}
286             {$sub->(%+)}egx) {
287 7         171 return [200, "OK", {text=>$text,
288             orig_fragment=>$f{fragment},
289 2         61 orig_payload=>$f{payload}}];
290             } else {
291             return [304, "Fragment with ID '$id' already does not exist"];
292             }
293              
294 12         26 } else { # insert
295              
296             my $replaced;
297 3     3   37 my %f;
298 3 100       21 my $sub = sub {
299 2         7 %f = @_;
300 2         9 return $f{fragment} if $payload eq $f{payload};
301 2         6 $replaced++;
302 2         4 $f{orig_fragment} = $f{fragment};
303 2         9 $f{orig_payload} = $f{payload};
304 12         56 $f{payload} = $payload;
305 12 100 66     54 $format_fragment->(%f);
306 1         31 };
307             if ($good_pattern && $text =~ /$good_pattern/) {
308             return [304, "Text contains good pattern"];
309 11 100       1451 }
  3         64  
310 3 100       12  
311             if ($text =~ s{(?(?:$one_line_pattern | $multi_line_pattern))}
312             {$sub->(%+)}ex) {
313 2         68 if ($replaced) {
314             return [200, "Payload replaced", {
315 1         42 text=>$text, orig_fragment=>$f{orig_fragment},
316             orig_payload=>$f{orig_payload}}];
317             } else {
318             return [304, "Fragment with ID '$id' already exist with ".
319             "same content"];
320 8         60 }
321 8 100 66     51 }
322 1         5  
323 1         8 my $fragment = $format_fragment->(payload=>$payload, attrs=>$attrs);
324 1         34 if ($replace_pattern && $text =~ /($replace_pattern)/) {
325             my $orig_fragment = $1;
326             $text =~ s/$replace_pattern/$fragment/;
327             return [200, "Replace pattern replaced", {
328 7 100       30 text=>$text, orig_fragment=>$orig_fragment}];
    100          
329 1         3 }
330              
331 5         23 if ($top_style) {
332 5 100       23 $text = $fragment . $text;
333 5 100       22 } elsif (length($text)) {
334             my $enl = $text =~ /\R\z/; # text ends with newline
335             $fragment =~ s/\R\z// unless $enl;
336 1         4 $text .= ($enl ? "" : "\n") . $fragment;
337             } else {
338 7 100       313 # insert at bottom of empty string
339             $text = $fragment;
340             }
341             return [200, "Fragment inserted at the ".
342             ($top_style ? "top" : "bottom"), {text=>$text}];
343             }
344              
345             }
346              
347             $SPEC{':package'} = {
348             v => 1.1,
349             summary => 'Manipulate fragments in text',
350             description => <<'_',
351              
352             A fragment is a single line or a group of lines (called payload) with a metadata
353             encoded in the comment that is put adjacent to it (for a single line fragment)
354             or enclosing it (for a multiline fragment). Fragments are usually used in
355             configuration files or code. Here is the structure of a single-line fragment:
356              
357             #
358              
359             Here is the structure of a multi-line fragment:
360              
361             # BEGIN
362            
363             # END
364              
365             Label is by default `FRAGMENT` but can be other string. Attributes are a
366             sequence of `name=val` separated by whitespace, where name must be alphanums
367             only and val is zero or more non-whitespace characters. There must at least be
368             an attribute with name `id`, it is used to identify fragment and allow the
369             fragment to be easily replaced/modified/deleted from text. Attributes are
370             optional in the ending comment.
371              
372             Comment character used is by default `#` (`shell`-style comment), but other
373             comment styles are supported (see below).
374              
375             Examples of single-line fragments (the second example uses `c`-style comment and
376             the third uses `cpp`-style comment):
377              
378             RSYNC_ENABLE=1 # FRAGMENT id=enable
379             some text /* FRAGMENT id=id2 */
380             some text // FRAGMENT id=id3 foo=1 bar=2
381              
382             An example of multi-line fragment (using `html`-style comment instead of
383             `shell`):
384              
385            
386             some
387             lines
388             of
389             text
390            
391              
392             Another example (using `ini`-style comment):
393              
394             ; BEGIN FRAGMENT id=default-settings
395             register_globals=On
396             extension=mysql.so
397             extension=gd.so
398             memory_limit=256M
399             post_max_size=64M
400             upload_max_filesize=64M
401             browscap=/c/share/php/browscap.ini
402             allow_url_fopen=0
403             ; END FRAGMENT
404              
405             _
406             };
407              
408             my $arg_comment_style = {
409             summary => 'Comment style',
410             schema => ['str' => {
411             default => 'shell',
412             in => [qw/c cpp html shell ini/],
413             }],
414             };
415             my $arg_label = {
416             schema => [str => {default=>'FRAGMENT'}],
417             summary => 'Comment label',
418             };
419              
420             my $arg_id = {
421             summary => 'Fragment ID',
422             schema => ['str*' => { match => qr/\A[\w-]+\z/ }],
423             req => 1,
424             };
425              
426             my $arg_payload = {
427             summary => 'Fragment content',
428             schema => 'str*',
429             req => 1,
430             };
431              
432             $SPEC{list_fragments} = {
433             v => 1.1,
434             summary => 'List fragments in text',
435             args => {
436             text => {
437             summary => 'The text which contain fragments',
438             schema => 'str*',
439             req => 1,
440             pos => 0,
441             },
442             comment_style => $arg_comment_style,
443             label => $arg_label,
444             },
445             result => {
446             summary => 'List of fragments',
447             schema => 'array*',
448             description => <<'_',
449              
450             Will return status 200 if operation is successful. Result will be an array of
451             fragments, where each fragment is a hash containing these keys: `raw` (string),
452             `payload` (string), `attrs` (hash), `id` (string, can also be found in
453             attributes).
454              
455             _
456 6     6 1 18077 },
457             };
458             sub list_fragments {
459             _doit('list', @_);
460             }
461              
462             $SPEC{get_fragment} = {
463             v => 1.1,
464             summary => 'Get fragment with a certain ID in text',
465             description => <<'_',
466              
467             If there are multiple occurences of the fragment with the same ID ,
468              
469             _
470             args => {
471             text => {
472             summary => 'The text which contain fragments',
473             schema => 'str*',
474             req => 1,
475             pos => 0,
476             },
477             comment_style => $arg_comment_style,
478             label => $arg_label,
479             id => $arg_id,
480             },
481             result => {
482             summary => 'Fragment',
483             schema => 'array*',
484             description => <<'_',
485              
486             Will return status 200 if fragment is found. Result will be a hash with the
487             following keys: `raw` (string), `payload` (string), `attrs` (hash), `id`
488             (string, can also be found in attributes).
489              
490             Return 404 if fragment is not found.
491              
492             _
493 3     3 1 6791 },
494             };
495             sub get_fragment {
496             _doit('get', @_);
497             }
498              
499             $SPEC{set_fragment_attrs} = {
500             v => 1.1,
501             summary => 'Set/unset attributes of a fragment',
502             description => <<'_',
503              
504             If there are multiple occurences of the fragment with the same ID ,
505              
506             _
507             args => {
508             text => {
509             summary => 'The text which contain fragments',
510             schema => 'str*',
511             req => 1,
512             pos => 0,
513             },
514             comment_style => $arg_comment_style,
515             label => $arg_label,
516             id => $arg_id,
517             attrs => {
518             schema => 'hash*',
519             description => <<'_',
520              
521             To delete an attribute in the fragment, you can set the value to undef.
522              
523             _
524             req => 1,
525             },
526             },
527             result => {
528             summary => 'New text and other data',
529             schema => 'array*',
530             description => <<'_',
531              
532             Will return status 200 if fragment is found. Result will be a hash containing
533             these keys: `text` (string, the modified text), `orig_attrs` (hash, the old
534             attributes before being modified).
535              
536             Return 404 if fragment is not found.
537              
538             _
539 4     4 1 10843 },
540             };
541             sub set_fragment_attrs {
542             _doit('set_attrs', @_);
543             }
544              
545             $SPEC{insert_fragment} = {
546             v => 1.1,
547             summary => 'Insert or replace a fragment in text',
548             description => <<'_',
549              
550             Newline insertion behaviour: if fragment is inserted at the bottom and text does
551             not end with newline (which is considered bad style), the inserted fragment will
552             also not end with newline. Except when original text is an empty string, in
553             which case an ending newline will still be added.
554              
555             _
556             args => {
557             text => {
558             summary => 'The text to insert fragment into',
559             schema => 'str*',
560             req => 1,
561             pos => 0,
562             },
563             id => $arg_id,
564             payload => $arg_payload,
565             top_style => {
566             summary => 'Whether to append fragment at beginning of file '.
567             'instead of at the end',
568             schema => [bool => { default=>0 }],
569             description => <<'_',
570              
571             Default is false, which means to append at the end of file.
572              
573             Note that this only has effect if `replace_pattern` is not defined or replace
574             pattern is not found in file. Otherwise, fragment will be inserted to replace
575             the pattern.
576              
577             _
578             },
579             replace_pattern => {
580             summary => 'Regex pattern which if found will be used for '.
581             'placement of fragment',
582             schema => 'str',
583             description => <<'_',
584              
585             If fragment needs to be inserted into file, then if `replace_pattern` is defined
586             then it will be searched. If found, fragment will be placed to replace the
587             pattern. Otherwise, fragment will be inserted at the end (or beginning, see
588             `top_style`) of file.
589              
590             _
591             },
592             good_pattern => {
593             summary => 'Regex pattern which if found means fragment '.
594             'need not be inserted',
595             schema => 'str',
596             },
597             comment_style => $arg_comment_style,
598             label => $arg_label,
599             attrs => {
600             schema => [hash => {default=>{}}],
601             },
602             },
603             result => {
604             summary => 'A hash of result',
605             schema => 'hash*',
606             description => <<'_',
607              
608             Will return status 200 if operation is successful and text is changed. The
609             result is a hash with the following keys: `text` will contain the new text,
610             `orig_payload` will contain the original payload before being removed/replaced,
611             `orig_fragment` will contain the original fragment (or the text that matches
612             `replace_pattern`).
613              
614              
615             Will return status 304 if nothing is changed (i.e. if fragment with the
616             same payload that needs to be inserted already exists in the text).
617              
618             _
619 13     13 1 42763 },
620             };
621             sub insert_fragment {
622             _doit('insert', @_);
623             }
624              
625             $SPEC{delete_fragment} = {
626             v => 1.1,
627             summary => 'Delete fragment in text',
628             description => <<'_',
629              
630             If there are multiple occurences of fragment (which is considered an abnormal
631             condition), all occurences will be deleted.
632              
633             Newline deletion behaviour: if fragment at the bottom of text does not end with
634             newline (which is considered bad style), the text after the fragment is deleted
635             will also not end with newline.
636              
637             _
638             args => {
639             text => {
640             summary => 'The text to delete fragment from',
641             schema => 'str*',
642             req => 1,
643             pos => 0,
644             },
645             id => {
646             summary => 'Fragment ID',
647             schema => ['str*' => { match => qr/\A[\w-]+\z/ }],
648             req => 1,
649             pos => 1,
650             },
651             comment_style => $arg_comment_style,
652             label => {
653             schema => ['any' => {
654             of => ['str*', 'code*'],
655             default => 'FRAGMENT',
656             }],
657             summary => 'Comment label',
658             },
659             },
660             result => {
661             summary => 'A hash of result',
662             schema => 'hash*',
663             description => <<'_',
664              
665             Will return status 200 if operation is successful and text is deleted. The
666             result is a hash with the following keys: `text` will contain the new text,
667             `orig_payload` will contain the original fragment payload before being deleted,
668             `orig_fragment` will contain the original fragment. If there are multiple
669             occurences (which is considered an abnormal condition), only the last deleted
670             fragment will be returned in `orig_payload` and `orig_fragment`.
671              
672             Will return status 304 if nothing is changed (i.e. when the fragment that needs
673             to be deleted already does not exist in the text).
674              
675             _
676 9     9 1 32548 },
677             };
678             sub delete_fragment {
679             _doit('delete', @_);
680             }
681              
682             1;
683             # ABSTRACT: Manipulate fragments in text
684              
685             __END__