File Coverage

blib/lib/Text/Fragment.pm
Criterion Covered Total %
statement 182 187 97.3
branch 85 98 86.7
condition 16 22 72.7
subroutine 21 21 100.0
pod 5 5 100.0
total 309 333 92.7


line stmt bran cond sub pod time code
1             package Text::Fragment;
2              
3             our $DATE = '2017-07-10'; # DATE
4             our $VERSION = '0.10'; # VERSION
5              
6 5     5   17524 use 5.010001;
  5         25  
7 5     5   29 use strict;
  5         13  
  5         108  
8 5     5   28 use warnings;
  5         16  
  5         204  
9 5     5   18565 use Log::ger;
  5         629  
  5         38  
10              
11 5     5   8864 use Data::Clone;
  5         6263  
  5         8173  
12              
13             require Exporter;
14             our @ISA = qw(Exporter);
15             our @EXPORT_OK = qw(
16             list_fragments
17             get_fragment
18             set_fragment_attrs
19             insert_fragment
20             delete_fragment
21             );
22              
23             our $re_id = qr/\A[A-Za-z0-9_.,:-]+\z/;
24              
25             our %SPEC;
26              
27             sub _format_quoted {
28 3     3   9 my $unquoted = shift;
29 3         8 my $res = "";
30 3         8 my $i = -1;
31 3         15 while (++$i < length($unquoted)) {
32 6         19 my $c = substr($unquoted, $i, 1);
33 6 50 33     56 if ($c eq '\\' or $c eq '"') {
    50          
34 0         0 $res .= "\\$c";
35             } elsif ($c !~ /[\x20-\x7F]/) {
36             # strip non-printables
37             } else {
38 6         25 $res .= $c;
39             }
40             }
41 3         16 qq("$res");
42             }
43              
44             sub _parse_quoted {
45 1     1   2 my $quoted = shift;
46 1         4 $quoted =~ s/\A"//; $quoted =~ s/"\z//;
  1         4  
47 1         3 my $res = "";
48 1         1 my $i = -1;
49 1         4 while (++$i < length($quoted)) {
50 2         4 my $c = substr($quoted, $i, 1);
51 2 50       6 if ($c eq '\\') {
52 0         0 $res .= substr($quoted, ++$i, 1);
53             } else {
54 2         7 $res .= $c;
55             }
56             }
57 1         6 $res;
58             }
59              
60             sub _format_attr_value {
61 8     8   20 my $val = shift;
62 8 100       61 $val =~ /\s|"|[^\x20-\x7f]/ ? _format_quoted($val) : $val;
63             }
64              
65             sub _label {
66 33     33   143 my %args = @_;
67 33   100     161 my $id = $args{id} // "";
68 33         86 my $label = $args{label}; # str
69 33         82 my $comment_style = $args{comment_style};
70 33         101 my $attrs = $args{attrs};
71              
72 33         146 my $quoted_re = qr/"(?:[^\n\r"\\]|\\[^\n\r])*"/;
73              
74 33         95 my $a_re; # regex to match attributes
75             my $ai_re; # also match attributes, but attribute id must be present
76 33 100       98 if (length $id) {
77 27         1050 $ai_re = qr/(?:\w+=\S*[ \t]+)*id=(?\Q$id\E)(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
78             } else {
79 6         141 $ai_re = qr/(?:\w+=\S*[ \t]+)*id=(?\S*)(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
80             }
81 33         420 $a_re = qr/(?:\w+=\S*)?(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
82              
83 33         93 my ($ts, $te); # tag start and end
84 33 100       146 if ($comment_style eq 'shell') {
    100          
    100          
    100          
    50          
85 22         52 $ts = "#";
86 22         48 $te = "";
87             } elsif ($comment_style eq 'c') {
88 3         9 $ts = "/*";
89 3         9 $te = "*/";
90             } elsif ($comment_style eq 'cpp') {
91 3         10 $ts = "//";
92 3         7 $te = "";
93             } elsif ($comment_style eq 'html') {
94 2         4 $ts = "";
96             } elsif ($comment_style eq 'ini') {
97 3         8 $ts = ";";
98 3         7 $te = "";
99             }
100             # regex to detect fragment
101 33         1676 my $ore = qr!^(?.*?)[ \t]*\Q$ts\E[ \t]*
102             \Q$label\E[ \t]+
103             (?$ai_re)[ \t]*
104             \Q$te\E[ \t]*(?\R|\z)!mx;
105              
106 33         2719 my $mre = qr!^\Q$ts\E[ \t]*
107             BEGIN[ \t]+\Q$label\E[ \t]+
108             (?$ai_re)[ \t]*
109             \Q$te\E[ \t]*(?\R)
110             (?:
111             (?.*)
112             ^\Q$ts\E[ \t]*END[ \t]+\Q$label\E[ \t]+
113             (?:\w+=\S*[ \t]+)*id=\g{id}(?:[ \t]+\w+=\S+)*
114             [ \t]*\Q$te\E |
115             (?.*?) # without any ID at the ending comment
116             ^\Q$ts\E[ \t]*END[ \t]+\Q$label\E(?:[ \t]+$a_re)?[ \t]*
117             \Q$te\E
118             )
119             [ \t]*(?\R|\z)!msx;
120              
121             my $parse_attrs = sub {
122 18   50 18   94 my $s = shift // "";
123 18         40 my %a;
124 18         284 while ($s =~ /(\w+)=(?:($quoted_re)|(\S+))(?:\s+|\z)/g) {
125 28 100       256 $a{$1} = $2 ? _parse_quoted($2) : $3;
126             }
127 18         254 \%a;
128 33         224 };
129              
130             return {
131             one_line_pattern => $ore,
132             multi_line_pattern => $mre,
133             parse_attrs => $parse_attrs,
134             format_fragment => sub {
135 12     12   46 my %f = @_;
136              
137             # formatted attrs as string
138 12         23 my $as = "";
139 12 100       35 if (ref($f{attrs})) {
140 10         22 for (sort keys %{ $f{attrs} }) {
  10         47  
141 6         30 $as .= " " . "$_="._format_attr_value($f{attrs}{$_});
142             }
143             } else {
144 2         5 my $a = $parse_attrs->($f{attrs});
145 2         9 $as = join("", map {" $_="._format_attr_value($a->{$_})}
146 2         9 grep {$_ ne 'id'}
  4         11  
147             sort keys %$a);
148             }
149              
150 12         32 my $pl = $f{payload};
151              
152             # to keep things simple here, regardless of whether the replaced
153             # pattern contains ending newline (enl), we still format with ending
154             # newline. then we'll just need to strip ending newline if it's not
155             # needed.
156              
157 12 100 66     65 if ($f{is_multi} || $pl =~ /\R/) {
158 2 100       13 $pl .= "\n" unless $pl =~ /\R\z/;
159 2 50       32 "$ts BEGIN $label id=$id$as" . ($te ? " $te":"") . "\n" .
    50          
160             $pl .
161             "$ts END $label id=$id" . ($te ? " $te":"") . "\n";
162             } else {
163 10 100       115 "$pl $ts $label id=$id$as" . ($te ? " $te":"") . "\n";
164             }
165             },
166 33         371 };
167             }
168              
169             sub _doit {
170 35     35   190 my ($which, %args) = @_;
171              
172 35 50       274 die "BUG: invalid which"
173             unless $which =~ /\A(?:list|get|insert|delete|set_attrs)\z/;
174 35         85 my ($label_str, $label_sub);
175 35 50       140 if (ref($args{label}) eq 'CODE') {
176 0         0 $label_str = "FRAGMENT";
177 0         0 $label_sub = $args{label};
178             } else {
179 35   100     212 $label_str = $args{label} || "FRAGMENT";
180 35         107 $label_sub = \&_label;
181             }
182              
183 35         91 my $text = $args{text};
184 35 50       119 defined($text) or return [400, "Please specify text"];
185 35         82 my $id = $args{id};
186 35 100       191 if ($which =~ /\A(?:get|insert|set_attrs|delete)\z/) {
187 29 50       107 defined($id) or return [400, "Please specify id"];
188             }
189 35 100       102 if (defined $id) {
190 29 100       221 $id =~ $re_id or return [400, "Invalid ID syntax '$id', please use ".
191             "letters/numbers/dots/dashes only"];
192             }
193 34   100     186 my $attrs = $args{attrs} // {};
194 34         141 for (keys %$attrs) {
195 12 100       67 /\A\w+\z/ or return [400, "Invalid attribute name '$_', please use ".
196             "letters/numbers only"];
197 11 100       41 if (!defined($attrs->{$_})) {
198 2 50       10 if ($which eq 'set_attrs') {
199 2         7 next;
200             } else {
201 0         0 return [400, "Undefined value for attribute name '$_'"];
202             }
203             }
204             }
205              
206 33         87 my $good_pattern = $args{good_pattern};
207 33         64 my $replace_pattern = $args{replace_pattern};
208 33         75 my $top_style = $args{top_style};
209 33   100     144 my $comment_style = $args{comment_style} // "shell";
210 33 50       177 $comment_style =~ /\A(cpp|c|shell|html|ini)\z/ or return [
211             400, "Invalid comment_style '$comment_style', ".
212             "please use cpp/c/shell/html/ini"];
213 33         114 my $res = $label_sub->(id=>$id, label=>$label_str,
214             comment_style=>$comment_style);
215 33         104 my $one_line_pattern = $res->{one_line_pattern};
216 33         74 my $multi_line_pattern = $res->{multi_line_pattern};
217 33         65 my $parse_attrs = $res->{parse_attrs};
218 33         64 my $format_fragment = $res->{format_fragment};
219 33         72 my $payload = $args{payload};
220 33 100       116 if ($which eq 'insert') {
221 12 50       31 defined($payload) or return [400, "Please specify payload"];
222             }
223              
224 33 100       167 if ($which eq 'list') {
    100          
    100          
    100          
225              
226 6         15 my @ff;
227 6         898 while ($text =~ /($one_line_pattern|$multi_line_pattern)/xg) {
228             push @ff, {
229             raw => $1,
230 5     5   2585 id => $+{id},
  5         1759  
  5         7486  
231             payload => $+{payload},
232 12         151 attrs => $parse_attrs->($+{attrs}),
233             };
234             }
235 6         237 return [200, "OK", \@ff];
236              
237             } elsif ($which eq 'get') {
238              
239 3 100       224 if ($text =~ /($one_line_pattern|$multi_line_pattern)/x) {
240             return [200, "OK", {
241             raw => $1,
242             id => $+{id},
243             payload => $+{payload},
244 2         23 attrs => $parse_attrs->($+{attrs}),
245             }];
246             } else {
247 1         26 return [404, "Fragment with ID '$id' not found"];
248             }
249              
250             } elsif ($which eq 'set_attrs') {
251              
252 3         8 my $orig_attrs;
253             my $sub = sub {
254 2     2   30 my %f = @_;
255 2         10 $orig_attrs = $parse_attrs->($f{attrs});
256 2         15 my %a = %$orig_attrs; delete $a{id};
  2         8  
257 2         8 for my $k (keys %$attrs) {
258 6         16 my $v = $attrs->{$k};
259 6 100       19 if (defined $v) {
260 4         13 $a{$k} = $v;
261             } else {
262 2         8 delete $a{$k};
263             }
264             }
265 2         7 $f{attrs} = \%a;
266 2         10 $format_fragment->(%f);
267 3         21 };
268 3 100       425 if ($text =~ s{$one_line_pattern | $multi_line_pattern}
269 2         43 {$sub->(%+)}egx) {
270 2         88 return [200, "OK", {text=>$text, orig_attrs=>$orig_attrs}];
271             } else {
272 1         48 return [404, "Fragment with ID '$id' not found"];
273             }
274              
275             } elsif ($which eq 'delete') {
276              
277 9         20 my %f;
278             my $sub = sub {
279 8     8   119 %f = @_;
280 8 100       94 $f{enl} ? $f{bnl} : "";
281 9         41 };
282 9 100       1013 if ($text =~ s{(?\R?)
283             (?$one_line_pattern | $multi_line_pattern)}
284 8         142 {$sub->(%+)}egx) {
285             return [200, "OK", {text=>$text,
286             orig_fragment=>$f{fragment},
287 7         253 orig_payload=>$f{payload}}];
288             } else {
289 2         89 return [304, "Fragment with ID '$id' already does not exist"];
290             }
291              
292             } else { # insert
293              
294 12         20 my $replaced;
295             my %f;
296             my $sub = sub {
297 3     3   30 %f = @_;
298 3 100       16 return $f{fragment} if $payload eq $f{payload};
299 2         4 $replaced++;
300 2         4 $f{orig_fragment} = $f{fragment};
301 2         5 $f{orig_payload} = $f{payload};
302 2         4 $f{payload} = $payload;
303 2         8 $format_fragment->(%f);
304 12         44 };
305 12 100 66     40 if ($good_pattern && $text =~ /$good_pattern/) {
306 1         17 return [304, "Text contains good pattern"];
307             }
308              
309 11 100       1076 if ($text =~ s{(?(?:$one_line_pattern | $multi_line_pattern))}
310 3         44 {$sub->(%+)}ex) {
311 3 100       8 if ($replaced) {
312             return [200, "Payload replaced", {
313             text=>$text, orig_fragment=>$f{orig_fragment},
314 2         50 orig_payload=>$f{orig_payload}}];
315             } else {
316 1         31 return [304, "Fragment with ID '$id' already exist with ".
317             "same content"];
318             }
319             }
320              
321 8         39 my $fragment = $format_fragment->(payload=>$payload, attrs=>$attrs);
322 8 100 66     38 if ($replace_pattern && $text =~ /($replace_pattern)/) {
323 1         4 my $orig_fragment = $1;
324 1         7 $text =~ s/$replace_pattern/$fragment/;
325 1         21 return [200, "Replace pattern replaced", {
326             text=>$text, orig_fragment=>$orig_fragment}];
327             }
328              
329 7 100       20 if ($top_style) {
    100          
330 1         2 $text = $fragment . $text;
331             } elsif (length($text)) {
332 5         18 my $enl = $text =~ /\R\z/; # text ends with newline
333 5 100       19 $fragment =~ s/\R\z// unless $enl;
334 5 100       17 $text .= ($enl ? "" : "\n") . $fragment;
335             } else {
336             # insert at bottom of empty string
337 1         3 $text = $fragment;
338             }
339 7 100       148 return [200, "Fragment inserted at the ".
340             ($top_style ? "top" : "bottom"), {text=>$text}];
341             }
342              
343             }
344              
345             $SPEC{':package'} = {
346             v => 1.1,
347             summary => 'Manipulate fragments in text',
348             description => <<'_',
349              
350             A fragment is a single line or a group of lines (called payload) with a metadata
351             encoded in the comment that is put adjacent to it (for a single line fragment)
352             or enclosing it (for a multiline fragment). Fragments are usually used in
353             configuration files or code. Here is the structure of a single-line fragment:
354              
355             #
356              
357             Here is the structure of a multi-line fragment:
358              
359             # BEGIN
360            
361             # END
362              
363             Label is by default `FRAGMENT` but can be other string. Attributes are a
364             sequence of `name=val` separated by whitespace, where name must be alphanums
365             only and val is zero or more non-whitespace characters. There must at least be
366             an attribute with name `id`, it is used to identify fragment and allow the
367             fragment to be easily replaced/modified/deleted from text. Attributes are
368             optional in the ending comment.
369              
370             Comment character used is by default `#` (`shell`-style comment), but other
371             comment styles are supported (see below).
372              
373             Examples of single-line fragments (the second example uses `c`-style comment and
374             the third uses `cpp`-style comment):
375              
376             RSYNC_ENABLE=1 # FRAGMENT id=enable
377             some text /* FRAGMENT id=id2 */
378             some text // FRAGMENT id=id3 foo=1 bar=2
379              
380             An example of multi-line fragment (using `html`-style comment instead of
381             `shell`):
382              
383            
384             some
385             lines
386             of
387             text
388            
389              
390             Another example (using `ini`-style comment):
391              
392             ; BEGIN FRAGMENT id=default-settings
393             register_globals=On
394             extension=mysql.so
395             extension=gd.so
396             memory_limit=256M
397             post_max_size=64M
398             upload_max_filesize=64M
399             browscap=/c/share/php/browscap.ini
400             allow_url_fopen=0
401             ; END FRAGMENT
402              
403             _
404             };
405              
406             my $arg_comment_style = {
407             summary => 'Comment style',
408             schema => ['str' => {
409             default => 'shell',
410             in => [qw/c cpp html shell ini/],
411             }],
412             };
413             my $arg_label = {
414             schema => [str => {default=>'FRAGMENT'}],
415             summary => 'Comment label',
416             };
417              
418             my $arg_id = {
419             summary => 'Fragment ID',
420             schema => ['str*' => { match => qr/\A[\w-]+\z/ }],
421             req => 1,
422             };
423              
424             my $arg_payload = {
425             summary => 'Fragment content',
426             schema => 'str*',
427             req => 1,
428             };
429              
430             $SPEC{list_fragments} = {
431             v => 1.1,
432             summary => 'List fragments in text',
433             args => {
434             text => {
435             summary => 'The text which contain fragments',
436             schema => 'str*',
437             req => 1,
438             pos => 0,
439             },
440             comment_style => $arg_comment_style,
441             label => $arg_label,
442             },
443             result => {
444             summary => 'List of fragments',
445             schema => 'array*',
446             description => <<'_',
447              
448             Will return status 200 if operation is successful. Result will be an array of
449             fragments, where each fragment is a hash containing these keys: `raw` (string),
450             `payload` (string), `attrs` (hash), `id` (string, can also be found in
451             attributes).
452              
453             _
454             },
455             };
456             sub list_fragments {
457 6     6 1 23351 _doit('list', @_);
458             }
459              
460             $SPEC{get_fragment} = {
461             v => 1.1,
462             summary => 'Get fragment with a certain ID in text',
463             description => <<'_',
464              
465             If there are multiple occurences of the fragment with the same ID ,
466              
467             _
468             args => {
469             text => {
470             summary => 'The text which contain fragments',
471             schema => 'str*',
472             req => 1,
473             pos => 0,
474             },
475             comment_style => $arg_comment_style,
476             label => $arg_label,
477             id => $arg_id,
478             },
479             result => {
480             summary => 'Fragment',
481             schema => 'array*',
482             description => <<'_',
483              
484             Will return status 200 if fragment is found. Result will be a hash with the
485             following keys: `raw` (string), `payload` (string), `attrs` (hash), `id`
486             (string, can also be found in attributes).
487              
488             Return 404 if fragment is not found.
489              
490             _
491             },
492             };
493             sub get_fragment {
494 3     3 1 5355 _doit('get', @_);
495             }
496              
497             $SPEC{set_fragment_attrs} = {
498             v => 1.1,
499             summary => 'Set/unset attributes of a fragment',
500             description => <<'_',
501              
502             If there are multiple occurences of the fragment with the same ID ,
503              
504             _
505             args => {
506             text => {
507             summary => 'The text which contain fragments',
508             schema => 'str*',
509             req => 1,
510             pos => 0,
511             },
512             comment_style => $arg_comment_style,
513             label => $arg_label,
514             id => $arg_id,
515             attrs => {
516             schema => 'hash*',
517             description => <<'_',
518              
519             To delete an attribute in the fragment, you can set the value to undef.
520              
521             _
522             req => 1,
523             },
524             },
525             result => {
526             summary => 'New text and other data',
527             schema => 'array*',
528             description => <<'_',
529              
530             Will return status 200 if fragment is found. Result will be a hash containing
531             these keys: `text` (string, the modified text), `orig_attrs` (hash, the old
532             attributes before being modified).
533              
534             Return 404 if fragment is not found.
535              
536             _
537             },
538             };
539             sub set_fragment_attrs {
540 4     4 1 14649 _doit('set_attrs', @_);
541             }
542              
543             $SPEC{insert_fragment} = {
544             v => 1.1,
545             summary => 'Insert or replace a fragment in text',
546             description => <<'_',
547              
548             Newline insertion behaviour: if fragment is inserted at the bottom and text does
549             not end with newline (which is considered bad style), the inserted fragment will
550             also not end with newline. Except when original text is an empty string, in
551             which case an ending newline will still be added.
552              
553             _
554             args => {
555             text => {
556             summary => 'The text to insert fragment into',
557             schema => 'str*',
558             req => 1,
559             pos => 0,
560             },
561             id => $arg_id,
562             payload => $arg_payload,
563             top_style => {
564             summary => 'Whether to append fragment at beginning of file '.
565             'instead of at the end',
566             schema => [bool => { default=>0 }],
567             description => <<'_',
568              
569             Default is false, which means to append at the end of file.
570              
571             Note that this only has effect if `replace_pattern` is not defined or replace
572             pattern is not found in file. Otherwise, fragment will be inserted to replace
573             the pattern.
574              
575             _
576             },
577             replace_pattern => {
578             summary => 'Regex pattern which if found will be used for '.
579             'placement of fragment',
580             schema => 'str',
581             description => <<'_',
582              
583             If fragment needs to be inserted into file, then if `replace_pattern` is defined
584             then it will be searched. If found, fragment will be placed to replace the
585             pattern. Otherwise, fragment will be inserted at the end (or beginning, see
586             `top_style`) of file.
587              
588             _
589             },
590             good_pattern => {
591             summary => 'Regex pattern which if found means fragment '.
592             'need not be inserted',
593             schema => 'str',
594             },
595             comment_style => $arg_comment_style,
596             label => $arg_label,
597             attrs => {
598             schema => [hash => {default=>{}}],
599             },
600             },
601             result => {
602             summary => 'A hash of result',
603             schema => 'hash*',
604             description => <<'_',
605              
606             Will return status 200 if operation is successful and text is changed. The
607             result is a hash with the following keys: `text` will contain the new text,
608             `orig_payload` will contain the original payload before being removed/replaced,
609             `orig_fragment` will contain the original fragment (or the text that matches
610             `replace_pattern`).
611              
612              
613             Will return status 304 if nothing is changed (i.e. if fragment with the
614             same payload that needs to be inserted already exists in the text).
615              
616             _
617             },
618             };
619             sub insert_fragment {
620 13     13 1 34737 _doit('insert', @_);
621             }
622              
623             $SPEC{delete_fragment} = {
624             v => 1.1,
625             summary => 'Delete fragment in text',
626             description => <<'_',
627              
628             If there are multiple occurences of fragment (which is considered an abnormal
629             condition), all occurences will be deleted.
630              
631             Newline deletion behaviour: if fragment at the bottom of text does not end with
632             newline (which is considered bad style), the text after the fragment is deleted
633             will also not end with newline.
634              
635             _
636             args => {
637             text => {
638             summary => 'The text to delete fragment from',
639             schema => 'str*',
640             req => 1,
641             pos => 0,
642             },
643             id => {
644             summary => 'Fragment ID',
645             schema => ['str*' => { match => qr/\A[\w-]+\z/ }],
646             req => 1,
647             pos => 1,
648             },
649             comment_style => $arg_comment_style,
650             label => {
651             schema => ['any' => {
652             of => ['str*', 'code*'],
653             default => 'FRAGMENT',
654             }],
655             summary => 'Comment label',
656             },
657             },
658             result => {
659             summary => 'A hash of result',
660             schema => 'hash*',
661             description => <<'_',
662              
663             Will return status 200 if operation is successful and text is deleted. The
664             result is a hash with the following keys: `text` will contain the new text,
665             `orig_payload` will contain the original fragment payload before being deleted,
666             `orig_fragment` will contain the original fragment. If there are multiple
667             occurences (which is considered an abnormal condition), only the last deleted
668             fragment will be returned in `orig_payload` and `orig_fragment`.
669              
670             Will return status 304 if nothing is changed (i.e. when the fragment that needs
671             to be deleted already does not exist in the text).
672              
673             _
674             },
675             };
676             sub delete_fragment {
677 9     9 1 36738 _doit('delete', @_);
678             }
679              
680             1;
681             # ABSTRACT: Manipulate fragments in text
682              
683             __END__