File Coverage

blib/lib/Regexp/Common/delimited.pm
Criterion Covered Total %
statement 34 37 91.8
branch 10 10 100.0
condition 7 9 77.7
subroutine 7 9 77.7
pod 0 2 0.0
total 58 67 86.5


line stmt bran cond sub pod time code
1             package Regexp::Common::delimited;
2              
3 72     72   586 use 5.10.0;
  72         160  
4              
5 72     72   238 use strict;
  72         80  
  72         1193  
6 72     72   193 use warnings;
  72         157  
  72         1709  
7 72     72   212 no warnings 'syntax';
  72         167  
  72         2072  
8              
9 72     72   222 use Regexp::Common qw /pattern clean no_defaults/;
  72         73  
  72         367  
10              
11 72     72   32256 use charnames ':full';
  72         1766070  
  72         1147  
12              
13             our $VERSION = '2017040401';
14              
15             sub gen_delimited {
16              
17 1880     1880 0 2318 my ($dels, $escs, $cdels) = @_;
18             # return '(?:\S*)' unless $dels =~ /\S/;
19 1880 100 66     6862 if (defined $escs && length $escs) {
20 1874         5383 $escs .= substr ($escs, -1) x (length ($dels) - length ($escs));
21             }
22 1880 100 100     5602 if (defined $cdels && length $cdels) {
23 1748         3947 $cdels .= substr ($cdels, -1) x (length ($dels) - length ($cdels));
24             }
25             else {
26 132         119 $cdels = $dels;
27             }
28              
29 1880         1915 my @pat = ();
30 1880         3195 for (my $i = 0; $i < length $dels; $i ++) {
31 146374         140034 my $del = quotemeta substr ($dels, $i, 1);
32 146374         110861 my $cdel = quotemeta substr ($cdels, $i, 1);
33 146374 100 66     343837 my $esc = defined $escs && length ($escs)
34             ? quotemeta substr ($escs, $i, 1) : "";
35 146374 100       173583 if ($cdel eq $esc) {
    100          
36 9         35 push @pat =>
37             "(?k:$del)(?k:[^$cdel]*(?:(?:$cdel$cdel)[^$cdel]*)*)(?k:$cdel)";
38             }
39             elsif (length $esc) {
40 146359         509514 push @pat =>
41             "(?k:$del)(?k:[^$esc$cdel]*(?:$esc.[^$esc$cdel]*)*)(?k:$cdel)";
42             }
43             else {
44 6         20 push @pat => "(?k:$del)(?k:[^$cdel]*)(?k:$cdel)";
45             }
46             }
47 1880         13619 my $pat = join '|', @pat;
48 1880         16224 return "(?k:(?|$pat))";
49             }
50              
51             sub _croak {
52 0     0     require Carp;
53 0           goto &Carp::croak;
54             }
55              
56             pattern name => [qw( delimited -delim= -esc=\\ -cdelim= )],
57             create => sub {my $flags = $_[1];
58             _croak 'Must specify delimiter in $RE{delimited}'
59             unless length $flags->{-delim};
60             return gen_delimited (@{$flags}{-delim, -esc, -cdelim});
61             },
62             ;
63              
64             pattern name => [qw( quoted -esc=\\ )],
65             create => sub {my $flags = $_[1];
66             return gen_delimited (q{"'`}, $flags -> {-esc});
67             },
68             ;
69              
70              
71             my @bracket_pairs;
72             if ($] >= 5.014) {
73             #
74             # List from http://xahlee.info/comp/unicode_matching_brackets.html
75             #
76             @bracket_pairs =
77             map {ref $_ ? $_ :
78             /!/ ? [(do {my $x = $_; $x =~ s/!/TOP/; $x},
79             do {my $x = $_; $x =~ s/!/BOTTOM/; $x})]
80             : [(do {my $x = $_; $x =~ s/\?/LEFT/; $x},
81             do {my $x = $_; $x =~ s/\?/RIGHT/; $x})]}
82             "? PARENTHESIS",
83             "? SQUARE BRACKET",
84             "? CURLY BRACKET",
85             "? DOUBLE QUOTATION MARK",
86             "? SINGLE QUOTATION MARK",
87             "SINGLE ?-POINTING ANGLE QUOTATION MARK",
88             "?-POINTING DOUBLE ANGLE QUOTATION MARK",
89             "FULLWIDTH ? PARENTHESIS",
90             "FULLWIDTH ? SQUARE BRACKET",
91             "FULLWIDTH ? CURLY BRACKET",
92             "FULLWIDTH ? WHITE PARENTHESIS",
93             "? WHITE PARENTHESIS",
94             "? WHITE SQUARE BRACKET",
95             "? WHITE CURLY BRACKET",
96             "? CORNER BRACKET",
97             "? ANGLE BRACKET",
98             "? DOUBLE ANGLE BRACKET",
99             "? BLACK LENTICULAR BRACKET",
100             "? TORTOISE SHELL BRACKET",
101             "? BLACK TORTOISE SHELL BRACKET",
102             "? WHITE CORNER BRACKET",
103             "? WHITE LENTICULAR BRACKET",
104             "? WHITE TORTOISE SHELL BRACKET",
105             "HALFWIDTH ? CORNER BRACKET",
106             "MATHEMATICAL ? WHITE SQUARE BRACKET",
107             "MATHEMATICAL ? ANGLE BRACKET",
108             "MATHEMATICAL ? DOUBLE ANGLE BRACKET",
109             "MATHEMATICAL ? FLATTENED PARENTHESIS",
110             "MATHEMATICAL ? WHITE TORTOISE SHELL BRACKET",
111             "? CEILING",
112             "? FLOOR",
113             "Z NOTATION ? IMAGE BRACKET",
114             "Z NOTATION ? BINDING BRACKET",
115             [ "HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT",
116             "HEAVY SINGLE " . "COMMA QUOTATION MARK ORNAMENT", ],
117             [ "HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT",
118             "HEAVY DOUBLE " . "COMMA QUOTATION MARK ORNAMENT", ],
119             "MEDIUM ? PARENTHESIS ORNAMENT",
120             "MEDIUM FLATTENED ? PARENTHESIS ORNAMENT",
121             "MEDIUM ? CURLY BRACKET ORNAMENT",
122             "MEDIUM ?-POINTING ANGLE BRACKET ORNAMENT",
123             "HEAVY ?-POINTING ANGLE QUOTATION MARK ORNAMENT",
124             "HEAVY ?-POINTING ANGLE BRACKET ORNAMENT",
125             "LIGHT ? TORTOISE SHELL BRACKET ORNAMENT",
126             "ORNATE ? PARENTHESIS",
127             "! PARENTHESIS",
128             "! SQUARE BRACKET",
129             "! CURLY BRACKET",
130             "! TORTOISE SHELL BRACKET",
131             "PRESENTATION FORM FOR VERTICAL ? CORNER BRACKET",
132             "PRESENTATION FORM FOR VERTICAL ? WHITE CORNER BRACKET",
133             "PRESENTATION FORM FOR VERTICAL ? TORTOISE SHELL BRACKET",
134             "PRESENTATION FORM FOR VERTICAL ? BLACK LENTICULAR BRACKET",
135             "PRESENTATION FORM FOR VERTICAL ? WHITE LENTICULAR BRACKET",
136             "PRESENTATION FORM FOR VERTICAL ? ANGLE BRACKET",
137             "PRESENTATION FORM FOR VERTICAL ? DOUBLE ANGLE BRACKET",
138             "PRESENTATION FORM FOR VERTICAL ? SQUARE BRACKET",
139             "PRESENTATION FORM FOR VERTICAL ? CURLY BRACKET",
140             "?-POINTING ANGLE BRACKET",
141             "? ANGLE BRACKET WITH DOT",
142             "?-POINTING CURVED ANGLE BRACKET",
143             "SMALL ? PARENTHESIS",
144             "SMALL ? CURLY BRACKET",
145             "SMALL ? TORTOISE SHELL BRACKET",
146             "SUPERSCRIPT ? PARENTHESIS",
147             "SUBSCRIPT ? PARENTHESIS",
148             "? SQUARE BRACKET WITH UNDERBAR",
149             [ "LEFT SQUARE BRACKET WITH TICK IN TOP CORNER",
150             "RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER", ],
151             [ "LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER",
152             "RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER", ],
153             "? SQUARE BRACKET WITH QUILL",
154             "TOP ? HALF BRACKET",
155             "BOTTOM ? HALF BRACKET",
156             "? S-SHAPED BAG DELIMITER",
157             [ "LEFT ARC LESS-THAN BRACKET",
158             "RIGHT ARC GREATER-THAN BRACKET", ],
159             [ "DOUBLE LEFT ARC GREATER-THAN BRACKET",
160             "DOUBLE RIGHT ARC LESS-THAN BRACKET", ],
161             "? SIDEWAYS U BRACKET",
162             "? DOUBLE PARENTHESIS",
163             "? WIGGLY FENCE",
164             "? DOUBLE WIGGLY FENCE",
165             "? LOW PARAPHRASE BRACKET",
166             "? RAISED OMISSION BRACKET",
167             "? SUBSTITUTION BRACKET",
168             "? DOTTED SUBSTITUTION BRACKET",
169             "? TRANSPOSITION BRACKET",
170             [ "OGHAM FEATHER MARK",
171             "OGHAM REVERSED FEATHER MARK", ],
172             [ "TIBETAN MARK GUG RTAGS GYON",
173             "TIBETAN MARK GUG RTAGS GYAS", ],
174             [ "TIBETAN MARK ANG KHANG GYON",
175             "TIBETAN MARK ANG KHANG GYAS", ],
176             ;
177              
178             #
179             # Filter out unknown characters; this may run on an older version
180             # of Perl with an old version of Unicode.
181             #
182             @bracket_pairs = grep {defined charnames::string_vianame ($$_ [0]) &&
183             defined charnames::string_vianame ($$_ [1])}
184             @bracket_pairs;
185              
186             if (@bracket_pairs) {
187             my $delims = join "" => map {charnames::string_vianame ($$_ [0])}
188             @bracket_pairs;
189             my $cdelims = join "" => map {charnames::string_vianame ($$_ [1])}
190             @bracket_pairs;
191              
192             pattern name => [qw (bquoted -esc=\\)],
193             create => sub {my $flags = $_ [1];
194             return gen_delimited ($delims, $flags -> {-esc},
195             $cdelims);
196             },
197             version => 5.014,
198             ;
199             }
200             }
201              
202              
203             #
204             # Return the Unicode names of the pairs of matching delimiters.
205             #
206 0     0 0   sub bracket_pairs {@bracket_pairs}
207              
208             1;
209              
210             __END__