line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# [[[ HEADER ]]] |
2
|
|
|
|
|
|
|
package RPerl::Operation::Expression::Operator::RegularExpression; |
3
|
4
|
|
|
4
|
|
22
|
use strict; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
98
|
|
4
|
4
|
|
|
4
|
|
18
|
use warnings; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
78
|
|
5
|
4
|
|
|
4
|
|
20
|
use RPerl::AfterSubclass; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
476
|
|
6
|
|
|
|
|
|
|
our $VERSION = 0.013_000; |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# [[[ OO INHERITANCE ]]] |
9
|
4
|
|
|
4
|
|
25
|
use parent qw(RPerl::Operation::Expression::Operator); |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
20
|
|
10
|
4
|
|
|
4
|
|
204
|
use RPerl::Operation::Expression::Operator; |
|
4
|
|
|
|
|
438
|
|
|
4
|
|
|
|
|
5012
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# [[[ CRITICS ]]] |
13
|
|
|
|
|
|
|
## no critic qw(ProhibitUselessNoCritic ProhibitMagicNumbers RequireCheckedSyscalls) # USER DEFAULT 1: allow numeric values & print operator |
14
|
|
|
|
|
|
|
## no critic qw(RequireInterpolationOfMetachars) # USER DEFAULT 2: allow single-quoted control characters & sigils |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# [[[ OO PROPERTIES ]]] |
17
|
|
|
|
|
|
|
our hashref $properties = {}; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# COPYRIGHT NOTICE: modifier descriptions copied from JPCRE2 docs under BSD license https://github.com/jpcre2/jpcre2 |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# supported and compliant! :-) |
22
|
|
|
|
|
|
|
our string_hashref $modifiers_compile = { |
23
|
|
|
|
|
|
|
i => 'Case-insensitive. Equivalent to PCRE2_CASELESS option.', |
24
|
|
|
|
|
|
|
m => 'Multi-line regex. Equivalent to PCRE2_MULTILINE option.', |
25
|
|
|
|
|
|
|
s => 'If this modifier is set, a dot meta-character in the pattern matches all characters, including newlines. Equivalent to PCRE2_DOTALL option.', |
26
|
|
|
|
|
|
|
u => 'Enable UTF support.Treat pattern and subjects as UTF strings. It is equivalent to PCRE2_UTF option.', |
27
|
|
|
|
|
|
|
x => 'Whitespace data characters in the pattern are totally ignored except when escaped or inside a character class, enables commentary in pattern. Equivalent to PCRE2_EXTENDED option.', |
28
|
|
|
|
|
|
|
}; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=begin DISABLED_UNSUPPORTED_OR_NONCOMPLIANT |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# DEV NOTE: there are other unsupported modifiers as well, see the Perl docs https://perldoc.perl.org/perlre.html#Modifiers |
33
|
|
|
|
|
|
|
our string_hashref $modifiers_compile_unsupported = { |
34
|
|
|
|
|
|
|
xx => 'Whitespace data characters in the pattern are totally ignored except when escaped, EVEN WHEN INSIDE A CHARACTER CLASS. Requires Perl v5.26 or newer.', |
35
|
|
|
|
|
|
|
}; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# DEV NOTE: some of these noncompliant modifiers may be related to Perl regex assertions, such as 'A' https://perldoc.perl.org/perlre.html |
38
|
|
|
|
|
|
|
our string_hashref $modifiers_compile_noncompliant = { |
39
|
|
|
|
|
|
|
e => 'Unset back-references in the pattern will match to empty strings. Equivalent to PCRE2_MATCH_UNSET_BACKREF.', |
40
|
|
|
|
|
|
|
j => '\u \U \x and unset back-references will act as JavaScript standard. Equivalent to PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF. |
41
|
|
|
|
|
|
|
\U matches an upper case "U" character (by default it causes a compile error if this option is not set). |
42
|
|
|
|
|
|
|
\u matches a lower case "u" character unless it is followed by four hexadecimal digits, in which case the hexadecimal number defines the code point to match (by default it causes a compile error if this option is not set). |
43
|
|
|
|
|
|
|
\x matches a lower case "x" character unless it is followed by two hexadecimal digits, in which case the hexadecimal number defines the code point to match (By default, as in Perl, a hexadecimal number is always expected after \x, but it may have zero, one, or two digits (so, for example, \xz matches a binary zero character followed by z) ). |
44
|
|
|
|
|
|
|
Unset back-references in the pattern will match to empty strings.', |
45
|
|
|
|
|
|
|
n => 'Enable Unicode support for \w \d etc... in pattern. Equivalent to PCRE2_UTF | PCRE2_UCP.', |
46
|
|
|
|
|
|
|
A => 'Match only at the first position. It is equivalent to PCRE2_ANCHORED option.', |
47
|
|
|
|
|
|
|
D => 'A dollar meta-character in the pattern matches only at the end of the subject string. Without this modifier, a dollar also matches immediately before the final character if it is a newline (but not before any other newlines). This modifier is ignored if m modifier is set. Equivalent to PCRE2_DOLLAR_ENDONLY option.', |
48
|
|
|
|
|
|
|
J => 'Allow duplicate names for sub-patterns. Equivalent to PCRE2_DUPNAMES option.', |
49
|
|
|
|
|
|
|
S => 'When a pattern is going to be used several times, it is worth spending more time analyzing it in order to speed up the time taken for matching/replacing. It may also be beneficial for a very long subject string or pattern. Equivalent to an extra compilation with JIT_COMPILER with the option PCRE2_JIT_COMPLETE.', |
50
|
|
|
|
|
|
|
U => 'This modifier inverts the "greediness" of the quantifiers so that they are not greedy by default, but become greedy if followed by ?. Equivalent to PCRE2_UNGREEDY option.', |
51
|
|
|
|
|
|
|
}; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=end DISABLED_UNSUPPORTED_OR_NONCOMPLIANT |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=cut |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
our string_hashref $modifiers_match = { |
58
|
|
|
|
|
|
|
g => 'Global. Will perform global matching or replacement if passed. Equivalent to jpcre2::FIND_ALL for match and PCRE2_SUBSTITUTE_GLOBAL for replace.', |
59
|
|
|
|
|
|
|
}; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=begin DISABLED_UNSUPPORTED_OR_NONCOMPLIANT |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
our string_hashref $modifiers_match_noncompliant = { |
64
|
|
|
|
|
|
|
A => 'Match at start. Equivalent to PCRE2_ANCHORED. Can be used in match operation. Setting this option only at match time (i.e regex was not compiled with this option) will disable optimization during match time.', |
65
|
|
|
|
|
|
|
}; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=end DISABLED_UNSUPPORTED_OR_NONCOMPLIANT |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=cut |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
our string_hashref $modifiers_substitute = { |
72
|
|
|
|
|
|
|
g => 'Global. Will perform global matching or replacement if passed. Equivalent to jpcre2::FIND_ALL for match and PCRE2_SUBSTITUTE_GLOBAL for replace.', |
73
|
|
|
|
|
|
|
}; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=begin DISABLED_UNSUPPORTED_OR_NONCOMPLIANT |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
our string_hashref $modifiers_substitute_noncompliant = { |
78
|
|
|
|
|
|
|
e => 'Replaces unset group with empty string. Equivalent to PCRE2_SUBSTITUTE_UNSET_EMPTY.', |
79
|
|
|
|
|
|
|
E => 'Extension of e modifier. Sets even unknown groups to empty string. Equivalent to PCRE2_SUBSTITUTE_UNSET_EMPTY | PCRE2_SUBSTITUTE_UNKNOWN_UNSET', |
80
|
|
|
|
|
|
|
x => 'Extended replacement operation. Equivalent to PCRE2_SUBSTITUTE_EXTENDED. It enables some Bash like features: |
81
|
|
|
|
|
|
|
${<n>:-<string>} |
82
|
|
|
|
|
|
|
${<n>:+<string1>:<string2>} |
83
|
|
|
|
|
|
|
<n> may be a group number or a name. The first form specifies a default value. If group <n> is set, its value is inserted; if not, <string> is expanded and the result is inserted. The second form specifies strings that are expanded and inserted when group <n> is set or unset, respectively. The first form is just a convenient shorthand for ${<n>:+${<n>}:<string>}.', |
84
|
|
|
|
|
|
|
}; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=end DISABLED_UNSUPPORTED_OR_NONCOMPLIANT |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=cut |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# [[[ SUBROUTINES & OO METHODS ]]] |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub ast_to_rperl__generate { |
93
|
19
|
|
|
19
|
|
36
|
{ my string_hashref::method $RETURN_TYPE }; |
|
19
|
|
|
|
|
32
|
|
94
|
19
|
|
|
|
|
51
|
( my object $self, my string_hashref $modes) = @ARG; |
95
|
19
|
|
|
|
|
55
|
my string_hashref $rperl_source_group = { PMC => q{} }; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# RPerl::diag( 'in Operator::RegularExpression->ast_to_rperl__generate(), received $self = ' . "\n" . RPerl::Parser::rperl_ast__dump($self) . "\n" ); |
98
|
|
|
|
|
|
|
|
99
|
19
|
|
|
|
|
45
|
my string $self_class = ref $self; |
100
|
19
|
50
|
|
|
|
56
|
if ( $self_class eq 'Operator_104' ) { # Operator -> SubExpression OP06_REGEX_BIND OP06_REGEX_PATTERN |
101
|
|
|
|
|
|
|
my string_hashref $rperl_source_subgroup |
102
|
19
|
|
|
|
|
396
|
= $self->{children}->[0]->ast_to_rperl__generate($modes); |
103
|
19
|
|
|
|
|
369
|
RPerl::Generator::source_group_append( $rperl_source_group, |
104
|
|
|
|
|
|
|
$rperl_source_subgroup ); |
105
|
19
|
|
|
|
|
84
|
$rperl_source_group->{PMC} .= q{ } . $self->{children}->[1] . q{ } . $self->{children}->[2]; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
else { |
108
|
0
|
|
|
|
|
0
|
die RPerl::Parser::rperl_rule__replace( |
109
|
|
|
|
|
|
|
'ERROR ECOGEASRP00, CODE GENERATOR, ABSTRACT SYNTAX TO RPERL: Grammar rule ' |
110
|
|
|
|
|
|
|
. $self_class |
111
|
|
|
|
|
|
|
. ' found where Operator_104 expected, dying' ) |
112
|
|
|
|
|
|
|
. "\n"; |
113
|
|
|
|
|
|
|
} |
114
|
19
|
|
|
|
|
90
|
return $rperl_source_group; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub ast_to_cpp__generate__CPPOPS_PERLTYPES { |
118
|
0
|
|
|
0
|
|
0
|
{ my string_hashref::method $RETURN_TYPE }; |
|
0
|
|
|
|
|
0
|
|
119
|
0
|
|
|
|
|
0
|
( my object $self, my string_hashref $modes) = @ARG; |
120
|
0
|
|
|
|
|
0
|
my string_hashref $cpp_source_group |
121
|
|
|
|
|
|
|
= { CPP => |
122
|
|
|
|
|
|
|
q{// <<< RP::O::E::O::RE __DUMMY_SOURCE_CODE CPPOPS_PERLTYPES >>>} |
123
|
|
|
|
|
|
|
. "\n" }; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
#... |
126
|
0
|
|
|
|
|
0
|
return $cpp_source_group; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub ast_to_cpp__generate__CPPOPS_CPPTYPES { |
130
|
5
|
|
|
5
|
|
12
|
{ my string_hashref::method $RETURN_TYPE }; |
|
5
|
|
|
|
|
13
|
|
131
|
5
|
|
|
|
|
17
|
( my object $self, my string_hashref $modes) = @ARG; |
132
|
5
|
|
|
|
|
21
|
my string_hashref $cpp_source_group = { CPP => q{} }; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# RPerl::diag( 'in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), received $self = ' . "\n" . RPerl::Parser::rperl_ast__dump($self) . "\n" ); |
135
|
|
|
|
|
|
|
|
136
|
5
|
|
|
|
|
18
|
my string $self_class = ref $self; |
137
|
5
|
50
|
|
|
|
22
|
if ( $self_class eq 'Operator_104' ) { # Operator -> SubExpression OP06_REGEX_BIND OP06_REGEX_PATTERN |
138
|
|
|
|
|
|
|
# generate subexpression, to left of regex bind operator |
139
|
5
|
|
|
|
|
145
|
my string_hashref $cpp_source_subgroup = $self->{children}->[0]->ast_to_cpp__generate__CPPOPS_CPPTYPES($modes); |
140
|
5
|
|
|
|
|
113
|
RPerl::Generator::source_group_append( $cpp_source_group, $cpp_source_subgroup ); |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# get bind and pattern strings |
143
|
5
|
|
|
|
|
16
|
my string $bind = $self->{children}->[1]; |
144
|
5
|
|
|
|
|
20
|
my string $pattern = $self->{children}->[2]; |
145
|
5
|
|
|
|
|
46
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $bind = '} . $bind . q{'} . "\n" ); |
146
|
5
|
|
|
|
|
32
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $pattern = '} . $pattern . q{'} . "\n" ); |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# NEED FIX: DIE ON !~ BINDING OPERATOR, should actually be logic & code generation to implement !~ binding operator |
149
|
5
|
50
|
|
|
|
21
|
if ($bind ne '=~') { |
150
|
0
|
|
|
|
|
0
|
die q{ERROR ECOGEASCPxx: Regular expression binding operator '} . $bind . q{' not yet supported, dying}; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# separate pattern into match/substitute flag, bare pattern, and modifiers |
154
|
5
|
|
|
|
|
23
|
my character $match_or_substitute = substr $pattern, 0, 1; |
155
|
5
|
|
|
|
|
16
|
my string $modifiers = q{}; |
156
|
5
|
|
|
|
|
23
|
for (my integer $pattern_index = ((length $pattern) - 1); $pattern_index >= 0; $pattern_index--) { |
157
|
23
|
|
|
|
|
39
|
my character $modifier = substr $pattern, $pattern_index, 1; |
158
|
23
|
100
|
|
|
|
49
|
if ($modifier eq '/') { last; } |
|
5
|
|
|
|
|
19
|
|
159
|
18
|
|
|
|
|
45
|
$modifiers = $modifier . $modifiers; |
160
|
|
|
|
|
|
|
} |
161
|
5
|
|
|
|
|
24
|
my string $pattern_bare = substr $pattern, 1, ((length $pattern) - ((length $modifiers) + 1)); |
162
|
5
|
|
|
|
|
28
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $match_or_substitute = '} . $match_or_substitute . q{'} . "\n" ); |
163
|
5
|
|
|
|
|
23
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $pattern_bare = '} . $pattern_bare . q{'} . "\n" ); |
164
|
5
|
|
|
|
|
42
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers = '} . $modifiers . q{'} . "\n" ); |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# test for and remove book-end forward slash characters |
167
|
5
|
50
|
|
|
|
20
|
if ((substr $pattern_bare, 0, 1) ne q{/}) { |
168
|
0
|
|
|
|
|
0
|
die q{ERROR ECOGEASCP81: Regular expression pattern '} . $pattern_bare . q{' does not begin with forward slash '/' character, dying}; |
169
|
|
|
|
|
|
|
} |
170
|
5
|
50
|
|
|
|
23
|
if ((substr $pattern_bare, -1, 1) ne q{/}) { |
171
|
0
|
|
|
|
|
0
|
die q{ERROR ECOGEASCP82: Regular expression pattern '} . $pattern_bare . q{' does not end with forward slash '/' character, dying}; |
172
|
|
|
|
|
|
|
} |
173
|
5
|
|
|
|
|
22
|
$pattern_bare = substr $pattern_bare, 1, ((length $pattern_bare) - 2); |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# must backslash-escape backslashes within bare pattern, character class \w must appear as \\w etc, convert all single backslashes into double backslashes |
176
|
5
|
|
|
|
|
28
|
$pattern_bare =~ s/\\/\\\\/gxms; |
177
|
5
|
|
|
|
|
28
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have forward-slash-cleaned and backslash-escaped $pattern_bare = '} . $pattern_bare . q{'} . "\n" ); |
178
|
|
|
|
|
|
|
|
179
|
5
|
|
|
|
|
13
|
my string $modifiers_compile_enabled = q{}; |
180
|
5
|
|
|
|
|
16
|
my string $modifiers_match_enabled = q{}; |
181
|
5
|
|
|
|
|
14
|
my string $modifiers_substitute_enabled = q{}; |
182
|
5
|
|
|
|
|
14
|
my string $modifiers_compile_extra = q{}; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# include S compile modifier for optimization on long (presumably more complex) patterns |
185
|
5
|
50
|
|
|
|
16
|
if ((length $pattern_bare) > 20) { |
186
|
0
|
|
|
|
|
0
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), setting S modifier to attempt PCRE2 optimization of pattern over length of 20 characters} . "\n" ); |
187
|
0
|
|
|
|
|
0
|
$modifiers_compile_extra = 'S'; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# match |
191
|
5
|
100
|
|
|
|
22
|
if ($match_or_substitute eq 'm') { |
|
|
50
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# EXAMPLE C++ CODE |
193
|
|
|
|
|
|
|
# // check if string matches the pattern, return true or false |
194
|
|
|
|
|
|
|
# regex("(\\d)|(\\w)").match("I am the subject") |
195
|
|
|
|
|
|
|
# // match all and get the match count using the action modifier 'g', return count |
196
|
|
|
|
|
|
|
# regex("(\\d)|(\\w)","m").match("I am the subject","g") |
197
|
|
|
|
|
|
|
|
198
|
3
|
|
|
|
|
8
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have all valid match modifiers = '} . (join ', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_match}}})) . q{'} . "\n" ); |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
18
|
|
|
3
|
|
|
|
|
63
|
|
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# validate & sort modifiers |
201
|
3
|
|
|
|
|
20
|
foreach my character $modifier (split //, $modifiers) { |
202
|
12
|
100
|
|
|
|
31
|
if (exists $modifiers_compile->{$modifier}) { |
|
|
50
|
|
|
|
|
|
203
|
9
|
|
|
|
|
35
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid compile modifier '} . $modifier . q{' with description as follows:} . "\n\t" . $modifiers_compile->{$modifier} . "\n" ); |
204
|
9
|
|
|
|
|
20
|
$modifiers_compile_enabled .= $modifier; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
elsif (exists $modifiers_match->{$modifier}) { |
207
|
3
|
|
|
|
|
21
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid match modifier '} . $modifier . q{' with description as follows:} . "\n\t" . $modifiers_match->{$modifier} . "\n" ); |
208
|
3
|
|
|
|
|
7
|
$modifiers_match_enabled .= $modifier; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
else { |
211
|
0
|
|
|
|
|
0
|
die q{ERROR ECOGEASCP83: Non-compliant, unsupported, or unrecognized regular expression modifier '} . $modifier . q{' found, must be one of (} . (join ', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_match}}})) . q{), dying}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
} |
214
|
3
|
|
|
|
|
17
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_compile_enabled = '} . $modifiers_compile_enabled . "\n" ); |
215
|
3
|
|
|
|
|
16
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_match_enabled = '} . $modifiers_match_enabled . "\n" ); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# compose final C++ code for modifiers |
218
|
3
|
|
|
|
|
13
|
my $modifiers_compile_CPP = q{}; |
219
|
3
|
50
|
|
|
|
14
|
if (($modifiers_compile_enabled . $modifiers_compile_extra) ne q{}) { |
220
|
3
|
|
|
|
|
12
|
$modifiers_compile_CPP = q{, "} . ($modifiers_compile_enabled . $modifiers_compile_extra) . q{"}; |
221
|
|
|
|
|
|
|
} |
222
|
3
|
|
|
|
|
7
|
my $modifiers_match_CPP = q{}; |
223
|
3
|
50
|
|
|
|
14
|
if ($modifiers_match_enabled ne q{}) { |
224
|
3
|
|
|
|
|
11
|
$modifiers_match_CPP = q{, "} . $modifiers_match_enabled . q{"}; |
225
|
|
|
|
|
|
|
} |
226
|
3
|
|
|
|
|
12
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_compile_CPP = '} . $modifiers_compile_CPP . "\n" ); |
227
|
3
|
|
|
|
|
11
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_match_CPP = '} . $modifiers_match_CPP . "\n" ); |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# DEV NOTE: $cpp_source_group->{CPP} already contains the generated subexpression to be used as the subject of the regex |
230
|
|
|
|
|
|
|
# DEV NOTE: Perl vs JPCRE2 inconsistency, must explicitly cast return value change count as boolean true/false value |
231
|
3
|
|
|
|
|
22
|
$cpp_source_group->{CPP} = '(boolean) regex("' . $pattern_bare . '"' . $modifiers_compile_CPP . ').match(' . $cpp_source_group->{CPP} . $modifiers_match_CPP . ')'; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
# substitute |
234
|
|
|
|
|
|
|
elsif ($match_or_substitute eq 's') { |
235
|
|
|
|
|
|
|
# EXAMPLE C++ CODE |
236
|
|
|
|
|
|
|
# // replace first occurrence of a digit with @ |
237
|
|
|
|
|
|
|
# string foo = (const string) "I am the subject string 44"; |
238
|
|
|
|
|
|
|
# regex("\\d").preplace(&foo, "@") |
239
|
|
|
|
|
|
|
# // replace all occurrences of a digit with @ |
240
|
|
|
|
|
|
|
# string foo = (const string) "I am the subject string 44"; |
241
|
|
|
|
|
|
|
# regex("\\d").preplace(&foo, "@", "g") |
242
|
|
|
|
|
|
|
# // swap two parts of a string |
243
|
|
|
|
|
|
|
# string foo = (const string) "I am the subject\tTo be swapped according to tab"; |
244
|
|
|
|
|
|
|
# regex("^([^\t]+)\t([^\t]+)$").preplace(&foo, "$2 $1") |
245
|
|
|
|
|
|
|
|
246
|
2
|
|
|
|
|
9
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have all valid substitute modifiers = '} . (join ', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_substitute}}})) . q{'} . "\n" ); |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
14
|
|
|
2
|
|
|
|
|
40
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# validate & sort modifiers |
249
|
2
|
|
|
|
|
15
|
foreach my character $modifier (split //, $modifiers) { |
250
|
6
|
100
|
|
|
|
18
|
if (exists $modifiers_compile->{$modifier}) { |
|
|
50
|
|
|
|
|
|
251
|
4
|
|
|
|
|
21
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid compile modifier '} . $modifier . q{' with description as follows:} . "\n\t" . $modifiers_compile->{$modifier} . "\n" ); |
252
|
4
|
|
|
|
|
8
|
$modifiers_compile_enabled .= $modifier; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
elsif (exists $modifiers_substitute->{$modifier}) { |
255
|
2
|
|
|
|
|
13
|
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid substitute modifier '} . $modifier . q{' with description as follows:} . "\n\t" . $modifiers_substitute->{$modifier} . "\n" ); |
256
|
2
|
|
|
|
|
6
|
$modifiers_substitute_enabled .= $modifier; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
else { |
259
|
0
|
|
|
|
|
0
|
die q{ERROR ECOGEASCP84: Non-compliant, unsupported, or unrecognized regular expression modifier '} . $modifier . q{' found, must be one of (} . (join ', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_substitute}}})) . q{), dying}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# compose final C++ code for modifiers |
264
|
2
|
|
|
|
|
9
|
my $modifiers_compile_CPP = q{}; |
265
|
2
|
50
|
|
|
|
12
|
if (($modifiers_compile_enabled . $modifiers_compile_extra) ne q{}) { |
266
|
2
|
|
|
|
|
8
|
$modifiers_compile_CPP = q{, "} . ($modifiers_compile_enabled . $modifiers_compile_extra) . q{"}; |
267
|
|
|
|
|
|
|
} |
268
|
2
|
|
|
|
|
7
|
my $modifiers_substitute_CPP = q{}; |
269
|
2
|
50
|
|
|
|
10
|
if ($modifiers_substitute_enabled ne q{}) { |
270
|
2
|
|
|
|
|
9
|
$modifiers_substitute_CPP = q{, "} . $modifiers_substitute_enabled . q{"}; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# validate substitute pattern |
274
|
2
|
|
|
|
|
12
|
my $pattern_forward_slash_count = ( $pattern_bare =~ m/\//gxms ); |
275
|
2
|
50
|
|
|
|
12
|
if ($pattern_forward_slash_count != 1) { |
276
|
0
|
|
|
|
|
0
|
die q{ERROR ECOGEASCP85: Substitution regular expression pattern '} . $pattern_bare . q{' does not contain exactly one forward slash '/' character, dying}; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# split find/replace portions of substitute pattern |
280
|
2
|
|
|
|
|
6
|
my string $pattern_find = q{}; |
281
|
2
|
|
|
|
|
7
|
my string $pattern_replace = q{}; |
282
|
2
|
|
|
|
|
6
|
my boolean $found_slash = 0; |
283
|
2
|
|
|
|
|
11
|
foreach my character $pattern_character (split //, $pattern_bare) { |
284
|
10
|
100
|
|
|
|
21
|
if ($pattern_character eq '/') { |
|
|
100
|
|
|
|
|
|
285
|
2
|
|
|
|
|
7
|
$found_slash = 1; |
286
|
2
|
|
|
|
|
5
|
next; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
elsif (not $found_slash) { |
289
|
4
|
|
|
|
|
10
|
$pattern_find .= $pattern_character; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
else { |
292
|
4
|
|
|
|
|
11
|
$pattern_replace .= $pattern_character; |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# START HERE |
297
|
|
|
|
|
|
|
# START HERE |
298
|
|
|
|
|
|
|
# START HERE |
299
|
|
|
|
|
|
|
# NEED ADD ERROR CHECK OR GRAMMAR CHANGE: regex substitution's LHS subexpression can only be a variable, because we must return assign value back to variable to emulate PERLOPS_PERLTYPES behavior |
300
|
|
|
|
|
|
|
# NEED ADD SUPPORT: non-destructive regex substitution using Perl's /r modifier, and NOT setting the original variable to the return value in C++ |
301
|
|
|
|
|
|
|
# NEED ADD LOGIC: bind not !~ instead of only bind =~, disable die on !~ above !!! |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# DEV NOTE: $cpp_source_group->{CPP} already contains the generated subexpression to be used as the subject of the regex |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# EXAMPLE C++ CODE: regex("FIND", "MODS_COMP").preplace(&foo, "REPLACE_WITH", "MODS_SUBST") |
306
|
2
|
|
|
|
|
19
|
$cpp_source_group->{CPP} = 'regex("' . $pattern_find . '"' . $modifiers_compile_CPP . ').preplace(&' . $cpp_source_group->{CPP} . ', "' . $pattern_replace . '"' . $modifiers_substitute_CPP . ')'; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
else { |
309
|
0
|
|
|
|
|
0
|
die q{ERROR ECOGEASCP80: Unrecognized regular expression type '} . $match_or_substitute . q{' found, must be 'm' for match or 's' for substitute, dying}; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
else { |
313
|
0
|
|
|
|
|
0
|
die RPerl::Parser::rperl_rule__replace( 'ERROR ECOGEASCP00, CODE GENERATOR, ABSTRACT SYNTAX TO RPERL: Grammar rule ' . $self_class . ' found where Operator_104 expected, dying' ) . "\n"; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
5
|
|
|
|
|
134
|
RPerl::diag( 'in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), about to return $cpp_source_group = ' . "\n" . RPerl::Parser::rperl_ast__dump($cpp_source_group) . "\n" ); |
317
|
5
|
|
|
|
|
31
|
return $cpp_source_group; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
1; # end of class |