| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::LinkParser::MatchPath::Lex; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
15
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
31
|
|
|
4
|
1
|
|
|
1
|
|
1334
|
use Exporter::Lite; |
|
|
1
|
|
|
|
|
867
|
|
|
|
1
|
|
|
|
|
6
|
|
|
5
|
1
|
|
|
1
|
|
8919
|
use Lex; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our @tokens = ( |
|
8
|
|
|
|
|
|
|
# word class |
|
9
|
|
|
|
|
|
|
POS => '_[pavding]_', |
|
10
|
|
|
|
|
|
|
WORD_REGEXP => '(?:/.+?/)[i]?', |
|
11
|
|
|
|
|
|
|
WORD => '\w+', |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# label class |
|
14
|
|
|
|
|
|
|
LABEL_REGEXP => '<\/.+?\/>', |
|
15
|
|
|
|
|
|
|
LABEL => '<.+?>', |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
POUND_SIGN => '#(?=[<(])', |
|
18
|
|
|
|
|
|
|
EXCLM_SIGN => '!(?=[<\w\/(])', |
|
19
|
|
|
|
|
|
|
AT_SIGN => '@(?=[<(])', |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
NEWLINE => '\n', |
|
22
|
|
|
|
|
|
|
EOR => ';', |
|
23
|
|
|
|
|
|
|
LPAREN => '[(]', |
|
24
|
|
|
|
|
|
|
RPAREN => '[)]', |
|
25
|
|
|
|
|
|
|
# QM => '[?]', |
|
26
|
|
|
|
|
|
|
# AND => '[&,]', |
|
27
|
|
|
|
|
|
|
OR => '[|]', |
|
28
|
|
|
|
|
|
|
PERCENT => '[%]', |
|
29
|
|
|
|
|
|
|
COMMENT => '^\s*#.+?$', |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
ERROR => '.+', |
|
32
|
|
|
|
|
|
|
); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub new { |
|
35
|
|
|
|
|
|
|
my $class = shift; |
|
36
|
|
|
|
|
|
|
my %opt = @_; |
|
37
|
|
|
|
|
|
|
bless { |
|
38
|
|
|
|
|
|
|
lexer => Lex->new(@tokens), |
|
39
|
|
|
|
|
|
|
debug => $opt{debug}, |
|
40
|
|
|
|
|
|
|
}, $class; |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub _get_tokens { |
|
44
|
|
|
|
|
|
|
my $self = shift; |
|
45
|
|
|
|
|
|
|
my $token; |
|
46
|
|
|
|
|
|
|
my ($name, $content); |
|
47
|
|
|
|
|
|
|
while($token = $self->{lexer}->nextToken){ |
|
48
|
|
|
|
|
|
|
($name, $content) = ($token->name(), $token->get); |
|
49
|
|
|
|
|
|
|
$name =~ s/.+:://; |
|
50
|
|
|
|
|
|
|
$content =~ s/\n$// if $name =~ /EOR$/; |
|
51
|
|
|
|
|
|
|
die "Error occurred during tokenizing text: ( $content )" if $name =~ /ERROR/; |
|
52
|
|
|
|
|
|
|
last unless $token->name =~ /(?:NEWLINE|COMMENT)$/; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
if (not $self->{lexer}->eof) { |
|
55
|
|
|
|
|
|
|
[ $name, $content ]; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# post-processing |
|
60
|
|
|
|
|
|
|
sub _pp_tokens { |
|
61
|
|
|
|
|
|
|
my $self = shift; |
|
62
|
|
|
|
|
|
|
my $token = $self->{token}; |
|
63
|
|
|
|
|
|
|
my @token; |
|
64
|
|
|
|
|
|
|
for ( my $i = 0; $i<@$token; ){ |
|
65
|
|
|
|
|
|
|
# one-step matching |
|
66
|
|
|
|
|
|
|
if( |
|
67
|
|
|
|
|
|
|
$token->[$i][0] =~ /^(?:POUND|EXCML|AT)_SIGN$/o && |
|
68
|
|
|
|
|
|
|
$token->[$i+1][0] =~ /^LABEL/o |
|
69
|
|
|
|
|
|
|
){ |
|
70
|
|
|
|
|
|
|
push |
|
71
|
|
|
|
|
|
|
@token, |
|
72
|
|
|
|
|
|
|
$token->[$i], |
|
73
|
|
|
|
|
|
|
[ 'LPAREN' => '(' ], |
|
74
|
|
|
|
|
|
|
$token->[$i+1], |
|
75
|
|
|
|
|
|
|
$token->[$i+2], |
|
76
|
|
|
|
|
|
|
[ 'RPAREN' => ')' ]; |
|
77
|
|
|
|
|
|
|
$i+=3; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
# append '@' if there is none before '(' |
|
80
|
|
|
|
|
|
|
elsif( |
|
81
|
|
|
|
|
|
|
$token->[$i][0] eq 'LPAREN' && |
|
82
|
|
|
|
|
|
|
$token->[$i-1][0] =~ /^(?:WORD|POS)/ |
|
83
|
|
|
|
|
|
|
){ |
|
84
|
|
|
|
|
|
|
push |
|
85
|
|
|
|
|
|
|
@token, |
|
86
|
|
|
|
|
|
|
[ 'AT_SIGN' => '@' ], |
|
87
|
|
|
|
|
|
|
$token->[$i]; |
|
88
|
|
|
|
|
|
|
$i+=1; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
else { |
|
92
|
|
|
|
|
|
|
push @token, $token->[$i]; |
|
93
|
|
|
|
|
|
|
$i++; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
$self->{token} = \@token; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub load { |
|
100
|
|
|
|
|
|
|
my $self = shift; |
|
101
|
|
|
|
|
|
|
$self->{lexer}->from(shift); |
|
102
|
|
|
|
|
|
|
while( my $t = $self->_get_tokens() ){ |
|
103
|
|
|
|
|
|
|
push @{$self->{token}}, $t; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
$self->_pp_tokens; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub lex { |
|
110
|
|
|
|
|
|
|
my $self = shift; |
|
111
|
|
|
|
|
|
|
my $t = shift @{$self->{token}}; |
|
112
|
|
|
|
|
|
|
if( $t->[0] ){ |
|
113
|
|
|
|
|
|
|
printf (" - %-15s ==> %s\n", @{$t}[1,0]) if $self->{debug}; |
|
114
|
|
|
|
|
|
|
return @$t; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
('', undef); |
|
117
|
|
|
|
|
|
|
} |