| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Pistachio::Tokenizer; |
|
2
|
|
|
|
|
|
|
# ABSTRACT: provides iterator(), which turns source code text into a Pistachio::Token iterator |
|
3
|
|
|
|
|
|
|
|
|
4
|
3
|
|
|
3
|
|
4026
|
use strict; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
114
|
|
|
5
|
3
|
|
|
3
|
|
15
|
use warnings; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
129
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.10'; # VERSION |
|
7
|
|
|
|
|
|
|
|
|
8
|
3
|
|
|
3
|
|
5659
|
use Module::Load; |
|
|
3
|
|
|
|
|
3574
|
|
|
|
3
|
|
|
|
|
17
|
|
|
9
|
3
|
|
|
3
|
|
169
|
use Carp 'croak'; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
242
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use constant { |
|
12
|
3
|
|
|
|
|
3549
|
LNG => 0, |
|
13
|
|
|
|
|
|
|
IDX => 1, |
|
14
|
|
|
|
|
|
|
GOT => 2, |
|
15
|
|
|
|
|
|
|
MAX => 3, |
|
16
|
|
|
|
|
|
|
TOK => 4 |
|
17
|
3
|
|
|
3
|
|
15
|
}; |
|
|
3
|
|
|
|
|
5
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# @param string $type Object type. |
|
20
|
|
|
|
|
|
|
# @param Pistachio::Language $lang Language object. |
|
21
|
|
|
|
|
|
|
# @return Pistachio::Tokenizer |
|
22
|
|
|
|
|
|
|
sub new { |
|
23
|
4
|
|
|
4
|
0
|
14
|
my $type = shift; |
|
24
|
4
|
50
|
33
|
|
|
44
|
my $lang = ref $_[0] eq 'Pistachio::Language' && $_[0] |
|
25
|
|
|
|
|
|
|
or croak 'A Pistachio::Language is required'; |
|
26
|
4
|
|
|
|
|
29
|
bless [$lang], $type; |
|
27
|
|
|
|
|
|
|
} |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
30
|
|
|
|
|
|
|
# @param scalarref $text reference to text |
|
31
|
|
|
|
|
|
|
# @return coderef Pistachio::Token iterator |
|
32
|
|
|
|
|
|
|
sub iterator { |
|
33
|
4
|
|
|
4
|
0
|
1613
|
my ($this, $text) = @_; |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# initialize iterator data |
|
36
|
4
|
|
|
|
|
25
|
$this->[TOK] = $this->[LNG]->tokens($text); |
|
37
|
4
|
|
|
|
|
150
|
$this->[MAX] = scalar @{$this->[TOK]}; |
|
|
4
|
|
|
|
|
12
|
|
|
38
|
4
|
|
|
|
|
46
|
$this->[IDX] = 0; |
|
39
|
4
|
|
|
|
|
10
|
$this->[GOT] = 0; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# iterator closure |
|
42
|
|
|
|
|
|
|
sub { |
|
43
|
24
|
100
|
|
24
|
|
116
|
return undef if $this->_finished; |
|
44
|
23
|
|
|
|
|
53
|
my $token = $this->_transform($this->_curr); |
|
45
|
23
|
|
|
|
|
40
|
$this->[GOT]++; |
|
46
|
23
|
|
|
|
|
55
|
$this->_next; |
|
47
|
23
|
|
|
|
|
61
|
$token; |
|
48
|
4
|
|
|
|
|
26
|
}; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
52
|
|
|
|
|
|
|
# @return int 1 if we're finished iterating, or 0 |
|
53
|
|
|
|
|
|
|
sub _finished { |
|
54
|
24
|
|
|
24
|
|
34
|
my $this = shift; |
|
55
|
24
|
100
|
|
|
|
106
|
$this->[MAX] - $this->[GOT] < 1 ? 1 : 0; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
59
|
|
|
|
|
|
|
# @return Pistachio::Token |
|
60
|
|
|
|
|
|
|
sub _curr { |
|
61
|
86
|
|
|
86
|
|
106
|
my $this = shift; |
|
62
|
86
|
|
|
|
|
263
|
$this->[TOK]->[$this->[IDX]]; |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer |
|
66
|
|
|
|
|
|
|
# @return int 1 if there is a previous element, or 0 |
|
67
|
60
|
100
|
|
60
|
|
219
|
sub _has_prev { shift->[IDX] > 0 ? 1 : 0 } |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
70
|
|
|
|
|
|
|
# @return int 1 if there is a next element, or 0 |
|
71
|
|
|
|
|
|
|
sub _has_next { |
|
72
|
41
|
|
|
41
|
|
52
|
my $this = shift; |
|
73
|
41
|
50
|
|
|
|
157
|
$this->[MAX] - $this->[IDX] > 0 ? 1 : 0; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
77
|
|
|
|
|
|
|
# @return Pistachio::Token, or undef |
|
78
|
|
|
|
|
|
|
sub _prev { |
|
79
|
28
|
|
|
28
|
|
43
|
my $this = shift; |
|
80
|
28
|
50
|
|
|
|
45
|
return undef unless $this->_has_prev; |
|
81
|
28
|
|
|
|
|
42
|
$this->[IDX]--; |
|
82
|
28
|
|
|
|
|
59
|
$this->_curr; |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
86
|
|
|
|
|
|
|
# @return Pistachio::Token, or undef |
|
87
|
|
|
|
|
|
|
sub _next { |
|
88
|
35
|
|
|
35
|
|
49
|
my $this = shift; |
|
89
|
35
|
50
|
|
|
|
68
|
return undef unless $this->_has_next; |
|
90
|
35
|
|
|
|
|
52
|
$this->[IDX]++; |
|
91
|
35
|
|
|
|
|
71
|
$this->_curr; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
95
|
|
|
|
|
|
|
# @param string $meth '_prev' or '_next' |
|
96
|
|
|
|
|
|
|
# @return Pistachio::Token, or undef |
|
97
|
|
|
|
|
|
|
sub _skip_whitespace { |
|
98
|
20
|
|
|
20
|
|
29
|
my ($this, $meth) = @_; |
|
99
|
20
|
100
|
|
|
|
43
|
while ($_ = $this->$meth) { return $_ if !$_->whitespace } |
|
|
40
|
|
|
|
|
117
|
|
|
100
|
0
|
|
|
|
|
0
|
undef; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
104
|
|
|
|
|
|
|
# @param Pistachio::Token $token |
|
105
|
|
|
|
|
|
|
# @return Pistachio::Token |
|
106
|
|
|
|
|
|
|
sub _transform { |
|
107
|
23
|
|
|
23
|
|
35
|
my ($this, $token) = @_; |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Some token types will get transformed into |
|
110
|
|
|
|
|
|
|
# more specific types by transformation rules. |
|
111
|
|
|
|
|
|
|
|
|
112
|
23
|
|
|
|
|
27
|
my $into; |
|
113
|
23
|
|
|
|
|
28
|
for my $rule (@{$this->[LNG]->transform_rules}) { |
|
|
23
|
|
|
|
|
76
|
|
|
114
|
391
|
100
|
|
|
|
1027
|
$token->match($rule->type, $rule->value) or next; |
|
115
|
|
|
|
|
|
|
|
|
116
|
39
|
100
|
|
|
|
117
|
$rule->prec and do { |
|
117
|
32
|
100
|
|
|
|
104
|
$this->_has_prev or next; |
|
118
|
12
|
100
|
|
|
|
35
|
$this->_juxtaposed($rule->prec, '_prev') or next; |
|
119
|
|
|
|
|
|
|
}; |
|
120
|
|
|
|
|
|
|
|
|
121
|
9
|
100
|
|
|
|
27
|
$rule->succ and do { |
|
122
|
6
|
50
|
|
|
|
14
|
$this->_has_next or next; |
|
123
|
6
|
100
|
|
|
|
18
|
$this->_juxtaposed($rule->succ, '_next') or next; |
|
124
|
|
|
|
|
|
|
}; |
|
125
|
|
|
|
|
|
|
|
|
126
|
5
|
|
|
|
|
16
|
$into = $rule->into; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
23
|
100
|
|
|
|
530
|
$token->type($into) if $into; |
|
129
|
|
|
|
|
|
|
|
|
130
|
23
|
|
|
|
|
49
|
$token; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
|
134
|
|
|
|
|
|
|
# @param arrayref $neighbors (type, val) pairs that might either |
|
135
|
|
|
|
|
|
|
# precede or succeed the current |
|
136
|
|
|
|
|
|
|
# Pistachio::Token, depending on $meth |
|
137
|
|
|
|
|
|
|
# @param string $meth '_prev' or '_next' |
|
138
|
|
|
|
|
|
|
# @return int 1 if the current pair is juxtaposed |
|
139
|
|
|
|
|
|
|
# with the pairs from $neighbors, or 0 |
|
140
|
|
|
|
|
|
|
sub _juxtaposed { |
|
141
|
18
|
|
|
18
|
|
32
|
my ($this, $neighbors, $meth) = @_; |
|
142
|
|
|
|
|
|
|
|
|
143
|
18
|
|
|
|
|
29
|
my ($match, $idx) = (1, $this->[IDX]); |
|
144
|
|
|
|
|
|
|
|
|
145
|
18
|
|
|
|
|
32
|
for my $n (@$neighbors) { |
|
146
|
20
|
|
|
|
|
44
|
my $token = $this->_skip_whitespace($meth); |
|
147
|
20
|
|
|
8
|
|
84
|
my ($type, $val) = ($n->[0], sub {shift eq $n->[1]}); |
|
|
8
|
|
|
|
|
51
|
|
|
148
|
20
|
|
66
|
|
|
88
|
$match = $token && $token->match($type, $val); |
|
149
|
20
|
|
|
|
|
96
|
$this->[IDX] = $idx; |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
18
|
|
|
|
|
72
|
$match; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
1; |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
__END__ |