line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package RTF::Lexer; |
2
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
55
|
|
3
|
1
|
|
|
1
|
|
7
|
use Exporter; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
48
|
|
4
|
1
|
|
|
1
|
|
6
|
use DynaLoader; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
68
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our @ISA = qw(DynaLoader Exporter); |
7
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use constant CWORD => 256; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
95
|
|
10
|
1
|
|
|
1
|
|
6
|
use constant CSYMB => 257; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
51
|
|
11
|
1
|
|
|
1
|
|
5
|
use constant CUNDF => 258; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
53
|
|
12
|
1
|
|
|
1
|
|
6
|
use constant CPARM => 259; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
56
|
|
13
|
1
|
|
|
1
|
|
6
|
use constant CNOPR => 260; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
60
|
|
14
|
1
|
|
|
1
|
|
6
|
use constant PTEXT => 261; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
47
|
|
15
|
1
|
|
|
1
|
|
5
|
use constant ENTER => 262; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
53
|
|
16
|
1
|
|
|
1
|
|
5
|
use constant LEAVE => 263; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
44
|
|
17
|
1
|
|
|
1
|
|
5
|
use constant DESTN => 264; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
50
|
|
18
|
1
|
|
|
1
|
|
5
|
use constant ENHEX => 265; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
49
|
|
19
|
1
|
|
|
1
|
|
13
|
use constant ENBIN => 266; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
42
|
|
20
|
1
|
|
|
1
|
|
5
|
use constant WRHEX => 268; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
50
|
|
21
|
1
|
|
|
1
|
|
6
|
use constant OKEOF => 300; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
54
|
|
22
|
1
|
|
|
1
|
|
5
|
use constant UNEOF => 301; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
51
|
|
23
|
1
|
|
|
1
|
|
6
|
use constant UNBRC => 302; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
874
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our @EXPORT_OK; |
26
|
|
|
|
|
|
|
our %EXPORT_TAGS; |
27
|
|
|
|
|
|
|
{ |
28
|
|
|
|
|
|
|
my $package = __PACKAGE__; |
29
|
|
|
|
|
|
|
@EXPORT_OK = (grep { $_ } map { /^$package\::(\w+)$/ } keys %constant::declared); |
30
|
|
|
|
|
|
|
%EXPORT_TAGS = (all => [@EXPORT_OK]); |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub new { |
34
|
1
|
|
|
1
|
1
|
2
|
my $proto = shift; |
35
|
1
|
|
33
|
|
|
8
|
my $self = bless({}, ref($proto) || $proto); |
36
|
1
|
|
|
|
|
4
|
$self->init(@_); |
37
|
1
|
|
|
|
|
3
|
return $self; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub init{ |
41
|
1
|
|
|
1
|
0
|
3
|
my ($self, %opts) = @_; |
42
|
1
|
|
|
|
|
9
|
$self->{RTF_Lexer_backpack} = []; |
43
|
1
|
|
|
|
|
2
|
$self->{_RTF_Lexer_backpack} = undef; |
44
|
1
|
50
|
|
|
|
5
|
if($opts{in}){ |
45
|
1
|
50
|
|
|
|
4
|
unless(ref $opts{in}){ |
46
|
0
|
0
|
|
|
|
0
|
open my $fh, "< $opts{in}" or die "Can't open '$opts{in}': $!!\n"; |
47
|
0
|
|
|
|
|
0
|
$opts{in} = $fh; |
48
|
|
|
|
|
|
|
} |
49
|
1
|
|
|
|
|
68
|
$self->_set_source($opts{in}); |
50
|
1
|
|
|
|
|
3
|
$self->{_RTF_Lexer_IN} = $opts{in}; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
bootstrap RTF::Lexer $VERSION; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub get_token { |
57
|
1115
|
|
|
1115
|
1
|
995
|
my $self = shift; |
58
|
1115
|
|
|
|
|
1097
|
my $token; |
59
|
1115
|
50
|
|
|
|
990
|
if(@{$self->{RTF_Lexer_backpack}}){ |
|
1115
|
|
|
|
|
1920
|
|
60
|
0
|
|
|
|
|
0
|
$token = shift @{$self->{RTF_Lexer_backpack}}; |
|
0
|
|
|
|
|
0
|
|
61
|
|
|
|
|
|
|
}else{ |
62
|
1115
|
50
|
|
|
|
1505
|
if($self->{_RTF_Lexer_backpack}){ |
63
|
0
|
|
|
|
|
0
|
$token = $self->{_RTF_Lexer_backpack}; |
64
|
0
|
|
|
|
|
0
|
$self->{_RTF_Lexer_backpack} = undef; |
65
|
|
|
|
|
|
|
}else{ |
66
|
1115
|
|
|
|
|
1738
|
$token = [0, '']; |
67
|
1115
|
|
|
|
|
3065
|
$token->[0] = $self->_get_token($token->[1]); |
68
|
|
|
|
|
|
|
} |
69
|
1115
|
100
|
|
|
|
1990
|
if($token->[0] == CWORD){ |
70
|
641
|
|
|
|
|
552
|
my ($type, $text); |
71
|
641
|
|
|
|
|
1198
|
$type = $self->_get_token($text); |
72
|
641
|
100
|
|
|
|
1193
|
if($type == CPARM){ # Parameter that follows |
|
|
50
|
|
|
|
|
|
73
|
371
|
|
|
|
|
572
|
$token->[2] = $text; |
74
|
371
|
|
|
|
|
690
|
$type = $self->_get_token($text); # Space delimiter if any |
75
|
371
|
50
|
|
|
|
724
|
$self->{_RTF_Lexer_backpack} = [$type, $text] unless $type == CNOPR; |
76
|
|
|
|
|
|
|
}elsif($type != CNOPR){ # Not a space delimiter |
77
|
0
|
|
|
|
|
0
|
$self->{_RTF_Lexer_backpack} = [$type, $text]; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
} |
81
|
1115
|
50
|
|
|
|
3655
|
return $token->[0] ? $token : undef; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
0
|
|
|
0
|
1
|
0
|
sub unget_token { push @{$_[0]->{RTF_Lexer_backpack}}, $_[1] } |
|
0
|
|
|
|
|
0
|
|
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
{ |
87
|
|
|
|
|
|
|
my %stop_tokens = map { $_ => 1 } (OKEOF, UNBRC, UNEOF); |
88
|
1115
|
50
|
|
1115
|
0
|
5843
|
sub is_stop_token { $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} } |
89
|
0
|
0
|
|
0
|
0
|
|
sub add_stop_token { $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} = 1 } |
90
|
0
|
0
|
|
0
|
0
|
|
sub del_stop_token { delete $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} } |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
1; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head1 NAME |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
RTF::Lexer - Rich Text Format (RTF) lexical analyzer. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 SYNOPSIS |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
use RTF::Lexer qw(:all); |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
my $parser = RTF::Lexer->new(in => 'text.rtf'); |
105
|
|
|
|
|
|
|
my $token; |
106
|
|
|
|
|
|
|
do { |
107
|
|
|
|
|
|
|
$token = $parser->get_token(); |
108
|
|
|
|
|
|
|
} until $parser->is_stop_token($token); |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head1 DESCRIPTION |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
RTF::Lexer is a low-level RTF format lexical analyzer. It splits the |
114
|
|
|
|
|
|
|
input stream into separate tokens, which can be handled by other |
115
|
|
|
|
|
|
|
high-level modules. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 METHODS |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=over 4 |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item new |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
The constructor. Accepts the only argument C which must be an input |
124
|
|
|
|
|
|
|
file handle or a file name. In the latter case if there is a failure |
125
|
|
|
|
|
|
|
while opening the file method C throws an exception. By default |
126
|
|
|
|
|
|
|
the input is read from C. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item get_token |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Returns the next token from the input stream. The token is a reference to |
131
|
|
|
|
|
|
|
an array those first element is a numeric id of the token type. The second |
132
|
|
|
|
|
|
|
element is a string representation of the token. The third element may |
133
|
|
|
|
|
|
|
exists only if the token is a control word and represents the numerical |
134
|
|
|
|
|
|
|
parameter of this control word. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
The following token types are recognized by RTF::Lexer, that are declared |
137
|
|
|
|
|
|
|
as constants in this module: |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=over 8 |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item CWORD |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Control word (eg. C<\rtf1>, C<\trowd>). |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=item CSYMB |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Control symbol, mentioned in RTF Specification version 1.7. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item CUNDF |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Unknown control symbol (i.e. not mentioned in RTF Specification). |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item PTEXT |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Plain text. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item ENTER |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Start of group (C<{>). |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item LEAVE |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
End of group (C<}>). |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=item DESTN |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
End of destination group (C<}> that turns off destination mode). |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=item ENHEX |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Data in hexadecimal format that follows C<\'> control symbol. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=item ENBIN |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
End of binary data block (started by C<\bin> control word). |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=item WRHEX |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
Symbol which is not a hexadecimal digit found where ENHEX token expected. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item OKEOF |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Normal end of input stream. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=item UNEOF |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Unexpected end of input stream. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item UNBRC |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
End of group that does not match any start of group. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=back |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
These constants are not exported by default. Any of them may be exported by request. |
196
|
|
|
|
|
|
|
All of them may be exported by the use of C<:all> export tag. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item unget_token($token) |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Pushes back token C<$token> so the next call to C will return it. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=item set_destination |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Turns on the destination mode, i.e. all tokens will be ignored until the end of |
205
|
|
|
|
|
|
|
current group. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=back |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head1 SEE ALSO |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
RTF::Tokenizer, Rich Text Format (RTF) Specification. |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=head1 BUGS |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
It is impossible to have more then one RTF::Lexer objects in a single process. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head1 AUTHOR |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Vadim O. Ustiansky |
220
|
|
|
|
|
|
|
|