| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package RTF::Parser; |
|
2
|
|
|
|
|
|
|
$RTF::Parser::VERSION = '1.12'; |
|
3
|
14
|
|
|
14
|
|
135526
|
use strict; |
|
|
14
|
|
|
|
|
27
|
|
|
|
14
|
|
|
|
|
470
|
|
|
4
|
14
|
|
|
14
|
|
72
|
use warnings; |
|
|
14
|
|
|
|
|
20
|
|
|
|
14
|
|
|
|
|
853
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
RTF::Parser - A DEPRECATED event-driven RTF Parser |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 VERSION |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
version 1.12 |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
A DEPRECATED event-driven RTF Parser |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 PUBLIC SERVICE ANNOUNCEMENT |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
B
|
|
21
|
|
|
|
|
|
|
Need rtf2*? Google for pandoc.> |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
A very short history lesson... |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
C<1.07> of this module was released in 1999 by the original author, |
|
26
|
|
|
|
|
|
|
Philippe Verdret. I took over the module around 2004 with high intentions. I |
|
27
|
|
|
|
|
|
|
added almost all of the POD, all of the tests, and most of the comments, and |
|
28
|
|
|
|
|
|
|
rejigged the whole thing to use L for tokenizing the incoming |
|
29
|
|
|
|
|
|
|
RTF, which fixed a whole class of problems. |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
The big problem is really that the whole module is an API which happens to have |
|
32
|
|
|
|
|
|
|
C and C stuck on top of it. Any serious changes involve |
|
33
|
|
|
|
|
|
|
breaking the API, and that seems the greater sin than telling people to go and |
|
34
|
|
|
|
|
|
|
get themselves a better RTF convertor suite. |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
I had high hopes of overhauling the whole thing, but it didn't happen. I handed |
|
37
|
|
|
|
|
|
|
over maintainership some years later, but no new version was forthcoming, and |
|
38
|
|
|
|
|
|
|
the module has languished since then. There are many open bugs on rt.cpan.org |
|
39
|
|
|
|
|
|
|
and in the reviews. |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
In a moment of weakness, I've picked up the module again with the aim of |
|
42
|
|
|
|
|
|
|
adding this message, fixing one or two very minor bugs, and putting a version |
|
43
|
|
|
|
|
|
|
that doesn't have B in big red letters on the CPAN. |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
I doubt I'll ever tackle the bigger bugs (Unicode support), but I will accept |
|
46
|
|
|
|
|
|
|
patches I can understand. |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 IMPORTANT HINTS |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
RTF parsing is non-trivial. The inner workings of these modules are somewhat |
|
51
|
|
|
|
|
|
|
scary. You should go and read the 'Introduction' document included with this |
|
52
|
|
|
|
|
|
|
distribution before going any further - it explains how this distribution fits |
|
53
|
|
|
|
|
|
|
together, and is B reading. |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
If you just want to convert RTF to HTML or text, from inside your own script, |
|
56
|
|
|
|
|
|
|
jump straight to the docs for L or L |
|
57
|
|
|
|
|
|
|
respectively. |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 SUBCLASSING RTF::PARSER |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
When you subclass RTF::Parser, you'll want to do two things. You'll firstly |
|
62
|
|
|
|
|
|
|
want to overwrite the methods below described as the API. This describes what |
|
63
|
|
|
|
|
|
|
we do when we have tokens that aren't control words (except 'symbols' - see below). |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Then you'll want to create a hash that maps control words to code references |
|
66
|
|
|
|
|
|
|
that you want executed. They'll get passed a copy of the RTF::Parser object, |
|
67
|
|
|
|
|
|
|
the name of the control word (say, 'b'), any arguments passed with the control |
|
68
|
|
|
|
|
|
|
word, and then 'start'. |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head2 An example... |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
The following code removes bold tags from RTF documents, and then spits back |
|
73
|
|
|
|
|
|
|
out RTF. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
{ |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# Create our subclass |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
package UnboldRTF; |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# We'll be doing lots of printing without newlines, so don't buffer output |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
$|++; |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# Subclassing magic... |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
use RTF::Parser; |
|
88
|
|
|
|
|
|
|
@UnboldRTF::ISA = ( 'RTF::Parser' ); |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Redefine the API nicely |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub parse_start { print STDERR "Starting...\n"; } |
|
93
|
|
|
|
|
|
|
sub group_start { print '{' } |
|
94
|
|
|
|
|
|
|
sub group_end { print '}' } |
|
95
|
|
|
|
|
|
|
sub text { print "\n" . $_[1] } |
|
96
|
|
|
|
|
|
|
sub char { print "\\\'$_[1]" } |
|
97
|
|
|
|
|
|
|
sub symbol { print "\\$_[1]" } |
|
98
|
|
|
|
|
|
|
sub parse_end { print STDERR "All done...\n"; } |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
my %do_on_control = ( |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# What to do when we see any control we don't have |
|
105
|
|
|
|
|
|
|
# a specific action for... In this case, we print it. |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
'__DEFAULT__' => sub { |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my ( $self, $type, $arg ) = @_; |
|
110
|
|
|
|
|
|
|
$arg = "\n" unless defined $arg; |
|
111
|
|
|
|
|
|
|
print "\\$type$arg"; |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
}, |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# When we come across a bold tag, we just ignore it. |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
'b' => sub {}, |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
); |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Grab STDIN... |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
my $data = join '', (<>); |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# Create an instance of the class we created above |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
my $parser = UnboldRTF->new(); |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Prime the object with our control handlers... |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
$parser->control_definition( \%do_on_control ); |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Don't skip undefined destinations... |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
$parser->dont_skip_destinations(1); |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# Start the parsing! |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
$parser->parse_string( $data ); |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head1 METHODS |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
|
|
146
|
14
|
|
|
14
|
|
68
|
use vars qw($VERSION); |
|
|
14
|
|
|
|
|
34
|
|
|
|
14
|
|
|
|
|
835
|
|
|
147
|
|
|
|
|
|
|
|
|
148
|
14
|
|
|
14
|
|
75
|
use Carp; |
|
|
14
|
|
|
|
|
22
|
|
|
|
14
|
|
|
|
|
1449
|
|
|
149
|
14
|
|
|
14
|
|
28035
|
use RTF::Tokenizer 1.01; |
|
|
14
|
|
|
|
|
281456
|
|
|
|
14
|
|
|
|
|
456
|
|
|
150
|
14
|
|
|
14
|
|
16585
|
use RTF::Config; |
|
|
14
|
|
|
|
|
36
|
|
|
|
14
|
|
|
|
|
1836
|
|
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
my $DEBUG = 0; |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# Debugging stuff I'm leaving in in case someone is using it.., |
|
155
|
14
|
|
|
14
|
|
79
|
use constant PARSER_TRACE => 0; |
|
|
14
|
|
|
|
|
26
|
|
|
|
14
|
|
|
|
|
8365
|
|
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub backtrace { |
|
158
|
0
|
|
|
0
|
0
|
0
|
Carp::confess; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
$SIG{'INT'} = \&backtrace if PARSER_TRACE; |
|
162
|
|
|
|
|
|
|
$SIG{__DIE__} = \&backtrace if PARSER_TRACE; |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 new |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Creates a new RTF::Parser object. Doesn't accept any arguments. |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub new { |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# Get the real class name |
|
173
|
17
|
|
|
17
|
1
|
7688
|
my $proto = shift; |
|
174
|
17
|
|
33
|
|
|
113
|
my $class = ref($proto) || $proto; |
|
175
|
|
|
|
|
|
|
|
|
176
|
17
|
|
|
|
|
42
|
my $self = {}; |
|
177
|
|
|
|
|
|
|
|
|
178
|
17
|
100
|
|
|
|
104
|
$self->{_RTF_CONTROL_USED}++ if $INC{'RTF/Control.pm'}; |
|
179
|
|
|
|
|
|
|
|
|
180
|
17
|
|
|
|
|
44
|
$self->{_DONT_SKIP_DESTINATIONS} = 0; |
|
181
|
|
|
|
|
|
|
|
|
182
|
17
|
|
|
|
|
46
|
bless $self, $class; |
|
183
|
|
|
|
|
|
|
|
|
184
|
17
|
|
|
|
|
52
|
return $self; |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# For backwards compatability, we import RTF::Control's %do_on_control |
|
189
|
|
|
|
|
|
|
# if we've loaded RTF::Control (which would suggest we're being subclassed |
|
190
|
|
|
|
|
|
|
# by RTF::Control). This isn't nice or pretty, but it doesn't break things. |
|
191
|
|
|
|
|
|
|
# I'd do this in new() but there's no guarentee it'll be set by then... |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub _install_do_on_control { |
|
194
|
|
|
|
|
|
|
|
|
195
|
14
|
|
|
14
|
|
22
|
my $self = shift; |
|
196
|
|
|
|
|
|
|
|
|
197
|
14
|
100
|
|
|
|
68
|
return if $self->{_DO_ON_CONTROL}; |
|
198
|
|
|
|
|
|
|
|
|
199
|
8
|
100
|
|
|
|
28
|
if ( $self->{_RTF_CONTROL_USED} ) { |
|
200
|
|
|
|
|
|
|
|
|
201
|
4
|
|
|
|
|
20
|
$self->{_DO_ON_CONTROL} = \%RTF::Control::do_on_control; |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
} else { |
|
204
|
|
|
|
|
|
|
|
|
205
|
4
|
|
|
|
|
13
|
$self->{_DO_ON_CONTROL} = {}; |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 parse_stream( \*FH ) |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
This function used to accept a second parameter - a function specifying how |
|
214
|
|
|
|
|
|
|
the filehandle should be read. This is deprecated, because I could find no |
|
215
|
|
|
|
|
|
|
examples of people using it, nor could I see why people might want to use it. |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Pass this function a reference to a filehandle (or, now, a filename! yay) to |
|
218
|
|
|
|
|
|
|
begin reading and processing. |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=cut |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub parse_stream { |
|
223
|
|
|
|
|
|
|
|
|
224
|
1
|
|
|
1
|
1
|
6
|
my $self = shift; |
|
225
|
1
|
|
|
|
|
2
|
my $stream = shift; |
|
226
|
1
|
|
|
|
|
1
|
my $reader = shift; |
|
227
|
|
|
|
|
|
|
|
|
228
|
1
|
|
|
|
|
5
|
$self->_install_do_on_control(); |
|
229
|
|
|
|
|
|
|
|
|
230
|
1
|
50
|
|
|
|
3
|
die("parse_stream no longer accepts a reader") if $reader; |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Put an appropriately primed RTF::Tokenizer object into our object |
|
233
|
1
|
|
|
|
|
10
|
$self->{_TOKENIZER} = RTF::Tokenizer->new( file => $stream ); |
|
234
|
|
|
|
|
|
|
|
|
235
|
1
|
|
|
|
|
110
|
$self->_parse(); |
|
236
|
|
|
|
|
|
|
|
|
237
|
1
|
|
|
|
|
2
|
return $self; |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=head2 parse_string( $string ) |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Pass this function a string to begin reading and processing. |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=cut |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub parse_string { |
|
248
|
|
|
|
|
|
|
|
|
249
|
13
|
|
|
13
|
1
|
478
|
my $self = shift; |
|
250
|
13
|
|
|
|
|
24
|
my $string = shift; |
|
251
|
|
|
|
|
|
|
|
|
252
|
13
|
|
|
|
|
91
|
$self->_install_do_on_control(); |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
# Put an appropriately primed RTF::Tokenizer object into our object |
|
255
|
13
|
|
|
|
|
79
|
$self->{_TOKENIZER} = RTF::Tokenizer->new( string => $string ); |
|
256
|
|
|
|
|
|
|
|
|
257
|
13
|
|
|
|
|
549
|
$self->_parse(); |
|
258
|
|
|
|
|
|
|
|
|
259
|
13
|
|
|
|
|
44
|
return $self; |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head2 control_definition |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
The code that's executed when we trigger a control event is kept |
|
266
|
|
|
|
|
|
|
in a hash. We're holding this somewhere in our object. Earlier |
|
267
|
|
|
|
|
|
|
versions would make the assumption we're being subclassed by |
|
268
|
|
|
|
|
|
|
RTF::Control, which isn't something I want to assume. If you are |
|
269
|
|
|
|
|
|
|
using RTF::Control, you don't need to worry about this, because |
|
270
|
|
|
|
|
|
|
we're grabbing %RTF::Control::do_on_control, and using that. |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Otherwise, you pass this method a reference to a hash where the keys |
|
273
|
|
|
|
|
|
|
are control words, and the values are coderefs that you want executed. |
|
274
|
|
|
|
|
|
|
This sets all the callbacks... The arguments passed to your coderefs |
|
275
|
|
|
|
|
|
|
are: $self, control word itself (like, say, 'par'), any parameter the |
|
276
|
|
|
|
|
|
|
control word had, and then 'start'. |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
If you don't pass it a reference, you get back the reference of the |
|
279
|
|
|
|
|
|
|
current control hash we're holding. |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=cut |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub control_definition { |
|
284
|
|
|
|
|
|
|
|
|
285
|
4
|
|
|
4
|
1
|
31
|
my $self = shift; |
|
286
|
|
|
|
|
|
|
|
|
287
|
4
|
100
|
|
|
|
14
|
if (@_) { |
|
288
|
|
|
|
|
|
|
|
|
289
|
2
|
50
|
|
|
|
9
|
if ( ref $_[0] eq 'HASH' ) { |
|
290
|
|
|
|
|
|
|
|
|
291
|
2
|
|
|
|
|
8
|
$self->{_DO_ON_CONTROL} = shift; |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
} else { |
|
294
|
|
|
|
|
|
|
|
|
295
|
0
|
|
|
|
|
0
|
die "argument of control_definition() method must be an HASHREF"; |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
} else { |
|
300
|
|
|
|
|
|
|
|
|
301
|
2
|
|
|
|
|
15
|
return $self->{_DO_ON_CONTROL}; |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=head2 rtf_control_emulation |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
If you pass it a boolean argument, it'll set whether or not it thinks RTF::Control |
|
310
|
|
|
|
|
|
|
has been loaded. If you don't pass it an argument, it'll return what it thinks... |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=cut |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub rtf_control_emulation { |
|
315
|
|
|
|
|
|
|
|
|
316
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
317
|
0
|
|
|
|
|
0
|
my $bool = shift; |
|
318
|
|
|
|
|
|
|
|
|
319
|
0
|
0
|
|
|
|
0
|
if ( defined $bool ) { |
|
320
|
|
|
|
|
|
|
|
|
321
|
0
|
|
|
|
|
0
|
$self->{_RTF_CONTROL_USED} = $bool; |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
} else { |
|
324
|
|
|
|
|
|
|
|
|
325
|
0
|
|
|
|
|
0
|
return $self->{_RTF_CONTROL_USED}; |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 dont_skip_destinations |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
The RTF spec says that we skip any destinations that we don't have an explicit |
|
334
|
|
|
|
|
|
|
handler for. You could well not want this. Accepts a boolean argument, true |
|
335
|
|
|
|
|
|
|
to process destinations, 0 to skip the ones we don't understand. |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=cut |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub dont_skip_destinations { |
|
340
|
|
|
|
|
|
|
|
|
341
|
2
|
|
|
2
|
1
|
24
|
my $self = shift; |
|
342
|
2
|
|
|
|
|
4
|
my $bool = shift; |
|
343
|
|
|
|
|
|
|
|
|
344
|
2
|
|
|
|
|
6
|
$self->{_DONT_SKIP_DESTINATIONS} = $bool; |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# This is how he decided to call control actions. Leaving |
|
349
|
|
|
|
|
|
|
# it to do the right thing at the moment... Users of the |
|
350
|
|
|
|
|
|
|
# module don't need to know our dirty little secret... |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
{ |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
package RTF::Action; |
|
355
|
|
|
|
|
|
|
$RTF::Action::VERSION = '1.12'; |
|
356
|
14
|
|
|
14
|
|
83
|
use RTF::Config; |
|
|
14
|
|
|
|
|
18
|
|
|
|
14
|
|
|
|
|
1450
|
|
|
357
|
|
|
|
|
|
|
|
|
358
|
14
|
|
|
14
|
|
68
|
use vars qw($AUTOLOAD); |
|
|
14
|
|
|
|
|
24
|
|
|
|
14
|
|
|
|
|
1919
|
|
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
my $default; |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# The original RTF::Parser allowed $LOGFILE to be set |
|
363
|
|
|
|
|
|
|
# that made RTF::Config do fun things. We're allowing it |
|
364
|
|
|
|
|
|
|
# to, but wrapping it up a bit more carefully... |
|
365
|
|
|
|
|
|
|
if ($LOG_FILE) { |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
$default = sub { $RTF::Control::not_processed{ $_[1] }++ } |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
my $sub; |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
sub AUTOLOAD { |
|
374
|
|
|
|
|
|
|
|
|
375
|
61
|
|
|
61
|
|
90
|
my $self = $_[0]; |
|
376
|
|
|
|
|
|
|
|
|
377
|
61
|
|
|
|
|
238
|
$AUTOLOAD =~ s/^.*:://; |
|
378
|
|
|
|
|
|
|
|
|
379
|
14
|
|
|
14
|
|
83
|
no strict 'refs'; |
|
|
14
|
|
|
|
|
31
|
|
|
|
14
|
|
|
|
|
12758
|
|
|
380
|
|
|
|
|
|
|
|
|
381
|
61
|
100
|
|
|
|
196
|
if ( defined( $sub = $self->{_DO_ON_CONTROL}->{$AUTOLOAD} ) ) { |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Yuck, empty if. But we're just going to leave it for a while |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
} else { |
|
386
|
|
|
|
|
|
|
|
|
387
|
36
|
50
|
|
|
|
97
|
if ($default) { |
|
|
|
100
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
|
|
389
|
0
|
|
|
|
|
0
|
$sub = $default |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
} elsif ( $self->{_DO_ON_CONTROL}->{'__DEFAULT__'} ) { |
|
392
|
|
|
|
|
|
|
|
|
393
|
10
|
|
|
|
|
17
|
$sub = $self->{_DO_ON_CONTROL}->{'__DEFAULT__'}; |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
} else { |
|
396
|
|
|
|
|
|
|
|
|
397
|
26
|
|
|
27
|
|
148
|
$sub = sub { }; |
|
|
27
|
|
|
|
|
64
|
|
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
} |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# I don't understand why he's using goto here... |
|
404
|
61
|
|
|
|
|
157
|
*$AUTOLOAD = $sub; |
|
405
|
61
|
|
|
|
|
194
|
goto &$sub; |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
} |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=head1 API |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
These are some methods that you're going to want to over-ride if you |
|
414
|
|
|
|
|
|
|
subclass this modules. In general though, people seem to want to subclass |
|
415
|
|
|
|
|
|
|
RTF::Control, which subclasses this module. |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head2 parse_start |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Called before we start parsing... |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head2 parse_end |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Called when we're finished parsing |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 group_start |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Called when we encounter an opening { |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head2 group_end |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Called when we encounter a closing } |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head2 text |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Called when we encounter plain-text. Is given the text as its |
|
436
|
|
|
|
|
|
|
first argument |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=head2 char |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Called when we encounter a hex-escaped character. The hex characters |
|
441
|
|
|
|
|
|
|
are passed as the first argument. |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=head2 symbol |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
Called when we come across a control character. This is interesting, because, |
|
446
|
|
|
|
|
|
|
I'd have treated these as control words, so, I'm using Philippe's list as control |
|
447
|
|
|
|
|
|
|
words that'll trigger this for you. These are C<-_~:|{}*'\>. This needs to be |
|
448
|
|
|
|
|
|
|
tested. |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 bitmap |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Called when we come across a command that's talking about a linked bitmap |
|
453
|
|
|
|
|
|
|
file. You're given the file name. |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 binary |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Called when we have binary data. You get passed it. |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut |
|
460
|
|
|
|
|
|
|
|
|
461
|
4
|
|
|
4
|
1
|
8
|
sub parse_start { } |
|
462
|
4
|
|
|
4
|
1
|
5
|
sub parse_end { } |
|
463
|
4
|
|
|
4
|
1
|
7
|
sub group_start { } |
|
464
|
4
|
|
|
4
|
1
|
6
|
sub group_end { } |
|
465
|
0
|
|
|
0
|
1
|
0
|
sub text { } |
|
466
|
0
|
|
|
0
|
1
|
0
|
sub char { } |
|
467
|
0
|
|
|
0
|
1
|
0
|
sub symbol { } # -_~:|{}*'\ |
|
468
|
0
|
|
|
0
|
1
|
0
|
sub bitmap { } # \{bm(?:[clr]|cwd) |
|
469
|
0
|
|
|
0
|
1
|
0
|
sub binary { } |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
# This is the big, bad parse routine that isn't called directly. |
|
472
|
|
|
|
|
|
|
# We loop around RTF::Tokenizer, making event calls when we need to. |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
sub _parse { |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
# Read in our object |
|
477
|
14
|
|
|
14
|
|
26
|
my $self = shift; |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# Execute any pre-parse subroutines |
|
480
|
14
|
|
|
|
|
75
|
$self->parse_start(); |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
# Loop until we find the EOF |
|
483
|
14
|
|
|
|
|
25
|
while (1) { |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
# Read in our initial token |
|
486
|
174
|
|
|
|
|
711
|
my ( $token_type, $token_argument, $token_parameter ) = |
|
487
|
|
|
|
|
|
|
$self->{_TOKENIZER}->get_token(); |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# Control words |
|
490
|
174
|
100
|
|
|
|
3642
|
if ( $token_type eq 'control' ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# We have a special handler for control words |
|
493
|
79
|
|
|
|
|
317
|
$self->_control( $token_argument, $token_parameter ); |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
# Plain text |
|
496
|
|
|
|
|
|
|
} elsif ( $token_type eq 'text' ) { |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# Send it to the text() routine |
|
499
|
34
|
|
|
|
|
258
|
$self->text($token_argument); |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
# Groups |
|
502
|
|
|
|
|
|
|
} elsif ( $token_type eq 'group' ) { |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
# Call the appropriate handler |
|
505
|
47
|
100
|
|
|
|
195
|
$token_argument ? $self->group_start : |
|
506
|
|
|
|
|
|
|
$self->group_end; |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
# EOF |
|
509
|
|
|
|
|
|
|
} else { |
|
510
|
|
|
|
|
|
|
|
|
511
|
14
|
|
|
|
|
28
|
last; |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
} |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# All done |
|
518
|
14
|
|
|
|
|
63
|
$self->parse_end(); |
|
519
|
14
|
|
|
|
|
25
|
$self; |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# Control word handler (yeuch) |
|
524
|
|
|
|
|
|
|
# purl, be RTF barbie is Control words are *HARD*! |
|
525
|
|
|
|
|
|
|
sub _control { |
|
526
|
|
|
|
|
|
|
|
|
527
|
79
|
|
|
79
|
|
107
|
my $self = shift; |
|
528
|
79
|
|
|
|
|
95
|
my $type = shift; |
|
529
|
79
|
|
|
|
|
90
|
my $arg = shift; |
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
# standard, control_symbols, hex |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# Funky destination |
|
534
|
79
|
100
|
|
|
|
313
|
if ( $type eq '*' ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# We might actually want to process it... |
|
537
|
2
|
100
|
|
|
|
5
|
if ( $self->{_DONT_SKIP_DESTINATIONS} ) { |
|
538
|
|
|
|
|
|
|
|
|
539
|
1
|
|
|
|
|
3
|
$self->_control_execute('*'); |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
} else { |
|
542
|
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
# Grab the next token |
|
544
|
1
|
|
|
|
|
3
|
my ( $token_type, $token_argument, $token_parameter ) = |
|
545
|
|
|
|
|
|
|
$self->{_TOKENIZER}->get_token(); |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
# Basic sanity check |
|
548
|
1
|
50
|
|
|
|
20
|
croak('Malformed RTF - \* not followed by a control...') |
|
549
|
|
|
|
|
|
|
unless $token_type eq 'control'; |
|
550
|
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
# Do we have a handler for it? |
|
552
|
1
|
50
|
|
|
|
3
|
if ( defined $self->{_DO_ON_CONTROL}->{$token_argument} ) { |
|
553
|
0
|
|
|
|
|
0
|
$self->_control_execute( $token_argument, $token_parameter ); |
|
554
|
|
|
|
|
|
|
} else { |
|
555
|
1
|
|
|
|
|
4
|
$self->_skip_group(); |
|
556
|
1
|
|
|
|
|
3
|
$self->group_end(); |
|
557
|
|
|
|
|
|
|
} |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# Binary data |
|
561
|
|
|
|
|
|
|
} elsif ( $type eq 'bin' ) { |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
# Grab the next token |
|
564
|
0
|
|
|
|
|
0
|
my ( $token_type, $token_argument, $token_parameter ) = |
|
565
|
|
|
|
|
|
|
$self->{_TOKENIZER}->get_token(); |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
# Basic sanity check |
|
568
|
0
|
0
|
|
|
|
0
|
croak('Malformed RTF - \bin not followed by text...') |
|
569
|
|
|
|
|
|
|
unless $token_type eq 'text'; |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# Send it to the handler |
|
572
|
0
|
|
|
|
|
0
|
$self->binary($token_argument); |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# Implement a bitmap handler here |
|
575
|
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
# Control symbols |
|
577
|
|
|
|
|
|
|
} elsif ( $type =~ m/[-_~:|{}*\\]/ ) { |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
# Send it to the handler |
|
580
|
1
|
|
|
|
|
4
|
$self->symbol($type); |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# Entity |
|
583
|
|
|
|
|
|
|
} elsif ( $type eq "'" ) { |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
# Entity handler |
|
586
|
3
|
|
|
|
|
12
|
$self->char($arg); |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
# Some other control type - give it to the control executer |
|
589
|
|
|
|
|
|
|
} else { |
|
590
|
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
# Pass it to our default executer |
|
592
|
73
|
|
|
|
|
206
|
$self->_control_execute( $type, $arg ) |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
} |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
} |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
# Control word executer (this is nasty) |
|
599
|
|
|
|
|
|
|
sub _control_execute { |
|
600
|
|
|
|
|
|
|
|
|
601
|
74
|
|
|
74
|
|
80
|
my $self = shift; |
|
602
|
74
|
|
|
|
|
141
|
my $type = shift; |
|
603
|
74
|
|
|
|
|
83
|
my $arg = shift; |
|
604
|
|
|
|
|
|
|
|
|
605
|
14
|
|
|
14
|
|
86
|
no strict 'refs'; |
|
|
14
|
|
|
|
|
26
|
|
|
|
14
|
|
|
|
|
2488
|
|
|
606
|
74
|
|
|
|
|
77
|
&{"RTF::Action::$type"}( $self, $type, $arg, 'start' ); |
|
|
74
|
|
|
|
|
612
|
|
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
} |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
# Skip a group |
|
611
|
|
|
|
|
|
|
sub _skip_group { |
|
612
|
|
|
|
|
|
|
|
|
613
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
|
614
|
|
|
|
|
|
|
|
|
615
|
1
|
|
|
|
|
1
|
my $level_counter = 1; |
|
616
|
|
|
|
|
|
|
|
|
617
|
1
|
|
|
|
|
3
|
while ($level_counter) { |
|
618
|
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
# Get a token |
|
620
|
2
|
|
|
|
|
5
|
my ( $token_type, $token_argument, $token_parameter ) = |
|
621
|
|
|
|
|
|
|
$self->{_TOKENIZER}->get_token(); |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
# Make sure we can't loop forever |
|
624
|
2
|
50
|
|
|
|
31
|
last if $token_type eq 'eof'; |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
# We're in business if it's a group |
|
627
|
2
|
100
|
|
|
|
6
|
if ( $token_type eq 'group' ) { |
|
628
|
|
|
|
|
|
|
|
|
629
|
1
|
50
|
|
|
|
6
|
$token_argument ? $level_counter++ : |
|
630
|
|
|
|
|
|
|
$level_counter--; |
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
} |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
} |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
} |
|
637
|
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
1; |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=head1 AUTHOR |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
Peter Sergeant C, originally by Philippe Verdret |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
Copyright 2004 B. |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
|
649
|
|
|
|
|
|
|
the same terms as Perl itself. |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
=head1 CREDITS |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
This work was carried out under a grant generously provided by The Perl Foundation - |
|
654
|
|
|
|
|
|
|
give them money! |