File Coverage

blib/lib/RTF/TEXT/Converter.pm
Criterion Covered Total %
statement 34 38 89.4
branch 0 2 0.0
condition n/a
subroutine 11 13 84.6
pod 2 2 100.0
total 47 55 85.4


line stmt bran cond sub pod time code
1             package RTF::TEXT::Converter;
2             $RTF::TEXT::Converter::VERSION = '1.12';
3 4     4   2836 use strict;
  4         8  
  4         125  
4 4     4   20 use warnings;
  4         7  
  4         95  
5              
6 4     4   2907 use RTF::Control;
  4         10  
  4         750  
7 4     4   2744 use RTF::TEXT::Converter::ansi;
  4         12  
  4         154  
8 4     4   2181 use RTF::TEXT::Converter::charmap;
  4         16  
  4         174  
9              
10             @RTF::TEXT::Converter::ISA = qw(RTF::Control);
11              
12 4     4   23 use constant TRACE => 0;
  4         6  
  4         210  
13 4     4   18 use constant LIST_TRACE => 0;
  4         7  
  4         188  
14 4     4   17 use constant SHOW_RTF_LINE_NUMBER => 0;
  4         22  
  4         1658  
15              
16             =head1 NAME
17              
18             RTF::TEXT::Converter - Perl extension for converting RTF into text
19              
20             =head1 VERSION
21              
22             version 1.12
23              
24             =head1 DESCRIPTION
25              
26             Perl extension for converting RTF into text
27              
28             =head1 SYNOPSIS
29              
30             use strict;
31             use RTF::TEXT::Converter;
32              
33             my $object = RTF::TEXT::Converter->new(
34              
35             output => \*STDOUT
36              
37             );
38              
39             $object->parse_stream( \*RTF_FILE );
40              
41             OR
42              
43             use strict;
44             use RTF::TEXT::Converter;
45              
46             my $object = RTF::TEXT::Converter->new(
47              
48             output => \$string
49              
50             );
51              
52             $object->parse_string( $rtf_data );
53              
54             =head1 METHODS
55              
56             =head2 new()
57              
58             Constructor method. Currently takes one named parameter, C,
59             which can either be a reference to a filehandle, or a reference to
60             a string. This is where our text output will end up.
61              
62             =head2 parse_stream()
63              
64             Read RTF in from a filehandle, and start processing it. Pass me
65             a reference to a filehandle.
66              
67             =head2 parse_string()
68              
69             Read RTF in from a string, and start processing it. Pass me a string.
70              
71             =head1 JUST SO YOU KNOW
72              
73             You can mix-and-match your output and input methods - nothing to stop
74             you outputting to a string when you've read from a filehandle...
75              
76             =head1 AUTHOR
77              
78             Peter Sergeant C, originally by Philippe Verdret
79              
80             =head1 COPYRIGHT
81              
82             Copyright 2004 B.
83              
84             This program is free software; you can redistribute it and/or modify it under
85             the same terms as Perl itself.
86              
87             =head1 CREDITS
88              
89             This work was carried out under a grant generously provided by The Perl Foundation -
90             give them money!
91              
92              
93             =cut
94              
95             # Symbol exported by the RTF::Ouptut module:
96             # %info: informations of the {\info ...}
97             # %par_props: paragraph properties
98             # $style: name of the current style or pseudo-style
99             # $event: start and end on the 'document' event
100             # $text: text associated to the current style
101             # %symbol: symbol translations
102             # %do_on_control: routines associated to RTF controls
103             # %do_on_event: routines associated to events
104             # output(): a stack oriented output routine (don't use print())
105              
106             ###########################################################################
107             my $N = "\n"; # Pretty-printing
108              
109             my %charmap_defaults = map( { sprintf( "%02x", $_ ) => chr($_) } ( 0 .. 255 ) );
110              
111             # you can split on sentences here if you want!!!
112             # some output parameters
113             %do_on_event = (
114             'document' => sub { # Special action
115             },
116             # Table processing
117             'table' => sub { # end of table
118             if ( $event eq 'end' ) {
119             } else {
120             }
121             },
122             'row' => sub { # end of row
123             if ( $event eq 'end' ) {
124             output "$text$N";
125             } else {
126             # not defined
127             }
128             },
129             'cell' => sub { # end of cell
130             if ( $event eq 'end' ) {
131             output "$text$N";
132             } else {
133             # not defined
134             }
135             },
136             'par' => sub { # Default rule: if no entry for a paragraph style
137             # Paragraph styles
138             #return output($text) unless $text =~ /\S/;
139             output "$text$N";
140             }, );
141              
142             ###############################################################################
143             # If you have an &; in your RTF document and if
144             # is a character entity, you'll see "&;" in the RTF document
145             # and the corresponding glyphe in the HTML document
146             # How to give a new definition to a control registered in %do_on_control:
147             # - method redefinition (could be the purist's solution)
148             # - $Control::do_on_control{control_word} = sub {};
149             # - when %do_on_control is exported write:
150              
151             # OK, so a little rewrite has gone on here. I don't like opening 'ansi'
152             # and 'char_map' files, so I've wrapped them in RTF::TEXT::ansi.pm, and
153             # so on. This makes it an awful lot cleaner, but falls back as
154             # appropriate
155              
156             $do_on_control{'ansi'} = # callcack redefinition
157             sub {
158              
159 2     2   17 my @charmap_data = $_[SELF]->charmap_reader( $_[CONTROL] );
160              
161             # Create the charset hash...
162 70         82 my %charset = (
163              
164             # Defaults...
165             %charmap_defaults,
166              
167             # Specifics from our charset file...
168 2         115 map( { s/^\s+//;
169 70         450 split /\s+/
170             } @charmap_data )
171              
172             );
173              
174             # Over-ride &char to return our character mapping
175             {
176 4     4   24 no warnings 'redefine';
  4         7  
  4         991  
  2         25  
177             *char = sub {
178 1     1   4 output $charset{ $_[1] };
179             }
180 2         19 }
181              
182             };
183              
184             # symbol processing
185             # RTF: \~
186             # named chars
187             # RTF: \ldblquote, \rdblquote
188             $symbol{'~'} = ' ';
189             $symbol{'tab'} = "\t";
190             $symbol{'ldblquote'} = '"';
191             $symbol{'rdblquote'} = '"';
192             $symbol{'line'} = "\n";
193             $symbol{'_'} = '-';
194              
195             # If we get called from a non-ansi document, then we've not redefined
196             # char() to something sensible, so we put a nice definition here...
197             sub char {
198              
199 0     0 1 0 output $charmap_defaults{ $_[1] }
200              
201             }
202              
203             sub symbol {
204 0 0   0 1 0 if ( defined( my $sym = $symbol{ $_[1] } ) ) {
205 0         0 output $sym;
206             } else {
207 0         0 output $_[1]; # as it
208             }
209             }
210              
211             1;
212             __END__