File Coverage

blib/lib/OpenOffice/PerlPoint.pm
Criterion Covered Total %
statement 31 33 93.9
branch n/a
condition n/a
subroutine 11 11 100.0
pod n/a
total 42 44 95.4


line stmt bran cond sub pod time code
1              
2             # = HISTORY SECTION =====================================================================
3              
4             # ---------------------------------------------------------------------------------------
5             # version | date | author | changes
6             # ---------------------------------------------------------------------------------------
7             # 0.03 |05.07.2005| JSTENZEL | the meta data prolog is template driven now, thus
8             # | | | allowing individual results;
9             # | | JSTENZEL | reactivated $docDate as now with the template approach
10             # | | | it can be excluded from tests;
11             # | | JSTENZEL | new constructor option to suppress meta data handling
12             # | | | completely;
13             # | | JSTENZEL | user defined document data keys now configured via
14             # | | | constructor option;
15             # |10.07.2005| JSTENZEL | oo2pp() starts result with two newlines now, to
16             # | | | guarantee correct detection of the first paragraph;
17             # 0.02 |29.06.2005| JSTENZEL | new constructor option: imagebufferdir allows to set
18             # | | | up a user defined name for the image buffer directory,
19             # | | | which can be relative or absolute;
20             # | | JSTENZEL | added handling of external images;
21             # |02.07.2005| JSTENZEL | bugfix: $docDate value was title, not date;
22             # | | JSTENZEL | checking if an image URL host is available, via Net::Ping;
23             # | | JSTENZEL | new user defined field support: authormail;
24             # |03.07.2005| JSTENZEL | deactivated $docDate setting as it is difficult to handle
25             # | | | with changing locales, suspended till all the variables
26             # | | | can be set by options;
27             # 0.01 |19.06.2005| JSTENZEL | First version, parts derived or inspired from/by an
28             # | | | oo2pod example in OpenOffice::OODoc 1.309 and modules
29             # | | | in the OpenOffice::OODoc 2.00 distribution.
30             # ---------------------------------------------------------------------------------------
31              
32             # = POD SECTION =========================================================================
33              
34             =head1 NAME
35              
36             B - an Open Office / Open Document to PerlPoint converter class
37              
38             =head1 VERSION
39              
40             This manual describes version B<0.03>.
41              
42             =head1 SYNOPSIS
43              
44             # load the module
45             use OpenOffice::PerlPoint;
46              
47             # build an object
48             my $oo2pp=new OpenOffice::PerlPoint(file=>$ooFile);
49              
50             # convert document
51             my $perlpoint=$oo2pp->oo2pp;
52              
53              
54             =head1 DESCRIPTION
55              
56             C is a translator class to transform Open Office 1.0 and 2.0 (and
57             generally OASIS Open Document) documents into PerlPoint sources. It is based on
58             C.
59              
60             Once you have transformed an Open Office or Open Document document into PerlPoint, it may
61             be furtherly processed using the PerlPoint utilities.
62              
63             If you prefer, you do not need to perform an explicit transformation. Beginning with
64             release 0.40, C can process Open Office / Open Document sources directly.
65             Please see C for details, or the documentation that comes with PerlPoint.
66              
67             B of the source format features.>
68             Please see the I sections below.
69              
70             =head1 METHODS
71              
72             =cut
73              
74              
75             # declare package
76             package OpenOffice::PerlPoint;
77              
78             # declare version
79             $VERSION=0.03;
80              
81             # pragmata
82 1     1   26844 use strict;
  1         2  
  1         33  
83              
84             # load modules
85 1     1   7 use Carp;
  1         2  
  1         58  
86 1     1   18163 use Safe;
  1         55449  
  1         73  
87 1     1   3315 use Storable;
  1         5299  
  1         85  
88 1     1   4203 use Net::Ping;
  1         89935  
  1         242  
89 1     1   1524 use Text::Wrapper;
  1         1971  
  1         44  
90 1     1   9 use File::Basename;
  1         2  
  1         158  
91 1     1   13437 use LWP::UserAgent;
  1         73317  
  1         41  
92 1     1   1510 use Text::Template;
  1         5138  
  1         67  
93 1     1   9 use POSIX qw(strftime);
  1         3  
  1         12  
94 1     1   2021 use OpenOffice::OODoc 2.00;
  0            
  0            
95              
96             # declare attributes
97             use fields qw(
98             file
99             archive
100              
101             metadata
102             docContent
103             docStyles
104             content
105             notes
106              
107             metaData
108              
109             userAgent
110             ping
111              
112             skipmetadata
113             imagebufferdir
114             metadataTemplate
115             userdefinedDocdata
116             );
117              
118              
119             # define data: delimiter handling
120             my %delimiters=(
121             'text:footnote-citation' => {
122             begin => '[',
123             end => ']',
124             },
125             'text:footnote-body' => {
126             begin => '{NOTE: ',
127             end => '}',
128             },
129             'text:span' => {
130             begin => '<<',
131             end => '>>',
132             },
133             'text:list-item' => {
134             begin => '',
135             end => '',
136             },
137             );
138              
139              
140             # define data: style extraction directives for traversal (see traverseElement() below)
141             my @styles=(
142             ['B', 'properties', 'fo:font-weight', 'bold', 0, '\B<', '>'],
143             ['I', 'properties', 'fo:font-style', 'italic', 0, '\I<', '>'],
144             ['U', 'properties', 'style:text-underline', 'single', 0, '\U<', '>'], # OO 1.0
145             ['U', 'properties', 'style:text-underline-style', 'solid', 0, '\U<', '>'], # OD (missing the previous part -bug?!)
146             ['F', 'properties', 'fo:color', qr/^(\#[\da-fA-F]{6})$/, 0, '\F{color="$1"}<', '>'], # first backslash is for highlightning
147             ['C', 'properties', 'style:font-name', qr/^(Courier New)$/, 0, '\C<', '>'],
148             ['BLOCK', 'references', 'style:parent-style-name', qr/^(Code)$/, 0, ' ' x 3, ''],
149             );
150              
151              
152              
153             # init wrappers
154             my ($paragraphWrapper, $listWrapper);
155             $paragraphWrapper=Text::Wrapper->new(
156             columns => 76,
157             par_start => '',
158             body_start => ''
159             );
160            
161             $listWrapper=Text::Wrapper->new(
162             columns => 76,
163             par_start => ' ',
164             body_start => ' '
165             );
166              
167             =pod
168              
169             =head2 new()
170              
171             The constructor.
172              
173             B
174              
175             All parameters except the first are named.
176              
177             =over 4
178              
179             =item class
180              
181             The target class name. This parameter is set automatically if you use the usual Perl syntax
182             to call a constructor.
183              
184             =item file
185              
186             The (absolute or relative) path to the Office document that should be converted.
187              
188             =item imagebufferdir
189              
190             OO document images refer to images stored within the document or located externally at
191             a location that is specified by an URL. Both image sources cannot be accessed by PerlPoint,
192             so the converter makes copies from those sources and refers to I. The C
193             option specifies where these intermediate copies should be stored. The directory is made
194             unless it already exists.
195              
196             A I path will result in a directory relative to the document. An I path
197             is suitable if images from various documents should be collected in one place, or if the
198             resulting PerlPoint document should be written to a special path.
199              
200              
201             =item metadataTemplate
202              
203             A template to include document meta data to the transformation result. The template is
204             expected to be in C format, in a safe compartment.
205              
206             These data are available:
207              
208             =over 4
209              
210             =item %metadata
211              
212             A hash of all document meta data. The keys of this hash are the following, while the
213             values hold the document data assigned to that keys.
214              
215             =over 4
216            
217             =item title
218              
219             document title
220              
221             =item subject
222              
223             document subject
224              
225             =item description
226              
227             document description
228              
229             =item creator
230              
231             document author
232              
233             =item date
234              
235             last modification
236              
237             =item keywords
238              
239             keywords describing the document
240              
241             =item User defined fields
242              
243             All names defined by C.
244              
245             =back
246              
247             =item %tools
248              
249             Keys: C holds the name of the program that wrote the OO document.
250             C holds the name of the converter, usually the name of this module.
251              
252             =item $source
253              
254             Source name, usually the name set by option C.
255              
256             =back
257              
258             This option has no effect if C is set.
259              
260              
261             =item skipmetadata
262              
263             If set to a true value meta data processing is bypassed.
264              
265              
266             =item userdefinedDocdata
267              
268             Each OO document can be described by various predefined data, which are set automatically
269             (like the modification date) or set up by the document author in a dialog (like the
270             documents title). Additionally, OO allows to define up to four user informations. Called
271             C to I by default, they can be named individually if required.
272              
273             This option expects a reference to an array of names for those user defined document data entries.
274             The names can be used in templates passed in via option C to access the data
275             stored in the related document fields.
276              
277             =back
278              
279             B the new object.
280              
281             B
282              
283             # build an object
284             my $oo2pp=new OpenOffice::PerlPoint(file=>$ooFile);
285              
286             =cut
287             sub new
288             {
289             # get parameters
290             my ($class, @pars)=@_;
291              
292             # build parameter hash
293             confess "[BUG] The number of parameters should be even - use named parameters, please.\n" if @pars%2;
294             my %pars=@pars;
295              
296             # check parameters
297             confess "[BUG] Missing class name.\n" unless $class;
298             confess "[BUG] Missing file parameter.\n" unless exists $pars{file};
299             confess "[BUG] Missing image buffer directory parameter.\n" unless exists $pars{imagebufferdir};
300              
301             # build object
302             my __PACKAGE__ $me=fields::new($class);
303              
304             # store configuration
305             $me->{$_}=$pars{$_} for qw(
306             file
307             imagebufferdir
308             skipmetadata
309             metadataTemplate
310             userdefinedDocdata
311             );
312              
313             # aggregate a user agent object
314             $me->{userAgent}=new LWP::UserAgent;
315             $me->{userAgent}->timeout(1);
316             $me->{userAgent}->env_proxy;
317              
318             # and a Net::Ping object
319             $me->{ping}=new Net::Ping;
320              
321             # build archive object
322             $me->{archive}=ooFile($pars{file});
323             confess "[Error] $pars{file} is no regular OpenOffice.org file.\n" unless $me->{archive};
324              
325             # extract metadata
326             $me->{metadata}=ooMeta(archive => $me->{archive});
327             carp "[Warn] $pars{file} has not standard OOO properties, it looks strange.\n" unless $me->{metadata};
328              
329             # extract document (in content and style parts)
330             $me->{docContent}=ooDocument(
331             archive => $me->{archive},
332             member => 'content',
333             delimiters => \%delimiters,
334             );
335             confess "[Error] No standard OOO content found in $pars{file}!\n" unless $me->{docContent};
336              
337             $me->{docStyles}=ooDocument(
338             archive => $me->{archive},
339             member => 'styles',
340             delimiters => \%delimiters,
341             );
342             confess "[Error] No standard OOO styles found in $pars{file}!\n" unless $me->{docContent};
343              
344             # the strange next lines prevent the getText() method of
345             # OpenOffice::OODoc::Text (see the corresponding man page) from using
346             # its default tags for spans and footnotes
347             delete $me->{docContent}{delimiters}{'text:span'};
348             delete $me->{docContent}{delimiters}{'text:footnote-body'};
349              
350             # here we select the tab as field separator for table field output
351             # (the default is ";" as for CSV output)
352             $me->{docContent}{field_separator}="\t";
353              
354             # in the next sequence, we will extract all the footnotes, store them for
355             # later processing and remove them from the content
356             $me->{notes}=[$me->{docContent}->getFootnoteList];
357             $me->{docContent}->removeElement($_) for @{$me->{notes}};
358              
359             # get the full list of text objects (without the previously removed footnotes)
360             $me->{content}=[$me->{docContent}->getTextElementList];
361              
362             # reply the new object
363             $me;
364             }
365              
366             # TODO: make document variable names configurable
367             sub convertMetadata
368             {
369             # get and check parameters
370             ((my __PACKAGE__ $me), (my ($item, $guard)))=@_;
371             confess "[BUG] Missing object parameter.\n" unless $me;
372             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
373              
374             # variables
375             my ($perlpoint, $title, $subject, $description, $author, $date, $version, $generator, $copyright, $authormail);
376              
377             # anything to do?
378             if ($me->{metadata})
379             {
380             # predefined meta data: title, subject, description, author
381             $me->{metaData}{$_}=$me->{metadata}->$_ || 'unknown' for qw(
382             creator
383             date
384             description
385             keywords
386             subject
387             title
388             );
389              
390             # get user defined metadata, as set up by caller
391             my %userDefinedMetadata=$me->{metadata}->user_defined;
392             $me->{metaData}{$_}=$userDefinedMetadata{$_} || 'unknown' for @{$me->{userdefinedDocdata}};
393              
394             # get generator
395             $generator=$me->{metadata}->generator;
396             $generator='unknown program' unless $generator;
397             }
398              
399             # process meta data, if configured
400             if (defined $me->{metadataTemplate})
401             {
402             # build safe environment
403             my $safe=new Safe;
404              
405             # clone meta data into a transfer variable
406             my %transfer=(
407             # meta data
408             metaData => $me->{metaData} ? Storable::dclone($me->{metaData}) : {},
409              
410             # generator and converter
411             tools => {
412             generator => $generator,
413             converter => __PACKAGE__,
414             },
415              
416             # more data
417             source => $me->{file},
418             );
419              
420             # build a template object, process the template and add the result
421             # (template bugs stop the program immediately - as they are considered bugs, not (user) errors)
422             my $template=new Text::Template(TYPE => 'STRING', SOURCE => $me->{metadataTemplate})
423             or die "[BUG] Couldn't construct template: $Text::Template::ERROR\n";
424             $perlpoint.=$template->fill_in(SAFE => $safe, HASH => \%transfer)
425             or die "[BUG] Couldn't process template: $Text::Template::ERROR\n";
426             }
427              
428             # supply result
429             $perlpoint;
430             }
431              
432              
433             #-----------------------------------------------------------------------------
434              
435             # convert completely
436             =pod
437              
438             =head2 oo2pp()
439              
440             Perform conversion of the document specified in the constructor call.
441              
442             B
443              
444             =over 4
445              
446             =item object
447              
448             A object as supplied by C.
449              
450             =back
451              
452             B the PerlPoint string.
453              
454             B
455              
456             # convert document
457             my $perlpoint=$oo2pp->oo2pp;
458              
459             =cut
460             sub oo2pp
461             {
462             # get and check parameters
463             ((my __PACKAGE__ $me), (my ($item, $guard)))=@_;
464             confess "[BUG] Missing object parameter.\n" unless $me;
465             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
466              
467             # variables
468             my ($perlpoint);
469              
470             # meta data, unless suppressed
471             $perlpoint.=$me->convertMetadata unless $me->{skipmetadata};
472              
473             # content
474             $perlpoint.=$me->convertContent;
475              
476             # supply result
477             $perlpoint;
478             }
479              
480             #-----------------------------------------------------------------------------
481              
482             # build a headline
483             sub buildHeadline
484             {
485             # get and check parameters
486             my ($me, $element)=@_;
487             confess "[BUG] Missing object parameter.\n" unless $me;
488             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
489             confess "[BUG] Missing element parameter.\n" unless defined $element;
490             confess "[BUG] Element parameter is no ", 'XML::Twig::Elt', " object.\n" unless ref $element and $element->isa('XML::Twig::Elt');
491              
492             # build headline and supply result
493             $me->constructHeadline($me->{docContent}->getLevel($element), $me->{docContent}->getText($element) || 'EMPTY HEADLINE FOUND - CHECK SOURCE DOCUMENT FORMATTING, PLEASE ~ EMPTY HEADLINE');
494             }
495              
496             #-----------------------------------------------------------------------------
497              
498             # a low level routine to build a headline
499             sub constructHeadline
500             {
501             # get and check parameters
502             ((my __PACKAGE__ $me), (my ($level, $text, $short)))=@_;
503             confess "[BUG] Missing object parameter.\n" unless $me;
504             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
505             confess "[BUG] Missing level parameter.\n" unless defined $level;
506             confess "[BUG] Missing text parameter.\n" unless $text;
507              
508             # build headline and supply result
509             join('', '=' x $level, $text, (defined $short ? " ~ $short" : ()), "\n\n");
510             }
511              
512             #-----------------------------------------------------------------------------
513              
514             # a low level routine to build a comment
515             sub constructComment
516             {
517             # get and check parameters
518             ((my __PACKAGE__ $me), (my ($text)))=@_;
519             confess "[BUG] Missing object parameter.\n" unless $me;
520             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
521             confess "[BUG] Missing text parameter.\n" unless $text;
522              
523             # build comment and supply result
524             join('', '// ', $text, "\n");
525             }
526              
527             #-----------------------------------------------------------------------------
528              
529             # build a separate note section (at the end of the document)
530             sub buildNoteBlock
531             {
532             # get and check parameters
533             my ($me)=@_;
534             confess "[BUG] Missing object parameter.\n" unless $me;
535             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
536              
537             }
538              
539             #-----------------------------------------------------------------------------
540              
541             # build content from element
542             sub buildContent
543             {
544             # get and check parameters
545             my ($me, $element)=@_;
546             confess "[BUG] Missing object parameter.\n" unless $me;
547             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
548             confess "[BUG] Missing element parameter.\n" unless defined $element;
549             confess "[BUG] Element parameter is no ", 'XML::Twig::Elt', " object.\n" unless ref $element and $element->isa('XML::Twig::Elt');
550              
551             # variables
552             my ($perlpoint)=('');
553              
554             # get element text
555             my $text=$me->{docContent}->getText($element);
556              
557             # choose an output format according to the type
558             if ($element->isItemList)
559             {
560             # try to find out whether this list is ordered or not
561             my $prefix=$element->isOrderedList ? '#' : '*';
562              
563             # handle all list elements
564             foreach my $item ($me->{docContent}->getItemElementList($element))
565             {
566             # transform text
567             my $transformed=$listWrapper->wrap($me->traverseElement($item, \&guardSpecials));
568              
569             # write it, if necessary
570             $perlpoint.=$transformed ? ("$prefix $transformed\n") : ();
571             }
572             }
573             elsif ($element->isTable)
574             {
575             # a table: get a table handle, table dimensions and the table text
576             my $table=$me->{docContent}->getTable($element);
577             my ($rowNr, $colNr)=$me->{docContent}->getTableSize($table);
578             my $tableText=$me->{docContent}->getTableText($table);
579              
580             # set column separator
581             my $columnSeparator='|';
582             {
583             my ($i, $cs)=(1, $columnSeparator);
584             ++$i, $cs=quotemeta($columnSeparator='|' x $i) while $tableText=~/$cs/g;
585             }
586            
587             # start table (TODO: switch to nested tables)
588             $perlpoint.="\@$columnSeparator\n";
589              
590             # handle all rows
591             foreach my $row ($me->{docContent}->getTableRows($table))
592             {
593             # cell value collector
594             my (@cellValues);
595              
596             # handle all cells
597             foreach my $cellNr (0..$colNr-1)
598             {
599             # get cell handle
600             my $cell=$me->{docContent}->getCell($row, $cellNr);
601              
602             # get content
603             push(@cellValues, $me->{docContent}->getCellValue($cell));
604             }
605              
606             # add row
607             $perlpoint=join('', $perlpoint, join(" $columnSeparator ", map {(defined) ? $_ : ''} @cellValues), "\n");
608             }
609              
610             # complete table
611             $perlpoint.="\n";
612             }
613             else
614             {
615             # scopies
616             my ($empty, $prefix, $suffix)=('');
617              
618             # get paragraph style and its attributes
619             my $styleName=$me->{docContent}->getStyle($element);
620             my $styleObject=$me->{docContent}->getStyleElement($styleName) || $me->{docStyles}->getStyleElement($styleName);
621             my %attributes=$me->{docContent}->getStyleAttributes($styleObject);
622              
623             # use Data::Dumper; warn Dumper \%attributes;
624              
625             # set paragraph prefix
626             if (
627             exists $attributes{references}{'style:family'}
628             and $attributes{references}{'style:family'} eq 'paragraph'
629             and exists $attributes{references}{'style:parent-style-name'}
630             and $attributes{references}{'style:parent-style-name'}=~/^(Code)$/
631             )
632             {
633             # a code block
634             $prefix=' ' x 3;
635             $suffix='';
636             $empty="\n";
637             }
638             else
639             {
640             # default to a text paragraph
641             $prefix='.';
642             $suffix="\n";
643             }
644              
645             # transform text
646             my $transformed=$paragraphWrapper->wrap($me->traverseElement($element, \&guardSpecials));
647              
648             # write it, if necessary
649             $perlpoint.=$transformed ? ("$prefix$transformed$suffix") : $empty;
650             }
651              
652             # supply result
653             $perlpoint;
654             }
655              
656             #-----------------------------------------------------------------------------
657              
658             # convert content
659             sub convertContent
660             {
661             # get and check parameters
662             my ($me)=@_;
663             confess "[BUG] Missing object parameter.\n" unless $me;
664             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
665              
666             # variables
667             my ($perlpoint)=("\n\n");
668              
669             # handle all elements
670             foreach my $element (@{$me->{content}})
671             {
672             # get element level
673             if ($element->isHeader)
674             {$perlpoint.=$me->buildHeadline($element);}
675             else
676             {$perlpoint.=$me->buildContent($element);}
677             }
678              
679             # supply result
680             $perlpoint;
681             }
682              
683             #-----------------------------------------------------------------------------
684              
685              
686             # Traverse an element to find style based formattings and produce appropriate markup.
687             # This is a proof of concept and needs cleanup (constants for indices etc.).
688             # The base traversal idea was taken from getText().
689             sub traverseElement
690             {
691             # get and check parameters
692             ((my __PACKAGE__ $me), (my ($item, $guard)))=@_;
693             confess "[BUG] Missing object parameter.\n" unless $me;
694             confess "[BUG] Object parameter is no ", __PACKAGE__, " object.\n" unless ref $me and ref $me eq __PACKAGE__;
695             confess "[BUG] Guard parameter should be a code reference.\n" if $guard and ref($guard) ne 'CODE';
696              
697             # declare vars
698             my ($result)=('');
699              
700              
701             # node?
702             if ($item->isElementNode)
703             {
704             # scopy
705             my @matches;
706              
707             # get item name
708             my $itemName=$item->getName();
709              
710              
711             # special cases first: whitespace
712             if ($itemName eq 'text:s')
713             {
714             # this stands for a number of whitespaces (TODO: take care of text:c)
715             return ' ';
716             }
717             elsif ($itemName eq 'text:line-break')
718             {
719             # a tab stop character (TODO: could be translated into a whitespace sequence)
720             return ' ';
721             }
722             elsif ($itemName eq 'text:tab-stop')
723             {
724             # a tab stop character (TODO: could be translated into a whitespace sequence)
725             return "\t";
726             }
727             elsif ($itemName=~/^text:table-of-content$/)
728             {
729             # ignore TOC's (TODO: make this configurable)
730             return '';
731             }
732              
733             # get paragraph style and its attributes
734             my $styleName=$me->{docContent}->getStyle($item);
735             my $styleObject=$me->{docContent}->getStyleElement($styleName) || $me->{docStyles}->getStyleElement($styleName);
736             my %attributes=(defined $me->{docContent}->getStyle($item)) ? $me->{docContent}->getStyleAttributes($styleObject) : ();
737              
738             # use Data::Dumper; warn Dumper \%attributes;
739              
740             # special case: image
741             if ($item->isImage)
742             {
743             # get image element
744             $item=$me->{docContent}->getImageElement($item);
745              
746             # extract image data
747             my (
748             $imageName,
749             $imageLink,
750             $imageDescription
751             )=(
752             $me->{docContent}->imageName($item),
753             $me->{docContent}->imageLink($item),
754             # $me->{docContent}->imageDescription($item),
755             );
756            
757              
758             # make a buffer directory, if necessary (TODO: make path configurable)
759             mkdir($me->{imagebufferdir}) unless -d $me->{imagebufferdir};
760              
761             # export internal graphics
762             if ($imageLink=~m{^(\#)?Pictures/})
763             {
764             # export image and adapt source link
765             $me->{docContent}->exportImage($item, $imageLink=join('/', $me->{imagebufferdir}, basename($imageLink)));
766             }
767              
768             # import external graphics
769             elsif ($imageLink=~m{^https?://([^/]+)})
770             {
771             # try to get the image file
772             warn "[Info] Trying to fetch $imageLink from $1.\n";
773              
774             # buffer host name
775             my ($imageHost)=$1;
776            
777             # host reachable?
778             if ($me->{ping}->ping($imageHost, 1))
779             {
780             my $file=$me->{userAgent}->get($imageLink);
781             warn "[Info] Fetched (success: ", $file->is_success, ").\n";
782              
783             # success?
784             if ($file->is_success)
785             {
786             # try to store the file locally
787             if (open(my $copy, join('', '>', $imageLink=join('/', $me->{imagebufferdir}, basename($imageLink)))))
788             {print $copy $file->content;}
789             else
790             {return qq(.\\I Image $imageName not importable: could not open local buffer file $imageLink ($!).);}
791             }
792             else
793             {return qq(.\\I Image $imageName not importable from $imageHost.);}
794             }
795             else
796             {return qq(.\\I Image host $imageHost not reachable to get $imageName.);}
797             }
798              
799             # build a tag and return it (TODDO: generalize)
800             return qq(\\IMAGE{alt="$imageName" src="$imageLink"});
801             }
802              
803             # we need buffers
804             my ($prefixPart, $contentPart, $suffixPart);
805              
806             # open new tags
807             foreach my $style (@styles)
808             {
809             # translate formatting into tags if possible
810             if (
811             exists $attributes{$style->[1]}{$style->[2]}
812             and (
813             # string match?
814             (not ref($style->[3]) and $attributes{$style->[1]}{$style->[2]} eq $style->[3])
815             # pattern match?
816             or (ref($style->[3]) and (@matches=$attributes{$style->[1]}{$style->[2]}=~/$style->[3]/))
817             )
818             )
819             {
820             # anything to do?
821             next unless ++$style->[4]==1;
822              
823             # get prefix string
824             my $prefix=$style->[5];
825              
826             # replace placeholders, if necessary
827             if (ref($style->[3]))
828             {
829             # replace last match results
830             $prefix=~s/\$(\d+)/$matches[$1-1]/g;
831             }
832              
833             # add prefix
834             $prefixPart=join('', defined $prefixPart ? $prefixPart : '', $prefix);
835             }
836             }
837              
838             # recursive call
839             $contentPart.=$me->traverseElement($_, $guard) foreach $item->getChildNodes;
840              
841             # close tags that were opened on this level
842             foreach my $style (reverse @styles)
843             {
844             # translate formatting into tags if possible
845             if (
846             exists $attributes{$style->[1]}{$style->[2]}
847             and (
848             # string match?
849             (not ref($style->[3]) and $attributes{$style->[1]}{$style->[2]} eq $style->[3])
850             # pattern match?
851             or (ref($style->[3]) and $attributes{$style->[1]}{$style->[2]}=~/$style->[3]/)
852             )
853             )
854             {
855             # anything to do?
856             next if --$style->[4];
857              
858             # get suffix string
859             my $suffix=$style->[6];
860              
861             # replace placeholders, if necessary
862             if (ref($style->[3]))
863             {
864             # save last match results
865             $suffix=~s/\$(\d+)/$matches[$1-1]/g;
866             }
867              
868             # add prefix
869             $suffixPart=join('', defined $suffixPart ? $suffixPart : '', $suffix);
870             }
871             }
872              
873             # now combine the parts as necessary
874             $result.=join('',
875             defined $prefixPart ? $prefixPart : '',
876             $contentPart,
877             defined $suffixPart ? $suffixPart : '',
878             ) if defined $contentPart and $contentPart;
879             }
880             else
881             {
882             # get text, guard specials, and add the result to the functions result string
883             my $text=$me->{docContent}->outputTextConversion($item->getValue() || '');
884             $text=$guard->($text) if $text and $guard;
885             $result.=$text;
886             }
887              
888             # supply result
889             $result;
890             }
891              
892              
893             # class method: a translator for characters that are special in the target language
894             sub guardSpecials
895             {
896             # get and check parameters
897             my ($text)=@_;
898              
899             # translate
900             $text=~s/([\$>\\])/\\$1/g;
901              
902             # supply modified text
903             $text;
904             }
905              
906              
907              
908             # flag successfull load
909             1;
910              
911              
912              
913              
914             # = POD TRAILER SECTION =================================================================
915              
916             =pod
917              
918             =head1 NOTES
919              
920             First of all, please note the I of this software. C
921             does support just a subset of the very rich features potentially occuring in an Open Office document.
922             Some features should be added later, other features have no expression in PerlPoint
923             and therefore are ignored.
924              
925             =head2 Supported features
926              
927             =over 4
928              
929             =item Meta data
930              
931             Selected meta data are transformed into PerlPoint variables. The names of these variables
932             are fix in the current version of C, but shall become configurable
933             in later versions.
934              
935             =over 4
936              
937             =item Title
938              
939             Stored in C<$docTitle.>
940              
941             =item Subject
942              
943             Stored in C<$docSubtitle.>
944              
945             =item Description
946              
947             Stored in C<$docDescription.>
948              
949             =item Author
950              
951             Stored in C<$docAuthor.>
952              
953             =item Date
954              
955             Stored in C<$docDate.>
956              
957             =item Version
958              
959             If the user defines a data named C, it is stored in C<$docVersion>.
960              
961             =item Copyright
962              
963             If the user defines a data named C, it is stored in C<$docCopyright>.
964              
965             =back
966              
967              
968             =item Headlines
969              
970             Headlines are supported. Make sure to use the predefined headline formats in the Office document,
971             and avoid gaps in headline hierarchies (as they will cause PerlPoint translation errors later).
972              
973              
974             =item Text formatting
975              
976             Bold, italic, underlined, colorized text portions within a paragraph are translated into
977             the related PerlPoint tags C<\B>, C<\I>, C<\U> and C<\F>.
978              
979             =item Text marked as code
980              
981             In Perlpoint, text within a paragraph can be marked as "code" by the C<\C> tag. As there
982             is no comparable feature in OpenOffice (I know of), all text assigned to the font I
983             is treated as such code.
984              
985             The font is fix for now, but shall be configurable in a future version.
986              
987             =item Blocks
988              
989             In PerlPoint, examples can be written into "code blocks", which are paragraphs marked by
990             indentation. Open Office as an office suite is not focussed on code, so again there is a
991             convention. All paragraphs assigned to a style "Code" are treated as blocks.
992              
993             The style name is fixed for now, but shall be configurable in a future version.
994              
995              
996             =item Lists
997              
998             Lists are basically supported.
999              
1000             Unfortunately, it is difficult to distinguish between ordered and bullet lists in OASIS OpenDocument.
1001             That's why ordered lists are transformed into bullet lists if an OpenDocument source is translated.
1002             For OpenOffice 1.0 documents ordered lists are handled correctly.
1003              
1004              
1005             =item Tables
1006              
1007             Tables are supported as long as they are not nested.
1008              
1009              
1010             =item Images
1011              
1012             Images I are fully supported.
1013              
1014              
1015             =item Comments
1016              
1017             Comments are supported.
1018              
1019              
1020             =back
1021              
1022              
1023             =head2 Limitations
1024              
1025             =over 4
1026              
1027             =item Limited to text documents
1028              
1029             The current version can handle text documents I. Spreadsheets, presentations etc. cannot
1030             be transformed at this time.
1031              
1032              
1033             =item Footnote support
1034              
1035             ... is invalid yet. Current results will not pass a PerlPoint converter.
1036              
1037              
1038             =item OASIS OpenDocument
1039              
1040             is not fully supported at the moment due to the beta status of both Open Office 2.0 and
1041             C. As both tools are well supported this is just a matter of time.
1042              
1043             =back
1044              
1045              
1046             =head2 TOCs
1047              
1048             Office document tables of contents cannot easily be transformed into PerlPoint TOCs. That's why
1049             they are ignored.
1050              
1051              
1052             =head2 POD
1053              
1054             It should be possible to adapt this library for POD output. This might be done in the future.
1055             Both versions could use a common base library.
1056              
1057              
1058             =head1 Credits
1059              
1060             This module is based on C. Thanks to its author
1061             Jean-Marie Gouarné for the module and his helpful support with many questions.
1062              
1063              
1064             =head1 TODO
1065              
1066             =over 4
1067              
1068             =item *
1069              
1070             Support nested tables.
1071              
1072             =item *
1073              
1074             TOC ignoration could become configurable.
1075              
1076             =item *
1077              
1078             Optionally image file copies stored in a buffer directory should be named generically.
1079              
1080             =item *
1081              
1082             Support other formats like spreadsheets and presentations.
1083              
1084             =back
1085              
1086              
1087             =head1 SEE ALSO
1088              
1089             =over 4
1090              
1091             =item B
1092              
1093             The module that made it possible to write C relatively quickly.
1094              
1095             =item B
1096              
1097             A bundle of packages to deal with PerlPoint documents.
1098              
1099             =item B
1100              
1101             A OpenOffice / OpenDocument format to PerlPoint translator, distributed and installed with this module.
1102              
1103              
1104             =back
1105              
1106              
1107             =head1 SUPPORT
1108              
1109             A PerlPoint mailing list is set up to discuss usage, ideas,
1110             bugs, suggestions and translator development. To subscribe,
1111             please send an empty message to perlpoint-subscribe@perl.org.
1112              
1113             If you prefer, you can contact me via perl@jochen-stenzel.de
1114             as well.
1115              
1116              
1117             =head1 AUTHOR
1118              
1119             Copyright (c) Jochen Stenzel (perl@jochen-stenzel.de), 2005.
1120             All rights reserved.
1121              
1122             Parts of the module are derived from an C example script
1123             that came with C 1.309.
1124              
1125             This module is free software, you can redistribute it and/or modify it
1126             under the terms of the Artistic License distributed with Perl version
1127             5.003 or (at your option) any later version. Please refer to the
1128             Artistic License that came with your Perl distribution for more
1129             details.
1130              
1131             The Artistic License should have been included in your distribution of
1132             Perl. It resides in the file named "Artistic" at the top-level of the
1133             Perl source tree (where Perl was downloaded/unpacked - ask your
1134             system administrator if you dont know where this is). Alternatively,
1135             the current version of the Artistic License distributed with Perl can
1136             be viewed on-line on the World-Wide Web (WWW) from the following URL:
1137             http://www.perl.com/perl/misc/Artistic.html
1138              
1139              
1140             =head1 DISCLAIMER
1141              
1142             This software is distributed in the hope that it will be useful, but
1143             is provided "AS IS" WITHOUT WARRANTY OF ANY KIND, either expressed or
1144             implied, INCLUDING, without limitation, the implied warranties of
1145             MERCHANTABILITY and FITNESS FOR A PARTICULAR PURPOSE.
1146              
1147             The ENTIRE RISK as to the quality and performance of the software
1148             IS WITH YOU (the holder of the software). Should the software prove
1149             defective, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
1150             CORRECTION.
1151              
1152             IN NO EVENT WILL ANY COPYRIGHT HOLDER OR ANY OTHER PARTY WHO MAY CREATE,
1153             MODIFY, OR DISTRIBUTE THE SOFTWARE BE LIABLE OR RESPONSIBLE TO YOU OR TO
1154             ANY OTHER ENTITY FOR ANY KIND OF DAMAGES (no matter how awful - not even
1155             if they arise from known or unknown flaws in the software).
1156              
1157             Please refer to the Artistic License that came with your Perl
1158             distribution for more details.
1159