File Coverage

blib/lib/File/Spec/DatedPage.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             #============================== DatedPage.pm =================================
2             # Filename: DatedPage.pm
3             # Description: Object to parse dated filenames with page numbers.
4             # Programmed by: Dale Amon
5             # Revised by: $Author: amon $
6             # Date: $Date: 2008-08-31 23:41:58 $
7             # Version: $Revision: 1.4 $
8             # License: LGPL 2.1, Perl Artistic or BSD
9             #
10             #==============================================================================
11 1     1   591 use strict;
  1         2  
  1         31  
12 1     1   5 use Fault::DebugPrinter;
  1         2  
  1         17  
13 1     1   1717 use DMA::FSM;
  0            
  0            
14             use Document::PageId;
15             use File::Spec::Dated;
16              
17             package File::Spec::DatedPage;
18             use vars qw{@ISA};
19             @ISA = qw( File::Spec::Dated );
20              
21             #==============================================================================
22             # Object Methods
23             #==============================================================================
24             # Local Lexical Analyzers for splitpath method.
25              
26             sub _getDocTitleAndStartPage {
27             my ($lexeme, $bb) = @_;
28             my $lpar;
29              
30             # Is it a page number? Flag is true to require leading p.
31             ($lpar,$lexeme) = Document::PageId->parse($lexeme,1);
32              
33             if ($lpar) {
34             @$bb{'startpage','del','tail'} = ($lpar,"","startpage");
35             return (1,$lexeme);
36             }
37              
38             # Swallow title lexemes
39             $bb->{'document_title_section'} .= $bb->{'del'} . $lexeme;
40             @$bb{'del','tail'} = ("-","document_title_section");
41             return (0,undef);
42             }
43              
44             sub _getSecondPage {
45             my ($lexeme, $bb) = @_;
46             my $lpar;
47              
48             # Check for ending page. Flag is false to require NO leading p.
49             ($lpar,$lexeme) = Document::PageId->parse($lexeme, 0);
50             if ($lpar) {
51             @$bb{'endpage','tail'} = ($lpar,"endpage");
52             return (1,$lexeme);
53             }
54             return (0,$lexeme);
55             }
56              
57             sub _getPageTitle {
58             my ($lexeme, $bb) = @_;
59             $bb->{'page_title_section'} .= $bb->{'del'} . $lexeme;
60             @$bb{'del','tail'} = ("-","page_title_section");
61             return (1,undef);
62             }
63              
64             sub _noop {my ($lexeme, $bb) = @_; return (1,$lexeme);}
65              
66             #------------------------------------------------------------------------------
67              
68             sub splitpath {
69             my ($self,$file) = (shift, shift);
70              
71             my $fst =
72             {
73             'S0' => ["D0","SAME", \&_getDocTitleAndStartPage,"S1","TSTL","S0","NEXT"],
74             'S1' => ["D0","SAME", \&_getSecondPage, "S2","NEXT","S2","SAME"],
75             'S2' => ["D0","SAME", \&_getPageTitle, "S2","NEXT","",""],
76             'D0' => ["D0","DONE", \&_noop, "","","",""],
77             };
78              
79             $self->SUPER::splitpath ($file);
80             Fault::DebugPrinter->dbg (4, "Beginning parse for File::Spec::DatedPage");
81              
82             $self->{'del'} = "";
83             $self->{'name_body'} = $self->_append_extensions_to_tail;
84              
85             # There really should be nothing remaining; I only capture this so I'll
86             # remember it exists in case I do find a future need to look at it.
87             #
88             my @remaining = DMA::FSM::FSM ( $fst, $self,
89             split (/-/, $self->{'name_body'}));
90             delete $self->{'del'};
91             delete $self->{'state'};
92              
93             # If page or document_title sections were the tailpart, see if it has
94             # trailing extensions. If the tailpart were pages, there could not have
95             # been anything leftover.
96             #
97             {$_ = $self->{'tail'};
98             if (/page_title_section/ ||
99             /document_title_section/) {
100             my $lpar = $self->_parse_extensions_from_tail;
101             $self->{$_} = ($lpar) ? $lpar : undef;
102             $self->reset_name_body;
103             $self->reset_name;
104             }
105             }
106              
107             return (@$self{'volume','basepath','directory',
108             'startdate','enddate',
109             'document_title_section',
110             'startpage','endpage',
111             'page_title_section'},
112             @{$self->{'extensions'}});
113             }
114              
115             #------------------------------------------------------------------------------
116             # Set parts of a name_body
117              
118             sub set_startpage {my $s=shift; @$s{'startpage', '_dirty'}=(shift,1);
119             return $s;}
120              
121             sub set_endpage {my $s=shift; @$s{'endpage', '_dirty'}=(shift,1);
122             return $s;}
123              
124             sub set_document_title_section
125             {my $s=shift; @$s{'document_title_section',
126             '_dirty'}=(shift,1);
127             return $s;}
128             sub set_page_title_section
129             {my $s=shift; @$s{'page_title_section',
130             '_dirty'}=(shift,1);
131             return $s;}
132              
133             #------------------------------------------------------------------------------
134              
135             sub reset_name_body {
136             my $self = shift;
137             my ($namebody,$del) = ("","");
138             my (@list) = (@$self{'document_title_section',
139             'startpage','endpage',
140             'page_title_section'});
141             $list[1] = (defined $list[1]) ? "p$list[1]" : undef;
142              
143             foreach (@list) {
144             $_ || next;
145             $namebody .= "$del$_"; $del = "-";
146             }
147             return $self->{'name_body'} = ($namebody) ? $namebody : undef;
148             }
149              
150             #------------------------------------------------------------------------------
151              
152             sub startpage {return shift->{'startpage'};}
153             sub endpage {return shift->{'endpage'};}
154             sub document_title_section {return shift->{'document_title_section'};}
155             sub page_title_section {return shift->{'page_title_section'};}
156             sub ambiguous {return shift->{'ambiguous'};}
157              
158             #------------------------------------------------------------------------------
159              
160             sub pages {
161             my $self = shift;
162             my ($beg,$end) = ($self->{'startpage'}, $self->{'endpage'});
163             defined $beg || (return undef);
164             defined $end || (return $beg);
165             return $beg . "-" . $end;
166             }
167              
168             #==============================================================================
169             # INTERNAL: Object Methods
170             #==============================================================================
171              
172             sub _init {
173             my $self = shift;
174             $self->SUPER::_init;
175             @$self{'document_title_section',
176             'startpage','endpage',
177             'page_title_section',
178             'ambiguous'} =
179             (undef,undef,undef,undef,0 );
180             }
181            
182             #==============================================================================
183             # Pod Documentation
184             #==============================================================================
185             # You may extract and format the documention section with the 'perldoc' cmd.
186              
187             =head1 NAME
188              
189             File::Spec::DatedPage - Parse a dated and page numbered file name in a system independant way.
190              
191             =head1 SYNOPSIS
192              
193             use File::Spec::DatedPage;
194              
195             ($volume, $basepath, $directory,
196             $startdate, $enddate, $document_title_section,
197             $startpage, $endpage, $page_title_section,
198             @extensions) = $obj->splitpath ($filepath);
199              
200             $document_title_section = $obj->document_title_section;
201              
202             $startpage = $obj->startpage;
203             $endpage = $obj->endpage;
204             $pages = $obj->pages;
205              
206             $page_title_section = $obj->page_title_section;
207              
208             $flag = $obj->ambiguous;
209             $obj = $obj->set_document_title_section ($document_title_section);
210             $obj = $obj->set_startpage ($startpage);
211             $obj = $obj->set_endpage ($endpage);
212             $obj = $obj->set_page_title_section ($page_title_section);
213              
214             $name_body = $obj->reset_name_body;
215              
216             =head1 Inheritance
217              
218             UNIVERSAL
219             File::Spec::Unix
220             File::Spec::BaseParse
221             File::Spec::Dated
222             File::Spec::DatedPage
223              
224             =head1 Description
225              
226             Further splits a pathname string from what it's parent classes have already
227             done. Using the example name_body string, XMAS-Title-Subtitle-note, it will
228             be broken down further as:
229              
230             document_title_section: XMAS-Title-Subtitle-note
231             startpage:
232             endpage
233             page_title_section:
234              
235             The 'ambiguous' flag is not implemented yet: it is only talked about.. It
236             should mark cases where a file extension might really be part of a filename
237             as shown in File::Spec::Dated. This is the first semantic level at which it
238             seems to matter. I have now made sure it does not exist outside this module
239             so as to make it easy to purge if I decide it is an unworkable idea.
240              
241             =head1 Examples
242              
243             use File::Spec::DatedPage;
244             my $baz = File::Spec::DatedPage->new;
245             my @list = $baz->splitpath
246             ("/my/base/Cards/19901225-XMAS-Title-Subtitle-note.tar.gz");
247              
248             my $foo = File::Spec::DatedPage->new
249             ("/my/base/Cards/19901225-XMAS-Title-Subtitle-note.tar.gz");
250              
251             my $startpage = $foo->startpage;
252             my $endpage = $foo->endpage;
253             my $pages = $foo->pages;
254             my $dtitle = $foo->document_title_section;
255             my $ptitle = $foo->page_title_section;
256              
257             $foo->set_startpage ("100");
258             $foo->set_endpage ("101");
259             $foo->set_document_title_section
260             ("JournalOfIrreproduceableResults-QuantumBubbling" );
261             $foo->set_page_title_section ("ThePintEffect-allTheTimeInTheworldInAGuinness");
262             my $name_body = $foo->reset_name_body;
263             my $name = $foo->reset_name;
264             my $filename = $foo->reset_filename;
265             my $filepath = $foo->reset_pathname;
266             my @parts = $foo->reparse;
267              
268             =head1 Class Variables
269              
270             None.
271              
272             =head1 Instance Variables
273              
274             document_title_section Title of the whole document.
275             startpage Starting page string.
276             endpage Ending page string.
277             page_title_section Title specifically associated with the page.
278             ambiguous Set if it is ambiguous whether the leftmost file
279             extension really is a file extension.
280              
281             =head1 Class Methods
282              
283             None.
284              
285             =head1 Instance Methods
286              
287             =over 4
288              
289             =item B<$flag = $obj-Eambiguous>
290              
291             Return true if it is ambiguous whether the rightmost file extension is really
292             a file extension.
293              
294             =item B<$document_title_section = $obj-Edocument_title_section>
295              
296             Return the document title section string.
297              
298             =item B<$endpage = $obj-Eendpage>
299              
300             Return the ending pageid object or undef if there is none.
301              
302             =item B<$pages = $obj-Epages>
303              
304             Return a $pages string suitable for use in an index or table of contents,
305             eg "100", "100-101" or "42.1-42.2".
306              
307             Return undef if there is no page information associated with this filespec.
308              
309             =item B<$page_title_section = $obj-Epage_title_section>
310              
311             Return the title string.
312              
313             =item B<$obj = $obj-Eset_endpage ($endpage)>
314              
315             Unconditionally set the end page.
316              
317             =item B<$obj = $obj-Eset_document_title_section ($document_title_section)>
318              
319             Unconditionally set the document_title_section string.
320              
321             =item B<$obj = $obj-Eset_startpage ($startpage)>
322              
323             Unconditionally set the start page.
324              
325             =item B<$obj = $obj-Eset_page_title_section ($page_title_section)>
326              
327             Unconditionally set the page_title_section.
328              
329             =item B<($volume, $basepath, $directory, $startdate, $enddate, $document_title_section, $startpage, $endpage, $page_title_section, @extensions) = $obj-Esplitpath ($filepath)>
330              
331             Parses the filename into:
332              
333             {firstdate{-lastdate}}{-title}{-startpage{-endpage}}{-subtitle}{.extensions}
334              
335             and returns all the elements of the pathname and filename as a list.
336             Completely reinitializes the object for the name $filepath.
337              
338             =item B<$startpage = $obj-Estartpage>
339              
340             Return the starting pageid object.
341              
342             =item B<$name_body = $obj-Ereset_name_body >
343              
344             Rebuild the name_body ivar from parts:
345              
346             document_title_section + startpage + endpage + page_title_section -> name_body
347              
348             =back 4
349              
350             =head1 Private Class Methods
351              
352             None.
353              
354             =head1 Private Instance Methods
355              
356             =over 4
357              
358             =item B<$obj = $obj-E_init>
359              
360             Internal initializer.
361              
362             This method is for the subclass initializer chaining and should not be used
363             otherwise.
364              
365             =back 4
366              
367             =head1 KNOWN BUGS
368              
369             See TODO.
370              
371             =head1 SEE ALSO
372              
373             Fault::DebugPrinter, DMA::FSM, Document::PageId, File::Spec::Dated
374              
375             =head1 AUTHOR
376              
377             Dale Amon
378              
379             =cut
380            
381             #=============================================================================
382             # CVS HISTORY
383             #=============================================================================
384             # $Log: DatedPage.pm,v $
385             # Revision 1.4 2008-08-31 23:41:58 amon
386             # Fixed doc title problem, one colon where two were needed by CPAN.
387             #
388             # Revision 1.3 2008-08-28 23:32:45 amon
389             # perldoc section regularization.
390             #
391             # Revision 1.2 2008-08-16 17:49:06 amon
392             # Update source format, documentation; switch to Fault package
393             #
394             # Revision 1.1.1.1 2004-09-02 12:37:47 amon
395             # File Spec extensions for doc name formats.
396             #
397             # 20040821 Dale Amon
398             # Switched to Finite State Machine for parsing.
399             #
400             # 20040820 Dale Amon
401             # Split it up. Much has gone to File::Spece:PublicationPage.
402             # This class now just parses out:
403             # --<pages>-<subtitle> </td> </tr> <tr> <td class="h" > <a name="404">404</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # Where title and subtitle now have a different meaning </td> </tr> <tr> <td class="h" > <a name="405">405</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # than they used to: before I used 'title' to mean the </td> </tr> <tr> <td class="h" > <a name="406">406</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # first dash delimited text item after a publication name; </td> </tr> <tr> <td class="h" > <a name="407">407</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # the rest of the set were the subtitles. I was not really </td> </tr> <tr> <td class="h" > <a name="408">408</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # dealing with a set of article titles after the pageid's. </td> </tr> <tr> <td class="h" > <a name="409">409</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # Now I have dubbed all of the first set of -text- items </td> </tr> <tr> <td class="h" > <a name="410">410</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # as title and the second set as subtitle. So I need new </td> </tr> <tr> <td class="h" > <a name="411">411</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # names for the 'first and rest' for both sets now. </td> </tr> <tr> <td class="h" > <a name="412">412</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # </td> </tr> <tr> <td class="h" > <a name="413">413</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # 20040815 Dale Amon <amon@islandone.org> </td> </tr> <tr> <td class="h" > <a name="414">414</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # Changed from Archivist::PublicationSpec to </td> </tr> <tr> <td class="h" > <a name="415">415</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # File::Spec::DatedPage </td> </tr> <tr> <td class="h" > <a name="416">416</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # </td> </tr> <tr> <td class="h" > <a name="417">417</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # 20021208 Dale Amon <amon@vnl.com> </td> </tr> <tr> <td class="h" > <a name="418">418</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # Hacked it apart into a Class hierarchy. </td> </tr> <tr> <td class="h" > <a name="419">419</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # </td> </tr> <tr> <td class="h" > <a name="420">420</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # 20021121 Dale Amon <amon@vnl.com> </td> </tr> <tr> <td class="h" > <a name="421">421</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # Created. </td> </tr> <tr> <td class="h" > <a name="422">422</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # </td> </tr> <tr> <td class="h" > <a name="423">423</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> 1; </td> </tr> </table> </body> </html>