File Coverage

blib/lib/Blog/BlogML/Reader.pm
Criterion Covered Total %
statement 12 12 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 16 16 100.0


line stmt bran cond sub pod time code
1             package Blog::BlogML::Reader;
2             # $Id: Reader.pm,v 1.6 2006/08/07 21:43:07 michael Exp $
3              
4             our $VERSION = 1.03;
5              
6 10     10   258640 use 5.008006;
  10         48  
  10         594  
7 10     10   75 use strict;
  10         22  
  10         614  
8 10     10   61 use warnings;
  10         25  
  10         384  
9              
10 10     10   55 use base 'XML::Parser::Expat';
  10         18  
  10         44912  
11             use HTTP::Date;
12             use Carp;
13              
14             sub new {
15             my $class = shift;
16            
17             my $source = shift or carp q(new(): Missing required argument: $source.);
18            
19             my %filter = @_;
20             $filter{after} &&= ($filter{after} =~ /\D/)? str2time($filter{after}):$filter{after};
21             $filter{before} &&= ($filter{before} =~ /\D/)? str2time($filter{before}):$filter{before};
22            
23             my $self = new XML::Parser::Expat(
24             Namespaces => 1,
25             NoExpand => 1,
26             ParseParamEnt => 0,
27             ErrorContext => 2,
28             );
29             $self->setHandlers(
30             Start => \&_on_start,
31             Char => \&_on_char,
32             End => \&_on_end,
33             );
34            
35             $self->{blog} = {
36             source => $source,
37             meta => {},
38             cats => {},
39             posts => [],
40             filter => \%filter,
41             };
42             $self->{current_context} = undef;
43             $self->{this_post} = undef;
44             $self->{this_cat} = undef;
45            
46             $self->{count} = 0;
47             $self->{from} = (defined $filter{from})? $filter{from}:0;
48             $self->{to} = (defined $filter{to})? $filter{to}:undef;
49            
50             eval{ $self->parsefile($self->{blog}{source}) };
51             carp $@ if $@;
52            
53             bless $self, $class;
54             }
55              
56             my %context = (
57             blog_root => '/blog',
58             blog_title => '/blog/title',
59             blog_subtitle => '/blog/sub-title',
60             blog_author => '/blog/author',
61              
62             cat => '/blog/categories/category',
63             cat_title => '/blog/categories/category/title',
64              
65             post => '/blog/posts/post',
66             post_title => '/blog/posts/post/title',
67             post_content => '/blog/posts/post/content',
68             post_cat => '/blog/posts/post/categories/category',
69             );
70              
71             sub _on_start {
72             my ($self, $element, %att) = @_;
73             $self->{current_context} = '/'.join('/', $self->context, $element);
74            
75             if ($self->{current_context} eq $context{post}
76             and $att{approved} eq 'true') {
77            
78             $self->{count}++;
79             if ($self->{count} < $self->{from}) {
80             return;
81             }
82            
83             if (defined $self->{to} and $self->{count} > $self->{to}) {
84             $self->finish();
85             return;
86             }
87            
88             if ($self->{blog}{filter}{post}
89             and $att{id} ne $self->{blog}{filter}{post}) {
90             return;
91             }
92             $att{'date-created'} = str2time($att{'date-created'});
93             if ($self->{blog}{filter}{before}
94             and $att{'date-created'} > $self->{blog}{filter}{before}) {
95             return;
96             }
97             if ($self->{blog}{filter}{after}
98             and $att{'date-created'} <= $self->{blog}{filter}{after}) {
99             $self->finish();
100             return;
101             }
102            
103             $self->{this_post} = {
104             id => $att{id},
105             url => $att{'post-url'},
106             time => $att{'date-created'},
107             title => '',
108             content => '',
109             cats => [],
110             };
111             }
112             elsif ($self->{current_context} eq $context{cat}
113             and $att{approved} eq 'true') {
114            
115             $self->{this_cat} = {
116             id => $att{id},
117             parent => $att{parentref},
118             title => '',
119             };
120             }
121             elsif ($self->{current_context} eq $context{blog_author}) {
122             $self->{blog}{meta}{author} = $att{name};
123             $self->{blog}{meta}{email} = $att{email};
124             }
125             elsif ($self->{current_context} eq $context{blog_root}) {
126             $self->{blog}{meta}{url} = $att{'root-url'};
127             $self->{blog}{meta}{time} = str2time($att{'date-created'});
128             }
129             elsif ($self->{current_context} eq $context{post_cat}
130             and $self->{this_post}) {
131             push @{$self->{this_post}{cats}}, $att{ref};
132             }
133             }
134              
135             sub _on_char {
136             my ($self, $char) = @_;
137            
138             _trim($char);
139            
140             if ($self->{current_context} eq $context{post_title}
141             and $self->{this_post}) {
142             $self->{this_post}{title} .= (($self->{this_post}{title} and $char)? ' ':'').$char;
143             }
144             elsif ($self->{current_context} eq $context{post_content}
145             and $self->{this_post}) {
146             $self->{this_post}{content} .= (($self->{this_post}{content} and $char)? "\n":'').$char;
147             }
148             elsif ($self->{current_context} eq $context{cat_title}
149             and $self->{this_cat}) {
150             $self->{this_cat}{title} .= (($self->{this_cat}{title} and $char)? ' ':'').$char;
151             }
152             elsif ($self->{current_context} eq $context{blog_title}) {
153             $self->{blog}{meta}{title} .= (($self->{blog}{meta}{title} and $char)? ' ':'').$char;
154             }
155             elsif ($self->{current_context} eq $context{blog_subtitle}) {
156             $self->{blog}{meta}{subtitle} .= (($self->{blog}{meta}{subtitle} and $char)? ' ':'').$char;
157             }
158             }
159              
160             sub _on_end {
161             my ($self, $element) = @_;
162             $self->{current_context} = '/'.join('/', $self->context, $element);
163            
164             if ($self->{current_context} eq $context{post}
165             and $self->{this_post}) {
166             if (defined $self->{blog}{filter}{cat}
167             and !grep /$self->{blog}{filter}{cat}/, @{$self->{this_post}{cats}}) {
168             return;
169             }
170             push @{$self->{blog}{posts}}, $self->{this_post};
171            
172             undef $self->{this_post};
173             }
174             elsif ($self->{current_context} eq $context{cat}
175             and $self->{this_cat}) {
176             $self->{blog}{cats}{$self->{this_cat}->{id}} = $self->{this_cat};
177            
178             undef $self->{this_cat};
179             }
180             }
181              
182             sub posts {
183             my ($self) = @_;
184             return $self->{blog}{posts};
185             }
186              
187             sub cats {
188             my ($self) = @_;
189             return $self->{blog}{cats};
190             }
191              
192             sub meta {
193             my ($self) = @_;
194             return $self->{blog}{meta};
195             }
196              
197             sub _trim {
198             $_[0] =~ s/(^\s+|\s+$)//g;
199             }
200              
201             1;
202              
203             =pod
204              
205             =head1 NAME
206              
207             Blog::BlogML::Reader - read data from a BlogML formatted document
208              
209             =head1 SYNOPSIS
210              
211             use Blog::BlogML::Reader;
212            
213             my $reader = new Blog::BlogML::Reader('some/file/blogml.xml');
214             my @posts = @{$reader->posts()};
215              
216             =head1 DEPENDENCIES
217              
218             =over
219              
220             =item * XML::Parser::Expat
221              
222             This module uses C to parse the XML in the BlogML source file.
223              
224             =item * HTTP::Date
225              
226             This module uses C to transform date strings into sortable timestamps.
227              
228             =back
229              
230             =head1 EXPORT
231              
232             None.
233              
234             =head1 INTERFACE
235              
236             =head2 filters
237              
238             When creating a new reader, the default bahaviour is to parse and return every post in the entire BlogML file. This can be inefficient if, for example, you have ten-thousand posts and only want the first one. For this reason it is recommended that you give the parser some limits. This is done by adding "filters" to the constructor call. Note that once a reader is constructed it's filters cannot be modified; you must create a new reader if you wish to apply new filters.
239              
240             =over 3
241              
242             =item * to=>I
243              
244             Limits the parser to only the first I posts (starting from the top of the file and working down) in the BlogML file; that is the parser stops working after I posts. Note that the count does not apply to posts that have an "approved" attribute of false: unapproved posts are always invisible to the parser.
245              
246             $reader = new Blog::BlogML::Reader('blogml.xml', to=>3);
247              
248             =item * from=>I
249              
250             The parser will only start working at the I item in the BlogML file. Note that this can optionally be used in conjunction with the C filter to limit the parser to a range of posts.
251              
252             $reader = new Blog::BlogML::Reader('blogml.xml', from=>11, to=>20);
253              
254             =item * before=>I
255              
256             Limits the parser to posts with a creation-date before (older than) the given I. The date format can either be a string that complies with the HTTP date protocol or a number representing the Unix time.
257              
258             $reader = new Blog::BlogML::Reader('blogml.xml', before=>"2006-05-01T00:00:00");
259              
260             =item * after=>I
261              
262             Limits the parser to posts with a creation-date on or after (younger than) the given I. Can optionally be used in conjunction with the C filter to limit the parser to a range of dates. The date format can either be a string that complies with the HTTP date protocol or a number representing the Unix time.
263              
264             $reader = new Blog::BlogML::Reader('blogml.xml', after=>1154979460);
265              
266             =item * id=>I
267              
268             If you know the exact post you want, why force the parser to work on the entire file?
269              
270             $reader = new Blog::BlogML::Reader('blogml.xml', id=>123);
271              
272             =item * cat=>I
273              
274             Limits the parser to only the posts that belong to the category with the given id.
275              
276             $reader = new Blog::BlogML::Reader('blogml.xml', cat=>'123');
277              
278             =back
279              
280             =head2 methods
281              
282             =over 3
283              
284             =item * meta()
285              
286             Returns a HASHREF of meta information about the blog.
287              
288             my $meta = $reader->meta();
289             print $meta->{title};
290             print $meta->{author}, $meta->{email};
291              
292             =item * posts()
293              
294             Returns an ARRAYREF of blog posts (in the same order as they are in the file). The number of posts returned will be limited by any filters applied when the reader was constructed.
295              
296             my $posts = $reader->posts();
297             print $posts->[0]{title};
298              
299             =item * cats()
300              
301             Returns a HASHREF of blog categories (keys are the category id).
302              
303             my $cats = $reader->cats();
304             print $cats->{'123'}{title};
305              
306             =back
307              
308             =head1 EXAMPLE
309              
310             use Blog::BlogML::Reader;
311             use Date::Format;
312              
313             # parse all posts in the month of April
314             my $reader = new Blog::BlogML::Reader('t/example.xml',
315             after => "2006-04-01T00:00:00",
316             before => "2006-05-01T00:00:00",
317             );
318              
319             my $posts = $reader->posts();
320             my $meta = $reader->meta();
321             my $cats = $reader->cats();
322              
323             print "

", $meta->{title}, "

";
324             print $meta->{author};
325              
326             foreach my $post (@$posts) {
327             print "

", $post->{title}, "

";
328              
329             # post dates are returned in Unix time, so format as desired
330             print "posted:", time2str("%o of %B %Y", $post->{time});
331              
332             print " categories:",
333             join(", ", map{$cats->{$_}{title}} @{$post->{cats}});
334              
335             print " link:", $post->{url};
336              
337             print $post->{content}, "
";
338             }
339              
340             =head1 SEE ALSO
341              
342             The website L has the latest documentation on the BlogML standard. Note that the reference document "t/example.xml" included with this distribution illustrates the expected format of BlogML documents used by this module.
343              
344             =head1 AUTHOR
345              
346             Michael Mathews, Emmathews@cpan.orgE
347              
348             =head1 COPYRIGHT AND LICENSE
349              
350             Copyright (C) 2006 by Michael Mathews
351              
352             This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.6 or, at your option, any later version of Perl 5 you may have available.
353              
354             =cut
355              
356             __END__