File Coverage

blib/lib/Blog/BlogML/Reader.pm

Criterion	Covered	Total	%
statement	12	12	100.0
branch			n/a
condition			n/a
subroutine	4	4	100.0
pod			n/a
total	16	16	100.0

line	stmt	sub	time	code
1				package Blog::BlogML::Reader;
2				# $Id: Reader.pm,v 1.6 2006/08/07 21:43:07 michael Exp $
3
4				our $VERSION = 1.03;
5
6	10	10	258640	use 5.008006;
	10		48
	10		594
7	10	10	75	use strict;
	10		22
	10		614
8	10	10	61	use warnings;
	10		25
	10		384
9
10	10	10	55	use base 'XML::Parser::Expat';
	10		18
	10		44912
11				use HTTP::Date;
12				use Carp;
13
14				sub new {
15				my $class = shift;
16
17				my $source = shift or carp q(new(): Missing required argument: $source.);
18
19				my %filter = @_;
20				$filter{after} &&= ($filter{after} =~ /\D/)? str2time($filter{after}):$filter{after};
21				$filter{before} &&= ($filter{before} =~ /\D/)? str2time($filter{before}):$filter{before};
22
23				my $self = new XML::Parser::Expat(
24				Namespaces => 1,
25				NoExpand => 1,
26				ParseParamEnt => 0,
27				ErrorContext => 2,
28				);
29				$self->setHandlers(
30				Start => \&_on_start,
31				Char => \&_on_char,
32				End => \&_on_end,
33				);
34
35				$self->{blog} = {
36				source => $source,
37				meta => {},
38				cats => {},
39				posts => [],
40				filter => \%filter,
41				};
42				$self->{current_context} = undef;
43				$self->{this_post} = undef;
44				$self->{this_cat} = undef;
45
46				$self->{count} = 0;
47				$self->{from} = (defined $filter{from})? $filter{from}:0;
48				$self->{to} = (defined $filter{to})? $filter{to}:undef;
49
50				eval{ $self->parsefile($self->{blog}{source}) };
51				carp $@ if $@;
52
53				bless $self, $class;
54				}
55
56				my %context = (
57				blog_root => '/blog',
58				blog_title => '/blog/title',
59				blog_subtitle => '/blog/sub-title',
60				blog_author => '/blog/author',
61
62				cat => '/blog/categories/category',
63				cat_title => '/blog/categories/category/title',
64
65				post => '/blog/posts/post',
66				post_title => '/blog/posts/post/title',
67				post_content => '/blog/posts/post/content',
68				post_cat => '/blog/posts/post/categories/category',
69				);
70
71				sub _on_start {
72				my ($self, $element, %att) = @_;
73				$self->{current_context} = '/'.join('/', $self->context, $element);
74
75				if ($self->{current_context} eq $context{post}
76				and $att{approved} eq 'true') {
77
78				$self->{count}++;
79				if ($self->{count} < $self->{from}) {
80				return;
81				}
82
83				if (defined $self->{to} and $self->{count} > $self->{to}) {
84				$self->finish();
85				return;
86				}
87
88				if ($self->{blog}{filter}{post}
89				and $att{id} ne $self->{blog}{filter}{post}) {
90				return;
91				}
92				$att{'date-created'} = str2time($att{'date-created'});
93				if ($self->{blog}{filter}{before}
94				and $att{'date-created'} > $self->{blog}{filter}{before}) {
95				return;
96				}
97				if ($self->{blog}{filter}{after}
98				and $att{'date-created'} <= $self->{blog}{filter}{after}) {
99				$self->finish();
100				return;
101				}
102
103				$self->{this_post} = {
104				id => $att{id},
105				url => $att{'post-url'},
106				time => $att{'date-created'},
107				title => '',
108				content => '',
109				cats => [],
110				};
111				}
112				elsif ($self->{current_context} eq $context{cat}
113				and $att{approved} eq 'true') {
114
115				$self->{this_cat} = {
116				id => $att{id},
117				parent => $att{parentref},
118				title => '',
119				};
120				}
121				elsif ($self->{current_context} eq $context{blog_author}) {
122				$self->{blog}{meta}{author} = $att{name};
123				$self->{blog}{meta}{email} = $att{email};
124				}
125				elsif ($self->{current_context} eq $context{blog_root}) {
126				$self->{blog}{meta}{url} = $att{'root-url'};
127				$self->{blog}{meta}{time} = str2time($att{'date-created'});
128				}
129				elsif ($self->{current_context} eq $context{post_cat}
130				and $self->{this_post}) {
131				push @{$self->{this_post}{cats}}, $att{ref};
132				}
133				}
134
135				sub _on_char {
136				my ($self, $char) = @_;
137
138				_trim($char);
139
140				if ($self->{current_context} eq $context{post_title}
141				and $self->{this_post}) {
142				$self->{this_post}{title} .= (($self->{this_post}{title} and $char)? ' ':'').$char;
143				}
144				elsif ($self->{current_context} eq $context{post_content}
145				and $self->{this_post}) {
146				$self->{this_post}{content} .= (($self->{this_post}{content} and $char)? "\n":'').$char;
147				}
148				elsif ($self->{current_context} eq $context{cat_title}
149				and $self->{this_cat}) {
150				$self->{this_cat}{title} .= (($self->{this_cat}{title} and $char)? ' ':'').$char;
151				}
152				elsif ($self->{current_context} eq $context{blog_title}) {
153				$self->{blog}{meta}{title} .= (($self->{blog}{meta}{title} and $char)? ' ':'').$char;
154				}
155				elsif ($self->{current_context} eq $context{blog_subtitle}) {
156				$self->{blog}{meta}{subtitle} .= (($self->{blog}{meta}{subtitle} and $char)? ' ':'').$char;
157				}
158				}
159
160				sub _on_end {
161				my ($self, $element) = @_;
162				$self->{current_context} = '/'.join('/', $self->context, $element);
163
164				if ($self->{current_context} eq $context{post}
165				and $self->{this_post}) {
166				if (defined $self->{blog}{filter}{cat}
167				and !grep /$self->{blog}{filter}{cat}/, @{$self->{this_post}{cats}}) {
168				return;
169				}
170				push @{$self->{blog}{posts}}, $self->{this_post};
171
172				undef $self->{this_post};
173				}
174				elsif ($self->{current_context} eq $context{cat}
175				and $self->{this_cat}) {
176				$self->{blog}{cats}{$self->{this_cat}->{id}} = $self->{this_cat};
177
178				undef $self->{this_cat};
179				}
180				}
181
182				sub posts {
183				my ($self) = @_;
184				return $self->{blog}{posts};
185				}
186
187				sub cats {
188				my ($self) = @_;
189				return $self->{blog}{cats};
190				}
191
192				sub meta {
193				my ($self) = @_;
194				return $self->{blog}{meta};
195				}
196
197				sub _trim {
198				$_[0] =~ s/(^\s+\|\s+$)//g;
199				}
200
201				1;
202
203				=pod
204
205				=head1 NAME
206
207				Blog::BlogML::Reader - read data from a BlogML formatted document
208
209				=head1 SYNOPSIS
210
211				use Blog::BlogML::Reader;
212
213				my $reader = new Blog::BlogML::Reader('some/file/blogml.xml');
214				my @posts = @{$reader->posts()};
215
216				=head1 DEPENDENCIES
217
218				=over
219
220				=item * XML::Parser::Expat
221
222				This module uses C to parse the XML in the BlogML source file.
223
224				=item * HTTP::Date
225
226				This module uses C to transform date strings into sortable timestamps.
227
228				=back
229
230				=head1 EXPORT
231
232				None.
233
234				=head1 INTERFACE
235
236				=head2 filters
237
238				When creating a new reader, the default bahaviour is to parse and return every post in the entire BlogML file. This can be inefficient if, for example, you have ten-thousand posts and only want the first one. For this reason it is recommended that you give the parser some limits. This is done by adding "filters" to the constructor call. Note that once a reader is constructed it's filters cannot be modified; you must create a new reader if you wish to apply new filters.
239
240				=over 3
241
242				=item * to=>I
243
244				Limits the parser to only the first I posts (starting from the top of the file and working down) in the BlogML file; that is the parser stops working after I posts. Note that the count does not apply to posts that have an "approved" attribute of false: unapproved posts are always invisible to the parser.
245
246				$reader = new Blog::BlogML::Reader('blogml.xml', to=>3);
247
248				=item * from=>I
249
250				The parser will only start working at the I item in the BlogML file. Note that this can optionally be used in conjunction with the C filter to limit the parser to a range of posts.
251
252				$reader = new Blog::BlogML::Reader('blogml.xml', from=>11, to=>20);
253
254				=item * before=>I
255
256				Limits the parser to posts with a creation-date before (older than) the given I. The date format can either be a string that complies with the HTTP date protocol or a number representing the Unix time.
257
258				$reader = new Blog::BlogML::Reader('blogml.xml', before=>"2006-05-01T00:00:00");
259
260				=item * after=>I
261
262				Limits the parser to posts with a creation-date on or after (younger than) the given I. Can optionally be used in conjunction with the C filter to limit the parser to a range of dates. The date format can either be a string that complies with the HTTP date protocol or a number representing the Unix time.
263
264				$reader = new Blog::BlogML::Reader('blogml.xml', after=>1154979460);
265
266				=item * id=>I
267
268				If you know the exact post you want, why force the parser to work on the entire file?
269
270				$reader = new Blog::BlogML::Reader('blogml.xml', id=>123);
271
272				=item * cat=>I
273
274				Limits the parser to only the posts that belong to the category with the given id.
275
276				$reader = new Blog::BlogML::Reader('blogml.xml', cat=>'123');
277
278				=back
279
280				=head2 methods
281
282				=over 3
283
284				=item * meta()
285
286				Returns a HASHREF of meta information about the blog.
287
288				my $meta = $reader->meta();
289				print $meta->{title};
290				print $meta->{author}, $meta->{email};
291
292				=item * posts()
293
294				Returns an ARRAYREF of blog posts (in the same order as they are in the file). The number of posts returned will be limited by any filters applied when the reader was constructed.
295
296				my $posts = $reader->posts();
297				print $posts->[0]{title};
298
299				=item * cats()
300
301				Returns a HASHREF of blog categories (keys are the category id).
302
303				my $cats = $reader->cats();
304				print $cats->{'123'}{title};
305
306				=back
307
308				=head1 EXAMPLE
309
310				use Blog::BlogML::Reader;
311				use Date::Format;
312
313				# parse all posts in the month of April
314				my $reader = new Blog::BlogML::Reader('t/example.xml',
315				after => "2006-04-01T00:00:00",
316				before => "2006-05-01T00:00:00",
317				);
318
319				my $posts = $reader->posts();
320				my $meta = $reader->meta();
321				my $cats = $reader->cats();
322
323				print " ", $meta->{title}, " ";
324				print $meta->{author};
325
326				foreach my $post (@$posts) {
327				print " ", $post->{title}, " ";
328
329				# post dates are returned in Unix time, so format as desired
330				print "posted:", time2str("%o of %B %Y", $post->{time});
331
332				print " categories:",
333				join(", ", map{$cats->{$_}{title}} @{$post->{cats}});
334
335				print " link:", $post->{url};
336
337				print $post->{content}, " ";
338				}
339
340				=head1 SEE ALSO
341
342				The website L has the latest documentation on the BlogML standard. Note that the reference document "t/example.xml" included with this distribution illustrates the expected format of BlogML documents used by this module.
343
344				=head1 AUTHOR
345
346				Michael Mathews, Emmathews@cpan.orgE
347
348				=head1 COPYRIGHT AND LICENSE
349
350				Copyright (C) 2006 by Michael Mathews
351
352				This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.6 or, at your option, any later version of Perl 5 you may have available.
353
354				=cut
355
356				__END__

File Coverage

", $meta->{title}, "

", $post->{title}, "