File Coverage

blib/lib/Data/Transform/SAXBuilder.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package Data::Transform::SAXBuilder;
2 2     2   60048 use strict;
  2         6  
  2         80  
3 2     2   12 use warnings;
  2         4  
  2         103  
4              
5             our $VERSION = '0.05';
6 2     2   12 use base qw(Data::Transform);
  2         4  
  2         2002  
7              
8 2     2   19934 use XML::LibXML;
  0            
  0            
9             use XML::SAX::IncrementalBuilder::LibXML;
10              
11             sub BUFFER () { 0 }
12             sub PARSER () { 1 }
13             sub HANDLER () { 2 }
14              
15             =pod
16              
17             =head1 NAME
18              
19             Data::Transform::SAXBuilder - A filter for parsing XML with L
20              
21             =head1 SYSNOPSIS
22              
23             use Data::Transform::SAXBuilder;
24             my $filter = Data::Transform::SAXBuilder->new();
25              
26             my $wheel = POE::Wheel:ReadWrite->new(
27             Filter => $filter,
28             InputEvent => 'input_event',
29             );
30              
31             =head1 DESCRIPTION
32              
33             L is used to turn an XML file or stream into
34             a (series of) DOM tree (fragments). It uses the L modules to do
35             the parsing and for the building of the DOM tree. This gives you very good
36             support for most(all?) XML features, and allows you to use a host of extra
37             modules available for use with L.
38              
39             To make the potentially time-consuming parsing process not interfere with
40             event-driven environments (like L), SAXBuolder will return a series
41             of document fragments instead of the entire DOM tree.
42              
43             There are two modes:
44              
45             =over 2
46              
47             =item
48              
49             The first builds the entire DOM tree, and just gives you pointers into the
50             tree at various points. This is useful if you know the xml document you are
51             parsing is not too big, and you want to be able to run XPATH queries on the
52             entire tree.
53              
54             =item
55              
56             The second mode splits up the DOM tree into document fragments and returns
57             each seperately. You could still build a complete DOM tree from these
58             fragments. Sometimes that isn't possible, because you're receiving a possibly
59             endless tree (for example when processing an XMPP stream)
60              
61             =back
62              
63             You can control how often you get events by specifying till how deep into
64             the tree you want to receive notifications. This also controls the size of
65             the document fragments you'll receive when you're using the second,
66             'detached' mode.
67              
68             =head1 PUBLIC METHODS
69              
70             Data::Transform::SAXBuilder follows the L API.
71             This documentation only covers things that are special to
72             Data::Transform::SAXBuilder.
73              
74             =cut
75              
76             =head2 new
77              
78             The constructor accepts two arguments which are both optional:
79              
80             =over 4
81              
82             =item buffer
83              
84             A string that is XML waiting to be parsed
85              
86             =item handler
87              
88             A SAX Handler that builds your data structures from SAX events. The
89             default is L, which creates DOM tree
90             fragments. But you could create any sort of object/structure you like.
91              
92             =back
93              
94             =cut
95              
96             sub new {
97             my $class = shift;
98              
99             my %args = @_;
100              
101             my $buffer = $args{buffer} ? [$args{buffer}] : [];
102             my $handler = $args{handler};
103             if(not defined($handler))
104             {
105             $handler = XML::SAX::IncrementalBuilder::LibXML->new();
106             }
107              
108             my $self = [
109             $buffer, # BUFFER
110             XML::LibXML->new (Handler => $handler), # PARSER
111             $handler, # HANDLER
112             ];
113              
114             return bless $self, $class;
115             }
116              
117             sub clone {
118             my $self = shift;
119              
120             my $handler = $self->[HANDLER]->clone;
121             my $new_self = [
122             [], # BUFFER
123             XML::LibXML->new (Handler => $handler), # PARSER
124             $handler, # HANDLER
125             ];
126              
127             return bless $new_self, ref $self;
128             }
129              
130             sub get_pending {
131             my $self = shift;
132              
133             return [ @{$self->[BUFFER]} ] if (@{$self->[BUFFER]} > 0);
134             return undef;
135             }
136              
137             sub DESTROY {
138             my $self = shift;
139              
140             delete $self->[BUFFER];
141             delete $self->[PARSER];
142             delete $self->[HANDLER];
143             }
144              
145             =head2 reset_parser
146              
147             Resets the filter so it is ready to parse a new document from the beginning.
148              
149             =cut
150              
151             sub reset_parser {
152             my $self = shift;
153              
154             $self->[BUFFER] = [];
155             $self->[HANDLER]->reset;
156             $self->[PARSER] = XML::LibXML->new (Handler => $self->[HANDLER]),
157             }
158              
159             sub _handle_get_data {
160             my ($self, $newdata) = @_;
161              
162             if (defined $newdata) {
163             eval {
164             $self->[PARSER]->parse_chunk($newdata);
165             };
166             return Data::Transform::Meta::Error->new($@) if ($@);
167              
168             if (defined $self->[HANDLER]->{'EOD'}) {
169             $self->[PARSER]->parse_chunk("", 1);
170             $self->reset_parser;
171             delete $self->[HANDLER]->{'EOD'};
172             }
173             }
174              
175             if($self->[HANDLER]->finished_nodes) {
176             my $ret = $self->[HANDLER]->get_node;
177             return $ret;
178             }
179             return;
180             }
181              
182             sub _handle_put_data {
183             my($self, $node) = @_;
184              
185             my $cooked;
186             if (ref $node) {
187             $cooked = $node->toString;
188             } else {
189             $cooked = $node;
190             }
191              
192             return $cooked;
193             }
194              
195             1;
196              
197             __END__