File Coverage

blib/lib/XML/Filter/Reindent.pm
Criterion Covered Total %
statement 71 83 85.5
branch 18 34 52.9
condition 1 2 50.0
subroutine 13 13 100.0
pod 0 10 0.0
total 103 142 72.5


line stmt bran cond sub pod time code
1             package XML::Filter::Reindent;
2 1     1   9018 use strict;
  1         3  
  1         39  
3 1     1   862 use XML::Filter::DetectWS;
  1         7917  
  1         45  
4              
5 1     1   11 use vars qw{ $VERSION @ISA };
  1         7  
  1         1250  
6             $VERSION = '0.03';
7             @ISA = qw{ XML::Filter::DetectWS };
8              
9 4     4 0 15 sub MAYBE (%) { 2 }
10              
11             sub new
12             {
13 2     2 0 697 my $class = shift;
14 2         17 my $self = $class->SUPER::new (@_);
15              
16             # Use one space per indent level (by default)
17 2 50       586 $self->{Tab} = " " unless defined $self->{Tab};
18              
19             # Note that this is a PerlSAX filter so we use the XML newline ("\x0A"),
20             # not the Perl output newline ("\n"), by default.
21 2 50       7 $self->{Newline} = "\x0A" unless defined $self->{Newline};
22              
23 2         4 $self;
24             }
25              
26             # Indent the element if its parent element says so
27             sub indent_element
28             {
29 2     2 0 4 my ($self, $event, $parent_says_indent) = @_;
30 2         6 return $parent_says_indent;
31             }
32              
33             # Always indent children unless element (or its ancestor) has
34             # xml:space="preserve" attribute
35             sub indent_children
36             {
37 2     2 0 4 my ($self, $event) = @_;
38 2 50       8 return $event->{PreserveWS} ? 0 : MAYBE;
39             }
40              
41             sub start_element
42             {
43 2     2 0 15 my ($self, $event) = @_;
44              
45 2         6 my $parent = $self->{ParentStack}->[-1];
46 2         6 my $level = $self->{Level}++;
47 2         10 $self->SUPER::start_element ($event);
48              
49 2 100       11 my $parent_says_indent = $parent->{IndentChildren} ? 1 : 0;
50             # init with 1 if parent says MAYBE
51 2 100       6 $event->{Indent} = $self->indent_element ($event, $parent_says_indent) ?
52             $level : undef;
53              
54 2         6 $event->{IndentChildren} = $self->indent_children ($event);
55             }
56              
57             sub end_element
58             {
59 2     2 0 15 my ($self, $event) = @_;
60 2         4 my $start_element = $self->{ParentStack}->[-1];
61              
62 2 50       6 if ($start_element->{IndentChildren} == MAYBE)
63             {
64 2         3 my $q = $self->{EventQ};
65 2         2 my $prev = $q->[-1];
66              
67 2 100       8 if ($prev == $start_element)
    50          
68             {
69             # End tag follows start tag: compress tag
70 1         4 $start_element->{Compress} = 1;
71 1         2 $event->{Compress} = 1;
72             #?? could detect if it contains only ignorable_ws
73             }
74             elsif ($prev->{EventType} eq 'characters')
75             {
76 0 0       0 if ($q->[-2] == $start_element)
77             {
78             # Element has only one child, a text node.
79             # Print element as: text here
80 0         0 delete $prev->{Indent};
81 0         0 $start_element->{IndentChildren} = 0;
82             }
83             }
84             }
85              
86 2         3 my $level = --$self->{Level};
87 2 50       6 $event->{Indent} = $start_element->{IndentChildren} ? $level : undef;
88              
89 2         3 my $compress = $start_element->{Compress};
90 2 100       7 if ($compress)
91             {
92 1         1 $event->{Compress} = $compress;
93 1         17 delete $event->{Indent};
94             }
95              
96 2         10 $self->SUPER::end_element ($event);
97             }
98              
99             sub end_document
100             {
101 3     3 0 20 my ($self, $event) = @_;
102              
103 3   50     10 $self->push_event ('end_document', $event || {});
104 3         5 $self->flush (0); # send remaining events
105             }
106              
107             sub push_event
108             {
109 10     10 0 1247 my ($self, $type, $event) = @_;
110              
111 10         17 $event->{EventType} = $type;
112 10 50       33 if ($type =~ /^(characters|comment|processing_instruction|entity_reference|cdata)$/)
113             {
114 0 0       0 my $indent_kids = $self->{ParentStack}->[-1]->{IndentChildren} ? 1 : 0;
115 0 0       0 $event->{Indent} = $indent_kids ? $self->{Level} : undef;
116             }
117              
118 10         12 my $q = $self->{EventQ};
119 10         10 push @$q, $event;
120              
121 10         23 $self->flush (4); # keep 4 events on the stack (maybe 3 is enough)
122             }
123              
124             sub flush
125             {
126 13     13 0 14 my ($self, $keep) = @_;
127 13         14 my $q = $self->{EventQ};
128              
129 13         13 my $result;
130              
131 13         27 while (@$q > $keep)
132             {
133 10         10 my $head = $q->[0];
134             # print "head=" . $head->{EventType} . " indent=" . $head->{Indent} . "\n";
135              
136 10 50       40 if ($head->{EventType} =~ /ws|ignorable/)
137             {
138 0         0 my $next = $q->[1];
139 0         0 my $indent = $next->{Indent};
140              
141 0 0       0 if (defined $indent) # fix existing indent
142             {
143 0         0 $head->{Data} = $self->{Newline} . ($self->{Tab} x $indent);
144 0         0 $result = $self->send (2);
145             }
146             else # remove existing indent
147             {
148 0         0 shift @$q;
149 0         0 $result = $self->send (1);
150             }
151             #?? remove keys: Indent, ...
152             }
153             else
154             {
155 10         13 my $indent = $head->{Indent};
156              
157 10 100       14 if (defined $indent) # insert indent
158             {
159 2         10 unshift @$q, { EventType => 'ws',
160             Data => $self->{Newline} . ($self->{Tab} x $indent) };
161 2         5 $result = $self->send (2);
162             }
163             else # no indent - leave as is
164             {
165 8         19 $result = $self->send (1);
166             }
167             }
168             }
169 13         31 return $result;
170             }
171              
172             sub send
173             {
174 10     10 0 15 my ($self, $i) = @_;
175            
176 10         11 my $q = $self->{EventQ};
177              
178 10         6 my $result;
179 10         19 while ($i--)
180             {
181 12         24 my $event = shift @$q;
182 12         18 my $type = $event->{EventType};
183 12         14 delete $event->{EventType};
184              
185             #print "TYPE=$type " . join(",", map { "$_=" . $event->{$_} } keys %$event) . "\n";
186 12         318 $result = $self->{Callback}->{$type}->($event);
187             }
188 10         91 return $result;
189             }
190              
191             1; # package return code
192              
193             =head1 NAME
194              
195             XML::Filter::Reindent - Reformats whitespace for pretty printing XML
196              
197             =head1 SYNOPSIS
198              
199             use XML::Handler::Composer;
200             use XML::Filter::Reindent;
201              
202             my $composer = new XML::Handler::Composer (%OPTIONS);
203             my $indent = new XML::Filter::Reindent (Handler => $composer, %OPTIONS);
204              
205             =head1 DESCRIPTION
206              
207             XML::Filter::Reindent is a sub class of L.
208              
209             XML::Filter::Reindent can be used as a PerlSAX filter to reformat an
210             XML document before sending it to a PerlSAX handler that prints it
211             (like L.)
212              
213             Like L, it detects ignorable whitespace and blocks of
214             whitespace characters in certain places. It uses this information and
215             information supplied by the user to determine where whitespace may be
216             modified, deleted or inserted.
217             Based on the indent settings, it then modifies, inserts and deletes characters
218             and ignorable_whitespace events accordingly.
219              
220             This is just a first stab at the implementation.
221             It may be buggy and may change completely!
222              
223             =head1 Constructor Options
224              
225             =over 4
226              
227             =item * Handler
228              
229             The PerlSAX handler (or filter) that will receive the PerlSAX events from this
230             filter.
231              
232             =item * Tab (Default: one space)
233              
234             The number of spaces per indent level for elements etc. in document content.
235              
236             =item * Newline (Default: "\x0A")
237              
238             The newline to use when re-indenting.
239             The default is the internal newline used by L, L etc.,
240             and should be fine when used in combination with L.
241              
242             =back
243              
244             =head1 $self->indent_children ($start_element_event)
245              
246             This method determines whether children of a certain element
247             may be reformatted.
248             The default implementation checks the PreserveWS parameter of the specified
249             start_element event and returns 0 if it is set or MAYBE otherwise.
250             The value MAYBE (2) indicates that further investigation is needed, e.g.
251             by examining the element contents. A value of 1 means yes, indent the
252             child nodes, no further investigation is needed.
253              
254             NOTE: the PreserveWS parameter is set by the parent class,
255             L, when the element or one of its ancestors has
256             the attribute xml:space="preserve".
257              
258             Override this method to tweak the behavior of this class.
259              
260             =head1 $self->indent_element ($start_element_event, $parent_says_indent)
261              
262             This method determines whether a certain element may be re-indented.
263             The default implementation returns the value of the $parent_says_indent
264             parameter, which was set to the value returned by indent_children for the
265             parent element. In other words, the element will be re-indented if the
266             parent element allows it.
267              
268             Override this method to tweak the behavior of this class.
269             I'm not sure how useful this hook is. Please provide feedback!
270              
271             =head1 Current Implementation
272              
273             The current implementation puts all incoming Perl SAX events in a queue for
274             further processing. When determining which nodes should be re-indented,
275             it sometimes needs information from previous events, hence the use of the
276             queue.
277              
278             The parameter (Compress => 1) is added to
279             matching start_element and end_element events with no events in between
280             This indicates to an XML printer that a compressed notation can be used,
281             e.g .
282              
283             If an element allows reformatting of its contents (xml:space="preserve" was
284             not active and indent_children returned MAYBE), the element
285             contents will be reformatted unless it only has one child node and that
286             child is a regular text node (characters event.)
287             In that case, the element will be printed as text contents.
288              
289             If you want element nodes with just one text child to be reindented as well,
290             simply override indent_children to return 1 instead of MAYBE (2.)
291              
292             This behavior may be changed or extended in the future.
293              
294             =head1 CAVEATS
295              
296             This code is highly experimental!
297             It has not been tested well and the API may change.
298              
299             The code that detects blocks of whitespace at potential indent positions
300             may need some work.
301              
302             =head1 AUTHOR
303              
304             Original Author is Enno Derksen.
305              
306             Send bug reports, hints, tips, suggestions to T.J. Mather at
307             >.
308              
309             =cut