File Coverage

blib/lib/XML/Beautify.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package XML::Beautify;
2             #require 5.6.0;
3             require 5.005;
4             $XML::Beautify::VERSION = 0.05;
5              
6 1     1   684 use strict;
  1         1  
  1         33  
7             #use warnings;
8 1     1   938 use Log::AndError;
  1         2424  
  1         26  
9 1     1   6 use Log::AndError::Constants qw(:all);
  1         4  
  1         102  
10 1     1   1535 use XML::Parser::Expat;
  0            
  0            
11              
12             ##############################################################################
13             ## Variables
14             ##############################################################################
15             use constant YES => 1;
16             use constant NO => 0;
17             use constant FULL => 2;
18              
19             @XML::Beautify::EXPORT_OK = qw(YES NO FULL);
20             %XML::Beautify::EXPORT_TAGS = (
21             'const' => [ @XML::Beautify::EXPORT_OK ],
22             );
23              
24             my($ref_self, $cleanXMLstr, $level, $last_handle);
25             my %Deflt = (
26             'INDENT_STR' => "\t",
27             'ORIG_INDENT' => -1,
28             'REM_CR' => NO,
29             'REM_WS' => NO,
30             );
31              
32             ##############################################################################
33             ## Documentation
34             ##############################################################################
35              
36             =head1 NAME
37              
38             XML::Beautify - Beautifies XML output from XML::Writer (soon to do any XML).
39              
40             =head1 SYNOPSIS
41              
42             B This is Alpha Software. The interface is still subject to change. Oh, and keep backups, Doh!!!
43             B This does not do well with already indented and formatted XML. I am still working on that. I am currently just reprinting the original string with E. I need to fix this by actually rewriting the XML from the element level on down using something like XML::Writer but adding in the whitespace. See E, Doh!
44              
45             use XML::Beautify qw(:const); # imports three constants (YES,NO,FULL)
46             $obj_ref = XML::Beautify->new();
47             $cleanXML = $obj_ref->beautify(\$XMLstr);
48            
49             B This uses Log::AndError for error handling and logging functions.
50             This is imported as an @ISA and so anything you can do there you can do here.
51              
52             =head1 DESCRIPTION
53              
54             Beautifies (converts to tree format) XML output from XML::Writer (soon to do any XML) to facilitate debugging XML or XML::Writer output.
55              
56             =head1 METHODS
57              
58             =cut
59              
60             DESTROY {
61             my $self = shift;
62             }
63              
64             # NO EXPORTS NEEDED
65             # We're a good little module.
66             @XML::Beautify::ISA = qw(Log::AndError);
67             ##############################################################################
68             ## constructor
69             ##############################################################################
70             sub new {
71             my $proto = shift;
72             my $class = ref($proto) || $proto;
73             my $self = {};
74             bless($self, $class);
75              
76             # This loads $self up with all of the default options.
77             foreach my $nomen (keys(%Deflt)){
78             $self->{$nomen} = $Deflt{$nomen};
79             }
80             # This overwrites any default values in $self with stuff passed in.
81             my %Cfg = @_;
82             @{$self}{keys(%Cfg)} = values(%Cfg);
83             $self->service_name('XML-Beautify');
84             $self->debug_level($self->{'DEBUG'});
85             return($self);
86             }
87              
88              
89             ##############################################################################
90             # Application subroutines
91             ##############################################################################
92              
93             #################################################################################
94             sub beautify {
95             =pod
96              
97             =head2 beautify()
98              
99             C
100              
101             =over 2
102              
103             =item Usage:
104              
105             $obj_ref->beautify(\$XML);
106              
107             =item Purpose:
108              
109             Parses the output of
110              
111             =item Returns:
112              
113             ($ok, $error) where $ok is 1 on success. $error is a diagnostic error message.
114              
115             =back
116              
117             =cut
118             my $self = shift;
119             $self->logger(DEBUG4, 'beautify('.join(',',@_).')');
120             my ($ref_XMLstr) = ($_[0]);
121             $self->error(-1, 'OK');
122              
123             my $expat = XML::Parser::Expat->new(
124             ParseParamEnt => undef,
125             NoExpand => 1,
126             );
127             $expat->setHandlers(
128             'Doctype' => \&_doctype,
129             'XMLDecl' => \&_decl,
130             'Start' => \&_start,
131             'End' => \&_end,
132             'Char' => \&_char,
133             'Proc' => \&_proc,
134             'Comment' => \&_comment,
135             'CdataStart' => \&_cDataStart,
136             'CdataEnd' => \&_cDataEnd,
137             'Default' => \&_default,
138             'Unparsed' => \&_unparsed,
139             'Notation' => \&_notation,
140             'ExternEnt' => \&_extEnt,
141             'Entity' => \&_ent,
142             'Element' => \&_element,
143             'Attlist' => \&_attriblist,
144             );
145              
146             ###HERE Need to find a better way to handle this and not use static globals.
147             ($ref_self, $cleanXMLstr, $level, $last_handle) = ($self, undef, $self->orig_indent, undef);
148             ###HERE Using these perhaps???
149             #context
150             #Returns a list of element names that represent open elements, with the last one being the innermost. Inside start and end tag handlers, this will be the tag of the parent element.
151              
152             #current_element
153             #Returns the name of the innermost currently opened element. Inside start or end handlers, returns the parent of the element associated with those tags.
154              
155             #in_element(NAME)
156             #Returns true if NAME is equal to the name of the innermost currently opened element. If namespace processing is being used and you want to check against a name that may be in a namespace, then use the generate_ns_name method to create the NAME argument.
157              
158              
159             $expat->parse($$ref_XMLstr);
160              
161             ###HERE Need to find a better way to handle this and not use static globals.
162             # Reset the values
163             ($ref_self, $level, $last_handle) = ($self, $self->orig_indent, undef);
164             $expat->release();
165             $self->logger(DEBUG4, 'RETURN[beautify()]: '.$self->error_code().'/'.$self->error_msg());
166             wantarray ? return($self->error(), $cleanXMLstr) : return($cleanXMLstr);
167             }
168              
169             ##############################################################################
170             sub indent_str {
171             =pod
172              
173             =head2 indent_str()
174              
175             C
176              
177             =over 2
178              
179             =item Usage:
180              
181             $indent_str = $obj_ref->indent_str(); # To retrieve the current value
182             or
183             $obj_ref->indent_str("\t"); # To set a new value
184              
185             =item Purpose:
186              
187             Sets or gets the indent str.
188              
189             =item Returns:
190              
191             ($indent_str) if set.
192              
193             =back
194              
195             =cut
196             my $self = shift;
197             $self->logger(DEBUG4, 'indent_str('.join(',',@_).')');
198             $self->error(-1, 'OK');
199             my $key = 'INDENT_STR';
200             if(!$self->{$key}){
201             $self->{$key} = $Deflt{$key};
202             }
203             if(@_){
204             $self->{$key} = $_[0];
205             }
206              
207             $self->logger(DEBUG4, 'RETURN[indent_str()]: '.$self->error_code().'/'.$self->error_msg());
208             return($self->{$key});
209             }
210              
211              
212             ##############################################################################
213             sub rem_cr {
214             =pod
215              
216             =head2 rem_cr()
217              
218             C
219              
220             =over 2
221              
222             =item Usage:
223              
224             $indent_str = $obj_ref->rem_cr(); # To retrieve the current value
225             or
226             $obj_ref->rem_cr(1); # To set a new value
227              
228             =item Purpose:
229              
230             Sets or gets the option to clean CR from XML before re-formatting.
231              
232             =item Returns:
233              
234             (true) if set.
235              
236             =back
237              
238             =cut
239             my $self = shift;
240             $self->logger(DEBUG4, 'rem_cr('.join(',',@_).')');
241             $self->error(-1, 'OK');
242             my $key = 'REM_CR';
243             if(!$self->{$key}){
244             $self->{$key} = $Deflt{$key};
245             }
246             if(@_){
247             $self->{$key} = $_[0];
248             }
249              
250             $self->logger(DEBUG4, 'RETURN[rem_cr()]: '.$self->error_code().'/'.$self->error_msg());
251             return($self->{$key});
252             }
253              
254             ##############################################################################
255             sub rem_ws {
256             =pod
257              
258             =head2 rem_ws()
259              
260             C
261              
262             =over 2
263              
264             =item Usage:
265              
266             $indent_str = $obj_ref->rem_ws(); # To retrieve the current value
267             or
268             $obj_ref->rem_ws(1); # To set a new value
269              
270             =item Purpose:
271              
272             Sets or gets the option to clean whitespace (Outside of char and cData) from XML before re-formatting. B This will remove blank whitespace from char sections if that is all that the char data contains.
273              
274             =item Returns:
275              
276             (true) if set.
277              
278             =back
279              
280             =cut
281             my $self = shift;
282             $self->logger(DEBUG4, 'rem_ws('.join(',',@_).')');
283             $self->error(-1, 'OK');
284             my $key = 'REM_WS';
285             if(!$self->{$key}){
286             $self->{$key} = $Deflt{$key};
287             }
288             if(@_){
289             $self->{$key} = $_[0];
290             }
291              
292             $self->logger(DEBUG4, 'RETURN[rem_ws()]: '.$self->error_code().'/'.$self->error_msg());
293             return($self->{$key});
294             }
295              
296              
297             ##############################################################################
298             sub orig_indent {
299             =pod
300              
301             =head2 orig_indent()
302              
303             C
304              
305             =over 2
306              
307             =item Usage:
308              
309             $indent_str = $obj_ref->orig_indent(); # To retrieve the current value
310             or
311             $obj_ref->orig_indent("\t"); # To set a new value
312              
313             =item Purpose:
314              
315             Sets or gets the original value for the indent incrementer. B Beware of setting this.
316              
317             =item Returns:
318              
319             ($indent_str) if set.
320              
321             =back
322              
323             =cut
324             my $self = shift;
325             $self->logger(DEBUG4, 'orig_indent('.join(',',@_).')');
326             $self->error(-1, 'OK');
327             my $key = 'ORIG_INDENT';
328             if(!$self->{$key}){
329             $self->{$key} = $Deflt{$key};
330             }
331             if(@_){
332             $self->{$key} = $_[0];
333             }
334              
335             $self->logger(DEBUG4, 'RETURN[orig_indent()]: '.$self->error_code().'/'.$self->error_msg());
336             return($self->{$key});
337             }
338              
339              
340             #################################################################################
341             ## Private Methods
342             #################################################################################
343             sub append_str {
344             my $self = shift;
345             $self->logger(DEBUG4, 'append_str('.join(',',@_).')');
346             $self->error(-1, 'OK');
347             my($line) = ($_[0]);
348              
349             $cleanXMLstr .= $line;
350              
351             $self->logger(DEBUG4, 'RETURN[append_str()]: '.$self->error_code().'/'.$self->error_msg());
352             }
353              
354             #################################################################################
355             sub _doctype{
356             #Doctype (Parser, Name, Sysid, Pubid, Internal)
357             #This handler is called for DOCTYPE declarations. Name is the document type name. Sysid is the system id of the document type, if it was provided, otherwise it's undefined. Pubid is the public id of the document type, which will be undefined if no public id was given. Internal is the internal subset, given as a string. If there was no internal subset, it will be undefined. Internal will contain all whitespace, comments, processing instructions, and declarations seen in the internal subset. The declarations will be there whether or not they have been processed by another handler (except for unparsed entities processed by the Unparsed handler). However, comments and processing instructions will not appear if they've been processed by their respective handlers.
358             $ref_self->logger(DEBUG4, '_doc('.join(',',@_).')');
359             $ref_self->error(-1, 'OK');
360             my($parser, $name, $sysid, $pubid, $internal) = @_;
361             $ref_self->logger(DEBUG3, '_doctype[origstr('.$parser->original_string().')]');
362              
363             # $ref_self->append_str($parser->original_string."\n");
364             $ref_self->append_str($ref_self->clean($parser->original_string)."\n");
365              
366             $last_handle = '_doctype';
367             $ref_self->logger(DEBUG4, 'RETURN[_doc()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
368             }
369              
370             #################################################################################
371             sub _decl{
372             #XMLDecl (Parser, Version, Encoding, Standalone)
373             #This handler is called for xml declarations. Version is a string containg the version. Encoding is either undefined or contains an encoding string. Standalone will be either true, false, or undefined if the standalone attribute is yes, no, or not made respectively.
374             $ref_self->logger(DEBUG4, '_decl('.join(',',@_).')');
375             $ref_self->error(-1, 'OK');
376             my($parser, $ver, $encoding, $standalone) = @_;
377             $ref_self->logger(DEBUG3, '_decl[origstr('.$parser->original_string().')]');
378              
379             # $ref_self->append_str($parser->original_string."\n");
380             $ref_self->append_str($ref_self->clean($parser->original_string)."\n");
381              
382             $last_handle = '_decl';
383             $ref_self->logger(DEBUG4, 'RETURN[_decl()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
384             }
385              
386             #################################################################################
387             sub _start{
388             #Start (Parser, Element [, Attr, Val [,...]])
389             #This event is generated when an XML start tag is recognized. Parser is an XML::Parser::Expat instance. Element is the name of the XML element that is opened with the start tag. The Attr & Val pairs are generated for each attribute in the start tag.
390             $ref_self->logger(DEBUG4, '_start('.join(',',@_).')');
391             $ref_self->error(-1, 'OK');
392             my($parser, $element) = ($_[0], $_[1]);
393             $ref_self->logger(DEBUG3, '_start[origstr('.$parser->original_string().')]');
394              
395             ###HERE originally cleaned
396             $ref_self->append_str("\n") if($last_handle eq '_start');
397             $level++; #increment the level counter
398             my $indent = $ref_self->indent_str x $level;
399             ###HERE Try putting all data on the line with the Start Tag and then not
400             # $ref_self->append_str($indent.$parser->original_string."\n");
401             # $ref_self->append_str($indent.$line);
402             $ref_self->append_str($indent.$ref_self->clean($parser->original_string));#."\n"
403              
404             $last_handle = '_start';
405             $ref_self->logger(DEBUG4, 'RETURN[_start()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
406             }
407              
408             #################################################################################
409             sub _end{
410             #End (Parser, Element)
411             #This event is generated when an XML end tag is recognized. Note that an XML empty tag () generates both a start and an end event.
412             #There is always a lower level start and end handler installed that wrap the corresponding callbacks. This is to handle the context mechanism. A consequence of this is that the default handler (see below) will not see a start tag or end tag unless the default_current method is called.
413             $ref_self->logger(DEBUG4, '_end('.join(',',@_).')');
414             $ref_self->error(-1, 'OK');
415             my($parser, $element) = ($_[0], $_[1]);
416             $ref_self->logger(DEBUG3, '_end[origstr('.$parser->original_string().')]');
417              
418             ###HERE originally cleaned
419             my $indent = '';
420             unless( ($last_handle eq '_char') || ($last_handle eq '_cDataEnd')){
421             $indent = $ref_self->indent_str x $level;
422             }
423             $ref_self->append_str($indent.$ref_self->clean($parser->original_string)."\n");
424             # $ref_self->append_str($indent.$line."\n");
425             $level--; # decrement the level counter
426              
427             $last_handle = '_end';
428             $ref_self->logger(DEBUG4, 'RETURN[_end()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
429             }
430              
431             #################################################################################
432             sub _char{
433             #Char (Parser, String)
434             #This event is generated when non-markup is recognized. The non-markup sequence of characters is in String. A single non-markup sequence of characters may generate multiple calls to this handler. Whatever the encoding of the string in the original document, this is given to the handler in UTF-8.
435             $ref_self->logger(DEBUG4, '_char('.join(',',@_).')');
436             $ref_self->error(-1, 'OK');
437             my($parser, $string) = ($_[0], $_[1]);
438             $ref_self->logger(DEBUG3, '_char[origstr('.$parser->original_string().')]');
439              
440             # $ref_self->append_str($parser->original_string);#."\n"
441             if($ref_self->rem_ws() == FULL){
442             $ref_self->append_str($ref_self->clean($parser->original_string));
443             }
444             else{
445             $ref_self->append_str($parser->original_string);
446             }
447              
448             $last_handle = '_char';
449             $ref_self->logger(DEBUG4, 'RETURN[_char()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
450             }
451              
452             #################################################################################
453             sub _proc{
454             #Proc (Parser, Target, Data)
455             #This event is generated when a processing instruction is recognized.
456             $ref_self->logger(DEBUG4, '_proc('.join(',',@_).')');
457             $ref_self->error(-1, 'OK');
458             my($parser, $string) = ($_[0], $_[1]);
459             $ref_self->logger(DEBUG3, '_proc[origstr('.$parser->original_string().')]');
460              
461             # $ref_self->append_str($parser->original_string);
462             $ref_self->append_str($ref_self->clean($parser->original_string));
463              
464             $last_handle = '_proc';
465             $ref_self->logger(DEBUG4, 'RETURN[_proc()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
466             }
467              
468             #################################################################################
469             sub _comment{
470             #Comment (Parser, String)
471             #This event is generated when a comment is recognized.
472             $ref_self->logger(DEBUG4, '_comment('.join(',',@_).')');
473             $ref_self->error(-1, 'OK');
474             my($parser, $string) = ($_[0], $_[1]);
475             $ref_self->logger(DEBUG3, '_comment[origstr('.$parser->original_string().')]');
476              
477             ###HERE originally cleaned
478             # my $line = $parser->original_string();
479             # $line =~ s/^\w//gio;
480             # $line =~ s/\w$//gio;
481             #my $indent = $ref_self->indent_str x $level;
482             $ref_self->append_str($ref_self->clean($parser->original_string));
483              
484             $last_handle = '_comment';
485             $ref_self->logger(DEBUG4, 'RETURN[_comment()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
486             }
487              
488             #################################################################################
489             sub _cDataStart{
490             #CdataStart (Parser)
491             #This is called at the start of a CDATA section.
492             $ref_self->logger(DEBUG4, '_cDataStart('.join(',',@_).')');
493             $ref_self->error(-1, 'OK');
494             my($parser) = ($_[0]);
495             $ref_self->logger(DEBUG3, '_cDataStart[origstr('.$parser->original_string().')]');
496              
497             ###HERE originally cleaned
498             $ref_self->append_str($ref_self->clean($parser->original_string));
499              
500             $last_handle = '_cDataStart';
501             $ref_self->logger(DEBUG4, 'RETURN[_cDataStart()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
502             }
503              
504             #################################################################################
505             sub _cDataEnd{
506             #CdataEnd (Parser)
507             #This is called at the end of a CDATA section.
508             $ref_self->logger(DEBUG4, '_cDataEnd('.join(',',@_).')');
509             $ref_self->error(-1, 'OK');
510             my($parser) = ($_[0]);
511             $ref_self->logger(DEBUG3, '_cDataEnd[origstr('.$parser->original_string().')]');
512              
513             # $ref_self->append_str($parser->original_string);#."\n"
514             $ref_self->append_str($ref_self->clean($parser->original_string));
515              
516             $last_handle = '_cDataEnd';
517             $ref_self->logger(DEBUG4, 'RETURN[_cDataEnd()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
518             }
519              
520             #################################################################################
521             sub _default{
522             #Default (Parser, String)
523             #This is called for any characters that don't have a registered handler. This includes both characters that are part of markup for which no events are generated (markup declarations) and characters that could generate events, but for which no handler has been registered.
524             #Whatever the encoding in the original document, the string is returned to the handler in UTF-8.
525             $ref_self->logger(DEBUG4, '_default('.join(',',@_).')');
526             $ref_self->error(-1, 'OK');
527             my($parser, $str) = ($_[0], $_[1]);
528             $ref_self->logger(DEBUG3, '_default[origstr('.$parser->original_string().')]');
529              
530             # $ref_self->append_str($parser->original_string."\n");
531             $ref_self->append_str($ref_self->clean($parser->original_string));#."\n"
532              
533             $last_handle = '_default';
534             $ref_self->logger(DEBUG4, 'RETURN[_default()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
535             }
536              
537             #################################################################################
538             sub _unparsed{
539             #Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation)
540             #This is called for a declaration of an unparsed entity. Entity is the name of the entity. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Notation is the notation name. Base and Pubid may be undefined.
541             $ref_self->logger(DEBUG4, '_unparsed('.join(',',@_).')');
542             $ref_self->error(-1, 'OK');
543             my($parser) = ($_[0]);
544             $ref_self->logger(DEBUG3, '_unparsed[origstr('.$parser->original_string().')]');
545              
546             # $ref_self->append_str($parser->original_string);
547             $ref_self->append_str($ref_self->clean($parser->original_string));
548              
549             $last_handle = '_unparsed';
550             $ref_self->logger(DEBUG4, 'RETURN[_unparsed()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
551             }
552              
553             #################################################################################
554             sub _notation{
555             #Notation (Parser, Notation, Base, Sysid, Pubid)
556             #This is called for a declaration of notation. Notation is the notation name. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined.
557             $ref_self->logger(DEBUG4, '_notation('.join(',',@_).')');
558             $ref_self->error(-1, 'OK');
559             my($parser) = ($_[0]);
560             $ref_self->logger(DEBUG3, '_notation[origstr('.$parser->original_string().')]');
561              
562             # $ref_self->append_str($parser->original_string);
563             $ref_self->append_str($ref_self->clean($parser->original_string));
564              
565             $last_handle = '_notation';
566             $ref_self->logger(DEBUG4, 'RETURN[_notation()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
567             }
568              
569             #################################################################################
570             sub _extEnt{
571             #ExternEnt (Parser, Base, Sysid, Pubid)
572             #This is called when an external entity is referenced. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Base, and Pubid may be undefined.
573             #This handler should either return a string, which represents the contents of the external entity, or return an open filehandle that can be read to obtain the contents of the external entity, or return undef, which indicates the external entity couldn't be found and will generate a parse error.
574             #If an open filehandle is returned, it must be returned as either a glob (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). The parser will close the filehandle after using it.
575             $ref_self->logger(DEBUG4, '_extEnt('.join(',',@_).')');
576             $ref_self->error(-1, 'OK');
577             my($parser) = ($_[0]);
578             $ref_self->logger(DEBUG3, '_extEnt[origstr('.$parser->original_string().')]');
579              
580             # $ref_self->append_str($parser->original_string);
581             $ref_self->append_str($ref_self->clean($parser->original_string));
582              
583             $last_handle = '_extEnt';
584             $ref_self->logger(DEBUG4, 'RETURN[_extEnt()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
585             }
586              
587             #################################################################################
588             sub _ent{
589             #Entity (Parser, Name, Val, Sysid, Pubid, Ndata)
590             #This is called when an entity is declared. For internal entities, the Val parameter will contain the value and the remaining three parameters will be undefined. For external entities, the Val parameter will be undefined, the Sysid parameter will have the system id, the Pubid parameter will have the public id if it was provided (it will be undefined otherwise), the Ndata parameter will contain the notation for unparsed entities. If this is a parameter entity declaration, then a '%' will be prefixed to the name.
591             #Note that this handler and the Unparsed handler above overlap. If both are set, then this handler will not be called for unparsed entities.
592             $ref_self->logger(DEBUG4, '_ent('.join(',',@_).')');
593             $ref_self->error(-1, 'OK');
594             my($parser) = ($_[0]);
595             $ref_self->logger(DEBUG3, '_ent[origstr('.$parser->original_string().')]');
596              
597             # $ref_self->append_str($parser->original_string);
598             $ref_self->append_str($ref_self->clean($parser->original_string));
599              
600             $last_handle = '_ent';
601             $ref_self->logger(DEBUG4, 'RETURN[_ent()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
602             }
603              
604             #################################################################################
605             sub _element{
606             #Element (Parser, Name, Model)
607             #The element handler is called when an element declaration is found. Name is the element name, and Model is the content model as a string.
608             $ref_self->logger(DEBUG4, '_element('.join(',',@_).')');
609             $ref_self->error(-1, 'OK');
610             my($parser) = ($_[0]);
611             $ref_self->logger(DEBUG3, '_element[origstr('.$parser->original_string().')]');
612              
613             # $ref_self->append_str($parser->original_string);
614             $ref_self->append_str($ref_self->clean($parser->original_string));
615              
616             $last_handle = '_element';
617             $ref_self->logger(DEBUG4, 'RETURN[_element()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
618             }
619              
620             #################################################################################
621             sub _attriblist{
622             #Attlist (Parser, Elname, Attname, Type, Default, Fixed)
623             #This handler is called for each attribute in an ATTLIST declaration. So an ATTLIST declaration that has multiple attributes will generate multiple calls to this handler. The Elname parameter is the name of the element with which the attribute is being associated. The Attname parameter is the name of the attribute. Type is the attribute type, given as a string. Default is the default value, which will either be ``#REQUIRED'', ``#IMPLIED'' or a quoted string (i.e. the returned string will begin and end with a quote character). If Fixed is true, then this is a fixed attribute.
624             $ref_self->logger(DEBUG4, '_attriblist('.join(',',@_).')');
625             $ref_self->error(-1, 'OK');
626             my($parser) = ($_[0]);
627             $ref_self->logger(DEBUG3, '_attriblist[origstr('.$parser->original_string().')]');
628              
629             # $ref_self->append_str($parser->original_string);
630             $ref_self->append_str($ref_self->clean($parser->original_string));
631              
632             $last_handle = '_attriblist';
633             $ref_self->logger(DEBUG4, 'RETURN[_attriblist()]: '.$ref_self->error_code().'/'.$ref_self->error_msg());
634             }
635              
636             #################################################################################
637             # WIP subs
638             #################################################################################
639              
640             #################################################################################
641             sub clean {
642             my $self = shift;
643             $self->logger(DEBUG4, 'clean('.join(',',@_).')');
644             $self->error(-1, 'OK');
645             my($XMLstr) = ($_[0]);
646              
647             if($self->rem_cr()){
648             #$XMLstr =~ s/\012?\015?//gio;
649             $XMLstr =~ s/\n//gio;
650             }
651             if($self->rem_ws()){
652             $XMLstr =~ s/^\w//gio;
653             $XMLstr =~ s/\w$//gio;
654             }
655             return($XMLstr);
656             $self->logger(DEBUG4, 'RETURN[clean()]: '.$self->error_code().'/'.$self->error_msg());
657             }
658              
659             #################################################################################
660              
661              
662             =head1 HISTORY
663              
664             =head2 See Changes file in distribution
665              
666             =head1 TODO
667              
668             =over 1
669              
670             =item *
671              
672             Deal with already formatted or mis-formatted XML.
673              
674             =item *
675              
676             Along those lines... Seriously consider reformatting each element in the XML from the data variable returned with the callback function instead of trying to miraculously use the original string.
677              
678             =item *
679              
680             Take an array ref, FH, etc; in addition to the scalar ref as an arg to C.
681              
682             =item *
683              
684             Better documentation.
685              
686             =item *
687              
688             Replace static variables. (process started in ver 0.03)
689              
690             =item *
691              
692             See if the current state tracking can be replaced with C, C, C functions from expat. (Related to above about statics)
693              
694             =back
695              
696             =head1 AUTHOR
697              
698             =over 1
699              
700             Thomas Bolioli
701              
702             =back
703              
704             =head1 COPYRIGHT
705              
706             Copyright (c) 2001 Thomas Bolioli. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
707              
708             =head1 SEE ALSO
709              
710             =over 1
711              
712             =item *
713              
714             Log::AndError
715              
716             =item *
717              
718             Log::AndError::Constants
719              
720             =item *
721              
722             XML::Parser::Expat
723              
724             =cut
725              
726             1;