File Coverage

blib/lib/XML/Easy/Transform/RationalizeNamespacePrefixes.pm
Criterion Covered Total %
statement 96 97 98.9
branch 61 70 87.1
condition 11 12 91.6
subroutine 7 7 100.0
pod 1 1 100.0
total 176 187 94.1


line stmt bran cond sub pod time code
1             package XML::Easy::Transform::RationalizeNamespacePrefixes;
2 1     1   87166 use base qw(Exporter);
  1         3  
  1         179  
3              
4 1     1   7 use strict;
  1         3  
  1         38  
5 1     1   6 use warnings;
  1         2  
  1         64  
6              
7             our $VERSION = "1.22";
8             our @EXPORT_OK;
9              
10 1     1   1487 use Carp::Clan;
  1         6016  
  1         8  
11              
12             =head1 NAME
13              
14             XML::Easy::Transform::RationalizeNamespacePrefixes - rationalize namespaces prefixes
15              
16             =head1 SYNOPSIS
17              
18             use XML::Easy::Transform::RationalizeNamespacePrefixes qw(
19             rationalize_namespace_prefixes
20             );
21              
22             my $doc = rationalize_namespace_prefixes(
23             xml10_read_document($text)
24             );
25              
26             =head1 DESCRIPTION
27              
28             This code creates a new tree of B nodes by examining
29             an existing B tree and producing a new tree that is
30             schemantically identical under the XML Namespaces 1.0 specification
31             but with all namespace declartions moved to the top node of the tree
32             (this may involve renaming several elements in the tree to have different
33             prefixes.)
34              
35             It supplies one public function that can do this transformation which is
36             exported on request:
37              
38             =over
39              
40             =item rationalize_namespace_prefixes($easy_element)
41              
42             =item rationalize_namespace_prefixes($easy_element, $generator_subref)
43              
44             =item rationalize_namespace_prefixes($easy_element, $options_hashref)
45              
46             The first argument is a B that you wish a transformed
47             copy of to be returned. An exception will be thrown if thrown if the
48             XML document is not namespace-well-formed (i.e. it breaches the XML
49             Namespaces 1.0 specification.)
50              
51             The second (optional) argument may be a reference to a function that should,
52             when passed a string containing a xml prefix as its first argument,
53             return a string containing an alternative xml prefix. If no function is
54             passed in then the default renaming function is used, which will append
55             or replace trailing numbers with higher numbers to the prefix.
56              
57             Alternativly, a hashref may be passed as the (optional) second arguement.
58             The keys of this hash may be:
59              
60             =over
61              
62             =item generator
63              
64             The prefix generating subroutine reference, as previously described.
65              
66             =item namespaces
67              
68             A hashref containing a mapping of namespace to prefixes that you want
69             to force to be declared. This enables you to control exactly what
70             prefixes are used for what namespaces and to force additional namespace
71             declarations for namespaces not otherwise mentioned in the XML
72             document you are transforming. Specifying more than one namespace that
73             maps to the same prefix will cause an exception to be thrown.
74              
75             =item force_attribute_prefix
76              
77             By default attributes without a prefix have the same namespace as the
78             element that they belong to. Setting this to a true value will force
79             prefixes to be prepended to attribute names even if they could be
80             ommited.
81              
82             =back
83              
84             The new B will be returned as the only return value
85             of this function.
86              
87             =cut
88              
89             {
90             my %default_known_prefixes = (
91             # include these as we're not meant to freak out if these namespaces are used
92             xml => "http://www.w3.org/XML/1998/namespace",
93             xmlns => "http://www.w3.org/2000/xmlns/",
94              
95             # by default the empty string is bound to ""
96             "" => "",
97             );
98              
99             # this holds the namespaces that we've assigned.
100              
101             sub rationalize_namespace_prefixes ($;$) {
102 21     21 1 4856 my $source_element = shift;
103              
104             # optional argument parsing
105              
106 21 100       59 my $args = @_ ? shift : {};
107 21 50       70 $args = { generator => $args } if ref($args) eq "CODE";
108 21 50       51 croak "Invalid second parameter passed to rationalize_namespace_prefixes: must be hashref or subroutine reference"
109             unless ref($args) eq "HASH";
110              
111 21 50       169 my $prefix_generator = exists $args->{generator} ? $args->{generator} : \&_prefix_generator;
112 21 50       53 croak "Argument 'generator' must be a subroutine reference"
113             unless ref($prefix_generator) eq "CODE";
114              
115 21 100       46 my $force_attr_prefixd_namespaces = exists $args->{namespaces} ? $args->{namespaces} : {};
116 21 50       49 croak "Argument 'namespaces' must be a hash reference"
117             unless ref($force_attr_prefixd_namespaces) eq "HASH";
118              
119             # create the modified tree and populate our two local hashes with
120             # the namespaces we should have
121              
122 21         25 my %assigned_prefixes;
123             my %assigned_ns;
124 21         164 foreach my $ns (keys %{ $force_attr_prefixd_namespaces }) {
  21         67  
125 1         3 $assigned_ns{ $ns } = $force_attr_prefixd_namespaces->{ $ns };
126 1 50       3 croak("Cannot assign namespace '$ns' to prefix '$force_attr_prefixd_namespaces->{ $ns }' as already assigned to '$assigned_prefixes{ $force_attr_prefixd_namespaces->{ $ns } }'")
127             if exists $assigned_prefixes{ $force_attr_prefixd_namespaces->{ $ns } };
128 1         7 $assigned_prefixes{ $force_attr_prefixd_namespaces->{ $ns } } = $ns;
129             }
130              
131 21         90 my $dest_element = _rnp($source_element, $prefix_generator, \%default_known_prefixes, \%assigned_prefixes, \%assigned_ns, $args->{force_attribute_prefix});
132              
133             # we now have a tree with *no* namespaces. Replace the top of that
134             # tree with a new element that is the same as the top element of the tree but
135             # with the needed namespace declarations
136              
137 14         53 my $attr = { ## no critic (stupid comma statement rule misfiring)
138 31 100       133 %{ $dest_element->attributes },
    100          
139 14         38 map { ($_ ne "") ? ("xmlns:$_" => $assigned_prefixes{$_}) :
140             ($assigned_prefixes{""} ne "") ? ( xmlns => $assigned_prefixes{""} ) : () }
141             keys %assigned_prefixes
142             };
143              
144 14         286 return XML::Easy::Element->new($dest_element->type_name, $attr, $dest_element->content_object);
145             }
146             push @EXPORT_OK, "rationalize_namespace_prefixes";
147              
148             sub _rnp {
149 43     43   59 my $element = shift;
150 43         41 my $prefix_generator = shift;
151 43         46 my $known_prefixes = shift;
152 43         45 my $assigned_prefixes = shift;
153 43         44 my $assigned_ns = shift;
154 43         57 my $force_attr_prefix = shift;
155              
156             # boolean that indicates if known_* is our copy or the
157             # version passed in (has it been copy-on-write-ed)
158 43         54 my $cowed = 0;
159              
160             # change the name of the element
161 43         96 my $attr = $element->attributes;
162 43         49 foreach (sort keys %{ $attr }) {
  43         152  
163 43 100       156 croak "Specification violation: Can't have more than one colon in attribute name '$_'"
164             if tr/:/:/ > 1;
165 40 100       177 next unless my ($prefix) = /\Axmlns(?::(.*))?\z/msx;
166 30 100       62 $prefix = "" unless defined $prefix;
167 30         57 my $ns = $attr->{$_};
168              
169             # check for things assigning namespaces to reserved places
170 30 100 100     89 croak "Specification violation: Can't assign '$ns' to prefix 'xml'"
171             if $prefix eq "xml" && $ns ne 'http://www.w3.org/XML/1998/namespace';
172 29 100       55 croak "Specification violation: Can't assign 'http://www.w3.org/2000/xmlns/' to any prefix"
173             if $ns eq 'http://www.w3.org/2000/xmlns/';
174              
175             # check we're not assigning things to the xmlns prefix
176 28 100       54 croak "Specification violation: Can't assign any namespace to prefix 'xmlns'"
177             if $prefix eq 'xmlns';
178              
179             # copy the hash if we haven't done so already
180 27 100       49 unless ($cowed) {
181 24         23 $known_prefixes = +{ %{ $known_prefixes } };
  24         93  
182 24         45 $cowed = 1;
183             }
184              
185             # record that this prefix maps to this namespace;
186 27         58 $known_prefixes->{ $prefix } = $ns;
187              
188 27 100       76 unless (exists $assigned_ns->{ $ns }) {
189             # find an unused unique prefix in the destination.
190 20         45 while (exists $assigned_prefixes->{ $prefix }) {
191 8         15 $prefix = $prefix_generator->($prefix);
192             }
193              
194             # remember that we're mapping that way
195 20         35 $assigned_prefixes->{ $prefix } = $ns;
196 20         66 $assigned_ns->{ $ns } = $prefix;
197             }
198              
199             }
200              
201             # munge the prefix on the main element
202 37 50       261 my ($efront, $eback) = $element->type_name =~ /\A([^:]+)(?::(.*))?\z/msx
203             or croak "Invalid element name '".$element->type_name."'";
204 37 100       88 my $prefix = defined ($eback) ? $efront : "";
205 37 100       82 my $local_name = defined ($eback) ? $eback : $efront;
206              
207             # map the prefix in the source document to a namespace,
208             # then look up the corrisponding prefix in the destination document
209 37         62 my $element_ns;
210             my $new_element_prefix;
211 37 100 100     126 if ($prefix eq "" && !exists($assigned_prefixes->{""})) {
212             # someone just used the default (empty) prefix for the first time without having
213             # declared an explict namespace. Remember that the empty namespace exists.
214 10         19 $element_ns = $assigned_prefixes->{""} = "";
215 10         18 $new_element_prefix = $assigned_ns->{""} = "";
216             } else {
217 27         47 $element_ns = $known_prefixes->{ $prefix };
218 27 100       57 unless (defined $element_ns) { croak "Prefix '$prefix' has no registered namespace" }
  1         8  
219 26         56 $new_element_prefix = $assigned_ns->{ $element_ns };
220             }
221 36 100       114 my $new_element_name = (length $new_element_prefix) ? "$new_element_prefix:$local_name" : $local_name;
222              
223             # munge the prefix on the attribute elements
224 36         58 my $new_attr = {};
225 36         48 foreach (keys %{ $attr }) {
  36         115  
226 37 50       177 my ($afront, $aback) = /\A([^:]+)(?::(.*))?\z/msx
227             or croak "Invalid attribute name '$_'";
228 37 100       73 my $aprefix = defined ($aback) ? $afront : "";
229 37 100       57 my $alocal_name = defined ($aback) ? $aback : $afront;
230              
231             # skip the namespaces
232 37 100 100     109 next if $aprefix eq "" && $alocal_name eq "xmlns";
233 33 100       83 next if $aprefix eq "xmlns";
234              
235             # map the prefix in the source document to a namespace,
236             # then look up the corrisponding prefix in the destination document
237 10 100       19 my $ns = $aprefix eq "" ? $element_ns : $known_prefixes->{ $aprefix };
238 10 50       21 unless (defined $ns) { croak "Prefix '$aprefix' has no registered namespace" }
  0         0  
239 10         17 my $new_prefix = $assigned_ns->{ $ns };
240              
241 10 100 66     56 my $final_name = (($force_attr_prefix && length $new_prefix) || $new_prefix ne $new_element_prefix) ? "$new_prefix:$alocal_name" : $alocal_name;
242 10         36 $new_attr->{ $final_name } = $attr->{ $_ };
243              
244             }
245              
246 36         54 my @content = @{ $element->content };
  36         117  
247 36         38 my @new_content;
248 36         68 while (@content) {
249 58         86 push @new_content, shift @content;
250 58 100       141 if (@content) {
251 22         82 push @new_content, _rnp((shift @content), $prefix_generator, $known_prefixes, $assigned_prefixes, $assigned_ns, $force_attr_prefix);
252             }
253             }
254              
255 36         348 return XML::Easy::Element->new( $new_element_name, $new_attr, \@new_content );
256             }
257             }
258              
259             sub _prefix_generator {
260 8     8   11 my $prefix = shift;
261              
262             # "" => default2 (the 2 is concatinated later)
263 8 100       16 $prefix = "default" if $prefix eq "";
264              
265             # turn foo into foo2 and foo2 into foo3, etc.
266 8 100       34 $prefix .= "2" unless $prefix =~ s/(\d+)$/ $1 + 1 /mxse;
  2         13  
267              
268 8         29 return $prefix;
269             }
270              
271             =back
272              
273             =head1 EXAMPLES
274              
275             =head2 A Basic Transform
276              
277             After defining a handy utility function:
278              
279             sub process($) {
280             return xml10_write_document(
281             rationalize_namespace_prefixes(
282             xml10_read_document( $_[0] )
283             ),"UTF-8"
284             );
285             }
286              
287             This code:
288              
289             print process <<'XML';
290            
291            
292            
293             XML
294              
295             Moves the namespace up and prints:
296              
297            
298            
299            
300              
301             =head2 Creating Prefixes
302              
303             If you use the same prefix twice in the document to refer to different namespaces
304             then the function will rename one of the prefixes:
305              
306             print process <<'XML';
307            
308            
309            
310             XML
311              
312             Prints
313              
314            
315            
316            
317              
318             This works for the default namespace too:
319              
320             print process <<'XML';
321            
322            
323            
324             XML
325              
326             Prints
327              
328            
329            
330            
331              
332             If you want control on how your prefixes will be renamed you can supply
333             a function as the second arguement to C.
334              
335             my $transformed = rationalize_namespace_prefixes(
336             $xml_easy_element,
337             sub {
338             my $name = shift;
339             $name =~ s/\d+\Z//;
340             return $name . int(rand(10000));
341             }
342             );
343              
344             If your function returns a prefix that has already been used it will be
345             called again and again until it returns an unused prefix. The first time
346             the function is called it will be passed the prefix from the source, and
347             if it is called subsequent times after that because the new prefix it
348             previously returned is already in use it will be passed the prefix the
349             previous call to the function created.
350              
351             =head2 Removing Unneeded Prefixes
352              
353             This module also removes all unnecessary prefixes:
354              
355            
356            
357            
358            
359              
360             Will be transformed into
361              
362            
363            
364            
365            
366              
367             =head1 AUTHOR
368              
369             Written by Mark Fowler Emark@twoshortplanks.comE
370              
371             Copyright Photobox 2009. All Rights Reserved.
372              
373             This program is free software; you can redistribute it
374             and/or modify it under the same terms as Perl itself.
375              
376             =head1 BUGS
377              
378             None known.
379              
380             Please report bugs via RT L.
381              
382             The version control system for this module is hosted on github. Please feel free to fork L and send pull requests.
383              
384             =head1 SEE ALSO
385              
386             L, L
387              
388             =cut
389              
390             1;