| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package WebSource::Extract::xslt; |
|
2
|
1
|
|
|
1
|
|
4157
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
47
|
|
|
3
|
1
|
|
|
1
|
|
1300
|
use XML::LibXML; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
use XML::LibXSLT; |
|
5
|
|
|
|
|
|
|
use Carp; |
|
6
|
|
|
|
|
|
|
use Date::Language; |
|
7
|
|
|
|
|
|
|
use Date::Format; |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our @ISA = ('WebSource::Module'); |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
WebSource::Extract::xslt - Apply an XSL Stylesheet to the input |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
This flavor of the B operator applies an XSL stylesheet to the input |
|
18
|
|
|
|
|
|
|
an returns the transformation result. |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Such an extraction operator should be described as follows : |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
... |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
where the xsl prefix should be associated to the URI http://www.w3.org/1999/XSL/Transform |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 METHODS |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=cut |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub new { |
|
37
|
|
|
|
|
|
|
my $class = shift; |
|
38
|
|
|
|
|
|
|
my %params = @_; |
|
39
|
|
|
|
|
|
|
my $self = bless \%params, $class; |
|
40
|
|
|
|
|
|
|
$self->SUPER::_init_; |
|
41
|
|
|
|
|
|
|
my $wsd = $self->{wsdnode}; |
|
42
|
|
|
|
|
|
|
if($wsd) { |
|
43
|
|
|
|
|
|
|
$wsd->setNamespace("http://www.w3.org/1999/XSL/Transform","xsl",0); |
|
44
|
|
|
|
|
|
|
my %param_mapping; |
|
45
|
|
|
|
|
|
|
foreach my $paramEl ($wsd->findnodes('xsl:stylesheet/xsl:param')) { |
|
46
|
|
|
|
|
|
|
my $paramName = $paramEl->getAttribute('name'); |
|
47
|
|
|
|
|
|
|
my $wsEnvKey = $paramEl->getAttributeNS("http://wwwsource.free.fr/ns/websource","mapped-from"); |
|
48
|
|
|
|
|
|
|
if(!$wsEnvKey) { |
|
49
|
|
|
|
|
|
|
$wsEnvKey = $paramName; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
$self->log(2,"Found parameter : $paramName (mapped from $wsEnvKey)"); |
|
52
|
|
|
|
|
|
|
$param_mapping{$paramName} = $wsEnvKey; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
$self->{xslparams} = \%param_mapping; |
|
55
|
|
|
|
|
|
|
my @stylesheet = $wsd->findnodes('xsl:stylesheet'); |
|
56
|
|
|
|
|
|
|
if(@stylesheet) { |
|
57
|
|
|
|
|
|
|
my $wsdoc = $wsd->ownerDocument; |
|
58
|
|
|
|
|
|
|
my $xsltdoc = XML::LibXML::Document->new($wsdoc->version,$wsdoc->encoding); |
|
59
|
|
|
|
|
|
|
$xsltdoc->setDocumentElement($stylesheet[0]->cloneNode(1)); |
|
60
|
|
|
|
|
|
|
my $xslt = XML::LibXSLT->new(); |
|
61
|
|
|
|
|
|
|
$xslt->register_function('http://wwwsource.free.fr/ns/websource/xslt-ext','reformat-date','WebSource::Extract::xslt::reformatDate'); |
|
62
|
|
|
|
|
|
|
$xslt->register_function('http://wwwsource.free.fr/ns/websource/xslt-ext','string-replace','WebSource::Extract::xslt::stringReplace'); |
|
63
|
|
|
|
|
|
|
$xslt->register_function('http://wwwsource.free.fr/ns/websource/xslt-ext','html-lint','WebSource::Extract::xslt::htmlLint'); |
|
64
|
|
|
|
|
|
|
$self->{xsl} = $xslt->parse_stylesheet($xsltdoc); |
|
65
|
|
|
|
|
|
|
$self->{format} = $wsd->getAttribute("format"); |
|
66
|
|
|
|
|
|
|
} else { |
|
67
|
|
|
|
|
|
|
croak "No stylesheet found\n"; |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
$self->{xsl} or croak "No XSLT stylesheet given"; |
|
71
|
|
|
|
|
|
|
return $self; |
|
72
|
|
|
|
|
|
|
} |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub handle { |
|
75
|
|
|
|
|
|
|
my $self = shift; |
|
76
|
|
|
|
|
|
|
my $env = shift; |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
$self->log(5,"Got document ",$env->{baseuri}); |
|
79
|
|
|
|
|
|
|
my $data = $env->data; |
|
80
|
|
|
|
|
|
|
if(!$data->isa("XML::LibXML::Document")) { |
|
81
|
|
|
|
|
|
|
$self->log(5,"Creating document from DOM node"); |
|
82
|
|
|
|
|
|
|
my $doc = XML::LibXML::Document->new("1.0","UTF-8"); |
|
83
|
|
|
|
|
|
|
$doc->setDocumentElement($data->cloneNode(1)); |
|
84
|
|
|
|
|
|
|
$data = $doc; |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
$self->log(6,"We have : \n".$data->toString(1,'utf-8')."\n"); |
|
87
|
|
|
|
|
|
|
$self->log(6,".. encoding: ".$data->ownerDocument->actualEncoding()."\n"); |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
my $mapping = $self->{xslparams}; |
|
90
|
|
|
|
|
|
|
my %parameters; |
|
91
|
|
|
|
|
|
|
foreach my $param (keys(%$mapping)) { |
|
92
|
|
|
|
|
|
|
my $origKey = $mapping->{$param}; |
|
93
|
|
|
|
|
|
|
my $value = $env->{$origKey}; |
|
94
|
|
|
|
|
|
|
$self->log(2,"Found value for $param (using $origKey) : ",$value); |
|
95
|
|
|
|
|
|
|
$parameters{$param} = $value; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
my $result = $self->{xsl}->transform($data,XML::LibXSLT::xpath_to_string(%parameters)); |
|
98
|
|
|
|
|
|
|
$self->{format} eq "document" or $result = $result->documentElement; |
|
99
|
|
|
|
|
|
|
$self->log(6,"Produced :\n",$result->toString(1,'UTF-8')); |
|
100
|
|
|
|
|
|
|
return WebSource::Envelope->new(type => "object/dom-node", data => $result); |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 XSLT EXTENSIONS |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
The module implements extra pratical XSLT extension functions |
|
106
|
|
|
|
|
|
|
These can be used by delaring a prefix for theses extensions whose namespace |
|
107
|
|
|
|
|
|
|
is C and declaring that this prefix is |
|
108
|
|
|
|
|
|
|
an extension prefix. For example: |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
xmlns:wsx="http://wwwsource.free.fr/ns/websource/xslt-ext" |
|
112
|
|
|
|
|
|
|
extension-element-prefixes="wsx" |
|
113
|
|
|
|
|
|
|
> |
|
114
|
|
|
|
|
|
|
... |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=cut |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 reformat-date |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Extension function to reformat dates |
|
123
|
|
|
|
|
|
|
{http://wwwsource.free.fr/ns/websource/xslt-ext}reformat-date( |
|
124
|
|
|
|
|
|
|
date, targetTemplate, sourceLanguage? |
|
125
|
|
|
|
|
|
|
) |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=cut |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub reformatDate { |
|
130
|
|
|
|
|
|
|
my ($srcdate,$template,@langs) = @_; |
|
131
|
|
|
|
|
|
|
my $dsttime = undef; |
|
132
|
|
|
|
|
|
|
while(!defined($dsttime) && @langs) { |
|
133
|
|
|
|
|
|
|
my $l = shift @langs; |
|
134
|
|
|
|
|
|
|
my $lang = Date::Language->new($l); |
|
135
|
|
|
|
|
|
|
$dsttime = $lang->str2time($srcdate); |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
if($dsttime) { |
|
138
|
|
|
|
|
|
|
return time2str($template,$dsttime); |
|
139
|
|
|
|
|
|
|
} else { |
|
140
|
|
|
|
|
|
|
return ""; |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head2 string-replace |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Extension function to do a string replacement using a perl regular expression |
|
148
|
|
|
|
|
|
|
{http://wwwsource.free.fr/ns/websource/xslt-ext}string-replace(regexp, replacement, data) |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=cut |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub stringReplace { |
|
153
|
|
|
|
|
|
|
my ($regexp,$replace,$data) = @_; |
|
154
|
|
|
|
|
|
|
$data =~ s/$regexp/$replace/g; |
|
155
|
|
|
|
|
|
|
return $data; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head2 parse-encoded |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Extension function parse-encoded which parses an encoded XML string an returns a cleaned-up version |
|
161
|
|
|
|
|
|
|
{http://wwwsource.free.fr/ns/websource/xslt-ext}html-lint |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub htmlLint { |
|
166
|
|
|
|
|
|
|
my ($string) = @_; |
|
167
|
|
|
|
|
|
|
my $temp = "" . $string . ""; |
|
168
|
|
|
|
|
|
|
my $parser = XML::LibXML->new( recover => 2); |
|
169
|
|
|
|
|
|
|
open(TEMP,">>",'/tmp/ws-xslt.log'); |
|
170
|
|
|
|
|
|
|
print TEMP $temp,"\n==============================\n"; |
|
171
|
|
|
|
|
|
|
close(TEMP); |
|
172
|
|
|
|
|
|
|
my $doc = $parser->load_xml( string => $temp); |
|
173
|
|
|
|
|
|
|
my @children = $doc->documentElement->childNodes(); |
|
174
|
|
|
|
|
|
|
return join("\n", map { $_->toString(1,'utf-8') } @children); |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
WebSource |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=cut |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
1; |