| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
7
|
|
|
7
|
|
825
|
use strict; |
|
|
7
|
|
|
|
|
14
|
|
|
|
7
|
|
|
|
|
209
|
|
|
2
|
7
|
|
|
7
|
|
34
|
use warnings; |
|
|
7
|
|
|
|
|
11
|
|
|
|
7
|
|
|
|
|
297
|
|
|
3
|
|
|
|
|
|
|
package RDF::Flow::Source; |
|
4
|
|
|
|
|
|
|
{ |
|
5
|
|
|
|
|
|
|
$RDF::Flow::Source::VERSION = '0.178'; |
|
6
|
|
|
|
|
|
|
} |
|
7
|
|
|
|
|
|
|
#ABSTRACT: Source of RDF data |
|
8
|
|
|
|
|
|
|
|
|
9
|
7
|
|
|
7
|
|
6330
|
use Log::Contextual::WarnLogger; |
|
|
7
|
|
|
|
|
7367
|
|
|
|
7
|
|
|
|
|
373
|
|
|
10
|
7
|
|
|
|
|
90
|
use Log::Contextual qw(:log), -default_logger |
|
11
|
7
|
|
|
7
|
|
12278
|
=> Log::Contextual::WarnLogger->new({ env_prefix => __PACKAGE__ }); |
|
|
7
|
|
|
|
|
783420
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
7
|
|
|
7
|
|
451392
|
use 5.010; |
|
|
7
|
|
|
|
|
29
|
|
|
|
7
|
|
|
|
|
321
|
|
|
14
|
7
|
|
|
7
|
|
43
|
use re qw(is_regexp); |
|
|
7
|
|
|
|
|
23
|
|
|
|
7
|
|
|
|
|
1260
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
7
|
|
|
7
|
|
11718
|
use RDF::Trine qw(iri); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use Scalar::Util qw(blessed refaddr reftype); |
|
18
|
|
|
|
|
|
|
use Try::Tiny; |
|
19
|
|
|
|
|
|
|
use Carp; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use URI; |
|
22
|
|
|
|
|
|
|
use URI::Escape; |
|
23
|
|
|
|
|
|
|
use File::Spec::Functions; |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use parent 'Exporter'; |
|
26
|
|
|
|
|
|
|
our @EXPORT_OK = qw(sourcelist_args iterator_to_model empty_rdf rdflow_uri); |
|
27
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
|
28
|
|
|
|
|
|
|
util => [qw(sourcelist_args iterator_to_model empty_rdf rdflow_uri)], |
|
29
|
|
|
|
|
|
|
); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
use RDF::Trine::Model; |
|
32
|
|
|
|
|
|
|
use RDF::Trine::Parser; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#require RDF::Flow::Pipeline; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub new { |
|
37
|
|
|
|
|
|
|
my $class = shift; |
|
38
|
|
|
|
|
|
|
my ($src, %args) = ref($_[0]) ? @_ : (undef,@_); |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$src = delete $args{from} unless defined $src; |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
my $match = delete $args{match}; |
|
43
|
|
|
|
|
|
|
my $code; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $self = bless { }, $class; |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
if ( $src and not ref $src ) { # load from file or directory |
|
48
|
|
|
|
|
|
|
my $model = RDF::Trine::Model->new; |
|
49
|
|
|
|
|
|
|
my @files; |
|
50
|
|
|
|
|
|
|
if ( $src =~ /^https?:\/\// ) { |
|
51
|
|
|
|
|
|
|
eval { RDF::Trine::Parser->parse_url_into_model( $src, $model ); }; |
|
52
|
|
|
|
|
|
|
goto CHECK; |
|
53
|
|
|
|
|
|
|
} else { |
|
54
|
|
|
|
|
|
|
if ( -d $src ) { |
|
55
|
|
|
|
|
|
|
if ( opendir(DIR, $src) ) { |
|
56
|
|
|
|
|
|
|
my $ext = join ('|', keys %RDF::Trine::Parser::file_extensions); |
|
57
|
|
|
|
|
|
|
@files = map { catfile($src,$_) } grep(/\.($ext)$/,readdir(DIR)); |
|
58
|
|
|
|
|
|
|
closedir DIR; |
|
59
|
|
|
|
|
|
|
} else { |
|
60
|
|
|
|
|
|
|
log_warn { "failed to open directory $src"; } |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
} else { |
|
63
|
|
|
|
|
|
|
@files = ($src); |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
while ($src = shift @files) { |
|
68
|
|
|
|
|
|
|
eval { RDF::Trine::Parser->parse_file_into_model( "file:///$src", $src, $model ); }; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
CHECK: # yes, it's an evil goto statement jump target :-) |
|
71
|
|
|
|
|
|
|
if ( @_ ) { |
|
72
|
|
|
|
|
|
|
log_warn { "failed to load from $src"; } |
|
73
|
|
|
|
|
|
|
} else { |
|
74
|
|
|
|
|
|
|
log_info { "loaded from $src"; } |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
$src = $model; |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
if (blessed $src and $src->isa('RDF::Flow::Source')) { |
|
82
|
|
|
|
|
|
|
$self->{from} = $src; |
|
83
|
|
|
|
|
|
|
$code = sub { |
|
84
|
|
|
|
|
|
|
$src->retrieve( @_ ); |
|
85
|
|
|
|
|
|
|
}; |
|
86
|
|
|
|
|
|
|
# return $src; # don't wrap |
|
87
|
|
|
|
|
|
|
# TODO: use args to modify object! |
|
88
|
|
|
|
|
|
|
} elsif ( blessed $src and $src->isa('RDF::Trine::Model') ) { |
|
89
|
|
|
|
|
|
|
$self->{from} = $src; |
|
90
|
|
|
|
|
|
|
$code = sub { |
|
91
|
|
|
|
|
|
|
my $uri = rdflow_uri( shift ); |
|
92
|
|
|
|
|
|
|
iterator_to_model( $src->bounded_description( |
|
93
|
|
|
|
|
|
|
iri( $uri ) |
|
94
|
|
|
|
|
|
|
) ); |
|
95
|
|
|
|
|
|
|
}; |
|
96
|
|
|
|
|
|
|
} elsif ( ref $src and ref $src eq 'CODE' ) { |
|
97
|
|
|
|
|
|
|
$code = $src; |
|
98
|
|
|
|
|
|
|
} elsif (not defined $src) { |
|
99
|
|
|
|
|
|
|
carp 'Missing RDF source in plain RDF::Flow::Source' |
|
100
|
|
|
|
|
|
|
if $class eq 'RDF::Flow::Source'; |
|
101
|
|
|
|
|
|
|
$code = sub { }; |
|
102
|
|
|
|
|
|
|
} else { |
|
103
|
|
|
|
|
|
|
croak 'expected RDF::Source, RDF::Trine::Model, or code reference' |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
$self->{name} = $args{name} if defined $args{name}; |
|
107
|
|
|
|
|
|
|
$self->{code} = $code; |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
$self->match( $match ); |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
$self->init(); |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
$self; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub init { } |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub match { # accessor |
|
119
|
|
|
|
|
|
|
my $self = shift; |
|
120
|
|
|
|
|
|
|
return $self->{match} unless @_; |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
my $match = shift; |
|
123
|
|
|
|
|
|
|
if ( defined $match ) { |
|
124
|
|
|
|
|
|
|
my $pattern = $match; |
|
125
|
|
|
|
|
|
|
$match = sub { $_[0] =~ $pattern; } |
|
126
|
|
|
|
|
|
|
if is_regexp($match); |
|
127
|
|
|
|
|
|
|
croak 'url parameter must be code or regexp'.reftype($match). ": $match" |
|
128
|
|
|
|
|
|
|
if reftype $match ne 'CODE'; |
|
129
|
|
|
|
|
|
|
$self->{match} = $match; |
|
130
|
|
|
|
|
|
|
} else { |
|
131
|
|
|
|
|
|
|
$self->{match} = undef; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub retrieve { |
|
136
|
|
|
|
|
|
|
my ($self, $env) = @_; |
|
137
|
|
|
|
|
|
|
$env = { 'rdflow.uri' => $env } if ($env and not ref $env); |
|
138
|
|
|
|
|
|
|
log_trace { |
|
139
|
|
|
|
|
|
|
sprintf "retrieve from %s with %s", about($self), rdflow_uri($env); |
|
140
|
|
|
|
|
|
|
}; |
|
141
|
|
|
|
|
|
|
$self->timestamp( $env ); |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
my $result; |
|
144
|
|
|
|
|
|
|
if ( $self->{match} ) { |
|
145
|
|
|
|
|
|
|
my $uri = $env->{'rdflow.uri'}; |
|
146
|
|
|
|
|
|
|
if ( $self->{match}->( $env->{'rdflow.uri'} ) ) { |
|
147
|
|
|
|
|
|
|
$result = $self->retrieve_rdf( $env ); |
|
148
|
|
|
|
|
|
|
$env->{'rdflow.uri'} = $uri; |
|
149
|
|
|
|
|
|
|
} else { |
|
150
|
|
|
|
|
|
|
log_trace { "URI did not match: " . $env->{'rdflow.uri'} }; |
|
151
|
|
|
|
|
|
|
$result = RDF::Trine::Model->new; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
} else { |
|
154
|
|
|
|
|
|
|
$result = $self->retrieve_rdf( $env ); |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
return $self->trigger_retrieved( $result ); |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub retrieve_rdf { |
|
161
|
|
|
|
|
|
|
my ($self, $env) = @_; |
|
162
|
|
|
|
|
|
|
return try { |
|
163
|
|
|
|
|
|
|
$self->{code}->( $env ); |
|
164
|
|
|
|
|
|
|
} catch { |
|
165
|
|
|
|
|
|
|
s/[.]?\s+$//s; |
|
166
|
|
|
|
|
|
|
RDF::Flow::Source::trigger_error( $self, $_, $env ); |
|
167
|
|
|
|
|
|
|
RDF::Trine::Model->new; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub trigger_error { |
|
172
|
|
|
|
|
|
|
my ($self, $message, $env) = @_; |
|
173
|
|
|
|
|
|
|
$message = 'unknown error' unless $message; |
|
174
|
|
|
|
|
|
|
$env->{'rdflow.error'} = $message if $env; |
|
175
|
|
|
|
|
|
|
log_error { $message }; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub trigger_retrieved { |
|
179
|
|
|
|
|
|
|
my ($self, $result, $msg) = @_; |
|
180
|
|
|
|
|
|
|
log_trace { |
|
181
|
|
|
|
|
|
|
$msg = "%s returned %s" unless $msg; |
|
182
|
|
|
|
|
|
|
my $size = 'no'; |
|
183
|
|
|
|
|
|
|
if ( $result ) { |
|
184
|
|
|
|
|
|
|
$size = (blessed $result and $result->can('size')) |
|
185
|
|
|
|
|
|
|
? $result->size : 'some'; |
|
186
|
|
|
|
|
|
|
}; |
|
187
|
|
|
|
|
|
|
sprintf $msg, name($self), "$size triples"; |
|
188
|
|
|
|
|
|
|
}; |
|
189
|
|
|
|
|
|
|
return $result; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
sub id { |
|
193
|
|
|
|
|
|
|
return "source".refaddr(shift); |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub graphviz { |
|
198
|
|
|
|
|
|
|
return scalar shift->graphviz_addnode( @_ ); |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub graphviz_addnode { |
|
202
|
|
|
|
|
|
|
my $self = shift; |
|
203
|
|
|
|
|
|
|
my $g = ( blessed $_[0] and $_[0]->isa('GraphViz') ) |
|
204
|
|
|
|
|
|
|
? shift : eval { GraphViz->new( @_ ) }; |
|
205
|
|
|
|
|
|
|
return unless $g; |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
$g->add_node( $self->id, $self->_graphviz_nodeattr ); |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
my $i=1; |
|
210
|
|
|
|
|
|
|
foreach my $s ( $self->inputs ) { |
|
211
|
|
|
|
|
|
|
$s->graphviz($g); |
|
212
|
|
|
|
|
|
|
$g->add_edge( $s->id, $self->id, $self->_graphviz_edgeattr($i++) ); |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
return $g; |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub _graphviz_nodeattr { |
|
219
|
|
|
|
|
|
|
return (label => shift->name); |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub _graphviz_edgeattr { } |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
use POSIX qw(strftime); |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub timestamp { |
|
227
|
|
|
|
|
|
|
my ($self, $env) = @_; |
|
228
|
|
|
|
|
|
|
my $now = time(); |
|
229
|
|
|
|
|
|
|
my $tz = strftime("%z", localtime($now)); |
|
230
|
|
|
|
|
|
|
$tz =~ s/(\d{2})(\d{2})/$1:$2/; |
|
231
|
|
|
|
|
|
|
$tz =~ s/00:00/Z/; # UTC aka Z-Time |
|
232
|
|
|
|
|
|
|
my $timestamp = strftime("%Y-%m-%dT%H:%M:%S", localtime($now)) . $tz; |
|
233
|
|
|
|
|
|
|
$env->{'rdflow.timestamp'} = $timestamp if $env; |
|
234
|
|
|
|
|
|
|
return $timestamp; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub name { |
|
238
|
|
|
|
|
|
|
shift->{name} || 'anonymous source'; |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub about { |
|
242
|
|
|
|
|
|
|
shift->name; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub inputs { |
|
246
|
|
|
|
|
|
|
my $self = shift; |
|
247
|
|
|
|
|
|
|
return $self->{inputs} ? @{ $self->{inputs} } : (); |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub size { |
|
251
|
|
|
|
|
|
|
my $self = shift; |
|
252
|
|
|
|
|
|
|
return 1 unless $self->{inputs}; |
|
253
|
|
|
|
|
|
|
return scalar @{ $self->{inputs} }; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub sourcelist_args { |
|
257
|
|
|
|
|
|
|
my ($inputs, $args) = ([],{}); |
|
258
|
|
|
|
|
|
|
while ( @_ ) { |
|
259
|
|
|
|
|
|
|
my $s = shift @_; |
|
260
|
|
|
|
|
|
|
if ( ref $s ) { |
|
261
|
|
|
|
|
|
|
push @$inputs, map { RDF::Flow::Source->new($_) } $s; |
|
262
|
|
|
|
|
|
|
} elsif ( defined $s ) { |
|
263
|
|
|
|
|
|
|
$args->{$s} = shift @_; |
|
264
|
|
|
|
|
|
|
} else { |
|
265
|
|
|
|
|
|
|
croak 'undefined parameter'; |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
return ($inputs, $args); |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub iterator_to_model { |
|
272
|
|
|
|
|
|
|
my $iterator = shift; |
|
273
|
|
|
|
|
|
|
return $iterator if $iterator->isa('RDF::Trine::Model'); |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
my $model = shift || RDF::Trine::Model->new; |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
$model->begin_bulk_ops; |
|
278
|
|
|
|
|
|
|
while (my $st = $iterator->next) { |
|
279
|
|
|
|
|
|
|
$model->add_statement( $st ); |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
$model->end_bulk_ops; |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
$model; |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub empty_rdf { |
|
287
|
|
|
|
|
|
|
my $rdf = shift; |
|
288
|
|
|
|
|
|
|
return 1 unless blessed $rdf; |
|
289
|
|
|
|
|
|
|
return !($rdf->isa('RDF::Trine::Model') and $rdf->size > 0) && |
|
290
|
|
|
|
|
|
|
!($rdf->isa('RDF::Trine::Iterator') and $rdf->peek); |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub rdflow_uri { |
|
294
|
|
|
|
|
|
|
my $env = shift; |
|
295
|
|
|
|
|
|
|
return ($env || '') unless ref $env; # plain scalar or undef |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
return $env->{'rdflow.uri'} if defined $env->{'rdflow.uri'}; |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# a few lines of code from Plack::Request, so we don't require all of Plack |
|
300
|
|
|
|
|
|
|
my $base = ($env->{'psgi.url_scheme'} || "http") . |
|
301
|
|
|
|
|
|
|
"://" . ($env->{HTTP_HOST} || (($env->{SERVER_NAME} || "") . |
|
302
|
|
|
|
|
|
|
":" . ($env->{SERVER_PORT} || 80))) . ($env->{SCRIPT_NAME} || '/'); |
|
303
|
|
|
|
|
|
|
$base = URI->new($base)->canonical; |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
my $path_escape_class = '^A-Za-z0-9\-\._~/'; |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
my $path = URI::Escape::uri_escape( $env->{PATH_INFO} || '', $path_escape_class ); |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
$path .= '?' . $env->{QUERY_STRING} if !$env->{'rdflow.ignorepath'} && |
|
310
|
|
|
|
|
|
|
defined $env->{QUERY_STRING} && $env->{QUERY_STRING} ne ''; |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
$base =~ s!/$!! if $path =~ m!^/!; |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
$env->{'rdflow.uri'} = URI->new( $base . $path )->canonical->as_string; |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
$env->{'rdflow.uri'} =~ s/^https?:\/\/\/$//; |
|
317
|
|
|
|
|
|
|
$env->{'rdflow.uri'}; |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# put at the end to prevent circular references in require |
|
321
|
|
|
|
|
|
|
require RDF::Flow::Pipeline; |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub pipe_to { |
|
324
|
|
|
|
|
|
|
my ($self, $next) = @_; |
|
325
|
|
|
|
|
|
|
return RDF::Flow::Pipeline->new( $self, $next ); |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
1; |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
__END__ |
|
332
|
|
|
|
|
|
|
=pod |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head1 NAME |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
RDF::Flow::Source - Source of RDF data |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head1 VERSION |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
version 0.178 |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
$src = rdflow( "mydata.ttl", name => "RDF file as source" ); |
|
345
|
|
|
|
|
|
|
$src = rdflow( "mydirectory", name => "directory with RDF files as source" ); |
|
346
|
|
|
|
|
|
|
$src = rdflow( \&mysource, name => "code reference as source" ); |
|
347
|
|
|
|
|
|
|
$src = rdflow( $model, name => "RDF::Trine::Model as source" ); |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
package MySource; |
|
350
|
|
|
|
|
|
|
use parent 'RDF::Flow::Source'; |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub retrieve_rdf { |
|
353
|
|
|
|
|
|
|
my ($self, $env) = @_; |
|
354
|
|
|
|
|
|
|
my $uri = $env->{'rdflow.uri'}; |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# ... your logic here ... |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
return $model; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
Each RDF::Flow::Source provides a C<retrieve> method, which returns RDF data on |
|
364
|
|
|
|
|
|
|
request. RDF data is always returned as instance of L<RDF::Trine::Model> or as |
|
365
|
|
|
|
|
|
|
instance of L<RDF::Trine::Iterator> with simple statements. The |
|
366
|
|
|
|
|
|
|
L<request format|/REQUEST FORMAT> is specified below. Sources can access RDF |
|
367
|
|
|
|
|
|
|
for instance parsed from a file or multiple files in a directory, via HTTP, |
|
368
|
|
|
|
|
|
|
from a L<RDF::Trine::Store>, or from a custom method. All sources share a set |
|
369
|
|
|
|
|
|
|
of common configurations options. |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=head1 METHODS |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head2 new ( $from {, %configuration } ) |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
Create a new RDF source by wrapping a code reference, a L<RDF::Trine::Model>, |
|
376
|
|
|
|
|
|
|
or loading RDF data from a file or URL. |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
If you pass an existing RDF::Flow::Source object, it will not be wrapped. |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
A source returns RDF data as instance of L<RDF::Trine::Model> or |
|
381
|
|
|
|
|
|
|
L<RDF::Trine::Iterator> when queried by a L<PSGI> requests. This is |
|
382
|
|
|
|
|
|
|
similar to PSGI applications, which return HTTP responses instead of |
|
383
|
|
|
|
|
|
|
RDF data. RDF::Light supports three types of sources: code references, |
|
384
|
|
|
|
|
|
|
instances of RDF::Flow, and instances of RDF::Trine::Model. |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
This constructor is exported as function C<rdflow> by L<RDF::Flow>: |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
use RDF::Flow qw(rdflow); |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
$src = rdflow( @args ); # short form |
|
391
|
|
|
|
|
|
|
$src = RDF:Source->new( @args ); # explicit constructor |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head2 init |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Called from the constructor. Can be used in your sources. |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=head2 retrieve |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Retrieve RDF data. Always returns an instance of L<RDF::Trine::Model> or |
|
400
|
|
|
|
|
|
|
L<RDF::Trine::Iterator>. You can use the method L</empty_rdf> to check |
|
401
|
|
|
|
|
|
|
whether the RDF data contains some triples or not. |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head2 retrieve_rdf |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Internal method to retrieve RDF data. You should define this when |
|
406
|
|
|
|
|
|
|
L<subclassing RDF::Flow::Source|RDF::Flow/DEFINING NEW SOURCE TYPES>, it |
|
407
|
|
|
|
|
|
|
is called by method C<retrieve>. |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 trigger_retrieved ( $source, $result [, $message ] ) |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Creates a logging event at trace level to log that some result has been |
|
412
|
|
|
|
|
|
|
retrieved from a source. Returns the result. By default the logging messages is |
|
413
|
|
|
|
|
|
|
constructed from the source's name and the result's size. This function is |
|
414
|
|
|
|
|
|
|
automatically called at the end of method C<retrieve>, so you do not have to |
|
415
|
|
|
|
|
|
|
call it, if your source only implements the method C<retrieve_rdf>. |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head2 name |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Returns the name of the source. |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head2 about |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Returns a string with short information (name and size) of the source. |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 size |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Returns the number of inputs (for multi-part sources, such as |
|
428
|
|
|
|
|
|
|
L<RDF::Flow::Source::Union>). |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=head2 inputs |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
Returns a list of inputs (unstable). |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=head2 id |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
Returns a unique id of the source, based on its memory address. |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=head2 pipe_to |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Pipes the source to another source (L<RDF::Flow::Pipeline>). |
|
441
|
|
|
|
|
|
|
C<< $a->pipe_to($b) >> is equivalent to C<< RDF::Flow::Pipeline->new($a,$b) >>. |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=head2 timestamp |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
Returns an ISO 8601 timestamp and possibly sets in |
|
446
|
|
|
|
|
|
|
C<rdflow.timestamp> environment variable. |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=head2 trigger_error |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
Triggers an error and possibly sets the C<rdflow.error> environment variable. |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=head2 graphviz |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Purely experimental method for visualizing nets of sources. |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 graphviz_addnode |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
Purely experimental method for visualizing nets of sources. |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=head1 CONFIGURATION |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=over 4 |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item name |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Name of the source. Defaults to "anonymous source". |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=item from |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Filename, URL, directory, L<RDF::Trine::Model> or code reference to retrieve |
|
471
|
|
|
|
|
|
|
RDF from. This option is not supported by all source types. |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=item match |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
Optional regular expression or code reference to match and/or map request URIs. |
|
476
|
|
|
|
|
|
|
For instance you can rewrite URNs to HTTP URIs like this: |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
match => sub { $_[0] =~ s/^urn:isbn:/http://example.org/isbn/; } |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
The URI in C<rdflow.uri> is set back to its original value after retrieval. |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=back |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=head1 REQUEST FORMAT |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
A valid request can either by an URI (as byte string) or a hash reference, that |
|
487
|
|
|
|
|
|
|
is called an environment. The environment must be a specific subset of a |
|
488
|
|
|
|
|
|
|
L<PSGI> environment with the following variables: |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=over 4 |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=item C<rdflow.uri> |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
A request URI as byte string. If this variable is provided, no other variables |
|
495
|
|
|
|
|
|
|
are needed and the following variables will not modify this value. |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=item C<psgi.url_scheme> |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
A string C<http> (assumed if not set) or C<https>. |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=item C<HTTP_HOST> |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
The base URL of the host for constructing an URI. This or SERVER_NAME is |
|
504
|
|
|
|
|
|
|
required unless rdflow.uri is set. |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=item C<SERVER_NAME> |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
Name of the host for construction an URI. Only used if HTTP_HOST is not set. |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=item C<SERVER_PORT> |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Port of the host for constructing an URI. By default C<80> is used, but not |
|
513
|
|
|
|
|
|
|
kept as part of an HTTP-URI due to URI normalization. |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=item C<SCRIPT_NAME> |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
Path for constructing an URI. Must start with C</> if given. |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=item C<QUERY_STRING> |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
Portion of the request URI that follows the ?, if any. |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=item C<rdflow.ignorepath> |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
If this variable is set, no query part is used when constructing an URI. |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=back |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
The method reuses code from L<Plack::Request> by Tatsuhiko Miyagawa. Note that |
|
530
|
|
|
|
|
|
|
the environment variable REQUEST_URI is not included. When this method |
|
531
|
|
|
|
|
|
|
constructs a request URI from a given environment hash, it always sets the |
|
532
|
|
|
|
|
|
|
variable C<rdflow.uri>, so it is always guaranteed to be set after calling. |
|
533
|
|
|
|
|
|
|
However it may be the empty string, if an environment without HTTP_HOST or |
|
534
|
|
|
|
|
|
|
SERVER_NAME was provided. |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
The following functions are defined to be used in custom source types. |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=head2 rdflow_uri ( $env | $uri ) |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Prepares and returns a request URI, as given by an evironment hash or by an |
|
543
|
|
|
|
|
|
|
existing URI. Sets C<rdflow.uri> if an environment has been given. URI |
|
544
|
|
|
|
|
|
|
construction is based on code from L<Plack>, as described in the L</REQUEST |
|
545
|
|
|
|
|
|
|
FORMAT>. The following environment variables are used: C<psgi.url_scheme>, |
|
546
|
|
|
|
|
|
|
C<HTTP_HOST> or C<SERVER_NAME>, C<SERVER_PORT>, C<SCRIPT_NAME>, C<PATH_INFO>, |
|
547
|
|
|
|
|
|
|
C<QUERY_STRING>, and C<rdflow.ignorepath>. |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=head2 sourcelist_args ( @_ ) |
|
550
|
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Parses a list of inputs (code or other references) mixed with key-value pairs |
|
552
|
|
|
|
|
|
|
and returns both separated in an array and and hash. |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=head2 iterator_to_model ( [ $iterator ] [, $model ] ) |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
Adds all statements from a L<RDF::Trine::Iterator> to a (possibly new) |
|
557
|
|
|
|
|
|
|
L<RDF::Trine::Model> model and returns the model. |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=head2 empty_rdf ( $rdf ) |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
Returns true if the argument is an empty L<RDF::Trine::Model>, an |
|
562
|
|
|
|
|
|
|
empty L<RDF::Trine::Iterator>, or no RDF data at all. |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
=head1 AUTHOR |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
Jakob Voà <voss@gbv.de> |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
This software is copyright (c) 2011 by Jakob VoÃ. |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
573
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=cut |
|
576
|
|
|
|
|
|
|
|