line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
7
|
|
|
7
|
|
825
|
use strict; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
209
|
|
2
|
7
|
|
|
7
|
|
34
|
use warnings; |
|
7
|
|
|
|
|
11
|
|
|
7
|
|
|
|
|
297
|
|
3
|
|
|
|
|
|
|
package RDF::Flow::Source; |
4
|
|
|
|
|
|
|
{ |
5
|
|
|
|
|
|
|
$RDF::Flow::Source::VERSION = '0.178'; |
6
|
|
|
|
|
|
|
} |
7
|
|
|
|
|
|
|
#ABSTRACT: Source of RDF data |
8
|
|
|
|
|
|
|
|
9
|
7
|
|
|
7
|
|
6330
|
use Log::Contextual::WarnLogger; |
|
7
|
|
|
|
|
7367
|
|
|
7
|
|
|
|
|
373
|
|
10
|
7
|
|
|
|
|
90
|
use Log::Contextual qw(:log), -default_logger |
11
|
7
|
|
|
7
|
|
12278
|
=> Log::Contextual::WarnLogger->new({ env_prefix => __PACKAGE__ }); |
|
7
|
|
|
|
|
783420
|
|
12
|
|
|
|
|
|
|
|
13
|
7
|
|
|
7
|
|
451392
|
use 5.010; |
|
7
|
|
|
|
|
29
|
|
|
7
|
|
|
|
|
321
|
|
14
|
7
|
|
|
7
|
|
43
|
use re qw(is_regexp); |
|
7
|
|
|
|
|
23
|
|
|
7
|
|
|
|
|
1260
|
|
15
|
|
|
|
|
|
|
|
16
|
7
|
|
|
7
|
|
11718
|
use RDF::Trine qw(iri); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use Scalar::Util qw(blessed refaddr reftype); |
18
|
|
|
|
|
|
|
use Try::Tiny; |
19
|
|
|
|
|
|
|
use Carp; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use URI; |
22
|
|
|
|
|
|
|
use URI::Escape; |
23
|
|
|
|
|
|
|
use File::Spec::Functions; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use parent 'Exporter'; |
26
|
|
|
|
|
|
|
our @EXPORT_OK = qw(sourcelist_args iterator_to_model empty_rdf rdflow_uri); |
27
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
28
|
|
|
|
|
|
|
util => [qw(sourcelist_args iterator_to_model empty_rdf rdflow_uri)], |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
use RDF::Trine::Model; |
32
|
|
|
|
|
|
|
use RDF::Trine::Parser; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#require RDF::Flow::Pipeline; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub new { |
37
|
|
|
|
|
|
|
my $class = shift; |
38
|
|
|
|
|
|
|
my ($src, %args) = ref($_[0]) ? @_ : (undef,@_); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$src = delete $args{from} unless defined $src; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
my $match = delete $args{match}; |
43
|
|
|
|
|
|
|
my $code; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $self = bless { }, $class; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
if ( $src and not ref $src ) { # load from file or directory |
48
|
|
|
|
|
|
|
my $model = RDF::Trine::Model->new; |
49
|
|
|
|
|
|
|
my @files; |
50
|
|
|
|
|
|
|
if ( $src =~ /^https?:\/\// ) { |
51
|
|
|
|
|
|
|
eval { RDF::Trine::Parser->parse_url_into_model( $src, $model ); }; |
52
|
|
|
|
|
|
|
goto CHECK; |
53
|
|
|
|
|
|
|
} else { |
54
|
|
|
|
|
|
|
if ( -d $src ) { |
55
|
|
|
|
|
|
|
if ( opendir(DIR, $src) ) { |
56
|
|
|
|
|
|
|
my $ext = join ('|', keys %RDF::Trine::Parser::file_extensions); |
57
|
|
|
|
|
|
|
@files = map { catfile($src,$_) } grep(/\.($ext)$/,readdir(DIR)); |
58
|
|
|
|
|
|
|
closedir DIR; |
59
|
|
|
|
|
|
|
} else { |
60
|
|
|
|
|
|
|
log_warn { "failed to open directory $src"; } |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
} else { |
63
|
|
|
|
|
|
|
@files = ($src); |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
while ($src = shift @files) { |
68
|
|
|
|
|
|
|
eval { RDF::Trine::Parser->parse_file_into_model( "file:///$src", $src, $model ); }; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
CHECK: # yes, it's an evil goto statement jump target :-) |
71
|
|
|
|
|
|
|
if ( @_ ) { |
72
|
|
|
|
|
|
|
log_warn { "failed to load from $src"; } |
73
|
|
|
|
|
|
|
} else { |
74
|
|
|
|
|
|
|
log_info { "loaded from $src"; } |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
$src = $model; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
if (blessed $src and $src->isa('RDF::Flow::Source')) { |
82
|
|
|
|
|
|
|
$self->{from} = $src; |
83
|
|
|
|
|
|
|
$code = sub { |
84
|
|
|
|
|
|
|
$src->retrieve( @_ ); |
85
|
|
|
|
|
|
|
}; |
86
|
|
|
|
|
|
|
# return $src; # don't wrap |
87
|
|
|
|
|
|
|
# TODO: use args to modify object! |
88
|
|
|
|
|
|
|
} elsif ( blessed $src and $src->isa('RDF::Trine::Model') ) { |
89
|
|
|
|
|
|
|
$self->{from} = $src; |
90
|
|
|
|
|
|
|
$code = sub { |
91
|
|
|
|
|
|
|
my $uri = rdflow_uri( shift ); |
92
|
|
|
|
|
|
|
iterator_to_model( $src->bounded_description( |
93
|
|
|
|
|
|
|
iri( $uri ) |
94
|
|
|
|
|
|
|
) ); |
95
|
|
|
|
|
|
|
}; |
96
|
|
|
|
|
|
|
} elsif ( ref $src and ref $src eq 'CODE' ) { |
97
|
|
|
|
|
|
|
$code = $src; |
98
|
|
|
|
|
|
|
} elsif (not defined $src) { |
99
|
|
|
|
|
|
|
carp 'Missing RDF source in plain RDF::Flow::Source' |
100
|
|
|
|
|
|
|
if $class eq 'RDF::Flow::Source'; |
101
|
|
|
|
|
|
|
$code = sub { }; |
102
|
|
|
|
|
|
|
} else { |
103
|
|
|
|
|
|
|
croak 'expected RDF::Source, RDF::Trine::Model, or code reference' |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
$self->{name} = $args{name} if defined $args{name}; |
107
|
|
|
|
|
|
|
$self->{code} = $code; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
$self->match( $match ); |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
$self->init(); |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
$self; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub init { } |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub match { # accessor |
119
|
|
|
|
|
|
|
my $self = shift; |
120
|
|
|
|
|
|
|
return $self->{match} unless @_; |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
my $match = shift; |
123
|
|
|
|
|
|
|
if ( defined $match ) { |
124
|
|
|
|
|
|
|
my $pattern = $match; |
125
|
|
|
|
|
|
|
$match = sub { $_[0] =~ $pattern; } |
126
|
|
|
|
|
|
|
if is_regexp($match); |
127
|
|
|
|
|
|
|
croak 'url parameter must be code or regexp'.reftype($match). ": $match" |
128
|
|
|
|
|
|
|
if reftype $match ne 'CODE'; |
129
|
|
|
|
|
|
|
$self->{match} = $match; |
130
|
|
|
|
|
|
|
} else { |
131
|
|
|
|
|
|
|
$self->{match} = undef; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub retrieve { |
136
|
|
|
|
|
|
|
my ($self, $env) = @_; |
137
|
|
|
|
|
|
|
$env = { 'rdflow.uri' => $env } if ($env and not ref $env); |
138
|
|
|
|
|
|
|
log_trace { |
139
|
|
|
|
|
|
|
sprintf "retrieve from %s with %s", about($self), rdflow_uri($env); |
140
|
|
|
|
|
|
|
}; |
141
|
|
|
|
|
|
|
$self->timestamp( $env ); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
my $result; |
144
|
|
|
|
|
|
|
if ( $self->{match} ) { |
145
|
|
|
|
|
|
|
my $uri = $env->{'rdflow.uri'}; |
146
|
|
|
|
|
|
|
if ( $self->{match}->( $env->{'rdflow.uri'} ) ) { |
147
|
|
|
|
|
|
|
$result = $self->retrieve_rdf( $env ); |
148
|
|
|
|
|
|
|
$env->{'rdflow.uri'} = $uri; |
149
|
|
|
|
|
|
|
} else { |
150
|
|
|
|
|
|
|
log_trace { "URI did not match: " . $env->{'rdflow.uri'} }; |
151
|
|
|
|
|
|
|
$result = RDF::Trine::Model->new; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
} else { |
154
|
|
|
|
|
|
|
$result = $self->retrieve_rdf( $env ); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
return $self->trigger_retrieved( $result ); |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub retrieve_rdf { |
161
|
|
|
|
|
|
|
my ($self, $env) = @_; |
162
|
|
|
|
|
|
|
return try { |
163
|
|
|
|
|
|
|
$self->{code}->( $env ); |
164
|
|
|
|
|
|
|
} catch { |
165
|
|
|
|
|
|
|
s/[.]?\s+$//s; |
166
|
|
|
|
|
|
|
RDF::Flow::Source::trigger_error( $self, $_, $env ); |
167
|
|
|
|
|
|
|
RDF::Trine::Model->new; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub trigger_error { |
172
|
|
|
|
|
|
|
my ($self, $message, $env) = @_; |
173
|
|
|
|
|
|
|
$message = 'unknown error' unless $message; |
174
|
|
|
|
|
|
|
$env->{'rdflow.error'} = $message if $env; |
175
|
|
|
|
|
|
|
log_error { $message }; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub trigger_retrieved { |
179
|
|
|
|
|
|
|
my ($self, $result, $msg) = @_; |
180
|
|
|
|
|
|
|
log_trace { |
181
|
|
|
|
|
|
|
$msg = "%s returned %s" unless $msg; |
182
|
|
|
|
|
|
|
my $size = 'no'; |
183
|
|
|
|
|
|
|
if ( $result ) { |
184
|
|
|
|
|
|
|
$size = (blessed $result and $result->can('size')) |
185
|
|
|
|
|
|
|
? $result->size : 'some'; |
186
|
|
|
|
|
|
|
}; |
187
|
|
|
|
|
|
|
sprintf $msg, name($self), "$size triples"; |
188
|
|
|
|
|
|
|
}; |
189
|
|
|
|
|
|
|
return $result; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
sub id { |
193
|
|
|
|
|
|
|
return "source".refaddr(shift); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub graphviz { |
198
|
|
|
|
|
|
|
return scalar shift->graphviz_addnode( @_ ); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub graphviz_addnode { |
202
|
|
|
|
|
|
|
my $self = shift; |
203
|
|
|
|
|
|
|
my $g = ( blessed $_[0] and $_[0]->isa('GraphViz') ) |
204
|
|
|
|
|
|
|
? shift : eval { GraphViz->new( @_ ) }; |
205
|
|
|
|
|
|
|
return unless $g; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
$g->add_node( $self->id, $self->_graphviz_nodeattr ); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
my $i=1; |
210
|
|
|
|
|
|
|
foreach my $s ( $self->inputs ) { |
211
|
|
|
|
|
|
|
$s->graphviz($g); |
212
|
|
|
|
|
|
|
$g->add_edge( $s->id, $self->id, $self->_graphviz_edgeattr($i++) ); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
return $g; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub _graphviz_nodeattr { |
219
|
|
|
|
|
|
|
return (label => shift->name); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub _graphviz_edgeattr { } |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
use POSIX qw(strftime); |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub timestamp { |
227
|
|
|
|
|
|
|
my ($self, $env) = @_; |
228
|
|
|
|
|
|
|
my $now = time(); |
229
|
|
|
|
|
|
|
my $tz = strftime("%z", localtime($now)); |
230
|
|
|
|
|
|
|
$tz =~ s/(\d{2})(\d{2})/$1:$2/; |
231
|
|
|
|
|
|
|
$tz =~ s/00:00/Z/; # UTC aka Z-Time |
232
|
|
|
|
|
|
|
my $timestamp = strftime("%Y-%m-%dT%H:%M:%S", localtime($now)) . $tz; |
233
|
|
|
|
|
|
|
$env->{'rdflow.timestamp'} = $timestamp if $env; |
234
|
|
|
|
|
|
|
return $timestamp; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub name { |
238
|
|
|
|
|
|
|
shift->{name} || 'anonymous source'; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub about { |
242
|
|
|
|
|
|
|
shift->name; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub inputs { |
246
|
|
|
|
|
|
|
my $self = shift; |
247
|
|
|
|
|
|
|
return $self->{inputs} ? @{ $self->{inputs} } : (); |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub size { |
251
|
|
|
|
|
|
|
my $self = shift; |
252
|
|
|
|
|
|
|
return 1 unless $self->{inputs}; |
253
|
|
|
|
|
|
|
return scalar @{ $self->{inputs} }; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub sourcelist_args { |
257
|
|
|
|
|
|
|
my ($inputs, $args) = ([],{}); |
258
|
|
|
|
|
|
|
while ( @_ ) { |
259
|
|
|
|
|
|
|
my $s = shift @_; |
260
|
|
|
|
|
|
|
if ( ref $s ) { |
261
|
|
|
|
|
|
|
push @$inputs, map { RDF::Flow::Source->new($_) } $s; |
262
|
|
|
|
|
|
|
} elsif ( defined $s ) { |
263
|
|
|
|
|
|
|
$args->{$s} = shift @_; |
264
|
|
|
|
|
|
|
} else { |
265
|
|
|
|
|
|
|
croak 'undefined parameter'; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
return ($inputs, $args); |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub iterator_to_model { |
272
|
|
|
|
|
|
|
my $iterator = shift; |
273
|
|
|
|
|
|
|
return $iterator if $iterator->isa('RDF::Trine::Model'); |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
my $model = shift || RDF::Trine::Model->new; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
$model->begin_bulk_ops; |
278
|
|
|
|
|
|
|
while (my $st = $iterator->next) { |
279
|
|
|
|
|
|
|
$model->add_statement( $st ); |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
$model->end_bulk_ops; |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
$model; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub empty_rdf { |
287
|
|
|
|
|
|
|
my $rdf = shift; |
288
|
|
|
|
|
|
|
return 1 unless blessed $rdf; |
289
|
|
|
|
|
|
|
return !($rdf->isa('RDF::Trine::Model') and $rdf->size > 0) && |
290
|
|
|
|
|
|
|
!($rdf->isa('RDF::Trine::Iterator') and $rdf->peek); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub rdflow_uri { |
294
|
|
|
|
|
|
|
my $env = shift; |
295
|
|
|
|
|
|
|
return ($env || '') unless ref $env; # plain scalar or undef |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
return $env->{'rdflow.uri'} if defined $env->{'rdflow.uri'}; |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# a few lines of code from Plack::Request, so we don't require all of Plack |
300
|
|
|
|
|
|
|
my $base = ($env->{'psgi.url_scheme'} || "http") . |
301
|
|
|
|
|
|
|
"://" . ($env->{HTTP_HOST} || (($env->{SERVER_NAME} || "") . |
302
|
|
|
|
|
|
|
":" . ($env->{SERVER_PORT} || 80))) . ($env->{SCRIPT_NAME} || '/'); |
303
|
|
|
|
|
|
|
$base = URI->new($base)->canonical; |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
my $path_escape_class = '^A-Za-z0-9\-\._~/'; |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
my $path = URI::Escape::uri_escape( $env->{PATH_INFO} || '', $path_escape_class ); |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
$path .= '?' . $env->{QUERY_STRING} if !$env->{'rdflow.ignorepath'} && |
310
|
|
|
|
|
|
|
defined $env->{QUERY_STRING} && $env->{QUERY_STRING} ne ''; |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
$base =~ s!/$!! if $path =~ m!^/!; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
$env->{'rdflow.uri'} = URI->new( $base . $path )->canonical->as_string; |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
$env->{'rdflow.uri'} =~ s/^https?:\/\/\/$//; |
317
|
|
|
|
|
|
|
$env->{'rdflow.uri'}; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# put at the end to prevent circular references in require |
321
|
|
|
|
|
|
|
require RDF::Flow::Pipeline; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub pipe_to { |
324
|
|
|
|
|
|
|
my ($self, $next) = @_; |
325
|
|
|
|
|
|
|
return RDF::Flow::Pipeline->new( $self, $next ); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
1; |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
__END__ |
332
|
|
|
|
|
|
|
=pod |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head1 NAME |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
RDF::Flow::Source - Source of RDF data |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head1 VERSION |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
version 0.178 |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head1 SYNOPSIS |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
$src = rdflow( "mydata.ttl", name => "RDF file as source" ); |
345
|
|
|
|
|
|
|
$src = rdflow( "mydirectory", name => "directory with RDF files as source" ); |
346
|
|
|
|
|
|
|
$src = rdflow( \&mysource, name => "code reference as source" ); |
347
|
|
|
|
|
|
|
$src = rdflow( $model, name => "RDF::Trine::Model as source" ); |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
package MySource; |
350
|
|
|
|
|
|
|
use parent 'RDF::Flow::Source'; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub retrieve_rdf { |
353
|
|
|
|
|
|
|
my ($self, $env) = @_; |
354
|
|
|
|
|
|
|
my $uri = $env->{'rdflow.uri'}; |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# ... your logic here ... |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
return $model; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=head1 DESCRIPTION |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
Each RDF::Flow::Source provides a C<retrieve> method, which returns RDF data on |
364
|
|
|
|
|
|
|
request. RDF data is always returned as instance of L<RDF::Trine::Model> or as |
365
|
|
|
|
|
|
|
instance of L<RDF::Trine::Iterator> with simple statements. The |
366
|
|
|
|
|
|
|
L<request format|/REQUEST FORMAT> is specified below. Sources can access RDF |
367
|
|
|
|
|
|
|
for instance parsed from a file or multiple files in a directory, via HTTP, |
368
|
|
|
|
|
|
|
from a L<RDF::Trine::Store>, or from a custom method. All sources share a set |
369
|
|
|
|
|
|
|
of common configurations options. |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=head1 METHODS |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head2 new ( $from {, %configuration } ) |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
Create a new RDF source by wrapping a code reference, a L<RDF::Trine::Model>, |
376
|
|
|
|
|
|
|
or loading RDF data from a file or URL. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
If you pass an existing RDF::Flow::Source object, it will not be wrapped. |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
A source returns RDF data as instance of L<RDF::Trine::Model> or |
381
|
|
|
|
|
|
|
L<RDF::Trine::Iterator> when queried by a L<PSGI> requests. This is |
382
|
|
|
|
|
|
|
similar to PSGI applications, which return HTTP responses instead of |
383
|
|
|
|
|
|
|
RDF data. RDF::Light supports three types of sources: code references, |
384
|
|
|
|
|
|
|
instances of RDF::Flow, and instances of RDF::Trine::Model. |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
This constructor is exported as function C<rdflow> by L<RDF::Flow>: |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
use RDF::Flow qw(rdflow); |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
$src = rdflow( @args ); # short form |
391
|
|
|
|
|
|
|
$src = RDF:Source->new( @args ); # explicit constructor |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head2 init |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Called from the constructor. Can be used in your sources. |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=head2 retrieve |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Retrieve RDF data. Always returns an instance of L<RDF::Trine::Model> or |
400
|
|
|
|
|
|
|
L<RDF::Trine::Iterator>. You can use the method L</empty_rdf> to check |
401
|
|
|
|
|
|
|
whether the RDF data contains some triples or not. |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head2 retrieve_rdf |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Internal method to retrieve RDF data. You should define this when |
406
|
|
|
|
|
|
|
L<subclassing RDF::Flow::Source|RDF::Flow/DEFINING NEW SOURCE TYPES>, it |
407
|
|
|
|
|
|
|
is called by method C<retrieve>. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 trigger_retrieved ( $source, $result [, $message ] ) |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Creates a logging event at trace level to log that some result has been |
412
|
|
|
|
|
|
|
retrieved from a source. Returns the result. By default the logging messages is |
413
|
|
|
|
|
|
|
constructed from the source's name and the result's size. This function is |
414
|
|
|
|
|
|
|
automatically called at the end of method C<retrieve>, so you do not have to |
415
|
|
|
|
|
|
|
call it, if your source only implements the method C<retrieve_rdf>. |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head2 name |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Returns the name of the source. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head2 about |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Returns a string with short information (name and size) of the source. |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 size |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Returns the number of inputs (for multi-part sources, such as |
428
|
|
|
|
|
|
|
L<RDF::Flow::Source::Union>). |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=head2 inputs |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
Returns a list of inputs (unstable). |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=head2 id |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
Returns a unique id of the source, based on its memory address. |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=head2 pipe_to |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Pipes the source to another source (L<RDF::Flow::Pipeline>). |
441
|
|
|
|
|
|
|
C<< $a->pipe_to($b) >> is equivalent to C<< RDF::Flow::Pipeline->new($a,$b) >>. |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=head2 timestamp |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
Returns an ISO 8601 timestamp and possibly sets in |
446
|
|
|
|
|
|
|
C<rdflow.timestamp> environment variable. |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=head2 trigger_error |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
Triggers an error and possibly sets the C<rdflow.error> environment variable. |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=head2 graphviz |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Purely experimental method for visualizing nets of sources. |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 graphviz_addnode |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
Purely experimental method for visualizing nets of sources. |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=head1 CONFIGURATION |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=over 4 |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item name |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Name of the source. Defaults to "anonymous source". |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=item from |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Filename, URL, directory, L<RDF::Trine::Model> or code reference to retrieve |
471
|
|
|
|
|
|
|
RDF from. This option is not supported by all source types. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=item match |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
Optional regular expression or code reference to match and/or map request URIs. |
476
|
|
|
|
|
|
|
For instance you can rewrite URNs to HTTP URIs like this: |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
match => sub { $_[0] =~ s/^urn:isbn:/http://example.org/isbn/; } |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
The URI in C<rdflow.uri> is set back to its original value after retrieval. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=back |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=head1 REQUEST FORMAT |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
A valid request can either by an URI (as byte string) or a hash reference, that |
487
|
|
|
|
|
|
|
is called an environment. The environment must be a specific subset of a |
488
|
|
|
|
|
|
|
L<PSGI> environment with the following variables: |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=over 4 |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=item C<rdflow.uri> |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
A request URI as byte string. If this variable is provided, no other variables |
495
|
|
|
|
|
|
|
are needed and the following variables will not modify this value. |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=item C<psgi.url_scheme> |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
A string C<http> (assumed if not set) or C<https>. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=item C<HTTP_HOST> |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
The base URL of the host for constructing an URI. This or SERVER_NAME is |
504
|
|
|
|
|
|
|
required unless rdflow.uri is set. |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=item C<SERVER_NAME> |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
Name of the host for construction an URI. Only used if HTTP_HOST is not set. |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=item C<SERVER_PORT> |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Port of the host for constructing an URI. By default C<80> is used, but not |
513
|
|
|
|
|
|
|
kept as part of an HTTP-URI due to URI normalization. |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=item C<SCRIPT_NAME> |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
Path for constructing an URI. Must start with C</> if given. |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=item C<QUERY_STRING> |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
Portion of the request URI that follows the ?, if any. |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=item C<rdflow.ignorepath> |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
If this variable is set, no query part is used when constructing an URI. |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=back |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
The method reuses code from L<Plack::Request> by Tatsuhiko Miyagawa. Note that |
530
|
|
|
|
|
|
|
the environment variable REQUEST_URI is not included. When this method |
531
|
|
|
|
|
|
|
constructs a request URI from a given environment hash, it always sets the |
532
|
|
|
|
|
|
|
variable C<rdflow.uri>, so it is always guaranteed to be set after calling. |
533
|
|
|
|
|
|
|
However it may be the empty string, if an environment without HTTP_HOST or |
534
|
|
|
|
|
|
|
SERVER_NAME was provided. |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=head1 FUNCTIONS |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
The following functions are defined to be used in custom source types. |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=head2 rdflow_uri ( $env | $uri ) |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Prepares and returns a request URI, as given by an evironment hash or by an |
543
|
|
|
|
|
|
|
existing URI. Sets C<rdflow.uri> if an environment has been given. URI |
544
|
|
|
|
|
|
|
construction is based on code from L<Plack>, as described in the L</REQUEST |
545
|
|
|
|
|
|
|
FORMAT>. The following environment variables are used: C<psgi.url_scheme>, |
546
|
|
|
|
|
|
|
C<HTTP_HOST> or C<SERVER_NAME>, C<SERVER_PORT>, C<SCRIPT_NAME>, C<PATH_INFO>, |
547
|
|
|
|
|
|
|
C<QUERY_STRING>, and C<rdflow.ignorepath>. |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=head2 sourcelist_args ( @_ ) |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Parses a list of inputs (code or other references) mixed with key-value pairs |
552
|
|
|
|
|
|
|
and returns both separated in an array and and hash. |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=head2 iterator_to_model ( [ $iterator ] [, $model ] ) |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
Adds all statements from a L<RDF::Trine::Iterator> to a (possibly new) |
557
|
|
|
|
|
|
|
L<RDF::Trine::Model> model and returns the model. |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=head2 empty_rdf ( $rdf ) |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
Returns true if the argument is an empty L<RDF::Trine::Model>, an |
562
|
|
|
|
|
|
|
empty L<RDF::Trine::Iterator>, or no RDF data at all. |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
=head1 AUTHOR |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
Jakob Voà <voss@gbv.de> |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
This software is copyright (c) 2011 by Jakob VoÃ. |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
573
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=cut |
576
|
|
|
|
|
|
|
|