line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package RDF::Generator::Void; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
20412
|
use 5.006; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
34
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
29
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
31
|
|
6
|
1
|
|
|
1
|
|
413
|
use Moose; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Moose::Util::TypeConstraints; |
8
|
|
|
|
|
|
|
use Data::UUID; |
9
|
|
|
|
|
|
|
use RDF::Trine qw[iri literal blank variable statement]; |
10
|
|
|
|
|
|
|
use RDF::Generator::Void::Stats; |
11
|
|
|
|
|
|
|
# use less (); |
12
|
|
|
|
|
|
|
use utf8; |
13
|
|
|
|
|
|
|
use URI::Split qw(uri_split uri_join); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use aliased 'RDF::Generator::Void::Meta::Attribute::ObjectList'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Define some namespace prefixes |
18
|
|
|
|
|
|
|
my $void = RDF::Trine::Namespace->new('http://rdfs.org/ns/void#'); |
19
|
|
|
|
|
|
|
my $rdf = RDF::Trine::Namespace->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
20
|
|
|
|
|
|
|
my $xsd = RDF::Trine::Namespace->new('http://www.w3.org/2001/XMLSchema#'); |
21
|
|
|
|
|
|
|
my $dct = RDF::Trine::Namespace->new('http://purl.org/dc/terms/'); |
22
|
|
|
|
|
|
|
my $prov = RDF::Trine::Namespace->new('http://www.w3.org/ns/prov#'); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 NAME |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
RDF::Generator::Void - Generate VoID descriptions based on data in an RDF model |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 VERSION |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Version 0.13 |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=cut |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
our $VERSION = '0.13'; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 SYNOPSIS |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use RDF::Generator::Void; |
39
|
|
|
|
|
|
|
use RDF::Trine::Model; |
40
|
|
|
|
|
|
|
my $mymodel = RDF::Trine::Model->temporary_model; |
41
|
|
|
|
|
|
|
[add some data to $mymodel here] |
42
|
|
|
|
|
|
|
my $generator = RDF::Generator::Void->new(inmodel => $mymodel); |
43
|
|
|
|
|
|
|
$generator->urispace('http://example.org'); |
44
|
|
|
|
|
|
|
$generator->add_endpoints('http://example.org/sparql'); |
45
|
|
|
|
|
|
|
my $voidmodel = $generator->generate; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 DESCRIPTION |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
This module takes a L<RDF::Trine::Model> object as input to the |
50
|
|
|
|
|
|
|
constructor, and based on the data in that model as well as data |
51
|
|
|
|
|
|
|
supplied by the user, it creates a new model with a VoID description |
52
|
|
|
|
|
|
|
of the data in the model. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
For a description of VoID, see L<http://www.w3.org/TR/void/>. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 METHODS |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head2 new(inmodel => $mymodel, dataset_uri => URI->new($dataset_uri), level => 1); |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The constructor. It can be called with two parameters, namely, |
61
|
|
|
|
|
|
|
C<inmodel> which is a model we want to describe and C<dataset_uri>, |
62
|
|
|
|
|
|
|
which is the URI we want to use for the description. Users should make |
63
|
|
|
|
|
|
|
sure it is possible to get this with HTTP. If this is not possible, |
64
|
|
|
|
|
|
|
you may leave this field empty so that a simple URN can be created for |
65
|
|
|
|
|
|
|
you as a default. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head2 C<inmodel> |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Read-only accessor for the model used in description creation. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 C<dataset_uri> |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Read-only accessor for the URI to the dataset. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=cut |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
has inmodel => ( |
78
|
|
|
|
|
|
|
is => 'ro', |
79
|
|
|
|
|
|
|
isa => 'RDF::Trine::Model', |
80
|
|
|
|
|
|
|
required => 1, |
81
|
|
|
|
|
|
|
); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# This is setting up the dataset_uri method, and make it possible to |
84
|
|
|
|
|
|
|
# create a resource of it from strings or URI objects. |
85
|
|
|
|
|
|
|
class_type 'URI'; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
subtype 'DatasetURI', |
88
|
|
|
|
|
|
|
as 'Object', |
89
|
|
|
|
|
|
|
where { $_->isa('RDF::Trine::Node::Resource') || $_->isa('RDF::Trine::Node::Blank') }; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
coerce 'DatasetURI', |
92
|
|
|
|
|
|
|
from 'URI', via { iri("$_") }, |
93
|
|
|
|
|
|
|
from 'Str', via { iri($_) }; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
has dataset_uri => ( |
96
|
|
|
|
|
|
|
is => 'ro', |
97
|
|
|
|
|
|
|
isa => 'DatasetURI', |
98
|
|
|
|
|
|
|
lazy => 1, |
99
|
|
|
|
|
|
|
builder => '_build_dataset_uri', |
100
|
|
|
|
|
|
|
coerce => 1, |
101
|
|
|
|
|
|
|
); |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# This will create a URN with a UUID by default |
104
|
|
|
|
|
|
|
sub _build_dataset_uri { |
105
|
|
|
|
|
|
|
my ($self) = @_; |
106
|
|
|
|
|
|
|
return iri sprintf('urn:uuid:%s', Data::UUID->new->create_str); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head2 Property Attributes |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
The below attributes concern some essential properties in the VoID |
112
|
|
|
|
|
|
|
vocabulary. They are mostly arrays, and can be manipulated using array |
113
|
|
|
|
|
|
|
methods. Methods starting with C<all_> will return an array of unique |
114
|
|
|
|
|
|
|
values. Methods starting with C<add_> takes a list of values to add, |
115
|
|
|
|
|
|
|
and those starting with C<has_no_> return a boolean value, false if |
116
|
|
|
|
|
|
|
the array is empty. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head3 C<all_vocabularies>, C<add_vocabularies>, C<has_no_vocabularies> |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Methods to manipulate a list of vocabularies used in the dataset. The |
121
|
|
|
|
|
|
|
values should be a string that represents the URI of a vocabulary. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# All the following attributes have that in common that they |
126
|
|
|
|
|
|
|
# automatically the method names also specified in handles, to |
127
|
|
|
|
|
|
|
# manipulate and query the data. |
128
|
|
|
|
|
|
|
has _vocabularies => ( traits => [ObjectList] ); |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head3 C<all_endpoints>, C<add_endpoints>, C<has_no_endpoints> |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Methods to manipulate a list of SPARQL endpoints that can be used to |
133
|
|
|
|
|
|
|
query the dataset. The values should be a string that represents the |
134
|
|
|
|
|
|
|
URI of a SPARQL endpoint. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
has _endpoints => ( traits => [ObjectList] ); |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head3 C<all_titles>, C<add_titles>, C<has_no_titles> |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Methods to manipulate the titles of the datasets. The values should be |
144
|
|
|
|
|
|
|
L<RDF::Trine::Node::Literal> objects, and should be set with |
145
|
|
|
|
|
|
|
language. Typically, you would have a value per language. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
has _titles => ( |
151
|
|
|
|
|
|
|
traits => [ObjectList], |
152
|
|
|
|
|
|
|
isa => 'ArrayRef[RDF::Trine::Node::Literal]', |
153
|
|
|
|
|
|
|
); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head3 C<all_licenses>, C<add_licenses>, C<has_no_licenses> |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Methods to manipulate a list of licenses that regulates the use of the |
159
|
|
|
|
|
|
|
dataset. The values should be a string that represents the URI of a |
160
|
|
|
|
|
|
|
license. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=cut |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
has _licenses => ( traits => [ObjectList] ); |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head3 C<urispace>, C<has_urispace> |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
This method is used to set the URI prefix string that will match the |
169
|
|
|
|
|
|
|
entities in your dataset. The computation of the number of entities |
170
|
|
|
|
|
|
|
depends on this being set. C<has_urispace> can be used to check if it |
171
|
|
|
|
|
|
|
is set. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# There should only be a single uriSpace per Dataset (but there may be |
176
|
|
|
|
|
|
|
# more for subsets), thus this is a simple scalar attribute. |
177
|
|
|
|
|
|
|
has urispace => ( |
178
|
|
|
|
|
|
|
is => 'rw', |
179
|
|
|
|
|
|
|
isa => 'Str', |
180
|
|
|
|
|
|
|
predicate => 'has_urispace', |
181
|
|
|
|
|
|
|
); |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head2 Running this stuff |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head3 C<level>, C<has_level> |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Set the level of detail. 0 doesn't do any statistics or heuristics, 1 |
188
|
|
|
|
|
|
|
has some statistics for the dataset as a whole, 2 will give some |
189
|
|
|
|
|
|
|
partition statistics and 3 will give subject and object counts for |
190
|
|
|
|
|
|
|
property partitions. Setting no level will give everything. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
has level => (is => 'rw', isa => 'Int', predicate => 'has_level'); |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head3 C<stats>, C<clear_stats>, C<has_stats> |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Method to compute a statistical summary for the data in the dataset, |
200
|
|
|
|
|
|
|
such as the number of entities, predicates, etc. C<clear_stats> will |
201
|
|
|
|
|
|
|
clear the statistics and C<has_stats> will return true if exists. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# In practice, this method just calls the ::Stats class to do |
206
|
|
|
|
|
|
|
# everything. |
207
|
|
|
|
|
|
|
has stats => ( |
208
|
|
|
|
|
|
|
is => 'rw', |
209
|
|
|
|
|
|
|
isa => 'RDF::Generator::Void::Stats', |
210
|
|
|
|
|
|
|
lazy => 1, |
211
|
|
|
|
|
|
|
builder => '_build_stats', |
212
|
|
|
|
|
|
|
clearer => 'clear_stats', |
213
|
|
|
|
|
|
|
predicate => 'has_stats', |
214
|
|
|
|
|
|
|
); |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _build_stats { |
217
|
|
|
|
|
|
|
my ($self) = @_; |
218
|
|
|
|
|
|
|
return RDF::Generator::Void::Stats->new(generator => $self); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head3 generate( [ $model ] ) |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Returns the VoID as an RDF::Trine::Model. You may pass a model with |
225
|
|
|
|
|
|
|
statements as argument to this method. This model may then contain |
226
|
|
|
|
|
|
|
arbitrary RDF that will be added to the RDF model. If you do not send |
227
|
|
|
|
|
|
|
a model, one will be created for you. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=cut |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
sub generate { |
232
|
|
|
|
|
|
|
my $self = shift; |
233
|
|
|
|
|
|
|
my $void_model = shift || RDF::Trine::Model->temporary_model; |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
local $self->{void_model} = $void_model; |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# Start generating the actual VoID statements |
238
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
239
|
|
|
|
|
|
|
$self->dataset_uri, |
240
|
|
|
|
|
|
|
$rdf->type, |
241
|
|
|
|
|
|
|
$void->Dataset, |
242
|
|
|
|
|
|
|
)); |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
my ($scheme, $auth, $path, $query, $frag) = uri_split($self->dataset_uri->uri_value); |
245
|
|
|
|
|
|
|
if ($frag) { # Then, we have a document that could be described with provenance |
246
|
|
|
|
|
|
|
my $uri = iri(uri_join($scheme, $auth, $path, $query, undef)); |
247
|
|
|
|
|
|
|
my $blank = blank(); |
248
|
|
|
|
|
|
|
$void_model->add_statement(statement($uri, |
249
|
|
|
|
|
|
|
$prov->wasGeneratedBy, |
250
|
|
|
|
|
|
|
$blank)); |
251
|
|
|
|
|
|
|
(my $ver = $VERSION) =~ s/\./-/; |
252
|
|
|
|
|
|
|
my $release_uri = iri("http://purl.org/NET/cpan-uri/dist/RDF-Generator-Void/v_$ver"); |
253
|
|
|
|
|
|
|
$void_model->add_statement(statement($blank, |
254
|
|
|
|
|
|
|
$prov->wasAssociatedWith, |
255
|
|
|
|
|
|
|
$release_uri)); |
256
|
|
|
|
|
|
|
$void_model->add_statement(statement($release_uri, |
257
|
|
|
|
|
|
|
$rdf->type, |
258
|
|
|
|
|
|
|
$prov->SoftwareAgent)); |
259
|
|
|
|
|
|
|
$void_model->add_statement(statement($release_uri, |
260
|
|
|
|
|
|
|
iri('http://www.w3.org/2000/01/rdf-schema#label'), |
261
|
|
|
|
|
|
|
literal("RDF::Generator::Void, Version $VERSION", 'en'))); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
foreach my $endpoint ($self->all_endpoints) { |
266
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
267
|
|
|
|
|
|
|
$self->dataset_uri, |
268
|
|
|
|
|
|
|
$void->sparqlEndpoint, |
269
|
|
|
|
|
|
|
iri($endpoint) |
270
|
|
|
|
|
|
|
)); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
foreach my $title ($self->all_titles) { |
274
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
275
|
|
|
|
|
|
|
$self->dataset_uri, |
276
|
|
|
|
|
|
|
$dct->title, |
277
|
|
|
|
|
|
|
$title |
278
|
|
|
|
|
|
|
)); |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
foreach my $license ($self->all_licenses) { |
282
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
283
|
|
|
|
|
|
|
$self->dataset_uri, |
284
|
|
|
|
|
|
|
$dct->license, |
285
|
|
|
|
|
|
|
iri($license) |
286
|
|
|
|
|
|
|
)); |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
291
|
|
|
|
|
|
|
$self->dataset_uri, |
292
|
|
|
|
|
|
|
$void->triples, |
293
|
|
|
|
|
|
|
literal($self->inmodel->size, undef, $xsd->integer), |
294
|
|
|
|
|
|
|
)); |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
if ($self->has_urispace) { |
297
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
298
|
|
|
|
|
|
|
$self->dataset_uri, |
299
|
|
|
|
|
|
|
$void->uriSpace, |
300
|
|
|
|
|
|
|
literal($self->urispace) |
301
|
|
|
|
|
|
|
)); |
302
|
|
|
|
|
|
|
return $void_model if ($self->has_level && ($self->level == 0)); |
303
|
|
|
|
|
|
|
$self->_generate_counts($void->entities, $self->stats->entities); |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
return $void_model if ($self->has_level && $self->level == 0); |
307
|
|
|
|
|
|
|
$self->_generate_counts($void->distinctSubjects, $self->stats->subjects); |
308
|
|
|
|
|
|
|
$self->_generate_counts($void->properties, $self->stats->properties); |
309
|
|
|
|
|
|
|
$self->_generate_counts($void->distinctObjects, $self->stats->objects); |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
$self->_generate_most_common_vocabs($self->stats) if $self->has_stats; |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
return $void_model if ($self->has_level && $self->level <= 1); |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
$self->_generate_propertypartitions; |
316
|
|
|
|
|
|
|
$self->_generate_classpartitions; |
317
|
|
|
|
|
|
|
return $void_model; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub _generate_counts { |
321
|
|
|
|
|
|
|
my ($self, $predicate, $count) = @_; |
322
|
|
|
|
|
|
|
return undef unless $self->has_stats; |
323
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement( |
324
|
|
|
|
|
|
|
$self->dataset_uri, |
325
|
|
|
|
|
|
|
$predicate, |
326
|
|
|
|
|
|
|
literal($count, undef, $xsd->integer), |
327
|
|
|
|
|
|
|
)); |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
sub _generate_propertypartitions { |
331
|
|
|
|
|
|
|
my ($self) = @_; |
332
|
|
|
|
|
|
|
return undef unless $self->has_stats; |
333
|
|
|
|
|
|
|
my $properties = $self->stats->propertyPartitions; |
334
|
|
|
|
|
|
|
while (my ($uri, $counts) = each(%{$properties})) { |
335
|
|
|
|
|
|
|
my $blank = blank(); |
336
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement( |
337
|
|
|
|
|
|
|
$self->dataset_uri, |
338
|
|
|
|
|
|
|
$void->propertyPartition, |
339
|
|
|
|
|
|
|
$blank)); |
340
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
341
|
|
|
|
|
|
|
$void->property, |
342
|
|
|
|
|
|
|
iri($uri))); |
343
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
344
|
|
|
|
|
|
|
$void->triples, |
345
|
|
|
|
|
|
|
literal($counts->{'triples'}, undef, $xsd->integer))); |
346
|
|
|
|
|
|
|
# OK, so sometimes, one has to balance elegance and performance... |
347
|
|
|
|
|
|
|
if ($counts->{'countsubjects'}) { |
348
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
349
|
|
|
|
|
|
|
$void->distinctSubjects, |
350
|
|
|
|
|
|
|
literal(scalar keys %{$counts->{'countsubjects'}}, undef, $xsd->integer))); |
351
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
352
|
|
|
|
|
|
|
$void->distinctObjects, |
353
|
|
|
|
|
|
|
literal(scalar keys %{$counts->{'countobjects'}}, undef, $xsd->integer))); |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub _generate_classpartitions { |
362
|
|
|
|
|
|
|
my ($self) = @_; |
363
|
|
|
|
|
|
|
return undef unless $self->has_stats; |
364
|
|
|
|
|
|
|
my $classes = $self->stats->classPartitions; |
365
|
|
|
|
|
|
|
while (my ($uri, $count) = each(%{$classes})) { |
366
|
|
|
|
|
|
|
my $blank = blank(); |
367
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement( |
368
|
|
|
|
|
|
|
$self->dataset_uri, |
369
|
|
|
|
|
|
|
$void->classPartition, |
370
|
|
|
|
|
|
|
$blank)); |
371
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
372
|
|
|
|
|
|
|
$void->class, |
373
|
|
|
|
|
|
|
iri($uri))); |
374
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
375
|
|
|
|
|
|
|
$void->triples, |
376
|
|
|
|
|
|
|
literal($count, undef, $xsd->integer))); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub _generate_most_common_vocabs { |
381
|
|
|
|
|
|
|
my ($self) = @_; |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Which vocabularies are most commonly used for predicates in the |
384
|
|
|
|
|
|
|
# dataset? Vocabularies used for less than 1% of triples need not |
385
|
|
|
|
|
|
|
# apply. |
386
|
|
|
|
|
|
|
my $threshold = $self->inmodel->size / 100; |
387
|
|
|
|
|
|
|
my %vocabs = %{ $self->stats->vocabularies }; |
388
|
|
|
|
|
|
|
$self->add_vocabularies(grep { $vocabs{$_} > $threshold } keys %vocabs); |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
foreach my $vocab ($self->all_vocabularies) { |
391
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement( |
392
|
|
|
|
|
|
|
$self->dataset_uri, |
393
|
|
|
|
|
|
|
$void->vocabulary, |
394
|
|
|
|
|
|
|
iri($vocab), |
395
|
|
|
|
|
|
|
)); |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=head1 AUTHORS |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
Kjetil Kjernsmo C<< <kjetilk@cpan.org> >> |
403
|
|
|
|
|
|
|
Toby Inkster C<< <tobyink@cpan.org> >> |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head1 TODO |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=over |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=item * URI regexps support. |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=item * Technical features (esp. serializations). |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item * Example resources and root resources. |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=item * Data dumps. |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item * Subject classification. |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=item * Method to disable heuristics. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item * More heuristics. |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=item * Linkset descriptions. |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=item * Set URI space on partitions. |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=item * Use L<CHI> to cache? |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=item * Use schema introspection to generate property attributes with L<MooseX::Semantics>. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=back |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=head1 BUGS |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Please report any bugs you find to L<https://github.com/kjetilk/RDF-Generator-Void/issues> |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Note that any claim that this module will generate a void in |
441
|
|
|
|
|
|
|
spacetime, a wormhole, black hole, or funny philosophy is totally |
442
|
|
|
|
|
|
|
bogus and without any scientific merit whatsoever. The lead author has |
443
|
|
|
|
|
|
|
made elaborate precautions to avoid any such issues, and expects |
444
|
|
|
|
|
|
|
everyone to take his word for it. Oh, BTW, should it just happen |
445
|
|
|
|
|
|
|
anyway, it won't L<hurt much|http://news.sciencemag.org/sciencenow/2012/03/scienceshot-one-black-hole-wont-.html>. |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=head1 SUPPORT |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
perldoc RDF::Generator::Void |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
The Perl and RDF community website is at L<http://www.perlrdf.org/> |
455
|
|
|
|
|
|
|
where you can also find a mailing list to direct questions to. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
You can also look for information at: |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=over 4 |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
L<http://annocpan.org/dist/RDF-Generator-Void> |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=item * CPAN Ratings |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
L<http://cpanratings.perl.org/d/RDF-Generator-Void> |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=item * MetaCPAN |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
L<https://metacpan.org/module/RDF::Generator::Void> |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=back |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
Many thanks to Konstantin Baierer for help with L<RDF::Generator::Void::Meta::Attribute::ObjectList>. |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
Copyright 2012 Toby Inkster. |
483
|
|
|
|
|
|
|
Copyright 2012-2013 Kjetil Kjernsmo. |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
486
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
487
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=cut |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
1; # End of RDF::Generator::Void |