| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package RDF::Generator::Void; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
20412
|
use 5.006; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
34
|
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
29
|
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
|
1
|
|
|
|
|
6
|
|
|
|
1
|
|
|
|
|
31
|
|
|
6
|
1
|
|
|
1
|
|
413
|
use Moose; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Moose::Util::TypeConstraints; |
|
8
|
|
|
|
|
|
|
use Data::UUID; |
|
9
|
|
|
|
|
|
|
use RDF::Trine qw[iri literal blank variable statement]; |
|
10
|
|
|
|
|
|
|
use RDF::Generator::Void::Stats; |
|
11
|
|
|
|
|
|
|
# use less (); |
|
12
|
|
|
|
|
|
|
use utf8; |
|
13
|
|
|
|
|
|
|
use URI::Split qw(uri_split uri_join); |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use aliased 'RDF::Generator::Void::Meta::Attribute::ObjectList'; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Define some namespace prefixes |
|
18
|
|
|
|
|
|
|
my $void = RDF::Trine::Namespace->new('http://rdfs.org/ns/void#'); |
|
19
|
|
|
|
|
|
|
my $rdf = RDF::Trine::Namespace->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
|
20
|
|
|
|
|
|
|
my $xsd = RDF::Trine::Namespace->new('http://www.w3.org/2001/XMLSchema#'); |
|
21
|
|
|
|
|
|
|
my $dct = RDF::Trine::Namespace->new('http://purl.org/dc/terms/'); |
|
22
|
|
|
|
|
|
|
my $prov = RDF::Trine::Namespace->new('http://www.w3.org/ns/prov#'); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 NAME |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
RDF::Generator::Void - Generate VoID descriptions based on data in an RDF model |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 VERSION |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Version 0.13 |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=cut |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
our $VERSION = '0.13'; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use RDF::Generator::Void; |
|
39
|
|
|
|
|
|
|
use RDF::Trine::Model; |
|
40
|
|
|
|
|
|
|
my $mymodel = RDF::Trine::Model->temporary_model; |
|
41
|
|
|
|
|
|
|
[add some data to $mymodel here] |
|
42
|
|
|
|
|
|
|
my $generator = RDF::Generator::Void->new(inmodel => $mymodel); |
|
43
|
|
|
|
|
|
|
$generator->urispace('http://example.org'); |
|
44
|
|
|
|
|
|
|
$generator->add_endpoints('http://example.org/sparql'); |
|
45
|
|
|
|
|
|
|
my $voidmodel = $generator->generate; |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
This module takes a L<RDF::Trine::Model> object as input to the |
|
50
|
|
|
|
|
|
|
constructor, and based on the data in that model as well as data |
|
51
|
|
|
|
|
|
|
supplied by the user, it creates a new model with a VoID description |
|
52
|
|
|
|
|
|
|
of the data in the model. |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
For a description of VoID, see L<http://www.w3.org/TR/void/>. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 METHODS |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head2 new(inmodel => $mymodel, dataset_uri => URI->new($dataset_uri), level => 1); |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The constructor. It can be called with two parameters, namely, |
|
61
|
|
|
|
|
|
|
C<inmodel> which is a model we want to describe and C<dataset_uri>, |
|
62
|
|
|
|
|
|
|
which is the URI we want to use for the description. Users should make |
|
63
|
|
|
|
|
|
|
sure it is possible to get this with HTTP. If this is not possible, |
|
64
|
|
|
|
|
|
|
you may leave this field empty so that a simple URN can be created for |
|
65
|
|
|
|
|
|
|
you as a default. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head2 C<inmodel> |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Read-only accessor for the model used in description creation. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 C<dataset_uri> |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Read-only accessor for the URI to the dataset. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=cut |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
has inmodel => ( |
|
78
|
|
|
|
|
|
|
is => 'ro', |
|
79
|
|
|
|
|
|
|
isa => 'RDF::Trine::Model', |
|
80
|
|
|
|
|
|
|
required => 1, |
|
81
|
|
|
|
|
|
|
); |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# This is setting up the dataset_uri method, and make it possible to |
|
84
|
|
|
|
|
|
|
# create a resource of it from strings or URI objects. |
|
85
|
|
|
|
|
|
|
class_type 'URI'; |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
subtype 'DatasetURI', |
|
88
|
|
|
|
|
|
|
as 'Object', |
|
89
|
|
|
|
|
|
|
where { $_->isa('RDF::Trine::Node::Resource') || $_->isa('RDF::Trine::Node::Blank') }; |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
coerce 'DatasetURI', |
|
92
|
|
|
|
|
|
|
from 'URI', via { iri("$_") }, |
|
93
|
|
|
|
|
|
|
from 'Str', via { iri($_) }; |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
has dataset_uri => ( |
|
96
|
|
|
|
|
|
|
is => 'ro', |
|
97
|
|
|
|
|
|
|
isa => 'DatasetURI', |
|
98
|
|
|
|
|
|
|
lazy => 1, |
|
99
|
|
|
|
|
|
|
builder => '_build_dataset_uri', |
|
100
|
|
|
|
|
|
|
coerce => 1, |
|
101
|
|
|
|
|
|
|
); |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# This will create a URN with a UUID by default |
|
104
|
|
|
|
|
|
|
sub _build_dataset_uri { |
|
105
|
|
|
|
|
|
|
my ($self) = @_; |
|
106
|
|
|
|
|
|
|
return iri sprintf('urn:uuid:%s', Data::UUID->new->create_str); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head2 Property Attributes |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
The below attributes concern some essential properties in the VoID |
|
112
|
|
|
|
|
|
|
vocabulary. They are mostly arrays, and can be manipulated using array |
|
113
|
|
|
|
|
|
|
methods. Methods starting with C<all_> will return an array of unique |
|
114
|
|
|
|
|
|
|
values. Methods starting with C<add_> takes a list of values to add, |
|
115
|
|
|
|
|
|
|
and those starting with C<has_no_> return a boolean value, false if |
|
116
|
|
|
|
|
|
|
the array is empty. |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head3 C<all_vocabularies>, C<add_vocabularies>, C<has_no_vocabularies> |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Methods to manipulate a list of vocabularies used in the dataset. The |
|
121
|
|
|
|
|
|
|
values should be a string that represents the URI of a vocabulary. |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# All the following attributes have that in common that they |
|
126
|
|
|
|
|
|
|
# automatically the method names also specified in handles, to |
|
127
|
|
|
|
|
|
|
# manipulate and query the data. |
|
128
|
|
|
|
|
|
|
has _vocabularies => ( traits => [ObjectList] ); |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head3 C<all_endpoints>, C<add_endpoints>, C<has_no_endpoints> |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Methods to manipulate a list of SPARQL endpoints that can be used to |
|
133
|
|
|
|
|
|
|
query the dataset. The values should be a string that represents the |
|
134
|
|
|
|
|
|
|
URI of a SPARQL endpoint. |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
has _endpoints => ( traits => [ObjectList] ); |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head3 C<all_titles>, C<add_titles>, C<has_no_titles> |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Methods to manipulate the titles of the datasets. The values should be |
|
144
|
|
|
|
|
|
|
L<RDF::Trine::Node::Literal> objects, and should be set with |
|
145
|
|
|
|
|
|
|
language. Typically, you would have a value per language. |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
has _titles => ( |
|
151
|
|
|
|
|
|
|
traits => [ObjectList], |
|
152
|
|
|
|
|
|
|
isa => 'ArrayRef[RDF::Trine::Node::Literal]', |
|
153
|
|
|
|
|
|
|
); |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head3 C<all_licenses>, C<add_licenses>, C<has_no_licenses> |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Methods to manipulate a list of licenses that regulates the use of the |
|
159
|
|
|
|
|
|
|
dataset. The values should be a string that represents the URI of a |
|
160
|
|
|
|
|
|
|
license. |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=cut |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
has _licenses => ( traits => [ObjectList] ); |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head3 C<urispace>, C<has_urispace> |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
This method is used to set the URI prefix string that will match the |
|
169
|
|
|
|
|
|
|
entities in your dataset. The computation of the number of entities |
|
170
|
|
|
|
|
|
|
depends on this being set. C<has_urispace> can be used to check if it |
|
171
|
|
|
|
|
|
|
is set. |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# There should only be a single uriSpace per Dataset (but there may be |
|
176
|
|
|
|
|
|
|
# more for subsets), thus this is a simple scalar attribute. |
|
177
|
|
|
|
|
|
|
has urispace => ( |
|
178
|
|
|
|
|
|
|
is => 'rw', |
|
179
|
|
|
|
|
|
|
isa => 'Str', |
|
180
|
|
|
|
|
|
|
predicate => 'has_urispace', |
|
181
|
|
|
|
|
|
|
); |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head2 Running this stuff |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head3 C<level>, C<has_level> |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Set the level of detail. 0 doesn't do any statistics or heuristics, 1 |
|
188
|
|
|
|
|
|
|
has some statistics for the dataset as a whole, 2 will give some |
|
189
|
|
|
|
|
|
|
partition statistics and 3 will give subject and object counts for |
|
190
|
|
|
|
|
|
|
property partitions. Setting no level will give everything. |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
has level => (is => 'rw', isa => 'Int', predicate => 'has_level'); |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head3 C<stats>, C<clear_stats>, C<has_stats> |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Method to compute a statistical summary for the data in the dataset, |
|
200
|
|
|
|
|
|
|
such as the number of entities, predicates, etc. C<clear_stats> will |
|
201
|
|
|
|
|
|
|
clear the statistics and C<has_stats> will return true if exists. |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# In practice, this method just calls the ::Stats class to do |
|
206
|
|
|
|
|
|
|
# everything. |
|
207
|
|
|
|
|
|
|
has stats => ( |
|
208
|
|
|
|
|
|
|
is => 'rw', |
|
209
|
|
|
|
|
|
|
isa => 'RDF::Generator::Void::Stats', |
|
210
|
|
|
|
|
|
|
lazy => 1, |
|
211
|
|
|
|
|
|
|
builder => '_build_stats', |
|
212
|
|
|
|
|
|
|
clearer => 'clear_stats', |
|
213
|
|
|
|
|
|
|
predicate => 'has_stats', |
|
214
|
|
|
|
|
|
|
); |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _build_stats { |
|
217
|
|
|
|
|
|
|
my ($self) = @_; |
|
218
|
|
|
|
|
|
|
return RDF::Generator::Void::Stats->new(generator => $self); |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head3 generate( [ $model ] ) |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Returns the VoID as an RDF::Trine::Model. You may pass a model with |
|
225
|
|
|
|
|
|
|
statements as argument to this method. This model may then contain |
|
226
|
|
|
|
|
|
|
arbitrary RDF that will be added to the RDF model. If you do not send |
|
227
|
|
|
|
|
|
|
a model, one will be created for you. |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=cut |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
sub generate { |
|
232
|
|
|
|
|
|
|
my $self = shift; |
|
233
|
|
|
|
|
|
|
my $void_model = shift || RDF::Trine::Model->temporary_model; |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
local $self->{void_model} = $void_model; |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# Start generating the actual VoID statements |
|
238
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
|
239
|
|
|
|
|
|
|
$self->dataset_uri, |
|
240
|
|
|
|
|
|
|
$rdf->type, |
|
241
|
|
|
|
|
|
|
$void->Dataset, |
|
242
|
|
|
|
|
|
|
)); |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
my ($scheme, $auth, $path, $query, $frag) = uri_split($self->dataset_uri->uri_value); |
|
245
|
|
|
|
|
|
|
if ($frag) { # Then, we have a document that could be described with provenance |
|
246
|
|
|
|
|
|
|
my $uri = iri(uri_join($scheme, $auth, $path, $query, undef)); |
|
247
|
|
|
|
|
|
|
my $blank = blank(); |
|
248
|
|
|
|
|
|
|
$void_model->add_statement(statement($uri, |
|
249
|
|
|
|
|
|
|
$prov->wasGeneratedBy, |
|
250
|
|
|
|
|
|
|
$blank)); |
|
251
|
|
|
|
|
|
|
(my $ver = $VERSION) =~ s/\./-/; |
|
252
|
|
|
|
|
|
|
my $release_uri = iri("http://purl.org/NET/cpan-uri/dist/RDF-Generator-Void/v_$ver"); |
|
253
|
|
|
|
|
|
|
$void_model->add_statement(statement($blank, |
|
254
|
|
|
|
|
|
|
$prov->wasAssociatedWith, |
|
255
|
|
|
|
|
|
|
$release_uri)); |
|
256
|
|
|
|
|
|
|
$void_model->add_statement(statement($release_uri, |
|
257
|
|
|
|
|
|
|
$rdf->type, |
|
258
|
|
|
|
|
|
|
$prov->SoftwareAgent)); |
|
259
|
|
|
|
|
|
|
$void_model->add_statement(statement($release_uri, |
|
260
|
|
|
|
|
|
|
iri('http://www.w3.org/2000/01/rdf-schema#label'), |
|
261
|
|
|
|
|
|
|
literal("RDF::Generator::Void, Version $VERSION", 'en'))); |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
foreach my $endpoint ($self->all_endpoints) { |
|
266
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
|
267
|
|
|
|
|
|
|
$self->dataset_uri, |
|
268
|
|
|
|
|
|
|
$void->sparqlEndpoint, |
|
269
|
|
|
|
|
|
|
iri($endpoint) |
|
270
|
|
|
|
|
|
|
)); |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
foreach my $title ($self->all_titles) { |
|
274
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
|
275
|
|
|
|
|
|
|
$self->dataset_uri, |
|
276
|
|
|
|
|
|
|
$dct->title, |
|
277
|
|
|
|
|
|
|
$title |
|
278
|
|
|
|
|
|
|
)); |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
foreach my $license ($self->all_licenses) { |
|
282
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
|
283
|
|
|
|
|
|
|
$self->dataset_uri, |
|
284
|
|
|
|
|
|
|
$dct->license, |
|
285
|
|
|
|
|
|
|
iri($license) |
|
286
|
|
|
|
|
|
|
)); |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
|
291
|
|
|
|
|
|
|
$self->dataset_uri, |
|
292
|
|
|
|
|
|
|
$void->triples, |
|
293
|
|
|
|
|
|
|
literal($self->inmodel->size, undef, $xsd->integer), |
|
294
|
|
|
|
|
|
|
)); |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
if ($self->has_urispace) { |
|
297
|
|
|
|
|
|
|
$void_model->add_statement(statement( |
|
298
|
|
|
|
|
|
|
$self->dataset_uri, |
|
299
|
|
|
|
|
|
|
$void->uriSpace, |
|
300
|
|
|
|
|
|
|
literal($self->urispace) |
|
301
|
|
|
|
|
|
|
)); |
|
302
|
|
|
|
|
|
|
return $void_model if ($self->has_level && ($self->level == 0)); |
|
303
|
|
|
|
|
|
|
$self->_generate_counts($void->entities, $self->stats->entities); |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
return $void_model if ($self->has_level && $self->level == 0); |
|
307
|
|
|
|
|
|
|
$self->_generate_counts($void->distinctSubjects, $self->stats->subjects); |
|
308
|
|
|
|
|
|
|
$self->_generate_counts($void->properties, $self->stats->properties); |
|
309
|
|
|
|
|
|
|
$self->_generate_counts($void->distinctObjects, $self->stats->objects); |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
$self->_generate_most_common_vocabs($self->stats) if $self->has_stats; |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
return $void_model if ($self->has_level && $self->level <= 1); |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
$self->_generate_propertypartitions; |
|
316
|
|
|
|
|
|
|
$self->_generate_classpartitions; |
|
317
|
|
|
|
|
|
|
return $void_model; |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub _generate_counts { |
|
321
|
|
|
|
|
|
|
my ($self, $predicate, $count) = @_; |
|
322
|
|
|
|
|
|
|
return undef unless $self->has_stats; |
|
323
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement( |
|
324
|
|
|
|
|
|
|
$self->dataset_uri, |
|
325
|
|
|
|
|
|
|
$predicate, |
|
326
|
|
|
|
|
|
|
literal($count, undef, $xsd->integer), |
|
327
|
|
|
|
|
|
|
)); |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
sub _generate_propertypartitions { |
|
331
|
|
|
|
|
|
|
my ($self) = @_; |
|
332
|
|
|
|
|
|
|
return undef unless $self->has_stats; |
|
333
|
|
|
|
|
|
|
my $properties = $self->stats->propertyPartitions; |
|
334
|
|
|
|
|
|
|
while (my ($uri, $counts) = each(%{$properties})) { |
|
335
|
|
|
|
|
|
|
my $blank = blank(); |
|
336
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement( |
|
337
|
|
|
|
|
|
|
$self->dataset_uri, |
|
338
|
|
|
|
|
|
|
$void->propertyPartition, |
|
339
|
|
|
|
|
|
|
$blank)); |
|
340
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
|
341
|
|
|
|
|
|
|
$void->property, |
|
342
|
|
|
|
|
|
|
iri($uri))); |
|
343
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
|
344
|
|
|
|
|
|
|
$void->triples, |
|
345
|
|
|
|
|
|
|
literal($counts->{'triples'}, undef, $xsd->integer))); |
|
346
|
|
|
|
|
|
|
# OK, so sometimes, one has to balance elegance and performance... |
|
347
|
|
|
|
|
|
|
if ($counts->{'countsubjects'}) { |
|
348
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
|
349
|
|
|
|
|
|
|
$void->distinctSubjects, |
|
350
|
|
|
|
|
|
|
literal(scalar keys %{$counts->{'countsubjects'}}, undef, $xsd->integer))); |
|
351
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
|
352
|
|
|
|
|
|
|
$void->distinctObjects, |
|
353
|
|
|
|
|
|
|
literal(scalar keys %{$counts->{'countobjects'}}, undef, $xsd->integer))); |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
} |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub _generate_classpartitions { |
|
362
|
|
|
|
|
|
|
my ($self) = @_; |
|
363
|
|
|
|
|
|
|
return undef unless $self->has_stats; |
|
364
|
|
|
|
|
|
|
my $classes = $self->stats->classPartitions; |
|
365
|
|
|
|
|
|
|
while (my ($uri, $count) = each(%{$classes})) { |
|
366
|
|
|
|
|
|
|
my $blank = blank(); |
|
367
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement( |
|
368
|
|
|
|
|
|
|
$self->dataset_uri, |
|
369
|
|
|
|
|
|
|
$void->classPartition, |
|
370
|
|
|
|
|
|
|
$blank)); |
|
371
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
|
372
|
|
|
|
|
|
|
$void->class, |
|
373
|
|
|
|
|
|
|
iri($uri))); |
|
374
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
|
375
|
|
|
|
|
|
|
$void->triples, |
|
376
|
|
|
|
|
|
|
literal($count, undef, $xsd->integer))); |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub _generate_most_common_vocabs { |
|
381
|
|
|
|
|
|
|
my ($self) = @_; |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Which vocabularies are most commonly used for predicates in the |
|
384
|
|
|
|
|
|
|
# dataset? Vocabularies used for less than 1% of triples need not |
|
385
|
|
|
|
|
|
|
# apply. |
|
386
|
|
|
|
|
|
|
my $threshold = $self->inmodel->size / 100; |
|
387
|
|
|
|
|
|
|
my %vocabs = %{ $self->stats->vocabularies }; |
|
388
|
|
|
|
|
|
|
$self->add_vocabularies(grep { $vocabs{$_} > $threshold } keys %vocabs); |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
foreach my $vocab ($self->all_vocabularies) { |
|
391
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement( |
|
392
|
|
|
|
|
|
|
$self->dataset_uri, |
|
393
|
|
|
|
|
|
|
$void->vocabulary, |
|
394
|
|
|
|
|
|
|
iri($vocab), |
|
395
|
|
|
|
|
|
|
)); |
|
396
|
|
|
|
|
|
|
} |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=head1 AUTHORS |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
Kjetil Kjernsmo C<< <kjetilk@cpan.org> >> |
|
403
|
|
|
|
|
|
|
Toby Inkster C<< <tobyink@cpan.org> >> |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head1 TODO |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=over |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=item * URI regexps support. |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=item * Technical features (esp. serializations). |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item * Example resources and root resources. |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=item * Data dumps. |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item * Subject classification. |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=item * Method to disable heuristics. |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item * More heuristics. |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=item * Linkset descriptions. |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=item * Set URI space on partitions. |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=item * Use L<CHI> to cache? |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=item * Use schema introspection to generate property attributes with L<MooseX::Semantics>. |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=back |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=head1 BUGS |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Please report any bugs you find to L<https://github.com/kjetilk/RDF-Generator-Void/issues> |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Note that any claim that this module will generate a void in |
|
441
|
|
|
|
|
|
|
spacetime, a wormhole, black hole, or funny philosophy is totally |
|
442
|
|
|
|
|
|
|
bogus and without any scientific merit whatsoever. The lead author has |
|
443
|
|
|
|
|
|
|
made elaborate precautions to avoid any such issues, and expects |
|
444
|
|
|
|
|
|
|
everyone to take his word for it. Oh, BTW, should it just happen |
|
445
|
|
|
|
|
|
|
anyway, it won't L<hurt much|http://news.sciencemag.org/sciencenow/2012/03/scienceshot-one-black-hole-wont-.html>. |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=head1 SUPPORT |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
perldoc RDF::Generator::Void |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
The Perl and RDF community website is at L<http://www.perlrdf.org/> |
|
455
|
|
|
|
|
|
|
where you can also find a mailing list to direct questions to. |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
You can also look for information at: |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=over 4 |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
L<http://annocpan.org/dist/RDF-Generator-Void> |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=item * CPAN Ratings |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
L<http://cpanratings.perl.org/d/RDF-Generator-Void> |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=item * MetaCPAN |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
L<https://metacpan.org/module/RDF::Generator::Void> |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=back |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
Many thanks to Konstantin Baierer for help with L<RDF::Generator::Void::Meta::Attribute::ObjectList>. |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
Copyright 2012 Toby Inkster. |
|
483
|
|
|
|
|
|
|
Copyright 2012-2013 Kjetil Kjernsmo. |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
486
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
|
487
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=cut |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
1; # End of RDF::Generator::Void |