| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package RDF::Generator::Void::Stats; |
|
2
|
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
85
|
use 5.006; |
|
|
5
|
|
|
|
|
31
|
|
|
4
|
5
|
|
|
5
|
|
17
|
use strict; |
|
|
5
|
|
|
|
|
5
|
|
|
|
5
|
|
|
|
|
76
|
|
|
5
|
5
|
|
|
5
|
|
15
|
use warnings; |
|
|
5
|
|
|
|
|
5
|
|
|
|
5
|
|
|
|
|
91
|
|
|
6
|
5
|
|
|
5
|
|
15
|
use Moose; |
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
24
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 NAME |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
RDF::Generator::Void::Stats - Generate statistics needed for good VoID descriptions |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Typically called for you by L<RDF::Generator::Void> as: |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $stats = RDF::Generator::Void::Stats->new(generator => $self); |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head2 METHODS |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head3 C<< BUILD >> |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
Called by Moose to initialize an object. |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head3 C<generator> |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Parameter to the constructor, to pass a L<RDF::Generator::Void> object. |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head3 C<vocabularies> |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
A hashref used to find common vocabularies in the data. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head3 C<entities> |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
The number of distinct entities, as defined in the specification. |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head3 C<properties> |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
The number of distinct properties, as defined in the specification. |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head3 C<subjects> |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
The number of distinct subjects, as defined in the specification. |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head3 C<objects> |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
The number of distinct objects, as defined in the specification. |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head3 C<propertyPartitions> |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
A hashref containing the number of triples for each property. |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head3 C<classPartitions> |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
A hashref containing the number of triples for each class. |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# The following attributes also act as read-write methods. |
|
61
|
|
|
|
|
|
|
has vocabularies => ( is => 'rw', isa => 'HashRef' ); |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
has ['entities', 'properties', 'subjects', 'objects'] => ( is => 'rw', isa => 'Int' ); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
has propertyPartitions => (is => 'rw', isa => 'HashRef' ); |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
has classPartitions => (is => 'rw', isa => 'HashRef' ); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# This is a read-only method, meaning that the constructor has it as a |
|
70
|
|
|
|
|
|
|
# parameter, but then it can only be read from. |
|
71
|
|
|
|
|
|
|
has generator => ( |
|
72
|
|
|
|
|
|
|
is => 'ro', |
|
73
|
|
|
|
|
|
|
isa => 'RDF::Generator::Void', |
|
74
|
|
|
|
|
|
|
required => 1, |
|
75
|
|
|
|
|
|
|
); |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# The BUILD method is kinda the constructor. It is called when the |
|
78
|
|
|
|
|
|
|
# user calls the constructor. In here, the statistics is generated. |
|
79
|
|
|
|
|
|
|
sub BUILD { |
|
80
|
11
|
|
|
11
|
1
|
14423
|
my ($self) = @_; |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Initialize local hashes to count stuff. |
|
83
|
11
|
|
|
|
|
20
|
my (%vocab_counter, %entities, %properties, %subjects, %objects, %classes); |
|
84
|
|
|
|
|
|
|
|
|
85
|
11
|
|
|
|
|
353
|
my $gen = $self->generator; |
|
86
|
|
|
|
|
|
|
# Here, we take the data in the model we want to generate |
|
87
|
|
|
|
|
|
|
# statistics for and we iterate over it. Doing it this way, we |
|
88
|
|
|
|
|
|
|
# should be able to generate all statistics in a single pass of the |
|
89
|
|
|
|
|
|
|
# data. |
|
90
|
|
|
|
|
|
|
$gen->inmodel->get_statements->each(sub { |
|
91
|
4687
|
|
|
4687
|
|
389199
|
my $st = shift; |
|
92
|
4687
|
50
|
|
|
|
8403
|
next unless $st->rdf_compatible; # To allow for non-RDF data models (e.g. N3) |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# wrap in eval, as this can potentially throw an exception. |
|
95
|
4687
|
|
|
|
|
114281
|
eval { |
|
96
|
4687
|
|
|
|
|
6637
|
my ($vocab_uri) = $st->predicate->qname; |
|
97
|
|
|
|
|
|
|
# The hash has a unique key, so now we count the number of qnames for each qname in the data |
|
98
|
4687
|
|
|
|
|
705653
|
$vocab_counter{$vocab_uri}++; |
|
99
|
|
|
|
|
|
|
}; |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
|
|
103
|
4687
|
100
|
66
|
|
|
141557
|
if ($gen->has_urispace && $st->subject->is_resource) { |
|
104
|
|
|
|
|
|
|
# Compute entities. We assume that all entities are subjects |
|
105
|
|
|
|
|
|
|
# with a prefix matching the uriSpace. Again, we use the |
|
106
|
|
|
|
|
|
|
# property that keys are unique, but we just set it to some |
|
107
|
|
|
|
|
|
|
# true value since we don't need to count how frequently each |
|
108
|
|
|
|
|
|
|
# entity is present. |
|
109
|
4291
|
|
|
|
|
132536
|
(my $urispace = $gen->urispace) =~ s/\./\\./g; |
|
110
|
4291
|
50
|
|
|
|
7814
|
$entities{$st->subject->uri_value} = 1 if ($st->subject->uri_value =~ m/^$urispace/); |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
4687
|
|
|
|
|
61200
|
$subjects{$st->subject->sse} = 1; |
|
114
|
4687
|
|
|
|
|
41295
|
$properties{$st->predicate->uri_value}{'triples'}++; |
|
115
|
4687
|
|
|
|
|
27275
|
$objects{$st->object->sse} = 1; |
|
116
|
|
|
|
|
|
|
|
|
117
|
4687
|
50
|
33
|
|
|
178766
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) { |
|
|
|
|
66
|
|
|
|
|
|
118
|
4687
|
100
|
66
|
|
|
7694
|
if (($st->predicate->uri_value eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') |
|
119
|
|
|
|
|
|
|
&& $st->object->is_resource) { |
|
120
|
692
|
|
|
|
|
9588
|
$classes{$st->object->uri_value}++ |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
4687
|
100
|
66
|
|
|
139436
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level > 2)) { |
|
|
|
|
66
|
|
|
|
|
|
125
|
4675
|
|
|
|
|
7468
|
$properties{$st->predicate->uri_value}{'countsubjects'}{$st->subject->sse} = 1; |
|
126
|
4675
|
|
|
|
|
58552
|
$properties{$st->predicate->uri_value}{'countobjects'}{$st->object->sse} = 1; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
11
|
|
|
|
|
295
|
}); |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# Finally, we update the attributes above, they are returned as a side-effect |
|
132
|
11
|
|
|
|
|
28939
|
$self->vocabularies(\%vocab_counter); |
|
133
|
11
|
|
|
|
|
313
|
$self->entities(scalar keys %entities); |
|
134
|
11
|
|
|
|
|
306
|
$self->properties(scalar keys %properties); |
|
135
|
11
|
|
|
|
|
285
|
$self->subjects(scalar keys %subjects); |
|
136
|
11
|
|
|
|
|
283
|
$self->objects(scalar keys %objects); |
|
137
|
11
|
50
|
33
|
|
|
290
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) { |
|
|
|
|
66
|
|
|
|
|
|
138
|
11
|
|
|
|
|
304
|
$self->propertyPartitions(\%properties); |
|
139
|
11
|
|
|
|
|
317
|
$self->classPartitions(\%classes); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head1 FURTHER DOCUMENTATION |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Please see L<RDF::Generator::Void> for further documentation. |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head1 AUTHORS AND COPYRIGHT |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Please see L<RDF::Generator::Void> for information about authors and copyright for this module. |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
1; |