line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package RDF::Generator::Void::Stats; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
85
|
use 5.006; |
|
5
|
|
|
|
|
31
|
|
4
|
5
|
|
|
5
|
|
17
|
use strict; |
|
5
|
|
|
|
|
5
|
|
|
5
|
|
|
|
|
76
|
|
5
|
5
|
|
|
5
|
|
15
|
use warnings; |
|
5
|
|
|
|
|
5
|
|
|
5
|
|
|
|
|
91
|
|
6
|
5
|
|
|
5
|
|
15
|
use Moose; |
|
5
|
|
|
|
|
6
|
|
|
5
|
|
|
|
|
24
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 NAME |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
RDF::Generator::Void::Stats - Generate statistics needed for good VoID descriptions |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Typically called for you by L<RDF::Generator::Void> as: |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $stats = RDF::Generator::Void::Stats->new(generator => $self); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head2 METHODS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head3 C<< BUILD >> |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
Called by Moose to initialize an object. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head3 C<generator> |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Parameter to the constructor, to pass a L<RDF::Generator::Void> object. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head3 C<vocabularies> |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
A hashref used to find common vocabularies in the data. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head3 C<entities> |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
The number of distinct entities, as defined in the specification. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head3 C<properties> |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
The number of distinct properties, as defined in the specification. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head3 C<subjects> |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
The number of distinct subjects, as defined in the specification. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head3 C<objects> |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
The number of distinct objects, as defined in the specification. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head3 C<propertyPartitions> |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
A hashref containing the number of triples for each property. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head3 C<classPartitions> |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
A hashref containing the number of triples for each class. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# The following attributes also act as read-write methods. |
61
|
|
|
|
|
|
|
has vocabularies => ( is => 'rw', isa => 'HashRef' ); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
has ['entities', 'properties', 'subjects', 'objects'] => ( is => 'rw', isa => 'Int' ); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
has propertyPartitions => (is => 'rw', isa => 'HashRef' ); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
has classPartitions => (is => 'rw', isa => 'HashRef' ); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# This is a read-only method, meaning that the constructor has it as a |
70
|
|
|
|
|
|
|
# parameter, but then it can only be read from. |
71
|
|
|
|
|
|
|
has generator => ( |
72
|
|
|
|
|
|
|
is => 'ro', |
73
|
|
|
|
|
|
|
isa => 'RDF::Generator::Void', |
74
|
|
|
|
|
|
|
required => 1, |
75
|
|
|
|
|
|
|
); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# The BUILD method is kinda the constructor. It is called when the |
78
|
|
|
|
|
|
|
# user calls the constructor. In here, the statistics is generated. |
79
|
|
|
|
|
|
|
sub BUILD { |
80
|
11
|
|
|
11
|
1
|
14423
|
my ($self) = @_; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Initialize local hashes to count stuff. |
83
|
11
|
|
|
|
|
20
|
my (%vocab_counter, %entities, %properties, %subjects, %objects, %classes); |
84
|
|
|
|
|
|
|
|
85
|
11
|
|
|
|
|
353
|
my $gen = $self->generator; |
86
|
|
|
|
|
|
|
# Here, we take the data in the model we want to generate |
87
|
|
|
|
|
|
|
# statistics for and we iterate over it. Doing it this way, we |
88
|
|
|
|
|
|
|
# should be able to generate all statistics in a single pass of the |
89
|
|
|
|
|
|
|
# data. |
90
|
|
|
|
|
|
|
$gen->inmodel->get_statements->each(sub { |
91
|
4687
|
|
|
4687
|
|
389199
|
my $st = shift; |
92
|
4687
|
50
|
|
|
|
8403
|
next unless $st->rdf_compatible; # To allow for non-RDF data models (e.g. N3) |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# wrap in eval, as this can potentially throw an exception. |
95
|
4687
|
|
|
|
|
114281
|
eval { |
96
|
4687
|
|
|
|
|
6637
|
my ($vocab_uri) = $st->predicate->qname; |
97
|
|
|
|
|
|
|
# The hash has a unique key, so now we count the number of qnames for each qname in the data |
98
|
4687
|
|
|
|
|
705653
|
$vocab_counter{$vocab_uri}++; |
99
|
|
|
|
|
|
|
}; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
|
103
|
4687
|
100
|
66
|
|
|
141557
|
if ($gen->has_urispace && $st->subject->is_resource) { |
104
|
|
|
|
|
|
|
# Compute entities. We assume that all entities are subjects |
105
|
|
|
|
|
|
|
# with a prefix matching the uriSpace. Again, we use the |
106
|
|
|
|
|
|
|
# property that keys are unique, but we just set it to some |
107
|
|
|
|
|
|
|
# true value since we don't need to count how frequently each |
108
|
|
|
|
|
|
|
# entity is present. |
109
|
4291
|
|
|
|
|
132536
|
(my $urispace = $gen->urispace) =~ s/\./\\./g; |
110
|
4291
|
50
|
|
|
|
7814
|
$entities{$st->subject->uri_value} = 1 if ($st->subject->uri_value =~ m/^$urispace/); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
4687
|
|
|
|
|
61200
|
$subjects{$st->subject->sse} = 1; |
114
|
4687
|
|
|
|
|
41295
|
$properties{$st->predicate->uri_value}{'triples'}++; |
115
|
4687
|
|
|
|
|
27275
|
$objects{$st->object->sse} = 1; |
116
|
|
|
|
|
|
|
|
117
|
4687
|
50
|
33
|
|
|
178766
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) { |
|
|
|
66
|
|
|
|
|
118
|
4687
|
100
|
66
|
|
|
7694
|
if (($st->predicate->uri_value eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') |
119
|
|
|
|
|
|
|
&& $st->object->is_resource) { |
120
|
692
|
|
|
|
|
9588
|
$classes{$st->object->uri_value}++ |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
4687
|
100
|
66
|
|
|
139436
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level > 2)) { |
|
|
|
66
|
|
|
|
|
125
|
4675
|
|
|
|
|
7468
|
$properties{$st->predicate->uri_value}{'countsubjects'}{$st->subject->sse} = 1; |
126
|
4675
|
|
|
|
|
58552
|
$properties{$st->predicate->uri_value}{'countobjects'}{$st->object->sse} = 1; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
11
|
|
|
|
|
295
|
}); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# Finally, we update the attributes above, they are returned as a side-effect |
132
|
11
|
|
|
|
|
28939
|
$self->vocabularies(\%vocab_counter); |
133
|
11
|
|
|
|
|
313
|
$self->entities(scalar keys %entities); |
134
|
11
|
|
|
|
|
306
|
$self->properties(scalar keys %properties); |
135
|
11
|
|
|
|
|
285
|
$self->subjects(scalar keys %subjects); |
136
|
11
|
|
|
|
|
283
|
$self->objects(scalar keys %objects); |
137
|
11
|
50
|
33
|
|
|
290
|
if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) { |
|
|
|
66
|
|
|
|
|
138
|
11
|
|
|
|
|
304
|
$self->propertyPartitions(\%properties); |
139
|
11
|
|
|
|
|
317
|
$self->classPartitions(\%classes); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head1 FURTHER DOCUMENTATION |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Please see L<RDF::Generator::Void> for further documentation. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head1 AUTHORS AND COPYRIGHT |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Please see L<RDF::Generator::Void> for information about authors and copyright for this module. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
1; |