File Coverage

blib/lib/RDF/Generator/Void/Stats.pm
Criterion Covered Total %
statement 46 46 100.0
branch 10 14 71.4
condition 14 24 58.3
subroutine 7 7 100.0
pod 1 1 100.0
total 78 92 84.7


line stmt bran cond sub pod time code
1             package RDF::Generator::Void::Stats;
2 5     5   1821 use Progress::Any;
  5         17002  
  5         26  
3              
4 5     5   182 use 5.006;
  5         11  
5 5     5   17 use strict;
  5         7  
  5         76  
6 5     5   17 use warnings;
  5         3  
  5         100  
7 5     5   24 use Moose;
  5         5  
  5         30  
8              
9             =head1 NAME
10              
11             RDF::Generator::Void::Stats - Generate statistics needed for good VoID descriptions
12              
13             =head1 SYNOPSIS
14              
15             Typically called for you by L<RDF::Generator::Void> as:
16              
17             my $stats = RDF::Generator::Void::Stats->new(generator => $self);
18              
19              
20             =head2 METHODS
21              
22             =head3 C<< BUILD >>
23              
24             Called by Moose to initialize an object.
25              
26             =head3 C<generator>
27              
28             Parameter to the constructor, to pass a L<RDF::Generator::Void> object.
29              
30             =head3 C<vocabularies>
31              
32             A hashref used to find common vocabularies in the data.
33              
34             =head3 C<entities>
35              
36             The number of distinct entities, as defined in the specification.
37              
38             =head3 C<properties>
39              
40             The number of distinct properties, as defined in the specification.
41              
42             =head3 C<subjects>
43              
44             The number of distinct subjects, as defined in the specification.
45              
46             =head3 C<objects>
47              
48             The number of distinct objects, as defined in the specification.
49              
50             =head3 C<propertyPartitions>
51              
52             A hashref containing the number of triples for each property.
53              
54             =head3 C<classPartitions>
55              
56             A hashref containing the number of triples for each class.
57              
58              
59             =cut
60              
61             # The following attributes also act as read-write methods.
62             has vocabularies => ( is => 'rw', isa => 'HashRef' );
63              
64             has ['entities', 'properties', 'subjects', 'objects'] => ( is => 'rw', isa => 'Int' );
65              
66             has propertyPartitions => (is => 'rw', isa => 'HashRef' );
67              
68             has classPartitions => (is => 'rw', isa => 'HashRef' );
69              
70             # This is a read-only method, meaning that the constructor has it as a
71             # parameter, but then it can only be read from.
72             has generator => (
73             is => 'ro',
74             isa => 'RDF::Generator::Void',
75             required => 1,
76             );
77              
78             # The BUILD method is kinda the constructor. It is called when the
79             # user calls the constructor. In here, the statistics is generated.
80             sub BUILD {
81 6     6 1 8065 my ($self) = @_;
82              
83             # Initialize local hashes to count stuff.
84 6         10 my (%vocab_counter, %entities, %properties, %subjects, %objects, %classes);
85 6         22 my $progress = Progress::Any->get_indicator(task => "compute");
86            
87 6         301 my $gen = $self->generator;
88              
89             # Here, we take the data in the model we want to generate
90             # statistics for and we iterate over it. Doing it this way, we
91             # should be able to generate all statistics in a single pass of the
92             # data.
93             $gen->inmodel->get_statements->each(sub {
94 4687     4687   459702 my $st = shift;
95 4687 50       8011 next unless $st->rdf_compatible; # To allow for non-RDF data models (e.g. N3)
96            
97             # wrap in eval, as this can potentially throw an exception.
98 4687         113598 eval {
99 4687         6559 my ($vocab_uri) = $st->predicate->qname;
100             # The hash has a unique key, so now we count the number of qnames for each qname in the data
101 4687         701327 $vocab_counter{$vocab_uri}++;
102             };
103              
104            
105              
106 4687 100 66     137397 if ($gen->has_urispace && $st->subject->is_resource) {
107             # Compute entities. We assume that all entities are subjects
108             # with a prefix matching the uriSpace. Again, we use the
109             # property that keys are unique, but we just set it to some
110             # true value since we don't need to count how frequently each
111             # entity is present.
112 4291         130641 (my $urispace = $gen->urispace) =~ s/\./\\./g;
113 4291 50       7443 $entities{$st->subject->uri_value} = 1 if ($st->subject->uri_value =~ m/^$urispace/);
114             }
115            
116 4687         59404 $subjects{$st->subject->sse} = 1;
117 4687         39809 $properties{$st->predicate->uri_value}{'triples'}++;
118 4687         26001 $objects{$st->object->sse} = 1;
119              
120 4687 50 33     178118 if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) {
      66        
121 4687 100 66     7099 if (($st->predicate->uri_value eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
122             && $st->object->is_resource) {
123 692         9272 $classes{$st->object->uri_value}++
124             }
125             }
126              
127 4687 100 66     136715 if ((!$gen->has_level) || ($gen->has_level && $gen->level > 2)) {
      66        
128 4675         7297 $properties{$st->predicate->uri_value}{'countsubjects'}{$st->subject->sse} = 1;
129 4675         55733 $properties{$st->predicate->uri_value}{'countobjects'}{$st->object->sse} = 1;
130             }
131 4687         74482 $progress->update(message => "Examening triples");
132 6         154 });
133              
134              
135             # Finally, we update the attributes above, they are returned as a side-effect
136 6         2519 $self->vocabularies(\%vocab_counter);
137 6         162 $self->entities(scalar keys %entities);
138 6         186 $self->properties(scalar keys %properties);
139 6         159 $self->subjects(scalar keys %subjects);
140 6         153 $self->objects(scalar keys %objects);
141 6 50 33     185 if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) {
      66        
142 6         163 $self->propertyPartitions(\%properties);
143 6         172 $self->classPartitions(\%classes);
144             }
145 6         21 $progress->update(message => "Data transfer");
146             }
147              
148             =head1 FURTHER DOCUMENTATION
149              
150             Please see L<RDF::Generator::Void> for further documentation.
151              
152             =head1 AUTHORS AND COPYRIGHT
153              
154              
155             Please see L<RDF::Generator::Void> for information about authors and copyright for this module.
156              
157              
158             =cut
159              
160             1;