File Coverage

blib/lib/RDF/Generator/Void/Stats.pm
Criterion Covered Total %
statement 40 40 100.0
branch 10 14 71.4
condition 14 24 58.3
subroutine 6 6 100.0
pod 1 1 100.0
total 71 85 83.5


line stmt bran cond sub pod time code
1             package RDF::Generator::Void::Stats;
2              
3 5     5   85 use 5.006;
  5         31  
4 5     5   17 use strict;
  5         5  
  5         76  
5 5     5   15 use warnings;
  5         5  
  5         91  
6 5     5   15 use Moose;
  5         6  
  5         24  
7              
8             =head1 NAME
9              
10             RDF::Generator::Void::Stats - Generate statistics needed for good VoID descriptions
11              
12             =head1 SYNOPSIS
13              
14             Typically called for you by L<RDF::Generator::Void> as:
15              
16             my $stats = RDF::Generator::Void::Stats->new(generator => $self);
17              
18              
19             =head2 METHODS
20              
21             =head3 C<< BUILD >>
22              
23             Called by Moose to initialize an object.
24              
25             =head3 C<generator>
26              
27             Parameter to the constructor, to pass a L<RDF::Generator::Void> object.
28              
29             =head3 C<vocabularies>
30              
31             A hashref used to find common vocabularies in the data.
32              
33             =head3 C<entities>
34              
35             The number of distinct entities, as defined in the specification.
36              
37             =head3 C<properties>
38              
39             The number of distinct properties, as defined in the specification.
40              
41             =head3 C<subjects>
42              
43             The number of distinct subjects, as defined in the specification.
44              
45             =head3 C<objects>
46              
47             The number of distinct objects, as defined in the specification.
48              
49             =head3 C<propertyPartitions>
50              
51             A hashref containing the number of triples for each property.
52              
53             =head3 C<classPartitions>
54              
55             A hashref containing the number of triples for each class.
56              
57              
58             =cut
59              
60             # The following attributes also act as read-write methods.
61             has vocabularies => ( is => 'rw', isa => 'HashRef' );
62              
63             has ['entities', 'properties', 'subjects', 'objects'] => ( is => 'rw', isa => 'Int' );
64              
65             has propertyPartitions => (is => 'rw', isa => 'HashRef' );
66              
67             has classPartitions => (is => 'rw', isa => 'HashRef' );
68              
69             # This is a read-only method, meaning that the constructor has it as a
70             # parameter, but then it can only be read from.
71             has generator => (
72             is => 'ro',
73             isa => 'RDF::Generator::Void',
74             required => 1,
75             );
76              
77             # The BUILD method is kinda the constructor. It is called when the
78             # user calls the constructor. In here, the statistics is generated.
79             sub BUILD {
80 11     11 1 14423 my ($self) = @_;
81              
82             # Initialize local hashes to count stuff.
83 11         20 my (%vocab_counter, %entities, %properties, %subjects, %objects, %classes);
84              
85 11         353 my $gen = $self->generator;
86             # Here, we take the data in the model we want to generate
87             # statistics for and we iterate over it. Doing it this way, we
88             # should be able to generate all statistics in a single pass of the
89             # data.
90             $gen->inmodel->get_statements->each(sub {
91 4687     4687   389199 my $st = shift;
92 4687 50       8403 next unless $st->rdf_compatible; # To allow for non-RDF data models (e.g. N3)
93            
94             # wrap in eval, as this can potentially throw an exception.
95 4687         114281 eval {
96 4687         6637 my ($vocab_uri) = $st->predicate->qname;
97             # The hash has a unique key, so now we count the number of qnames for each qname in the data
98 4687         705653 $vocab_counter{$vocab_uri}++;
99             };
100              
101            
102              
103 4687 100 66     141557 if ($gen->has_urispace && $st->subject->is_resource) {
104             # Compute entities. We assume that all entities are subjects
105             # with a prefix matching the uriSpace. Again, we use the
106             # property that keys are unique, but we just set it to some
107             # true value since we don't need to count how frequently each
108             # entity is present.
109 4291         132536 (my $urispace = $gen->urispace) =~ s/\./\\./g;
110 4291 50       7814 $entities{$st->subject->uri_value} = 1 if ($st->subject->uri_value =~ m/^$urispace/);
111             }
112            
113 4687         61200 $subjects{$st->subject->sse} = 1;
114 4687         41295 $properties{$st->predicate->uri_value}{'triples'}++;
115 4687         27275 $objects{$st->object->sse} = 1;
116              
117 4687 50 33     178766 if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) {
      66        
118 4687 100 66     7694 if (($st->predicate->uri_value eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
119             && $st->object->is_resource) {
120 692         9588 $classes{$st->object->uri_value}++
121             }
122             }
123              
124 4687 100 66     139436 if ((!$gen->has_level) || ($gen->has_level && $gen->level > 2)) {
      66        
125 4675         7468 $properties{$st->predicate->uri_value}{'countsubjects'}{$st->subject->sse} = 1;
126 4675         58552 $properties{$st->predicate->uri_value}{'countobjects'}{$st->object->sse} = 1;
127             }
128              
129 11         295 });
130              
131             # Finally, we update the attributes above, they are returned as a side-effect
132 11         28939 $self->vocabularies(\%vocab_counter);
133 11         313 $self->entities(scalar keys %entities);
134 11         306 $self->properties(scalar keys %properties);
135 11         285 $self->subjects(scalar keys %subjects);
136 11         283 $self->objects(scalar keys %objects);
137 11 50 33     290 if ((!$gen->has_level) || ($gen->has_level && $gen->level >= 1)) {
      66        
138 11         304 $self->propertyPartitions(\%properties);
139 11         317 $self->classPartitions(\%classes);
140             }
141             }
142              
143             =head1 FURTHER DOCUMENTATION
144              
145             Please see L<RDF::Generator::Void> for further documentation.
146              
147             =head1 AUTHORS AND COPYRIGHT
148              
149              
150             Please see L<RDF::Generator::Void> for information about authors and copyright for this module.
151              
152              
153             =cut
154              
155             1;