File Coverage

blib/lib/RDF/Trine/Serializer/NTriples/Canonical.pm
Criterion Covered Total %
statement 96 106 90.5
branch 32 42 76.1
condition 2 3 66.6
subroutine 10 10 100.0
pod 3 3 100.0
total 143 164 87.2


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             RDF::Trine::Serializer::NTriples::Canonical - Canonical representation of an RDF model
4              
5             =head1 VERSION
6              
7             This document describes RDF::Trine::Serializer::NTriples::Canonical version 1.018
8              
9             =head1 SYNOPSIS
10              
11             use RDF::Trine::Serializer::NTriples::Canonical;
12             my $serializer = RDF::Trine::Serializer::NTriples->new( onfail=>'truncate' );
13             $serializer->serialize_model_to_file(FH, $model);
14              
15             =head1 DESCRIPTION
16              
17             This module produces a canonical string representation of an RDF graph.
18             If the graph contains blank nodes, sometimes there is no canonical
19             representation that can be produced. The 'onfail' option allows you to
20             decide what is done in those circumstances:
21              
22             =over 8
23              
24             =item * truncate - drop problematic triples and only serialize a subgraph.
25              
26             =item * append - append problematic triples to the end of graph. The result will be non-canonical. This is the default behaviour.
27              
28             =item * space - As with 'append', but leave a blank line separating the canonical and non-canonical portions of the graph.
29              
30             =item * die - cause a fatal error.
31              
32             =back
33              
34             Other than the 'onfail' option, this package has exactly the same
35             interface as L<RDF::Trine::Serializer::NTriples>, providing
36             C<serialize_model_to_file> and C<serialize_model_to_string> methods.
37              
38             This package will be considerably slower than the non-canonicalising
39             serializer though, so should only be used for small to medium-sized
40             graphs, and only when you need canonicalisation (e.g. for side-by-side
41             comparison of two graphs to check they're isomorphic; or creating a
42             canonical representation for digital signing).
43              
44             =head1 METHODS
45              
46             Beyond the methods documented below, this class inherits methods from the
47             L<RDF::Trine::Serializer::NTriples> class.
48              
49             =over 4
50              
51             =cut
52              
53             package RDF::Trine::Serializer::NTriples::Canonical;
54              
55 68     68   19825 use 5.010;
  68         248  
56 68     68   348 use strict;
  68         156  
  68         1229  
57 68     68   353 use warnings;
  68         156  
  68         1464  
58              
59 68     68   336 use Carp;
  68         142  
  68         3296  
60 68     68   1148 use RDF::Trine;
  68         146  
  68         2648  
61 68     68   381 use base qw(RDF::Trine::Serializer::NTriples);
  68         153  
  68         6015  
62              
63             ######################################################################
64              
65             our ($VERSION);
66             BEGIN {
67 68     68   297 $VERSION = '1.018';
68 68         61344 $RDF::Trine::Serializer::serializer_names{ 'ntriples-canonical' } = __PACKAGE__;
69             # foreach my $type (qw(text/plain)) {
70             # $RDF::Trine::Serializer::media_types{ $type } = __PACKAGE__;
71             # }
72             }
73              
74             ######################################################################
75              
76             =item C<< new ( [ onfail => $rule ] ) >>
77              
78             Returns a new Canonical N-Triples serializer object. If specified, the value of
79             the 'onfail' argument dictates the handling of blank nodes with no canonical
80             representation. The allowable rule values are 'truncate', 'append', 'space',
81             and 'die', and their respective behaviour is described in L</DESCRIPTION> above.
82              
83             =cut
84              
85             sub new {
86 779     779 1 6579 my $class = shift;
87 779         2557 my %opts = (onfail => '');
88            
89 779         2362 while (@_) {
90 2         7 my $field = lc shift;
91 2         4 my $value = shift;
92 2         8 $opts{$field} = $value;
93             }
94            
95 779         2970 return bless \%opts, $class;
96             }
97              
98             =item C<< serialize_model_to_file ( $fh, $model ) >>
99              
100             Serializes the C<$model> to canonical NTriples, printing the results to the
101             supplied filehandle C<<$fh>>.
102              
103             =cut
104              
105             sub serialize_model_to_file {
106 1     1 1 619 my $self = shift;
107 1         2 my $file = shift;
108 1         2 my $model = shift;
109            
110 1         3 my $string = $self->serialize_model_to_string($model);
111 1         3 print {$file} $string;
  1         19  
112             }
113              
114             =item C<< serialize_model_to_string ( $model ) >>
115              
116             Serializes the C<$model> to canonical NTriples, returning the result as a string.
117              
118             =cut
119              
120             sub serialize_model_to_string {
121 778     778 1 1400 my $self = shift;
122 778         1384 my $model = shift;
123            
124 778         1440 my $blankNodes = {};
125 778         1434 my @statements;
126            
127 778         2675 my $stream = $model->as_stream;
128 778         2499 while (my $ST = $stream->next) {
129 1402         3681 push @statements, { 'trine' => $ST };
130            
131 1402 100       4325 if ($ST->subject->isa('RDF::Trine::Node::Blank')) {
132 330         819 $blankNodes->{ $ST->subject->blank_identifier }->{'trine'} = $ST->subject;
133             }
134            
135 1402 100       4131 if ($ST->object->isa('RDF::Trine::Node::Blank')) {
136 182         468 $blankNodes->{ $ST->object->blank_identifier }->{'trine'} = $ST->object;
137             }
138             }
139            
140 778         1398 my %lexCounts;
141            
142 778         1621 foreach my $st (@statements) {
143             # Really need to canonicalise typed literals as per XSD.
144            
145             $st->{'lex'} = sprintf('%s %s %s',
146             ($st->{'trine'}->subject->isa('RDF::Trine::Node::Blank') ? '~' : $st->{'trine'}->subject->as_ntriples),
147             $st->{'trine'}->predicate->as_ntriples,
148 1402 100       4442 ($st->{'trine'}->object->isa('RDF::Trine::Node::Blank') ? '~' : $st->{'trine'}->object->as_ntriples)
    100          
149             );
150 1402         5990 $lexCounts{ $st->{'lex'} }++;
151             }
152              
153 778         2056 my $blankNodeCount = scalar keys %$blankNodes;
154 778         1762 my $blankNodeLength = length "$blankNodeCount";
155 778         1805 my $blankNodePattern = '_:g%0'.$blankNodeLength.'d';
156 778         1723 my $hardNodePattern = '_:h%0'.$blankNodeLength.'d';
157            
158 778         2130 @statements = sort { $a->{'lex'} cmp $b->{'lex'} } @statements;
  1071         2216  
159            
160 778         1381 my $genSymCounter = 1;
161            
162 778         1831 foreach my $st (@statements) {
163 1402 100       4513 next unless $lexCounts{ $st->{'lex'} } == 1;
164            
165 1370 100       4125 if ($st->{'trine'}->object->isa('RDF::Trine::Node::Blank')) {
166 182 100       543 unless (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
167 128         538 $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'} =
168             sprintf($blankNodePattern, $genSymCounter);
169 128         261 $genSymCounter++;
170             }
171 182         478 my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
172 182         821 $st->{'lex'} =~ s/\~$/$b/;
173             }
174            
175 1370 100       3906 if ($st->{'trine'}->subject->isa('RDF::Trine::Node::Blank')) {
176 298 100       769 unless (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
177 118         510 $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'} =
178             sprintf($blankNodePattern, $genSymCounter);
179 118         249 $genSymCounter++;
180             }
181 298         764 my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
182 298         1328 $st->{'lex'} =~ s/^\~/$b/;
183             }
184             }
185            
186 778         1691 foreach my $st (@statements) {
187 1402 50       3936 if ($st->{'lex'} =~ /\~$/) {
188 0 0       0 if (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
189 0         0 my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
190 0         0 $st->{'lex'} =~ s/\~$/$b/;
191             }
192             }
193            
194 1402 100       3757 if ($st->{'lex'} =~ /^\~/) {
195 32 100       104 if (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
196 30         86 my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
197 30         122 $st->{'lex'} =~ s/^\~/$b/;
198             }
199             }
200             }
201            
202 778         1595 @statements = sort { $a->{'lex'} cmp $b->{'lex'} } @statements;
  1043         1899  
203            
204 778         1548 my @canonicalStatements;
205             my @otherStatements;
206 778         1543 foreach my $st (@statements) {
207 1402 100       18491 if ($st->{'lex'} =~ /(^\~)|(\~$)/) {
208 2 50       20 if (lc $self->{'onfail'} eq 'die') {
    50          
209 0         0 croak "Model could not be canonicalised";
210             } elsif (lc $self->{'onfail'} eq 'truncate') {
211 0         0 next;
212             }
213            
214 2 50       7 if ($st->{'lex'} =~ /\~$/) {
215 0 0       0 unless (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
216 0         0 $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'} =
217             sprintf($hardNodePattern, $genSymCounter);
218 0         0 $genSymCounter++;
219             }
220 0         0 my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
221 0         0 $st->{'lex'} =~ s/\~$/$b/;
222             }
223              
224 2 50       7 if ($st->{'lex'} =~ /^\~/) {
225 2 50       7 unless (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
226 2         7 $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'} =
227             sprintf($hardNodePattern, $genSymCounter);
228 2         4 $genSymCounter++;
229             }
230 2         6 my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
231 2         7 $st->{'lex'} =~ s/^\~/$b/;
232             }
233              
234 2         7 push @otherStatements, $st;
235             } else {
236 1400         2921 push @canonicalStatements, $st;
237             }
238             }
239            
240 778         1472 my $rv = '';
241 778         1409 foreach my $st (@canonicalStatements) {
242 1400         3599 $rv .= $st->{'lex'} . " .\r\n";
243             }
244              
245             $rv .= "\r\n"
246 778 100 66     4861 if (defined($self->{'onfail'}) && (lc $self->{'onfail'} eq 'space'));
247            
248 778         1638 foreach my $st (@otherStatements) {
249 2         5 $rv .= $st->{'lex'} . " .\r\n";
250             }
251              
252 778         11287 return $rv;
253             }
254              
255             1;
256             __END__
257              
258             =back
259              
260             =head1 BUGS
261              
262             Please report any bugs or feature requests to through the GitHub web interface
263             at L<https://github.com/kasei/perlrdf/issues>.
264              
265             =head1 SEE ALSO
266              
267             I<Signing RDF Graphs>, Jeremey J Carroll,
268             Digital Media Systems Laboratory, HB Laboratories Bristol.
269             HPL-2003-142, 23 July 2003.
270             L<http://www.hpl.hp.com/techreports/2003/HPL-2003-142.pdf>.
271              
272             L<RDF::Trine>, L<RDF::Trine::Serializer::NTriples>.
273              
274             L<http://www.perlrdf.org/>.
275              
276             =head1 AUTHOR
277              
278             Toby Inkster, E<lt>tobyink@cpan.orgE<gt>
279              
280             =head1 COPYRIGHT AND LICENSE
281              
282             Copyright (c) 2010 Toby Inkster
283              
284             This library is free software; you can redistribute it and/or modify
285             it under the same terms as Perl itself, either Perl version 5.8.1 or,
286             at your option, any later version of Perl 5 you may have available.
287              
288             =cut