File Coverage

blib/lib/RDF/Trine/Serializer/NTriples/Canonical.pm
Criterion Covered Total %
statement 96 106 90.5
branch 32 42 76.1
condition 2 3 66.6
subroutine 10 10 100.0
pod 3 3 100.0
total 143 164 87.2


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             RDF::Trine::Serializer::NTriples::Canonical - Canonical representation of an RDF model
4              
5             =head1 VERSION
6              
7             This document describes RDF::Trine::Serializer::NTriples::Canonical version 1.017
8              
9             =head1 SYNOPSIS
10              
11             use RDF::Trine::Serializer::NTriples::Canonical;
12             my $serializer = RDF::Trine::Serializer::NTriples->new( onfail=>'truncate' );
13             $serializer->serialize_model_to_file(FH, $model);
14              
15             =head1 DESCRIPTION
16              
17             This module produces a canonical string representation of an RDF graph.
18             If the graph contains blank nodes, sometimes there is no canonical
19             representation that can be produced. The 'onfail' option allows you to
20             decide what is done in those circumstances:
21              
22             =over 8
23              
24             =item * truncate - drop problematic triples and only serialize a subgraph.
25              
26             =item * append - append problematic triples to the end of graph. The result will be non-canonical. This is the default behaviour.
27              
28             =item * space - As with 'append', but leave a blank line separating the canonical and non-canonical portions of the graph.
29              
30             =item * die - cause a fatal error.
31              
32             =back
33              
34             Other than the 'onfail' option, this package has exactly the same
35             interface as L<RDF::Trine::Serializer::NTriples>, providing
36             C<serialize_model_to_file> and C<serialize_model_to_string> methods.
37              
38             This package will be considerably slower than the non-canonicalising
39             serializer though, so should only be used for small to medium-sized
40             graphs, and only when you need canonicalisation (e.g. for side-by-side
41             comparison of two graphs to check they're isomorphic; or creating a
42             canonical representation for digital signing).
43              
44             =head1 METHODS
45              
46             Beyond the methods documented below, this class inherits methods from the
47             L<RDF::Trine::Serializer::NTriples> class.
48              
49             =over 4
50              
51             =cut
52              
53             package RDF::Trine::Serializer::NTriples::Canonical;
54              
55 68     68   16439 use 5.010;
  68         247  
56 68     68   371 use strict;
  68         152  
  68         1279  
57 68     68   386 use warnings;
  68         176  
  68         1624  
58              
59 68     68   361 use Carp;
  68         154  
  68         3370  
60 68     68   1066 use RDF::Trine;
  68         177  
  68         2697  
61 68     68   401 use base qw(RDF::Trine::Serializer::NTriples);
  68         157  
  68         6149  
62              
63             ######################################################################
64              
65             our ($VERSION);
66             BEGIN {
67 68     68   261 $VERSION = '1.017';
68 68         62477 $RDF::Trine::Serializer::serializer_names{ 'ntriples-canonical' } = __PACKAGE__;
69             # foreach my $type (qw(text/plain)) {
70             # $RDF::Trine::Serializer::media_types{ $type } = __PACKAGE__;
71             # }
72             }
73              
74             ######################################################################
75              
76             =item C<< new ( [ onfail => $rule ] ) >>
77              
78             Returns a new Canonical N-Triples serializer object. If specified, the value of
79             the 'onfail' argument dictates the handling of blank nodes with no canonical
80             representation. The allowable rule values are 'truncate', 'append', 'space',
81             and 'die', and their respective behaviour is described in L</DESCRIPTION> above.
82              
83             =cut
84              
85             sub new {
86 779     779 1 7860 my $class = shift;
87 779         3074 my %opts = (onfail => '');
88            
89 779         2740 while (@_) {
90 2         10 my $field = lc shift;
91 2         7 my $value = shift;
92 2         10 $opts{$field} = $value;
93             }
94            
95 779         3556 return bless \%opts, $class;
96             }
97              
98             =item C<< serialize_model_to_file ( $fh, $model ) >>
99              
100             Serializes the C<$model> to canonical NTriples, printing the results to the
101             supplied filehandle C<<$fh>>.
102              
103             =cut
104              
105             sub serialize_model_to_file {
106 1     1 1 952 my $self = shift;
107 1         2 my $file = shift;
108 1         3 my $model = shift;
109            
110 1         4 my $string = $self->serialize_model_to_string($model);
111 1         3 print {$file} $string;
  1         18  
112             }
113              
114             =item C<< serialize_model_to_string ( $model ) >>
115              
116             Serializes the C<$model> to canonical NTriples, returning the result as a string.
117              
118             =cut
119              
120             sub serialize_model_to_string {
121 778     778 1 1529 my $self = shift;
122 778         1778 my $model = shift;
123            
124 778         1557 my $blankNodes = {};
125 778         1647 my @statements;
126            
127 778         3252 my $stream = $model->as_stream;
128 778         3060 while (my $ST = $stream->next) {
129 1402         3968 push @statements, { 'trine' => $ST };
130            
131 1402 100       4708 if ($ST->subject->isa('RDF::Trine::Node::Blank')) {
132 330         906 $blankNodes->{ $ST->subject->blank_identifier }->{'trine'} = $ST->subject;
133             }
134            
135 1402 100       4455 if ($ST->object->isa('RDF::Trine::Node::Blank')) {
136 182         547 $blankNodes->{ $ST->object->blank_identifier }->{'trine'} = $ST->object;
137             }
138             }
139            
140 778         1614 my %lexCounts;
141            
142 778         1941 foreach my $st (@statements) {
143             # Really need to canonicalise typed literals as per XSD.
144            
145             $st->{'lex'} = sprintf('%s %s %s',
146             ($st->{'trine'}->subject->isa('RDF::Trine::Node::Blank') ? '~' : $st->{'trine'}->subject->as_ntriples),
147             $st->{'trine'}->predicate->as_ntriples,
148 1402 100       5136 ($st->{'trine'}->object->isa('RDF::Trine::Node::Blank') ? '~' : $st->{'trine'}->object->as_ntriples)
    100          
149             );
150 1402         6505 $lexCounts{ $st->{'lex'} }++;
151             }
152              
153 778         2653 my $blankNodeCount = scalar keys %$blankNodes;
154 778         1870 my $blankNodeLength = length "$blankNodeCount";
155 778         2074 my $blankNodePattern = '_:g%0'.$blankNodeLength.'d';
156 778         1977 my $hardNodePattern = '_:h%0'.$blankNodeLength.'d';
157            
158 778         2475 @statements = sort { $a->{'lex'} cmp $b->{'lex'} } @statements;
  1071         2526  
159            
160 778         1565 my $genSymCounter = 1;
161            
162 778         1849 foreach my $st (@statements) {
163 1402 100       4958 next unless $lexCounts{ $st->{'lex'} } == 1;
164            
165 1370 100       4493 if ($st->{'trine'}->object->isa('RDF::Trine::Node::Blank')) {
166 182 100       590 unless (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
167 128         645 $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'} =
168             sprintf($blankNodePattern, $genSymCounter);
169 128         289 $genSymCounter++;
170             }
171 182         574 my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
172 182         955 $st->{'lex'} =~ s/\~$/$b/;
173             }
174            
175 1370 100       4429 if ($st->{'trine'}->subject->isa('RDF::Trine::Node::Blank')) {
176 298 100       841 unless (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
177 118         523 $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'} =
178             sprintf($blankNodePattern, $genSymCounter);
179 118         240 $genSymCounter++;
180             }
181 298         834 my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
182 298         1488 $st->{'lex'} =~ s/^\~/$b/;
183             }
184             }
185            
186 778         2004 foreach my $st (@statements) {
187 1402 50       4689 if ($st->{'lex'} =~ /\~$/) {
188 0 0       0 if (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
189 0         0 my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
190 0         0 $st->{'lex'} =~ s/\~$/$b/;
191             }
192             }
193            
194 1402 100       4150 if ($st->{'lex'} =~ /^\~/) {
195 32 100       108 if (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
196 30         88 my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
197 30         129 $st->{'lex'} =~ s/^\~/$b/;
198             }
199             }
200             }
201            
202 778         1882 @statements = sort { $a->{'lex'} cmp $b->{'lex'} } @statements;
  1043         2065  
203            
204 778         1694 my @canonicalStatements;
205             my @otherStatements;
206 778         1661 foreach my $st (@statements) {
207 1402 100       19832 if ($st->{'lex'} =~ /(^\~)|(\~$)/) {
208 2 50       20 if (lc $self->{'onfail'} eq 'die') {
    50          
209 0         0 croak "Model could not be canonicalised";
210             } elsif (lc $self->{'onfail'} eq 'truncate') {
211 0         0 next;
212             }
213            
214 2 50       7 if ($st->{'lex'} =~ /\~$/) {
215 0 0       0 unless (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
216 0         0 $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'} =
217             sprintf($hardNodePattern, $genSymCounter);
218 0         0 $genSymCounter++;
219             }
220 0         0 my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
221 0         0 $st->{'lex'} =~ s/\~$/$b/;
222             }
223              
224 2 50       8 if ($st->{'lex'} =~ /^\~/) {
225 2 50       8 unless (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
226 2         11 $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'} =
227             sprintf($hardNodePattern, $genSymCounter);
228 2         5 $genSymCounter++;
229             }
230 2         6 my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
231 2         8 $st->{'lex'} =~ s/^\~/$b/;
232             }
233              
234 2         6 push @otherStatements, $st;
235             } else {
236 1400         3212 push @canonicalStatements, $st;
237             }
238             }
239            
240 778         1568 my $rv = '';
241 778         1489 foreach my $st (@canonicalStatements) {
242 1400         4144 $rv .= $st->{'lex'} . " .\r\n";
243             }
244              
245             $rv .= "\r\n"
246 778 100 66     5049 if (defined($self->{'onfail'}) && (lc $self->{'onfail'} eq 'space'));
247            
248 778         1715 foreach my $st (@otherStatements) {
249 2         7 $rv .= $st->{'lex'} . " .\r\n";
250             }
251              
252 778         12727 return $rv;
253             }
254              
255             1;
256             __END__
257              
258             =back
259              
260             =head1 BUGS
261              
262             Please report any bugs or feature requests to through the GitHub web interface
263             at L<https://github.com/kasei/perlrdf/issues>.
264              
265             =head1 SEE ALSO
266              
267             I<Signing RDF Graphs>, Jeremey J Carroll,
268             Digital Media Systems Laboratory, HB Laboratories Bristol.
269             HPL-2003-142, 23 July 2003.
270             L<http://www.hpl.hp.com/techreports/2003/HPL-2003-142.pdf>.
271              
272             L<RDF::Trine>, L<RDF::Trine::Serializer::NTriples>.
273              
274             L<http://www.perlrdf.org/>.
275              
276             =head1 AUTHOR
277              
278             Toby Inkster, E<lt>tobyink@cpan.orgE<gt>
279              
280             =head1 COPYRIGHT AND LICENSE
281              
282             Copyright (c) 2010 Toby Inkster
283              
284             This library is free software; you can redistribute it and/or modify
285             it under the same terms as Perl itself, either Perl version 5.8.1 or,
286             at your option, any later version of Perl 5 you may have available.
287              
288             =cut