File Coverage

blib/lib/JSON/Schema/Draft201909.pm
Criterion Covered Total %
statement 231 231 100.0
branch 95 112 84.8
condition 26 30 86.6
subroutine 39 39 100.0
pod 5 5 100.0
total 396 417 94.9


line stmt bran cond sub pod time code
1 20     20   4823686 use strict;
  20         227  
  20         641  
2 20     20   110 use warnings;
  20         45  
  20         1183  
3             package JSON::Schema::Draft201909; # git description: v0.027-26-g81fb247
4             # vim: set ts=8 sts=2 sw=2 tw=100 et :
5             # ABSTRACT: Validate data against a schema
6             # KEYWORDS: JSON Schema data validation structure specification
7              
8             our $VERSION = '0.028';
9              
10 20     20   530 use 5.016; # for fc, unicode_strings features
  20         76  
11 20     20   124 no if "$]" >= 5.031009, feature => 'indirect';
  20         40  
  20         215  
12 20     20   1711 no if "$]" >= 5.033001, feature => 'multidimensional';
  20         50  
  20         212  
13 20     20   1023 no if "$]" >= 5.033006, feature => 'bareword_filehandles';
  20         59  
  20         116  
14 20     20   9278 use strictures 2;
  20         27538  
  20         911  
15 20     20   12369 use JSON::MaybeXS;
  20         92196  
  20         1364  
16 20     20   151 use Carp qw(croak carp);
  20         52  
  20         1334  
17 20     20   135 use List::Util 1.55 qw(pairs first uniqint);
  20         396  
  20         1720  
18 20     20   11113 use Ref::Util 0.100 qw(is_ref is_plain_hashref is_plain_coderef);
  20         29660  
  20         1518  
19 20     20   10704 use Mojo::URL;
  20         3875249  
  20         167  
20 20     20   9781 use Safe::Isa;
  20         7410  
  20         3004  
21 20     20   13379 use Path::Tiny;
  20         186257  
  20         1288  
22 20     20   12875 use Storable 'dclone';
  20         59236  
  20         1621  
23 20     20   9922 use File::ShareDir 'dist_dir';
  20         439383  
  20         1378  
24 20     20   8841 use Module::Runtime 'use_module';
  20         28286  
  20         159  
25 20     20   10697 use Moo;
  20         112065  
  20         138  
26 20     20   37102 use MooX::TypeTiny 0.002002;
  20         5316  
  20         156  
27 20     20   128271 use MooX::HandlesVia;
  20         15541  
  20         154  
28 20     20   13092 use Types::Standard 1.010002 qw(Bool Int Str HasMethods Enum InstanceOf HashRef Dict CodeRef Optional slurpy);
  20         1643415  
  20         282  
29 20     20   54470 use Feature::Compat::Try;
  20         5599  
  20         484  
30 20     20   53719 use JSON::Schema::Draft201909::Error;
  20         93  
  20         1013  
31 20     20   11753 use JSON::Schema::Draft201909::Result;
  20         181  
  20         978  
32 20     20   11756 use JSON::Schema::Draft201909::Document;
  20         102  
  20         291  
33 20     20   13438 use JSON::Schema::Draft201909::Utilities qw(get_type canonical_schema_uri E abort annotate_self);
  20         70  
  20         2068  
34 20     20   182 use namespace::clean;
  20         55  
  20         138  
35              
36             our @CARP_NOT = qw(JSON::Schema::Draft201909::Document);
37              
38             has output_format => (
39             is => 'ro',
40             isa => Enum(JSON::Schema::Draft201909::Result->OUTPUT_FORMATS),
41             default => 'basic',
42             );
43              
44             has short_circuit => (
45             is => 'ro',
46             isa => Bool,
47             lazy => 1,
48             default => sub { $_[0]->output_format eq 'flag' && !$_[0]->collect_annotations },
49             );
50              
51             has max_traversal_depth => (
52             is => 'ro',
53             isa => Int,
54             default => 50,
55             );
56              
57             has validate_formats => (
58             is => 'ro',
59             isa => Bool,
60             default => 0, # as specified by https://json-schema.org/draft/2019-09/schema#/$vocabulary
61             );
62              
63             has collect_annotations => (
64             is => 'ro',
65             isa => Bool,
66             );
67              
68             has annotate_unknown_keywords => (
69             is => 'ro',
70             isa => Bool,
71             );
72              
73             has _format_validations => (
74             is => 'bare',
75             isa => Dict[
76             (map +($_ => Optional[CodeRef]), qw(date-time date time duration email idn-email hostname idn-hostname ipv4 ipv6 uri uri-reference iri iri-reference uuid uri-template json-pointer relative-json-pointer regex)),
77             slurpy HashRef[Dict[type => Enum[qw(null object array boolean string number integer)], sub => CodeRef]],
78             ],
79             init_arg => 'format_validations',
80             handles_via => 'Hash',
81             handles => {
82             _get_format_validation => 'get',
83             },
84             lazy => 1,
85             default => sub { {} },
86             );
87              
88             sub add_schema {
89 4699 50   4699 1 144752 croak 'insufficient arguments' if @_ < 2;
90 4699         8819 my $self = shift;
91              
92             # TODO: resolve $uri against $self->base_uri
93 4699 100       17655 my $uri = !is_ref($_[0]) ? Mojo::URL->new(shift)
    100          
94             : $_[0]->$_isa('Mojo::URL') ? shift : Mojo::URL->new;
95              
96 4699 100       83345 croak 'cannot add a schema with a uri with a fragment' if defined $uri->fragment;
97              
98 4698 100       30191 if (not @_) {
99 2         10 my ($schema, $canonical_uri, $document, $document_path) = $self->_fetch_schema_from_uri($uri);
100 2 100 66     20 return if not defined $schema or not defined wantarray;
101 1         14 return $document;
102             }
103              
104 4696 50       12371 my $document = $_[0]->$_isa('JSON::Schema::Draft201909::Document') ? shift
    100          
105             : JSON::Schema::Draft201909::Document->new(
106             schema => shift,
107             $uri ? (canonical_uri => $uri) : (),
108             _evaluator => $self, # used only for traversal during document construction
109             );
110              
111 4696 100       190757 die JSON::Schema::Draft201909::Result->new(
112             output_format => $self->output_format,
113             valid => 0,
114             errors => [ $document->errors ],
115             ) if $document->has_errors;
116              
117 4644 100       248089 if (not grep $_->{document} == $document, $self->_resource_values) {
118 4640   66     406140 my $schema_content = $document->_serialized_schema
119             // $document->_serialized_schema($self->_json_decoder->encode($document->schema));
120              
121 4640 100       389550 if (my $existing_doc = first {
122 39996   66 39996   923684 my $existing_content = $_->_serialized_schema
123             // $_->_serialized_schema($self->_json_decoder->encode($_->schema));
124 39996         262179 $existing_content eq $schema_content
125             } uniqint map $_->{document}, $self->_resource_values) {
126             # we already have this schema content in another document object.
127 3399         27317 $document = $existing_doc;
128             }
129             else {
130 1241         25567 $self->_add_resources(map +($_->[0] => +{ %{$_->[1]}, document => $document }),
  1379         104581  
131             $document->resource_pairs);
132             }
133             }
134              
135 4638 100       196413 if ("$uri") {
136 33         6604 $self->_add_resources($uri => { path => '', canonical_uri => $document->canonical_uri, document => $document });
137             }
138              
139 4638         815392 return $document;
140             }
141              
142             sub evaluate_json_string {
143 3 50   3 1 11662 croak 'evaluate_json_string called in void context' if not defined wantarray;
144 3 50       16 croak 'insufficient arguments' if @_ < 3;
145 3         12 my ($self, $json_data, $schema, $config_override) = @_;
146              
147 3         7 my $data;
148             try {
149             $data = $self->_json_decoder->decode($json_data)
150             }
151 3         12 catch ($e) {
152             return JSON::Schema::Draft201909::Result->new(
153             output_format => $self->output_format,
154             valid => 0,
155             errors => [
156             JSON::Schema::Draft201909::Error->new(
157             keyword => undef,
158             instance_location => '',
159             keyword_location => '',
160             error => $e,
161             )
162             ],
163             );
164             }
165              
166 1         17 return $self->evaluate($data, $schema, $config_override);
167             }
168              
169             # this is called whenever we need to walk a document for something.
170             # for now it is just called when a ::Document object is created, to identify
171             # $id and $anchor keywords within.
172             # Returns the internal $state object accumulated during the traversal.
173             sub traverse {
174 4737 50   4737 1 129919 croak 'insufficient arguments' if @_ < 2;
175 4737         12467 my ($self, $schema_reference, $config_override) = @_;
176              
177 4737   100     19191 my $base_uri = Mojo::URL->new($config_override->{initial_schema_uri} // '');
178              
179             my $state = {
180             depth => 0,
181             data_path => '', # this never changes since we don't have an instance yet
182             traversed_schema_path => '', # the accumulated traversal path up to the last $ref traversal
183             initial_schema_uri => $base_uri, # the canonical URI as of the start or the last traversed $ref
184             schema_path => '', # the rest of the path, since the start or the last traversed $ref
185             errors => [],
186             # for now, this is hardcoded, but in the future we will wrap this in a dialect that starts off
187             # just with the Core vocabulary and then determine the actual vocabularies from the '$schema'
188             # keyword in the schema and the '$vocabulary' keyword in the metaschema.
189             vocabularies => [
190             (map use_module('JSON::Schema::Draft201909::Vocabulary::'.$_)->new,
191             qw(Core Applicator Validation Format Content MetaData)),
192             ],
193             identifiers => [],
194             configs => {},
195             callbacks => $config_override->{callbacks} // {},
196 4737   100     1689328 evaluator => $self,
197             };
198              
199             try {
200             $self->_traverse($schema_reference, $state);
201             }
202 4737         1155506 catch ($e) {
203             if ($e->$_isa('JSON::Schema::Draft201909::Error')) {
204             # note: we should never be here, since traversal subs are no longer be fatal
205             push @{$state->{errors}}, $e;
206             }
207             else {
208             E($state, 'EXCEPTION: '.$e);
209             }
210             }
211              
212 4737         18068 return $state;
213             }
214              
215             sub evaluate {
216 4670 50   4670 1 6011099 croak 'evaluate called in void context' if not defined wantarray;
217 4670 50       14299 croak 'insufficient arguments' if @_ < 3;
218 4670         13281 my ($self, $data, $schema_reference, $config_override) = @_;
219              
220 4670         19042 my $base_uri = Mojo::URL->new; # TODO: will be set by a global attribute
221              
222 4670         58701 my $state = {
223             data_path => '',
224             traversed_schema_path => '', # the accumulated traversal path up to the last $ref traversal
225             initial_schema_uri => $base_uri, # the canonical URI as of the start or the last traversed $ref
226             schema_path => '', # the rest of the path, since the start or the last traversed $ref
227             };
228              
229 4670         8618 my $valid;
230             try {
231             my ($schema, $canonical_uri, $document, $document_path);
232              
233             if (not is_ref($schema_reference) or $schema_reference->$_isa('Mojo::URL')) {
234             # TODO: resolve $uri against base_uri
235             ($schema, $canonical_uri, $document, $document_path) = $self->_fetch_schema_from_uri($schema_reference);
236             }
237             else {
238             # traverse is called via add_schema -> ::Document->new -> ::Document->BUILD
239             $document = $self->add_schema($base_uri, $schema_reference);
240             ($schema, $canonical_uri) = map $document->$_, qw(schema canonical_uri);
241             $document_path = '';
242             }
243              
244             abort($state, 'EXCEPTION: unable to find resource %s', $schema_reference)
245             if not defined $schema;
246              
247             $state = +{
248             %$state,
249             depth => 0,
250             initial_schema_uri => $canonical_uri, # the canonical URI as of the start or the last traversed $ref
251             document => $document, # the ::Document object containing this schema
252             document_path => $document_path, # the *initial* path within the document of this schema
253             errors => [],
254             annotations => [],
255             seen => {},
256             # for now, this is hardcoded, but in the future the dialect will be determined by the
257             # traverse() pass on the schema and examination of the referenced metaschema.
258             vocabularies => [
259             (map use_module('JSON::Schema::Draft201909::Vocabulary::'.$_)->new,
260             qw(Core Applicator Validation Format Content MetaData)),
261             ],
262             evaluator => $self,
263             %{$document->evaluation_configs},
264             (map {
265             my $val = $config_override->{$_} // $self->$_;
266             defined $val ? ( $_ => $val ) : ()
267             } qw(short_circuit collect_annotations validate_formats annotate_unknown_keywords)),
268             };
269              
270             $valid = $self->_eval($data, $schema, $state);
271             warn 'result is false but there are no errors' if not $valid and not @{$state->{errors}};
272             }
273 4670         13503 catch ($e) {
274             if ($e->$_isa('JSON::Schema::Draft201909::Result')) {
275             return $e;
276             }
277             elsif ($e->$_isa('JSON::Schema::Draft201909::Error')) {
278             push @{$state->{errors}}, $e;
279             }
280             else {
281             E($state, 'EXCEPTION: '.$e);
282             }
283              
284             $valid = 0;
285             }
286              
287             return JSON::Schema::Draft201909::Result->new(
288             output_format => $self->output_format,
289             valid => $valid,
290             $valid
291             # strip annotations from result if user didn't explicitly ask for them
292             ? ($config_override->{collect_annotations} // $self->collect_annotations
293             ? (annotations => $state->{annotations}) : ())
294 4618 100 100     137674 : (errors => $state->{errors}),
    100          
295             );
296             }
297              
298             sub get {
299 8 50   8 1 68206 croak 'insufficient arguments' if @_ < 2;
300 8         27 my ($self, $uri) = @_;
301              
302 8         33 my ($subschema, $canonical_uri) = $self->_fetch_schema_from_uri($uri);
303 8 100       678 $subschema = dclone($subschema) if is_ref($subschema);
304 8 100       218 return !defined $subschema ? () : wantarray ? ($subschema, $canonical_uri) : $subschema;
    100          
305             }
306              
307             ######## NO PUBLIC INTERFACES FOLLOW THIS POINT ########
308              
309             sub _traverse {
310 10192 50   10192   29689 croak 'insufficient arguments' if @_ < 3;
311 10192         20819 my ($self, $schema, $state) = @_;
312              
313 10192         19504 delete $state->{keyword};
314              
315             return E($state, 'EXCEPTION: maximum traversal depth exceeded')
316 10192 50       35761 if $state->{depth}++ > $self->max_traversal_depth;
317              
318 10192         31234 my $schema_type = get_type($schema);
319 10192 100       57615 return if $schema_type eq 'boolean';
320              
321 8152 100       18195 return E($state, 'invalid schema type: %s', $schema_type) if $schema_type ne 'object';
322              
323 8149         13170 foreach my $vocabulary (@{$state->{vocabularies}}) {
  8149         20322  
324 48894         139057 foreach my $keyword ($vocabulary->keywords) {
325 464493 100       898050 next if not exists $schema->{$keyword};
326              
327 12421         23853 $state->{keyword} = $keyword;
328 12421         35663 my $method = '_traverse_keyword_'.($keyword =~ s/^\$//r);
329              
330 12421 100       76111 $vocabulary->$method($schema, $state) if $vocabulary->can($method);
331              
332 12421 100       115194 if (my $sub = $state->{callbacks}{$keyword}) {
333 4         13 $sub->($schema, $state);
334             }
335             }
336             }
337             }
338              
339             # keyword => undef, or arrayref of alternatives
340             my %removed_keywords = (
341             definitions => [ '$defs' ],
342             dependencies => [ qw(dependentSchemas dependentRequired) ],
343             );
344              
345             sub _eval {
346 8937 50   8937   24958 croak '_eval called in void context' if not defined wantarray;
347 8937 50       23309 croak 'insufficient arguments' if @_ < 4;
348 8937         20167 my ($self, $data, $schema, $state) = @_;
349              
350             # do not propagate upwards changes to depth, traversed paths,
351             # but additions to annotations, errors are by reference and will be retained
352 8937         64607 $state = { %$state };
353 8937         64826 delete @{$state}{'keyword', grep /^_/, keys %$state};
  8937         22471  
354              
355             abort($state, 'EXCEPTION: maximum evaluation depth exceeded')
356 8937 100       38630 if $state->{depth}++ > $self->max_traversal_depth;
357              
358             # find all schema locations in effect at this data path + canonical_uri combination
359             # if any of them are absolute prefix of this schema location, we are in a loop.
360 8934         29588 my $canonical_uri = canonical_schema_uri($state);
361 8934         24025 my $schema_location = $state->{traversed_schema_path}.$state->{schema_path};
362             abort($state, 'EXCEPTION: infinite loop detected (same location evaluated twice)')
363             if grep substr($schema_location, 0, length) eq $_,
364 8934 100       14558 keys %{$state->{seen}{$state->{data_path}}{$canonical_uri}};
  8934         38480  
365 8933         1346702 $state->{seen}{$state->{data_path}}{$canonical_uri}{$schema_location}++;
366              
367 8933         1223884 my $schema_type = get_type($schema);
368 8933 100 66     29056 return $schema || E($state, 'subschema is false') if $schema_type eq 'boolean';
369              
370             # this should never happen, due to checks in traverse
371 8662 50       19611 abort($state, 'invalid schema type: %s', $schema_type) if $schema_type ne 'object';
372              
373 8662         14751 my $valid = 1;
374 8662         45898 my %unknown_keywords = map +($_ => undef), keys %$schema;
375 8662         21271 my $orig_annotations = $state->{annotations};
376 8662         18023 $state->{annotations} = [];
377 8662         14943 my @new_annotations;
378              
379             ALL_KEYWORDS:
380 8662         14543 foreach my $vocabulary (@{$state->{vocabularies}}) {
  8662         22939  
381 45941         143490 foreach my $keyword ($vocabulary->keywords) {
382 440711 100       832833 next if not exists $schema->{$keyword};
383              
384 15253         30226 delete $unknown_keywords{$keyword};
385              
386 15253         45833 my $method = '_eval_keyword_'.($keyword =~ s/^\$//r);
387 15253 100       64226 next if not $vocabulary->can($method);
388              
389 13828         30580 $state->{keyword} = $keyword;
390 13828         20844 my $error_count = @{$state->{errors}};
  13828         29334  
391 13828 100       47303 if (not $vocabulary->$method($data, $schema, $state)) {
392             warn 'result is false but there are no errors (keyword: '.$keyword.')'
393 3271 50       6605 if $error_count == @{$state->{errors}};
  3271         10572  
394 3271         7008 $valid = 0;
395             }
396              
397 13551 100 100     55010 last ALL_KEYWORDS if not $valid and $state->{short_circuit};
398              
399 12131         26203 push @new_annotations, @{$state->{annotations}}[$#new_annotations+1 .. $#{$state->{annotations}}];
  12131         30734  
  12131         25107  
400             }
401             }
402              
403             # check for previously-supported but now removed keywords
404 8385         38342 foreach my $keyword (sort keys %removed_keywords) {
405 16770 100       37733 next if not exists $schema->{$keyword};
406 2         28 my $message ='no-longer-supported "'.$keyword.'" keyword present (at location "'
407             .canonical_schema_uri($state).'")';
408 2 50       495 if ($removed_keywords{$keyword}) {
409 2         5 my @list = map '"'.$_.'"', @{$removed_keywords{$keyword}};
  2         13  
410 2 50       7 @list = ((map $_.',', @list[0..$#list-1]), $list[-1]) if @list > 2;
411 2 100       21 splice(@list, -1, 0, 'or') if @list > 1;
412 2         11 $message .= ': this should be rewritten as '.join(' ', @list);
413             }
414 2         435 carp $message;
415             }
416              
417 8385         18326 $state->{annotations} = $orig_annotations;
418              
419 8385 100       20037 if ($valid) {
420 5158         7684 push @{$state->{annotations}}, @new_annotations;
  5158         12500  
421 5158         12821 annotate_self(+{ %$state, keyword => $_ }, $schema) foreach sort keys %unknown_keywords;
422             }
423              
424 8385         82344 return $valid;
425             }
426              
427             has _resource_index => (
428             is => 'bare',
429             isa => HashRef[Dict[
430             canonical_uri => InstanceOf['Mojo::URL'],
431             path => Str,
432             document => InstanceOf['JSON::Schema::Draft201909::Document'],
433             ]],
434             handles_via => 'Hash',
435             handles => {
436             _add_resources => 'set',
437             _get_resource => 'get',
438             _remove_resource => 'delete',
439             _resource_index => 'elements',
440             _resource_keys => 'keys',
441             _add_resources_unsafe => 'set',
442             _resource_values => 'values',
443             },
444             lazy => 1,
445             default => sub { {} },
446             );
447              
448             around _add_resources => sub {
449             my ($orig, $self) = (shift, shift);
450              
451             my @resources;
452             foreach my $pair (sort { $a->[0] cmp $b->[0] } pairs @_) {
453             my ($key, $value) = @$pair;
454             if (my $existing = $self->_get_resource($key)) {
455             # we allow overwriting canonical_uri = '' to allow for ad hoc evaluation of schemas that
456             # lack all identifiers altogether, but preserve other resources from the original document
457             if ($key ne '') {
458             next if $existing->{path} eq $value->{path}
459             and $existing->{canonical_uri} eq $value->{canonical_uri}
460             and $existing->{document} == $value->{document};
461             croak 'uri "'.$key.'" conflicts with an existing schema resource';
462             }
463             }
464             elsif ($self->CACHED_METASCHEMAS->{$key}) {
465             croak 'uri "'.$key.'" conflicts with an existing meta-schema resource';
466             }
467              
468             my $fragment = $value->{canonical_uri}->fragment;
469             croak sprintf('canonical_uri cannot contain an empty fragment (%s)', $value->{canonical_uri})
470             if defined $fragment and $fragment eq '';
471              
472             croak sprintf('canonical_uri cannot contain a plain-name fragment (%s)', $value->{canonical_uri})
473             if ($fragment // '') =~ m{^[^/]};
474              
475             $self->$orig($key, $value);
476             }
477             };
478              
479 20         17290 use constant CACHED_METASCHEMAS => {
480             'https://json-schema.org/draft/2019-09/hyper-schema' => '2019-09/hyper-schema.json',
481             'https://json-schema.org/draft/2019-09/links' => '2019-09/links.json',
482             'https://json-schema.org/draft/2019-09/meta/applicator' => '2019-09/meta/applicator.json',
483             'https://json-schema.org/draft/2019-09/meta/content' => '2019-09/meta/content.json',
484             'https://json-schema.org/draft/2019-09/meta/core' => '2019-09/meta/core.json',
485             'https://json-schema.org/draft/2019-09/meta/format' => '2019-09/meta/format.json',
486             'https://json-schema.org/draft/2019-09/meta/hyper-schema' => '2019-09/meta/hyper-schema.json',
487             'https://json-schema.org/draft/2019-09/meta/meta-data' => '2019-09/meta/meta-data.json',
488             'https://json-schema.org/draft/2019-09/meta/validation' => '2019-09/meta/validation.json',
489             'https://json-schema.org/draft/2019-09/output/hyper-schema' => '2019-09/output/hyper-schema.json',
490             'https://json-schema.org/draft/2019-09/output/schema' => '2019-09/output/schema.json',
491             'https://json-schema.org/draft/2019-09/schema' => '2019-09/schema.json',
492 20     20   81171 };
  20         60  
493              
494             # returns the same as _get_resource
495             sub _get_or_load_resource {
496 1100     1100   3548 my ($self, $uri) = @_;
497              
498 1100         25788 my $resource = $self->_get_resource($uri);
499 1100 100       354573 return $resource if $resource;
500              
501 26 100       142 if (my $local_filename = $self->CACHED_METASCHEMAS->{$uri}) {
502 23         5817 my $file = path(dist_dir('JSON-Schema-Draft201909'), $local_filename);
503 23         4146 my $schema = $self->_json_decoder->decode($file->slurp_raw);
504 23         7016 my $document = JSON::Schema::Draft201909::Document->new(schema => $schema, _evaluator => $self);
505              
506             # this should be caught by the try/catch in evaluate()
507 23 50       1050 die JSON::Schema::Draft201909::Result->new(
508             output_format => $self->output_format,
509             valid => 0,
510             errors => [ $document->errors ],
511             ) if $document->has_errors;
512              
513             # we have already performed the appropriate collision checks, so we bypass them here
514             $self->_add_resources_unsafe(
515 23         1360 map +($_->[0] => +{ %{$_->[1]}, document => $document }),
  23         1883  
516             $document->resource_pairs
517             );
518              
519 23         2869 return $self->_get_resource($uri);
520             }
521              
522             # TODO:
523             # - load from network or disk
524             # - handle such resources with $anchor fragments
525              
526 3         583 return;
527             };
528              
529             # returns a schema (which may not be at a document root), the canonical uri for that schema,
530             # the JSON::Schema::Draft201909::Document object that holds that schema, and the path relative
531             # to the document root for this schema.
532             # creates a Document and adds it to the resource index, if not already present.
533             sub _fetch_schema_from_uri {
534 1139     1139   3146 my ($self, $uri) = @_;
535              
536 1139 100       3351 $uri = Mojo::URL->new($uri) if not is_ref($uri);
537 1139         6701 my $fragment = $uri->fragment;
538              
539 1139         5386 my ($subschema, $canonical_uri, $document, $document_path);
540 1139 100 100     6477 if (not length($fragment) or $fragment =~ m{^/}) {
541 1099         3487 my $base = $uri->clone->fragment(undef);
542 1099 100       139875 if (my $resource = $self->_get_or_load_resource($base)) {
543 1096   100     16378 $subschema = $resource->{document}->get($document_path = $resource->{path}.($fragment//''));
544 1096         20861 $document = $resource->{document};
545             my $closest_resource = first { !length($_->[1]{path}) # document root
546             || length($document_path)
547 1185 100 100 1185   106512 && path($_->[1]{path})->subsumes($document_path) } # path is above present location
548 497         34603 sort { length($b->[1]{path}) <=> length($a->[1]{path}) } # sort by length, descending
549 1096         26673 grep { not length Mojo::URL->new($_->[0])->fragment } # omit anchors
  1527         122192  
550             $document->resource_pairs;
551              
552             $canonical_uri = $closest_resource->[1]{canonical_uri}->clone
553 1096         48894 ->fragment(substr($document_path, length($closest_resource->[1]{path})));
554 1096 100       101358 $canonical_uri->fragment(undef) if not length($canonical_uri->fragment);
555             }
556             }
557             else { # we are following a URI with a plain-name fragment
558 40 100       979 if (my $resource = $self->_get_resource($uri)) {
559 35         10321 $subschema = $resource->{document}->get($document_path = $resource->{path});
560 35         1035 $canonical_uri = $resource->{canonical_uri}->clone; # this is *not* the anchor-containing URI
561 35         3229 $document = $resource->{document};
562             }
563             }
564              
565 1139 100       21937 return defined $subschema ? ($subschema, $canonical_uri, $document, $document_path) : ();
566             }
567              
568             has _json_decoder => (
569             is => 'ro',
570             isa => HasMethods[qw(encode decode)],
571             lazy => 1,
572             default => sub { JSON::MaybeXS->new(allow_nonref => 1, canonical => 1, utf8 => 1) },
573             );
574              
575             1;
576              
577             __END__
578              
579             =pod
580              
581             =encoding UTF-8
582              
583             =for stopwords schema subschema metaschema validator evaluator listref
584              
585             =head1 NAME
586              
587             JSON::Schema::Draft201909 - Validate data against a schema
588              
589             =head1 VERSION
590              
591             version 0.028
592              
593             =head1 SYNOPSIS
594              
595             use JSON::Schema::Draft201909;
596              
597             $js = JSON::Schema::Draft201909->new(
598             output_format => 'flag',
599             ... # other options
600             );
601             $result = $js->evaluate($instance_data, $schema_data);
602              
603             =head1 DESCRIPTION
604              
605             This module aims to be a fully-compliant L<JSON Schema|https://json-schema.org/> evaluator and
606             validator, targeting the currently-latest
607             L<Draft 2019-09|https://json-schema.org/specification-links.html#2019-09-formerly-known-as-draft-8>
608             version of the specification.
609              
610             =head1 CONFIGURATION OPTIONS
611              
612             =head2 output_format
613              
614             One of: C<flag>, C<basic>, C<strict_basic>, C<detailed>, C<verbose>, C<terse>. Defaults to C<basic>.
615             Passed to L<JSON::Schema::Draft201909::Result/output_format>.
616              
617             =head2 short_circuit
618              
619             When true, evaluation will return early in any execution path as soon as the outcome can be
620             determined, rather than continuing to find all errors or annotations. Be aware that this can result
621             in invalid results in the presence of keywords that depend on annotations, namely
622             C<unevaluatedItems> and C<unevaluatedProperties>.
623              
624             Defaults to true when C<output_format> is C<flag>, and false otherwise.
625              
626             =head2 max_traversal_depth
627              
628             The maximum number of levels deep a schema traversal may go, before evaluation is halted. This is to
629             protect against accidental infinite recursion, such as from two subschemas that each reference each
630             other, or badly-written schemas that could be optimized. Defaults to 50.
631              
632             =head2 validate_formats
633              
634             When true, the C<format> keyword will be treated as an assertion, not merely an annotation. Defaults
635             to false.
636              
637             =head2 format_validations
638              
639             An optional hashref that allows overriding the validation method for formats, or adding new ones.
640             Overrides to existing formats (see L</Format Validation>)
641             must be specified in the form of C<< { $format_name => $format_sub } >>, where
642             the format sub is a coderef that takes one argument and returns a boolean result. New formats must
643             be specified in the form of C<< { $format_name => { type => $type, sub => $format_sub } } >>,
644             where the type indicates which of the core JSON Schema types (null, object, array, boolean, string,
645             number, or integer) the instance value must be for the format validation to be considered.
646              
647             =head2 collect_annotations
648              
649             When true, annotations are collected from keywords that produce them, when validation succeeds.
650             These annotations are available in the returned result (see L<JSON::Schema::Draft201909::Result>).
651             Defaults to false.
652              
653             =head2 annotate_unknown_keywords
654              
655             When true, keywords that are not recognized by any vocabulary are collected as annotations (where
656             the value of the annotation is the value of the keyword). L</collect_annotations> must also be true
657             in order for this to have any effect.
658             Defaults to false (for now).
659              
660             =head1 METHODS
661              
662             =for Pod::Coverage keywords
663              
664             =head2 evaluate_json_string
665              
666             $result = $js->evaluate_json_string($data_as_json_string, $schema_data);
667             $result = $js->evaluate_json_string($data_as_json_string, $schema_data, { collect_annotations => 1});
668              
669             Evaluates the provided instance data against the known schema document.
670              
671             The data is in the form of a JSON-encoded string (in accordance with
672             L<RFC8259|https://tools.ietf.org/html/rfc8259>). B<The string is expected to be UTF-8 encoded.>
673              
674             The schema must represent a JSON Schema that respects the Draft 2019-09 meta-schema at
675             L<https://json-schema.org/draft/2019-09/schema>, in one of these forms:
676              
677             =over 4
678              
679             =item *
680              
681             a Perl data structure, such as what is returned from a JSON decode operation,
682              
683             =item *
684              
685             a L<JSON::Schema::Draft201909::Document> object,
686              
687             =item *
688              
689             or a URI string indicating the location where such a schema is located.
690              
691             =back
692              
693             Optionally, a hashref can be passed as a third parameter which allows changing the values of the
694             L</short_circuit>, L</collect_annotations>, L</annotate_unknown_keywords> and/or
695             L</validate_formats> settings for just this evaluation call.
696              
697             The result is a L<JSON::Schema::Draft201909::Result> object, which can also be used as a boolean.
698              
699             =head2 evaluate
700              
701             $result = $js->evaluate($instance_data, $schema_data);
702             $result = $js->evaluate($instance_data, $schema_data, { short_circuit => 0 });
703              
704             Evaluates the provided instance data against the known schema document.
705              
706             The data is in the form of an unblessed nested Perl data structure representing any type that JSON
707             allows: null, boolean, string, number, object, array. (See L</TYPES> below.)
708              
709             The schema must represent a JSON Schema that respects the Draft 2019-09 meta-schema at
710             L<https://json-schema.org/draft/2019-09/schema>, in one of these forms:
711              
712             =over 4
713              
714             =item *
715              
716             a Perl data structure, such as what is returned from a JSON decode operation,
717              
718             =item *
719              
720             a L<JSON::Schema::Draft201909::Document> object,
721              
722             =item *
723              
724             or a URI string indicating the location where such a schema is located.
725              
726             =back
727              
728             Optionally, a hashref can be passed as a third parameter which allows changing the values of the
729             L</short_circuit>, L</collect_annotations>, L</annotate_unknown_keywords> and/or
730             L</validate_formats> settings for just this
731             evaluation call.
732              
733             The result is a L<JSON::Schema::Draft201909::Result> object, which can also be used as a boolean.
734              
735             =head2 traverse
736              
737             $result = $js->traverse($schema_data);
738             $result = $js->traverse($schema_data, { initial_schema_uri => 'http://example.com' });
739              
740             Traverses the provided schema data without evaluating it against any instance data. Returns the
741             internal state object accumulated during the traversal, including any identifiers found therein, and
742             any errors found during parsing. For internal purposes only.
743              
744             You can pass a series of callback subs to this method corresponding to keywords, which is useful for
745             extracting data from within schemas and skipping properties that may look like keywords but actually
746             are not (for example C<{"const":{"$ref": "this is not actually a $ref"}}>). This feature is highly
747             experimental and is highly likely to change in the future.
748              
749             For example, to find the resolved targets of all C<$ref> keywords in a schema document:
750              
751             my @refs;
752             JSON::Schema::Draft201909->new->traverse($schema, {
753             callbacks => {
754             '$ref' => sub ($schema, $state) {
755             push @refs, Mojo::URL->new($schema->{'$ref'})
756             ->to_abs(JSON::Schema::Draft201909::Utilities::canonical_schema_uri($state));
757             }
758             },
759             });
760              
761             =head2 add_schema
762              
763             $js->add_schema($uri => $schema);
764             $js->add_schema($uri => $document);
765             $js->add_schema($schema);
766             $js->add_schema($document);
767              
768             Introduces the (unblessed, nested) Perl data structure or L<JSON::Schema::Draft201909::Document>
769             object, representing a JSON Schema, to the implementation, registering it under the indicated URI if
770             provided (and if not, C<''> will be used if no other identifier can be found within).
771              
772             You B<MUST> call C<add_schema> for any external resources that a schema may reference via C<$ref>
773             before calling L</evaluate>, other than the standard metaschemas which are loaded from a local cache
774             as needed.
775              
776             Returns C<undef> if the resource could not be found;
777             if there were errors in the document, will die with a L<JSON::Schema::Draft201909::Result> object
778             containing the errors;
779             otherwise returns the L<JSON::Schema::Draft201909::Document> that contains the added schema.
780              
781             =head2 get
782              
783             my $schema = $js->get($uri);
784             my ($schema, $canonical_uri) = $js->get($uri);
785              
786             Fetches the Perl data structure representing the JSON Schema at the indicated URI. When called in
787             list context, the canonical URI of that location is also returned, as a L<Mojo::URL>. Returns
788             C<undef> if the schema with that URI has not been loaded (or cached).
789              
790             =head1 LIMITATIONS
791              
792             =head2 Types
793              
794             Perl is a more loosely-typed language than JSON. This module delves into a value's internal
795             representation in an attempt to derive the true "intended" type of the value. However, if a value is
796             used in another context (for example, a numeric value is concatenated into a string, or a numeric
797             string is used in an arithmetic operation), additional flags can be added onto the variable causing
798             it to resemble the other type. This should not be an issue if data validation is occurring
799             immediately after decoding a JSON payload, or if the JSON string itself is passed to this module.
800             If this turns out to be an issue in real environments, I may have to implement a C<lax_scalars>
801             option.
802              
803             For more information, see L<Cpanel::JSON::XS/MAPPING>.
804              
805             =head2 Format Validation
806              
807             By default, formats are treated only as annotations, not assertions. When L</validate_format> is
808             true, strings are also checked against the format as specified in the schema. At present the
809             following formats are supported (use of any other formats than these will always evaluate as true):
810              
811             =over 4
812              
813             =item *
814              
815             C<date-time>
816              
817             =item *
818              
819             C<date>
820              
821             =item *
822              
823             C<time>
824              
825             =item *
826              
827             C<duration>
828              
829             =item *
830              
831             C<email>
832              
833             =item *
834              
835             C<idn-email>
836              
837             =item *
838              
839             C<hostname>
840              
841             =item *
842              
843             C<idn-hostname>
844              
845             =item *
846              
847             C<ipv4>
848              
849             =item *
850              
851             C<ipv6>
852              
853             =item *
854              
855             C<uri>
856              
857             =item *
858              
859             C<uri-reference>
860              
861             =item *
862              
863             C<iri>
864              
865             =item *
866              
867             C<uuid>
868              
869             =item *
870              
871             C<json-pointer>
872              
873             =item *
874              
875             C<relative-json-pointer>
876              
877             =item *
878              
879             C<regex>
880              
881             =back
882              
883             A few optional prerequisites are needed for some of these (if the prerequisite is missing,
884             validation will always succeed):
885              
886             =over 4
887              
888             =item *
889              
890             C<date-time>, C<date>, and C<time> require L<Time::Moment>
891              
892             =item *
893              
894             C<email> and C<idn-email> require L<Email::Address::XS> version 1.01 (or higher)
895              
896             =item *
897              
898             C<hostname> and C<idn-hostname> require L<Data::Validate::Domain>
899              
900             =item *
901              
902             C<idn-hostname> requires L<Net::IDN::Encode>
903              
904             =back
905              
906             =head2 Specification Compliance
907              
908             Until version 1.000 is released, this implementation is not fully specification-compliant.
909              
910             To date, missing features (some of which are optional, but still quite useful) include:
911              
912             =over 4
913              
914             =item *
915              
916             loading schema documents from disk
917              
918             =item *
919              
920             loading schema documents from the network
921              
922             =item *
923              
924             loading schema documents from a local web application (e.g. L<Mojolicious>)
925              
926             =item *
927              
928             additional output formats beyond C<flag>, C<basic>, C<strict_basic>, and C<terse> (L<https://json-schema.org/draft/2019-09/json-schema-core.html#rfc.section.10>)
929              
930             =item *
931              
932             examination of the C<$schema> keyword for deviation from the standard metaschema, including changes to vocabulary behaviour
933              
934             =back
935              
936             Additionally, some small errors in the specification (which have been fixed in the next draft
937             specification version) are fixed here rather than implementing the precise but unintended behaviour,
938             most notably in the use of json pointers rather than fragment-only URIs in C<instanceLocation> and
939             C<keywordLocation> in annotations and errors. (Use the C<strict_basic>
940             L<JSON::Schema::Draft201909/output_format> to revert this change.)
941              
942             =head1 SECURITY CONSIDERATIONS
943              
944             The C<pattern> and C<patternProperties> keywords, and the C<regex> format validator,
945             evaluate regular expressions from the schema.
946             No effort is taken (at this time) to sanitize the regular expressions for embedded code or
947             potentially pathological constructs that may pose a security risk, either via denial of service
948             or by allowing exposure to the internals of your application. B<DO NOT USE SCHEMAS FROM UNTRUSTED
949             SOURCES.>
950              
951             =head1 SEE ALSO
952              
953             =over 4
954              
955             =item *
956              
957             L<https://json-schema.org>
958              
959             =item *
960              
961             L<RFC8259: The JavaScript Object Notation (JSON) Data Interchange Format|https://tools.ietf.org/html/rfc8259>
962              
963             =item *
964              
965             L<RFC3986: Uniform Resource Identifier (URI): Generic Syntax|https://tools.ietf.org/html/rfc3986>
966              
967             =item *
968              
969             L<Test::JSON::Schema::Acceptance>: contains the official JSON Schema test suite
970              
971             =item *
972              
973             L<JSON::Schema::Tiny>: a more minimal implementation of the specification, with fewer dependencies
974              
975             =item *
976              
977             L<https://json-schema.org/draft/2019-09/release-notes.html>
978              
979             =item *
980              
981             L<Understanding JSON Schema|https://json-schema.org/understanding-json-schema>: tutorial-focused documentation
982              
983             =back
984              
985             =head1 SUPPORT
986              
987             Bugs may be submitted through L<https://github.com/karenetheridge/JSON-Schema-Draft201909/issues>.
988              
989             I am also usually active on irc, as 'ether' at C<irc.perl.org> and C<irc.libera.chat>.
990              
991             =head1 AUTHOR
992              
993             Karen Etheridge <ether@cpan.org>
994              
995             =head1 COPYRIGHT AND LICENCE
996              
997             This software is copyright (c) 2020 by Karen Etheridge.
998              
999             This is free software; you can redistribute it and/or modify it under
1000             the same terms as the Perl 5 programming language system itself.
1001              
1002             =cut