File Coverage

blib/lib/Cookie/Domain.pm
Criterion Covered Total %
statement 226 334 67.6
branch 79 188 42.0
condition 19 79 24.0
subroutine 38 42 90.4
pod 15 15 100.0
total 377 658 57.2


line stmt bran cond sub pod time code
1             ##----------------------------------------------------------------------------
2             ## Cookies API for Server & Client - ~/lib/Cookie/Domain.pm
3             ## Version v0.1.4
4             ## Copyright(c) 2022 DEGUEST Pte. Ltd.
5             ## Author: Jacques Deguest <jack@deguest.jp>
6             ## Created 2021/05/06
7             ## Modified 2023/09/19
8             ## You can use, copy, modify and redistribute this package and associated
9             ## files under the same terms as Perl itself.
10             ##----------------------------------------------------------------------------
11             package Cookie::Domain;
12             BEGIN
13             {
14 3     3   113916 use strict;
  3         15  
  3         94  
15 3     3   26 use warnings;
  3         6  
  3         94  
16 3     3   18 use warnings::register;
  3         6  
  3         347  
17 3     3   479 use parent qw( Module::Generic );
  3         290  
  3         16  
18 3     3   125320 use vars qw( $DOMAIN_RE $PUBLIC_SUFFIX_DATA $VERSION );
  3         6  
  3         204  
19 3     3   1042 use DateTime;
  3         548059  
  3         98  
20 3     3   771 use DateTime::Format::Strptime;
  3         213532  
  3         45  
21 3     3   4543 use Module::Generic::File qw( tempfile );
  3         214236  
  3         48  
22 3     3   3607 use JSON;
  3         32178  
  3         23  
23 3     3   2042 use Net::IDN::Encode ();
  3         312762  
  3         128  
24 3     3   41 use Want;
  3         9  
  3         254  
25 3     3   23 use constant URL => 'https://publicsuffix.org/list/effective_tld_names.dat';
  3         7  
  3         908  
26             # Properly formed domain name according to rfc1123
27 3     3   23 our $DOMAIN_RE = qr/^
28             (?:
29             [[:alnum:]]
30             (?:
31             (?:[[:alnum:]-]){0,61}
32             [[:alnum:]]
33             )?
34             (?:
35             \.[[:alnum:]]
36             (?:
37             (?:[[:alnum:]-]){0,61}
38             [[:alnum:]]
39             )?
40             )*
41             )
42             $/x;
43 3         84 our $VERSION = 'v0.1.4';
44             };
45              
46 3     3   34 use strict;
  3         9  
  3         88  
47 3     3   17 use warnings;
  3         6  
  3         10982  
48              
49             sub init
50             {
51 11     11 1 1143 my $self = shift( @_ );
52 11         77 my $base = Module::Generic::File::file( __FILE__ )->parent;
53 11         3005193 $self->{file} = $base->child( 'public_suffix_list.txt' );
54 11         672147 $self->{json_file} = Module::Generic::File->sys_tmpdir->child( 'public_suffix.json' );
55 11         2103325 $self->{meta} = {};
56 11         44737 $self->{min_suffix} = 0;
57 11         133 $self->{suffixes} = {};
58 11         123 $self->{_init_strict_use_sub} = 1;
59 11 50       261 $self->SUPER::init( @_ ) || return( $self->pass_error );
60 11 50       1397 unless( $self->{no_load} )
61             {
62 11 50       216 $self->load || return( $self->pass_error );
63             }
64 11         92 return( $self );
65             }
66              
67             sub cron_fetch
68             {
69             # Cookie::Domain->cron_fetch
70             # $obj->cron_fetch
71             # Cookie::Domain->cron_fetch( $hash_ref );
72             # $obj->cron_fetch( $hash_ref );
73             # Cookie::Domain->cron_fetch( %options );
74             # $obj->cron_fetch( %options );
75 0     0 1 0 my( $this, $self );
76 0         0 my $opts = {};
77 0 0 0     0 if( scalar( @_ ) && ( ref( $_[0] ) eq __PACKAGE__ || $_[0] eq __PACKAGE__ ) )
      0        
78             {
79 0         0 $this = shift( @_ );
80             }
81 0 0 0     0 if( @_ == 1 && ref( $_[0] ) eq 'HASH' )
    0          
82             {
83 0         0 $opts = shift( @_ );
84             }
85             elsif( !( scalar( @_ ) % 2 ) )
86             {
87 0         0 $opts = { @_ };
88             }
89            
90 0 0       0 if( ref( $this ) )
91             {
92 0         0 $self = $this;
93             }
94             else
95             {
96 0   0     0 $this //= __PACKAGE__;
97 0         0 $self = $this->new( $opts );
98             }
99 0   0     0 $opts->{file} //= '';
100 0   0     0 my $file = $opts->{file} || $self->file;
101 0 0       0 $file = $self->_is_a( $file, 'Module::Generic::File' ) ? $file : Module::Generic::File::file( $file );
102 0         0 require LWP::UserAgent;
103 0         0 my $ua = LWP::UserAgent->new(
104             agent => "Cookie::Domain/" . $VERSION,
105             );
106 0         0 my $meta = $self->meta;
107 0         0 my $req_headers = {};
108 0         0 my $dont_have_etag = 0;
109             my $mtime = $meta->{db_last_modified}
110             ? $meta->{db_last_modified}
111 0 0 0     0 : ( $file->exists && !$file->is_empty )
    0          
112             ? $file->mtime
113             : undef;
114             # If we have already a local file and it is not empty, let's use the etag when making the request
115 0 0 0     0 if( $meta->{etag} && $file->exists && !$file->is_empty )
    0 0        
116             {
117 0         0 $meta->{etag} =~ s/^\"([^"]+)\"$/$1/;
118 0         0 $req_headers->{'If-None-Match'} = qq{"$meta->{etag}"};
119             }
120             elsif( !$meta->{etag} )
121             {
122 0         0 $dont_have_etag = 1;
123             # <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since>
124 0 0 0     0 if( defined( $mtime ) && $mtime )
125             {
126 0         0 my $dt = $self->_parse_timestamp( $mtime );
127 0 0       0 if( $dt )
128             {
129             # HTTP Date format
130 0         0 my $dt_fmt = DateTime::Format::Strptime->new(
131             pattern => '%a, %d %b %Y %H:%M:%S GMT',
132             locale => 'en_GB',
133             time_zone => 'GMT',
134             );
135 0         0 $dt->set_formatter( $dt_fmt );
136 0         0 $req_headers->{ 'If-Modified-Since' } = $dt;
137             }
138             }
139             }
140            
141             # try-catch
142 0         0 local $@;
143             my $resp = eval
144 0         0 {
145 0         0 $ua->get( URL, %$req_headers );
146             };
147 0 0       0 if( $@ )
148             {
149 0         0 return( $self->error( "Error trying to perform an HTTP GET request to ", URL, ": $@" ) );
150             }
151 0         0 my $code = $resp->code;
152             # try-catch
153             my $data = eval
154 0         0 {
155 0         0 $resp->decoded_content( default_charset => 'utf-8', alt_charset => 'utf-8' );
156             };
157 0 0       0 if( $@ )
158             {
159 0         0 return( $self->error( "Error decoding response content: $@" ) );
160             }
161 0         0 my $last_mod = $resp->header( 'Last-Modified' );
162              
163 0         0 my $tz;
164             # DateTime::TimeZone::Local will die ungracefully if the local timezeon is not set with the error:
165             # "Cannot determine local time zone"
166             # try-catch
167             $tz = eval
168 0         0 {
169 0         0 DateTime::TimeZone->new( name => 'local' );
170             };
171 0 0       0 if( $@ )
172             {
173 0         0 $tz = DateTime::TimeZone->new( name => 'UTC' );
174             }
175            
176 0 0       0 if( $last_mod )
177             {
178 0         0 $last_mod = $self->_parse_timestamp( $last_mod )->set_time_zone( $tz );
179             }
180             else
181             {
182 0         0 $last_mod = DateTime->now( time_zone => $tz );
183             }
184 0         0 my $epoch = $last_mod->epoch;
185 0 0       0 if( $resp->header( 'etag' ) )
186             {
187 0 0 0     0 $dont_have_etag = $resp->header( 'etag' ) eq ( $meta->{etag} // '' ) ? 0 : 1;
188 0         0 $meta->{etag} = $resp->header( 'etag' );
189 0         0 $meta->{etag} =~ s/^\"([^"]+)\"$/$1/;
190             }
191            
192 0 0 0     0 if( $code == 304 ||
    0 0        
    0 0        
193             ( !$file->is_empty && $mtime && $mtime == $epoch ) )
194             {
195 0 0       0 if( !$self->suffixes->length )
196             {
197 0 0       0 $self->load_public_suffix || return( $self->pass_error );
198             }
199             # Did not have an etag, but I do have one now
200 0 0 0     0 if( $dont_have_etag && $meta->{etag} )
201             {
202 0 0       0 $self->save_as_json || return( $self->pass_error );
203             }
204 0         0 return( $self );
205             }
206             elsif( $code ne 200 )
207             {
208 0         0 return( $self->error( "Failed to get the remote public domain list. Server responded with code '$code': ", $resp->as_string ) );
209             }
210             elsif( !length( $data ) )
211             {
212 0         0 return( $self->error( "Remote server returned no data." ) );
213             }
214 0 0       0 $file->unload_utf8( $data, { lock => 1 } ) || return( $self->error( "Unable to open public suffix data file \"$file\" in write mode: ", $file->error ) );
215 0         0 $file->unlock;
216 0         0 $file->utime( $epoch, $epoch );
217 0 0       0 $self->load_public_suffix || return( $self->pass_error );
218 0 0       0 $self->save_as_json || return( $self->pass_error );
219              
220 0         0 return( $self );
221             }
222              
223             sub decode
224             {
225 0     0 1 0 my $self = shift( @_ );
226 0         0 my $name = shift( @_ );
227 0 0       0 return( '' ) if( !length( $name ) );
228             # try-catch
229 0         0 local $@;
230             my $rv = eval
231 0         0 {
232 0         0 return( Net::IDN::Encode::domain_to_ascii( $name ) );
233             };
234 0 0       0 if( $@ )
235             {
236 0         0 return( $self->error( "An unexpected error occurred while decoding a domain name: $@" ) );
237             }
238 0         0 return( $rv );
239             }
240              
241             sub encode
242             {
243 0     0 1 0 my $self = shift( @_ );
244 0         0 my $name = shift( @_ );
245 0 0       0 return( '' ) if( !length( $name ) );
246             # try-catch
247 0         0 local $@;
248             my $rv = eval
249 0         0 {
250 0         0 return( Net::IDN::Encode::domain_to_unicode( $name ) );
251             };
252 0 0       0 if( $@ )
253             {
254 0         0 return( $self->error( "An unexpected error occurred while encoding a domain name: $@" ) );
255             }
256 0         0 return( $rv );
257             }
258              
259 14     14 1 1315 sub file { return( shift->_set_get_object_without_init( 'file', 'Module::Generic::File', @_ ) ); }
260              
261 11     11 1 63 sub json_file { return( shift->_set_get_object_without_init( 'json_file', 'Module::Generic::File', @_ ) ); }
262              
263             sub load
264             {
265 11     11 1 72 my $self = shift( @_ );
266 11         100 my $f = $self->file;
267 11         580 my $json_file = $self->json_file;
268 11 100 66     690 if( defined( $PUBLIC_SUFFIX_DATA ) && ref( $PUBLIC_SUFFIX_DATA ) eq 'HASH' )
    100 66        
269             {
270 9         230 $self->suffixes( $PUBLIC_SUFFIX_DATA );
271 9         309284 $self->meta( {} );
272             }
273             elsif( $json_file && $json_file->exists )
274             {
275 1 50       136 $self->load_json( $json_file ) || return( $self->pass_error );
276 1         4 my $meta = $self->meta;
277 1 50 33     829 if( $f && $f->exists )
278             {
279 1 50 33     125 if( defined( $meta->{db_last_modified} ) && $meta->{db_last_modified} =~ /^\d{10}$/ )
280             {
281 1         346 my $mtime = $f->mtime;
282 1 50       6244 if( $mtime > $meta->{db_last_modified} )
283             {
284 0 0       0 $self->load_public_suffix( $f ) || return( $self->pass_error );
285 0 0       0 $self->save_as_json( $json_file ) || return( $self->pass_error );
286             }
287             }
288             else
289             {
290 0 0       0 $self->load_public_suffix( $f ) || return( $self->pass_error );
291 0 0       0 $self->save_as_json( $json_file ) || return( $self->pass_error );
292             }
293             }
294             }
295             else
296             {
297 1 0 33     94 return( $self->error( "No public suffix data file or json cache data file was specified." ) ) if( !$json_file && !$f );
298 1 50       31 $self->load_public_suffix( $f ) || return( $self->pass_error );
299 1 50       28 $self->save_as_json( $json_file ) || return( $self->pass_error );
300             }
301 11         14806 return( $self );
302             }
303              
304             sub load_json
305             {
306 1     1 1 12 my $self = shift( @_ );
307 1   50     10 my $file = shift( @_ ) || $self->json_file || return( $self->error( "No json file was specified." ) );
308 1 50       40 $file = $self->_is_a( $file, 'Module::Generic::File' ) ? $file : Module::Generic::File::file( "$file" );
309             # Basic error checking
310 1 50       124 if( !$file->exists )
    50          
    50          
311             {
312 0         0 return( $self->error( "Json data file provided \"$file\" does not exist." ) );
313             }
314             elsif( !$file->can_read )
315             {
316 0         0 return( $self->error( "Json data file provided \"$file\" lacks enough permission to read." ) );
317             }
318             elsif( $file->is_empty )
319             {
320 0         0 return( $self->error( "Json data file provided \"$file\" is empty." ) );
321             }
322 1         45186 my $json = $file->load_utf8;
323 1 50       8956 return( $self->error( "Unable to open the public suffix json data file in read mode: $!" ) ) if( !defined( $json ) );
324 1 50       1381 return( $self->error( "No data found from public domain json file \"$file\"." ) ) if( !CORE::length( $json ) );
325             # try-catch
326 1         8 local $@;
327             my $ref = eval
328 1         11 {
329 1         112 my $j = JSON->new->relaxed;
330 1         11425 return( $j->decode( $json ) );
331             };
332 1 50       14 if( $@ )
333             {
334 0         0 return( $self->error( "An unexpected error occurred while trying to load json data of public suffixes: $@" ) );
335             }
336 1 50       10 if( ref( $ref->{suffixes} ) eq 'HASH' )
337             {
338 1         7 $PUBLIC_SUFFIX_DATA = $ref->{suffixes};
339 1         21 $self->suffixes( $ref->{suffixes} );
340             }
341 1 50       463146 $ref->{meta} = {} if( ref( $ref->{meta} ) ne 'HASH' );
342 1         19 $self->meta( $ref->{metadata} );
343 1         1625 return( $self );
344             }
345              
346             sub load_public_suffix
347             {
348 1     1 1 9 my $self = shift( @_ );
349 1   50     10 my $file = shift( @_ ) || $self->file || return( $self->error( "No public suffix data file was provided." ) );
350 1 50       64 $file = $self->_is_a( $file, 'Module::Generic::File' ) ? $file : Module::Generic::File::file( "$file" );
351             # Basic error checking
352 1 50       96 if( !$file->exists )
    50          
    50          
353             {
354 0         0 return( $self->error( "Public suffix data file provided \"$file\" does not exist." ) );
355             }
356             elsif( !$file->can_read )
357             {
358 0         0 return( $self->error( "Public suffix data file provided \"$file\" lacks enough permission to read." ) );
359             }
360             elsif( $file->is_empty )
361             {
362 0         0 return( $self->error( "Public suffix data file provided \"$file\" is empty." ) );
363             }
364 1 50       44840 $file->open( '<', { binmode => 'utf-8' }) || return( $self->error( "Unable to open the public suffix data file in read mode: ", $file->error ) );
365 1         8041 my $ref = {};
366             $file->line(sub
367             {
368 14952     14952   14891861 my $l = shift( @_ );
369 14952         26090 chomp( $l );
370 14952         42493 $l =~ s,//.*$,,;
371 14952         29451 $l =~ s,[[:blank:]\h]+$,,g;
372 14952 100       48223 return(1) if( !CORE::length( $l ) );
373 9105         13061 my $orig;
374 9105 100       33890 if( $l !~ /^[\x00-\x7f]*$/ )
375             {
376 461         755 $orig = $l;
377             # try-catch
378 461         631 local $@;
379             $l = eval
380 461         644 {
381 461         1413 Net::IDN::Encode::domain_to_ascii( $l );
382             };
383 461 50       256152 if( $@ )
384             {
385 0         0 return( $self->error( "An unexpected error occurred while parsing the public suffix data file content: $@" ) );
386             }
387             }
388 9105         16699 my $is_neg = $l =~ s,^\!,,;
389 9105         27379 my @labels = split( /\./, $l );
390 9105         12842 my $h = $ref;
391 9105         15080 foreach my $label ( reverse( @labels ) )
392             {
393 19625   100     99691 $h = $h->{ $label } ||= {};
394             }
395 9105 100       17248 $h->{_is_neg} = $is_neg if( $is_neg );
396 9105 100       33702 $h->{_original} = $orig if( defined( $orig ) );
397 1         54 });
398              
399 1         1049 $file->close;
400 1         2088 $self->suffixes( $ref );
401 1         463239 $PUBLIC_SUFFIX_DATA = $ref;
402 1         7 return( $self );
403             }
404              
405 12     12 1 106 sub meta { return( shift->_set_get_hash_as_mix_object( 'meta', @_ ) ); }
406              
407 82     82 1 456 sub min_suffix { return( shift->_set_get_number( 'min_suffix', @_ ) ); }
408              
409 0     0 1 0 sub no_load { return( shift->_set_get_boolean( 'no_load', @_ ) ); }
410              
411             sub save_as_json
412             {
413 1     1 1 16 my $self = shift( @_ );
414 1   50     87 my $file = shift( @_ ) || $self->json_file || return( $self->error( "No json file was specified." ) );
415 1 50       18 $file = $self->_is_a( $file, 'Module::Generic::File' ) ? $file : Module::Generic::File::file( "$file" );
416 1         215 my $data = $self->suffixes;
417 1         975 my $tz;
418             # DateTime::TimeZone::Local will die ungracefully if the local timezeon is not set with the error:
419             # "Cannot determine local time zone"
420             # try-catch
421 1         1 local $@;
422             $tz = eval
423 1         5 {
424 1         36 DateTime::TimeZone->new( name => 'local' );
425             };
426 1 50       10942 if( $@ )
427             {
428 0         0 $tz = DateTime::TimeZone->new( name => 'UTC' );
429             }
430 1         20 my $dt_fmt = DateTime::Format::Strptime->new(
431             pattern => '%FT%T%z',
432             locale => 'en_GB',
433             time_zone => $tz->name,
434             );
435 1         5086 my $today = DateTime->now( time_zone => $tz, formatter => $dt_fmt );
436 1         1076 my $meta = $self->meta;
437             my $ref =
438             {
439             metadata =>
440             {
441             created => $today->stringify,
442             module => 'Cookie::Domain',
443             ( $self->file && $self->file->exists ? ( db_last_modified => $self->file->mtime ) : () ),
444 1 50 33     1430 ( $meta->{etag} ? ( etag => $meta->{etag} ) : () ),
    50          
445             },
446             suffixes => $data
447             };
448 1         17243 my $j = JSON->new->canonical->pretty->convert_blessed;
449             # try-catch
450             my $json = eval
451 1         4 {
452 1         22 $j->encode( $ref );
453             };
454 1 50       535794 if( $@ )
455             {
456 0         0 return( $self->error( "An error occurred while trying to save data to json file \"$file\": $@" ) );
457             }
458 1 50       27 $file->unload_utf8( $json ) ||
459             return( $self->error( "Unable to write json data to file \"$file\": ", $file->error ) );
460 1         50968 return( $self );
461             }
462              
463             sub stat
464             {
465 97     97 1 207536 my $self = shift( @_ );
466 97   100     481 my $name = shift( @_ ) || return( $self->error( "No host name was provided" ) );
467 95         545 my $opts = $self->_get_args_as_hash( @_ );
468 95 100       3021 $opts->{min_suffix} = $self->min_suffix if( !exists( $opts->{min_suffix} ) );
469 95         460542 my $idn;
470             # Punnycode
471 95 100       1034 if( $name !~ /^[\x00-\x7f]*$/ )
472             {
473 12         33 $idn = $name;
474 12         63 $name = Net::IDN::Encode::domain_to_ascii( $name );
475 12         5130 $name = lc( $name );
476 12         93 $name =~ s/^[[:blank:]\h]+|[[:blank:]\h]+$//g;
477 12         58 $name =~s/\.$//;
478             }
479             else
480             {
481 83         459 $name =~ s/^\.|\.$//g;
482 83         279 $name = lc( $name );
483             }
484 95 50       1379 return( $self->error( "Malformed domain name \"$name\"" ) ) if( $name !~ /$DOMAIN_RE/ );
485 95         786 my $labels = $self->new_array( [split( /\./, $name )] );
486 95         2933 my $any = {};
487 95         216 my $host = {};
488 95         229 my $expt = {};
489 95         364 my $ref = $self->suffixes;
490 95         75243 my $def = $ref;
491 95         290 my $stack = [];
492             # The following algorithm is borrowed from IO-Socket-SSL
493             # for( my $i = 0; $i < scalar( @$labels ); $i++ )
494             $labels->reverse->for(sub
495             {
496             # my $label = $labels->[$i];
497 201     201   6125 my( $i, $label ) = @_;
498 201         325 my $buff = [];
499 201 100       920 if( my $public_label_def = $def->{ $label } )
    100          
500             {
501             # name match, continue with next path element
502 121         3347 push( @$buff, $public_label_def );
503 121 100 66     763 if( exists( $public_label_def->{_is_neg} ) && $public_label_def->{_is_neg} )
504             {
505 4         208 $expt->{ $i + 1 }->{ $i + 1 } = -1;
506             }
507             else
508             {
509 117         3244 $host->{ $i + 1 }->{ $i + 1 } = 1;
510             }
511             }
512             elsif( exists( $def->{ '*' } ) )
513             {
514 8         345 my $public_label_def = $def->{ '*' };
515 8         155 push( @$buff, $public_label_def );
516 8 50 33     52 if( exists( $public_label_def->{_is_neg} ) && $public_label_def->{_is_neg} )
517             {
518 0         0 $expt->{ $i + 1 }->{ $i + 1 } = -1;
519             }
520             else
521             {
522 8         210 $any->{ $i + 1 }->{ $i + 1 } = 1;
523             }
524             }
525            
526 3     3   33 no warnings 'exiting';
  3         19  
  3         1773  
527             LABEL:
528             # We found something
529 201 100       3477 if( @$buff )
530             {
531             # take out the one we just added
532 129         291 $def = shift( @$buff );
533             # if we are circling within the next_choice loop, add the previous step to $stack
534 129 50       350 push( @$stack, [ $buff, $i ] ) if( @$buff );
535             # go deeper
536 129         454 next;
537             # The following works too by the way, but let's keep it traditional
538             # return(1);
539             }
540              
541             # We did not find anything, so we backtrack
542 72 50       436 last if( !scalar( @$stack ) );
543             # The following works too by the way, but let's keep it traditional
544             # return if( !scalar( @$stack ) );
545             # Recall our last entry
546 0         0 ( $buff, $_[0] ) = @{ pop( @$stack ) };
  0         0  
547 0         0 goto LABEL;
548 95         696 });
549            
550             # remove all exceptions from wildcards
551 95 100       1305 delete( @$any{ keys( %$expt ) } ) if( scalar( keys( %$expt ) ) );
552             # get longest match
553 51         217 my( $len ) = sort{ $b <=> $a } (
554 95         1680 keys( %$any ), keys( %$host ), map{ $_-1 } keys( %$expt )
  4         42  
555             );
556 95 100       328 $len = $opts->{min_suffix} if( !defined( $len ) );
557 95 100       409 $len += int( $opts->{add} ) if( $opts->{add} );
558 95         249 my $suffix;
559             my $sub;
560 95 100       374 if( $len < $labels->length )
    50          
561             {
562 71         2613600 $suffix = $self->new_array( [ $labels->splice( -$len, $len ) ] );
563             }
564             elsif( $len > 0 )
565             {
566 24         888707 $suffix = $labels;
567 24         128 $labels = $self->new_array;
568             }
569             else
570             {
571 0         0 $suffix = $self->new_array;
572             }
573 95 100       13268 if( !$suffix->length )
574             {
575 10 50       367988 if( want( 'OBJECT' ) )
576             {
577 0         0 rreturn( Module::Generic::Null->new );
578             }
579             else
580             {
581 10         573 return( '' );
582             }
583             }
584 85         3114766 $suffix = $suffix->join( '.' );
585 85         16896 $name = $labels->pop;
586 85 100       5951 $sub = $labels->join( '.' ) if( $labels->length );
587 85 100       3101980 if( defined( $idn ) )
588             {
589 12         2226 $suffix = Net::IDN::Encode::domain_to_unicode( $suffix );
590 12 100       3764 $name = Net::IDN::Encode::domain_to_unicode( $name ) if( defined( $name ) );
591 12 100       1732 $sub = Net::IDN::Encode::domain_to_unicode( $sub ) if( defined( $sub ) );
592             }
593 85         11730 return(Cookie::Domain::Result->new({ name => $name, sub => $sub, suffix => $suffix }));
594             }
595              
596 107     107 1 604 sub suffixes { return( shift->_set_get_hash_as_mix_object( 'suffixes', @_ ) ); }
597              
598             # NOTE: Cookie::Domain::Result class
599             {
600             package
601             Cookie::Domain::Result;
602             BEGIN
603             {
604 3     3   31 use strict;
  3         7  
  3         78  
605 3     3   30 use warnings;
  3         7  
  3         146  
606 3     3   22 use parent qw( Module::Generic::Hash );
  3         6  
  3         29  
607 3     3   323366 use Want;
  3         9  
  3         216  
608 3     3   523 our $VERSION = 'v0.1.0';
609             };
610            
611             sub domain
612             {
613 15     15   1112 my $self = shift( @_ );
614 15 50 33     123 if( !$self->name->length || !$self->suffix->length )
615             {
616 0         0 return( Module::Generic::Scalar->new( '' ) );
617             }
618 15         544166 return( $self->name->join( '.', $self->suffix ) );
619             }
620            
621 105     105   97186 sub name { return( shift->_set_get_scalar_as_object( 'name', @_ ) ); }
622              
623 75     75   132522 sub sub { return( shift->_set_get_scalar_as_object( 'sub', @_ ) ); }
624              
625 105     105   669943 sub suffix { return( shift->_set_get_scalar_as_object( 'suffix', @_ ) ); }
626             }
627              
628             1;
629             # NOTE: POD
630             __END__
631              
632             =encoding utf-8
633              
634             =head1 NAME
635              
636             Cookie::Domain - Domain Name Public Suffix Query Interface
637              
638             =head1 SYNOPSIS
639              
640             use Cookie::Domain;
641             my $dom = Cookie::Domain->new( min_suffix => 1, debug => 3 ) ||
642             die( Cookie::Domain->error, "\n" );
643             my $res = $dom->stat( 'www.example.or.uk' ) || die( $dom->error, "\n" );
644             # Check for potential errors;
645             die( $dom->error ) if( !defined( $res ) );
646             # stat() returns an empty string if nothing was found and undef upon error
647             print( "Nothing found\n" ), exit(0) if( !$res );
648             print( $res->domain, "\n" ); # example.co.uk
649             print( $res->name, "\n" ); # example
650             print( $res->sub, "\n" ); # www
651             print( $res->suffix, "\n" ); # co.uk
652              
653             # Load the public suffix. This is done automatically, so no need to do it
654             $dom->load_public_suffix( '/some/path/on/the/filesystem/data.txt' ) ||
655             die( $dom->error );
656             # Then, save it as json data for next time
657             $dom->save_as_json( '/var/domain/public_suffix.json' ) ||
658             die( $dom->error, "\n" );
659             say $dom->suffixes->length, " suffixes data loaded.";
660              
661             =head1 VERSION
662              
663             v0.1.4
664              
665             =head1 DESCRIPTION
666              
667             This is an interface to query the C<Public Suffix> list courtesy of the mozilla project.
668              
669             This list contains all the top level domains, a.k.a. zones and is used to determine what part of a domain name constitute the top level domain, what part is the domain, a.k.a. C<label> and what part (the rest) constitute the subdomain.
670              
671             Consider C<www.example.org>. In this example, C<org> is the top level domain, C<example> is the name, C<example.org> is the domain, and C<www> is the subdomain.
672              
673             This is easy enough, but there are cases where it is tricky to know which label (or part) is the domain part or the top level domain part. For example, C<www.example.com.sg>, C<com.sg> is the top level domain, C<example> the name, C<example.com.sg> is the domain, and C<www> the subdomain.
674              
675             This module will use a json cache data file to speed up the loading of the suffixes, a.k.a, top level domains, data.
676              
677             By default the location of this json file will be C<public_suffix.json> under your system temporary directory, but you can override this by specifying your own location upon object instantiation:
678              
679             my $dom = Cookie::Domain->new( json_file => '/home/joe/var/public_suffix.json' );
680              
681             =head1 METHODS
682              
683             =head2 new
684              
685             This initiates the package and take the following parameters either as an hash or hash reference:
686              
687             =over 4
688              
689             =item * C<debug>
690              
691             Optional. If set with a positive integer, this will activate verbose debugging message
692              
693             =item * C<file>
694              
695             Specify the location of the Public Suffix data file. The default one is under the same directory as this module with the file name C<public_suffix_list.txt>
696              
697             You can download a different (new) version and specify with this parameter where it will be found.
698              
699             =item * C<json_file>
700              
701             Specify the location of the json cache data file. The default location is set using L<Module::Generic::File> to get the system temporary directory and the file name C<public_suffix.json>.
702              
703             This json file is created once upon initiating an object and if it does not already exist. See the L</json_file> method for more information.
704              
705             =item * C<min_suffix>
706              
707             Sets the minimum suffix length required. Default to 0.
708              
709             =item * C<no_load>
710              
711             If this is set to true, this will prevent the object instantiation method from loading the public suffix file upon object instantiation. Normally you would not want to do that, unless you want to control when the file is loaded before you call L</stat>. This is primarily used by L</cron_fetch>
712              
713             =back
714              
715             =head2 cron_fetch
716              
717             You need to have installed the package L<LWP::UserAgent> to use this method.
718              
719             This method can also be called as a package subroutine, such as C<Cookie::Domain::cron_fetch>
720            
721             Its purpose is to perform a remote connection to L<https://publicsuffix.org/list/effective_tld_names.dat> and check for an updated copy of the public suffix data file.
722              
723             It checks if the remote file has changed by using the http header field C<Last-Modified> in the server response, or if there is already an C<etag> stored in the cache, it performs a conditional http query using C<If-None-Matched>. See L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag> for more information on those types of query.
724              
725             This is important to save bandwidth and useless processing.
726              
727             If the file has indeed changed, L</save_as_json> is invoked to refresh the cache.
728              
729             It returns the object it was called with for chaining.
730              
731             =head2 decode
732              
733             Takes a domain name, or rather called a host name, such as C<www.東京.jp> or C<今何年.jp> and this will return its punycode ascii representation prefixed with a so-called ASCII Compatible Encoding, a.k.a. C<ACE>. Thus, using our previous examples, this would produce respectively C<www.xn--1lqs71d.jp> and C<xn--wmq0m700b.jp>
734              
735             Even if the host name contains non-ascii dots, they will be recognised. For example C<www。東京。jp> would still be successfully decoded to C<www.xn--1lqs71d.jp>
736              
737             If the host name provided is not an international domain name (a.k.a. IDN), it is simply returned as is. Thus, if C<www.example.org> is provided, it would return C<www.example.org>
738              
739             If an error occurred, it sets an error object and returns L<perlfunc/undef>. The error can then be retrieved using L<Module::Generic/error> inherited by this module.
740              
741             It uses L<Net::IDN::Encode/domain_to_ascii> to perform the actual decoding.
742              
743             =head2 encode
744              
745             This does the reverse operation from L</decode>.
746              
747             It takes a domain name, or rather called a host name, already decoded, and with its so called ASCII Compatible Encoding a.k.a. C<ACE> prefix C<xn--> such as C<xn--wmq0m700b.jp> and returns its encoded version in perl internal utf8 encoding. Using the previous example, and this would return C<今何年.jp>. The C<ACE> prefix is required to tell apart international domain name (a.k.a. IDN) from other pure ascii domain names.
748              
749             Just like in L</decode>, if a non-international domain name is provided, it is returned as is. Thus, if C<www.example.org> is provided, it would return C<www.example.org>
750              
751             Note that this returns the name in perl's internal utf8 encoding, so if you need to save it to an utf8 file or print it out as utf8 string, you still need to encode it in utf8 before. For example:
752              
753             use Cookie::Domain;
754             use open ':std' => ':utf8';
755             my $d = Cookie::Domain->new;
756             say $d->encode( "xn--wmq0m700b.jp" );
757              
758             Or
759              
760             use Cookie::Domain;
761             use Encode;
762             my $d = Cookie::Domain->new;
763             my $encoded = $d->encode( "xn--wmq0m700b.jp" );
764             say Encode::encode_utf8( $encoded );
765              
766             If an error occurred, it sets an error object and returns L<perlfunc/undef>. The error can then be retrieved using L<Module::Generic/error> inherited by this module.
767              
768             It uses L<Net::IDN::Encode/domain_to_unicode> to perform the actual encoding.
769              
770             =head2 file
771              
772             Sets the file path to the Public Suffix file. This file is a public domain file at the initiative of Mozilla Foundation and its latest version can be accessed here: L<https://publicsuffix.org/list/>
773              
774             =head2 json_file
775              
776             Sets the file path of the json cache data file. THe purpose of this file is to contain a json representation of the parsed data from the Public Suffix data file. This is to avoid re-parsing it each time and instead load the json file using the XS module L<JSON>
777              
778             =head2 load
779              
780             This method takes no parameter and relies on the properties set with L</file> and L</json_file>.
781              
782             If the hash data is already accessibly in a module-wide variable, the data is taken from it.
783              
784             Otherwise, if json_file is set and accessible, this will load the data from it, otherwise, it will load the data from the file specified with L</file> and save it as json.
785              
786             If the json file meta data enclosed, specifically the property I<db_last_modified> has a unix timestamp value lower than the last modification timestamp of the public suffix data file, then, L</load> will reload that data file and save it as json again.
787              
788             That way, all you need to do is set up a crontab to fetch the latest version of that public suffix data file.
789              
790             For example, to fetch it every day at 1:00 in the morning:
791              
792             0 1 * * * perl -MCookie::Domain -e 'Cookie::Domain::cron_fetch' >/dev/null 2>&1
793              
794             But if you want to store the public suffix data file somewhere other than the default location:
795              
796             0 1 * * * perl -MCookie::Domain -e 'my $d=Cookie::Domain->new(file=>"/some/system/file.txt"); $d->cron_fetch' >/dev/null 2>&1
797              
798             See your machine manpage for C<crontab> for more detail.
799              
800             The data read are loaded into L</suffixes>.
801              
802             It returns the current object for chaining.
803              
804             =head2 load_json
805              
806             This takes a file path to the json cache data as the only argument, and attempt to read its content and set it onto the data accessible with L</suffixes>.
807              
808             If an error occurs, it set an error object using L<Module::Generic/error> and returns L<perlfunc/undef>
809              
810             It returns its current object for chaining.
811              
812             =head2 load_public_suffix
813              
814             This is similar to the method L</load_json> above.
815              
816             This takes a file path to the Public Suffix data as the only argument, read its content, parse it using the algorithm described at L<https://publicsuffix.org/list/> and set it onto the data accessible with L</suffixes> and also onto the package-wide global variable to make the data available across object instantiations.
817              
818             If an error occurs, it set an error object using L<Module::Generic/error> and returns L<perlfunc/undef>
819              
820             It returns its current object for chaining.
821              
822             =head2 meta
823              
824             Returns an L<hash object|Module::Generic::Hash> of meta information pertaining to the public suffix file. This is used primarily by L</cron_fetch>
825              
826             =head2 min_suffix
827              
828             Sets or gets the minimum suffix required as an integer value.
829              
830             It returns the current value as a L<Module::Generic::Number> object.
831              
832             =head2 no_load
833              
834             If this is set to true, this will prevent the object instantiation method from loading the public suffix file upon object instantiation. Normally you would not want to do that, unless you want to control when the file is loaded before you call L</stat>. This is primarily used by L</cron_fetch>
835              
836             =head2 save_as_json
837              
838             This takes as sole argument the file path where to save the json cache data and save the data accessible with L</suffixes>.
839              
840             It returns the current object for chaining.
841              
842             If an error occurs, it set an error object using L<Module::Generic/error> and returns L<perlfunc/undef>
843              
844             =head2 stat
845              
846             This takes a domain name, such as C<www.example.org> and optionally an hash reference of options and returns:
847              
848             =over 4
849              
850             =item C<undef()>
851              
852             If an error occurred.
853              
854             my $rv = $d->stat( 'www.example.org' );
855             die( "Error: ", $d->error ) if( !defined( $rv ) );
856              
857             =item empty string
858              
859             If there is no data available such as when querying a non existing top level domain.
860              
861             =item A C<Cookie::Domain::Result> object
862              
863             An object with the following properties and methods, although not all are necessarily defined, depending on the results.
864              
865             Accessed as an hash property and this return a regular string, but accessed as a method and they will return a L<Module::Generic::Scalar> object.
866              
867             =over 8
868              
869             =item I<name>
870              
871             The label that immediately follows the suffix (i.e. on its lefthand side).
872              
873             For example, in C<www.example.org>, the I<name> would be C<example>
874              
875             my $res = $dom->stat( 'www.example.org' ) || die( $dom->error );
876             say $res->{name}; # example
877             # or alternatively
878             say $res->name; # example
879              
880             =item I<sub>
881              
882             The sub domain or sub domains that follows the domain on its lefthand side.
883              
884             For example, in C<www.paris.example.fr>, C<www.paris> is the I<sub> and C<example> the I<name>
885              
886             my $res = $dom->stat( 'www.paris.example.fr' ) || die( $dom->error );
887             say $res->{sub}; # www.paris
888             # or alternatively
889             say $res->sub; # www.paris
890              
891             =item I<suffix>
892              
893             The top level domain or I<suffix>. For example, in C<example.com.sg>, C<com.sg> is the suffix and C<example> the I<name>
894              
895             my $res = $dom->stat( 'example.com.sg' ) || die( $dom->error );
896             say $res->{suffix}; # com.sg
897             # or alternatively
898             say $res->suffix; # com.sg
899              
900             What constitute a suffix varies from zone to zone or country to country, hence the necessity of this public domain suffix data file.
901              
902             =back
903              
904             C<Cookie::Domain::Result> objects inherit from L<Module::Generic::Hash>, thus you can do:
905              
906             my $res = $dom->stat( 'www.example.org' ) || die( $dom->error );
907             say $res->length, " properties set.";
908             # which should say 3 since we alway return suffix, name and sub
909              
910             The following additional method is also available as a convenience:
911              
912             =over 8
913              
914             =item I<domain>
915              
916             This is a read only method which returns and empty L<Module::Generic::Scalar> object if the I<name> property is empty, or the properties I<name> and I<suffix> join by a dot '.' and returned as a new L<Module::Generic::Scalar> object.
917              
918             my $res = $dom->stat( 'www.example.com.sg' ) || die( $dom->error );
919             say $res->domain; # example.com.sg
920             say $res->domain->length; # 14
921              
922             =back
923              
924             =back
925              
926             The options accepted are:
927              
928             =over 4
929              
930             =item I<add>
931              
932             This is an integer, and represent the additional length to be added, for the domain name.
933              
934             =item I<min_suffix>
935              
936             This is an integer, and if provided, will override the default value set with L</min_suffix>
937              
938             =back
939              
940             =head2 suffixes
941              
942             This method is used to access the hash repository of all the public suffix data.
943              
944             It is actually an L<Module::Generic::Hash> object. So you could do:
945              
946             say "There are ", $dom->suffixes->length, " rules.";
947              
948             =head1 AUTHOR
949              
950             Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
951              
952             =head1 SEE ALSO
953              
954             L<Cookie>, L<Cookie::Jar>, L<Mozilla::PublicSuffix>, L<Domain::PublicSuffix>, L<Net::PublicSuffixList>
955              
956             L<https://publicsuffix.org/list/>
957              
958             =head1 COPYRIGHT & LICENSE
959              
960             Copyright (c) 2021 DEGUEST Pte. Ltd.
961              
962             You can use, copy, modify and redistribute this package and associated files under the same terms as Perl itself.
963              
964             =cut