File Coverage

blib/lib/Data/Validate/Sanctions.pm
Criterion Covered Total %
statement 170 220 77.2
branch 28 62 45.1
condition 42 81 51.8
subroutine 30 36 83.3
pod 9 9 100.0
total 279 408 68.3


line stmt bran cond sub pod time code
1             package Data::Validate::Sanctions;
2              
3 7     7   174569 use strict;
  7         47  
  7         205  
4 7     7   36 use warnings;
  7         9  
  7         443  
5              
6             require Exporter;
7             our @ISA = qw(Exporter);
8             our @EXPORT_OK = qw/is_sanctioned set_sanction_file get_sanction_file/;
9              
10 7     7   40 use Carp;
  7         14  
  7         570  
11 7     7   3362 use Data::Validate::Sanctions::Fetcher;
  7         29  
  7         394  
12 7     7   2440 use Data::Validate::Sanctions::Redis;
  7         21  
  7         227  
13 7     7   57 use File::stat;
  7         21  
  7         83  
14 7     7   503 use File::ShareDir;
  7         14  
  7         286  
15 7     7   831 use YAML::XS qw/DumpFile LoadFile/;
  7         5756  
  7         357  
16 7     7   48 use Scalar::Util qw(blessed);
  7         24  
  7         280  
17 7     7   53 use Date::Utility;
  7         22  
  7         147  
18 7     7   3386 use Data::Compare;
  7         82537  
  7         59  
19 7     7   26435 use List::Util qw(any uniq max min);
  7         17  
  7         549  
20 7     7   52 use Locale::Country;
  7         18  
  7         905  
21 7     7   55 use Text::Trim qw(trim);
  7         18  
  7         326  
22 7     7   42 use Clone qw(clone);
  7         20  
  7         530  
23              
24             our $VERSION = '0.16';
25              
26             my $sanction_file;
27             my $instance;
28              
29 7     7   52 use constant IGNORE_OPERATION_INTERVAL => 8 * 60; # 8 minutes
  7         21  
  7         17066  
30              
31             # for OO
32             sub new { ## no critic (RequireArgUnpacking)
33 5     5 1 24 my ($class, %args) = @_;
34              
35 5   50     28 my $storage = delete $args{storage} // '';
36              
37 5 50       21 return Data::Validate::Sanctions::Redis->new(%args) if $storage eq 'redis';
38              
39 5         14 my $self = {};
40              
41 5   33     24 $self->{sanction_file} = $args{sanction_file} // _default_sanction_file();
42              
43 5         25 $self->{args} = {%args};
44              
45 5         14 $self->{last_modification} = 0;
46 5         16 $self->{last_index} = 0;
47 5         14 $self->{last_data_load} = 0;
48              
49 5   33     35 return bless $self, ref($class) || $class;
50             }
51              
52             sub update_data {
53 0     0 1 0 my ($self, %args) = @_;
54              
55 0         0 $self->_load_data();
56              
57 0         0 my $new_data = Data::Validate::Sanctions::Fetcher::run($self->{args}->%*, %args);
58 0         0 my $updated = 0;
59 0         0 foreach my $k (keys %$new_data) {
60 0   0     0 $self->{_data}->{$k} //= {};
61 0   0     0 $self->{_data}->{$k}->{updated} //= 0;
62 0   0     0 $self->{_data}->{$k}->{content} //= [];
63              
64 0 0 0     0 if (!$new_data->{$k}->{error} && $self->{_data}->{$k}->{error}) {
65 0         0 delete $self->{_data}->{$k}->{error};
66 0         0 $updated = 1;
67             }
68              
69 0 0 0     0 if ($new_data->{$k}->{error}) {
    0          
70 0         0 warn "$k list update failed because: $new_data->{$k}->{error}";
71 0         0 $self->{_data}->{$k}->{error} = $new_data->{$k}->{error};
72 0         0 $updated = 1;
73             } elsif ($self->{_data}{$k}->{updated} != $new_data->{$k}->{updated}
74             || scalar $self->{_data}{$k}->{content}->@* != scalar $new_data->{$k}->{content}->@*)
75             {
76 0 0       0 print "Source $k is updated with new data \n" if $args{verbose};
77 0         0 $self->{_data}->{$k} = $new_data->{$k};
78 0         0 $updated = 1;
79             } else {
80 0 0       0 print "Source $k is not changed \n" if $args{verbose};
81             }
82             }
83              
84 0 0       0 if ($updated) {
85 0         0 $self->_save_data();
86 0         0 $self->_index_data();
87             }
88              
89 0         0 return;
90             }
91              
92             sub last_updated {
93 0     0 1 0 my $self = shift;
94 0         0 my $list = shift;
95              
96 0 0       0 if ($list) {
97 0         0 return $self->{_data}->{$list}->{updated};
98             } else {
99 0         0 $self->_load_data();
100 0   0     0 return max(map { $_->{updated} // 0 } values %{$self->{_data}});
  0         0  
  0         0  
101             }
102             }
103              
104             sub set_sanction_file { ## no critic (RequireArgUnpacking)
105 4   100 4 1 2411 $sanction_file = shift // die "sanction_file is needed";
106 3         192402 undef $instance;
107 3         58 return;
108             }
109              
110             sub get_sanction_file {
111 7   66 7 1 694 $sanction_file //= _default_sanction_file();
112 7 50       66 return $instance ? $instance->{sanction_file} : $sanction_file;
113             }
114              
115             =head2 is_sanctioned
116              
117             Checks if the input profile info matches a sanctioned entity.
118             The arguments are the same as those of B<get_sanctioned_info>.
119              
120             It returns 1 if a match is found, otherwise 0.
121              
122             =cut
123              
124             sub is_sanctioned { ## no critic (RequireArgUnpacking)
125 14     14 1 4704 return (get_sanctioned_info(@_))->{matched};
126             }
127              
128             sub data {
129 0     0 1 0 my ($self) = @_;
130              
131 0 0       0 $self->_load_data() unless $self->{_data};
132              
133 0         0 return $self->{_data};
134             }
135              
136             =head2 _match_other_fields
137              
138             Matches fields possibly available in addition to name and date of birth.
139              
140             Returns a a hash-ref reporting the matched fields if it succeeeds; otherwise returns false (undef).
141              
142             =cut
143              
144             sub _match_other_fields {
145 19     19   40 my ($self, $entry, $args) = @_;
146              
147 19         63 my @optional_fields = qw/place_of_birth residence nationality citizen postal_code national_id passport_no/;
148              
149 19         31 my $matched_args = {};
150 19         35 for my $field (@optional_fields) {
151 133 0 33     251 next unless ($args->{$field} && $entry->{$field} && $entry->{$field}->@*);
      0        
152              
153 0 0   0   0 return undef unless any { $args->{$field} eq $_ } $entry->{$field}->@*;
  0         0  
154 0         0 $matched_args->{$field} = $args->{$field};
155             }
156              
157 19         41 return $matched_args;
158             }
159              
160             =head2 get_sanctioned_info
161              
162             Tries to find a match a sanction entry matching the input profile args.
163             It takes arguments in two forms. In the new API, it takes a hashref containing the following named arguments:
164              
165             =over 4
166              
167             =item * first_name: first name
168              
169             =item * last_name: last name
170              
171             =item * date_of_birth: (optional) date of birth as a string or epoch
172              
173             =item * place_of_birth: (optional) place of birth as a country name or code
174              
175             =item * residence: (optional) name or code of the country of residence
176              
177             =item * nationality: (optional) name or code of the country of nationality
178              
179             =item * citizen: (optional) name or code of the country of citizenship
180              
181             =item * postal_code: (optional) postal/zip code
182              
183             =item * national_id: (optional) national ID number
184              
185             =item * passport_no: (oiptonal) passort number
186              
187             =back
188              
189             For backward compatibility it also supports the old API, taking the following args:
190              
191             =over 4
192              
193             =item * first_name: first name
194              
195             =item * last_name: last name
196              
197             =item * date_of_birth: (optional) date of birth as a string or epoch
198              
199             =back
200              
201             It returns a hash-ref containg the following data:
202              
203             =over 4
204              
205             =item - matched: 1 if a match was found; 0 otherwise
206              
207             =item - list: the source for the matched entry,
208              
209             =item - matched_args: a name-value hash-ref of the similar arguments,
210              
211             =item - comment: additional comments if necessary,
212              
213             =back
214              
215             =cut
216              
217             sub get_sanctioned_info { ## no critic (RequireArgUnpacking)
218 14 50   14 1 68 my $self = blessed($_[0]) ? shift : $instance;
219 14 100       54 unless ($self) {
220 5         32 $instance = __PACKAGE__->new(sanction_file => get_sanction_file());
221 5         16 $self = $instance;
222             }
223              
224             # It's the old interface
225 14         42 my ($first_name, $last_name, $date_of_birth) = @_;
226 14         30 my $args = {};
227              
228             # in the new interface we accept fields in a hashref
229 14 50       50 if (ref $_[0] eq 'HASH') {
230 0         0 ($args) = @_;
231 0         0 ($first_name, $last_name, $date_of_birth) = $args->@{qw/first_name last_name date_of_birth/};
232             }
233              
234             # convert country names to iso codes
235 14         37 for my $field (qw/place_of_birth residence nationality citizen/) {
236 56         89 my $value = $args->{$field};
237 56 50       123 next unless $value;
238              
239 0         0 $args->{$field} = Data::Validate::Sanctions::Fetcher::get_country_code($value);
240             }
241              
242 14         55 $self->_load_data();
243              
244 14   66     82 my $client_full_name = join(' ', $first_name, $last_name || ());
245              
246             # Split into tokens after cleaning
247 14         40 my @client_name_tokens = _clean_names($client_full_name);
248              
249 14         28 my @match_with_dob_text;
250              
251             # only pick the sanctioned names which have common token with the client tokens
252             # and deduplicate the list
253 14         31 my $filtered_sanctioned_names = {};
254 14         29 foreach my $token (@client_name_tokens) {
255 29         44 foreach my $name (keys %{$self->{_token_sanctioned_names}->{$token}}) {
  29         196  
256 313         636 $filtered_sanctioned_names->{$name} = 1;
257             }
258             }
259              
260 14         27 foreach my $sanctioned_name (keys %{$filtered_sanctioned_names}) {
  14         73  
261 165         394 my $sanctioned_name_tokens = $self->{_sanctioned_name_tokens}->{$sanctioned_name};
262 165 100       292 next unless _name_matches(\@client_name_tokens, $sanctioned_name_tokens);
263              
264 16         60 for my $entry ($self->{_index}->{$sanctioned_name}->@*) {
265 19         52 my $matched_args = $self->_match_other_fields($entry, $args);
266 19 50       45 next unless $matched_args;
267 19         43 $matched_args->{name} = $sanctioned_name;
268              
269             # dob is matched only if it's included in lookup args
270 19 100       59 return _possible_match($entry->{source}, \%$matched_args) unless defined $date_of_birth;
271              
272             # 1- Some entries in sanction list can have more than one date of birth
273             # 2- first epoch is compared, then year
274 14         55 my $client_dob_date = Date::Utility->new($date_of_birth);
275 14         1073 $args->{dob_epoch} = $client_dob_date->epoch;
276 14         393 $args->{dob_year} = $client_dob_date->year;
277              
278 14         370 for my $dob_field (qw/dob_epoch dob_year/) {
279 27   100     92 $entry->{$dob_field} //= [];
280 27     14   128 my $checked_dob = any { $_ eq $args->{$dob_field} } $entry->{$dob_field}->@*;
  14         40  
281              
282 27 100       108 return _possible_match($entry->{source}, {%$matched_args, $dob_field => $args->{$dob_field}}) if $checked_dob;
283             }
284              
285             # Saving names with dob_text for later check.
286 12 100 100     49 my $has_no_epoch_or_year = ($entry->{dob_epoch}->@* || $entry->{dob_year}->@*) ? 0 : 1;
287 12 50 50     18 my $has_dob_text = @{$entry->{dob_text} // []} ? 1 : 0;
  12         51  
288 12 100 66     55 if ($has_dob_text || $has_no_epoch_or_year) {
289 1         7 push @match_with_dob_text,
290             {
291             name => $sanctioned_name,
292             entry => $entry,
293             matched_args => $matched_args,
294             };
295             }
296             }
297             }
298              
299             # Return a possible match if the name matches and no date of birth is present in sanctions
300 7         25 for my $match (@match_with_dob_text) {
301             # We match only in case we have full match for the name
302             # in other case we may get to many false positive
303 1         10 my ($sanction_name, $client_name) = map { uc(s/[^[:alpha:]\s]//gr) } ($match->{name}, $client_full_name);
  2         10  
304              
305 1 50       5 next unless $sanction_name eq $client_name;
306              
307 1   50     10 my $dob_text = $match->{entry}->{dob_text} // [];
308              
309 1         2 my $comment;
310 1 50       5 if (@$dob_text) {
311 0         0 $comment = 'dob raw text: ' . join q{, } => @$dob_text;
312             }
313              
314 1         6 return _possible_match($match->{entry}->{source}, $match->{matched_args}, $comment);
315             }
316              
317             # Return if no possible match, regardless if date of birth is provided or not
318 6         91 return {matched => 0};
319             }
320              
321             sub _load_data {
322 14     14   32 my $self = shift;
323 14         49 my $sanction_file = $self->{sanction_file};
324 14   50     42 $self->{last_modification} //= 0;
325 14   50     48 $self->{last_index} //= 0;
326 14   50     42 $self->{last_data_load} //= 0;
327 14   100     54 $self->{_data} //= {};
328 14   100     55 $self->{_sanctioned_name_tokens} //= {};
329 14   100     58 $self->{_token_sanctioned_names} //= {};
330              
331 14 100 66     108 return $self->{_data} if $self->{_data} and $self->{last_data_load} + $self->IGNORE_OPERATION_INTERVAL > time;
332              
333 5 50       116 if (-e $sanction_file) {
334 5         41 my $file_modify_time = stat($sanction_file)->mtime;
335 5 0 33     1070 return $self->{_data} if $file_modify_time <= $self->{last_modification} && $self->{_data};
336 5         16 $self->{last_modification} = $file_modify_time;
337 5         25 $self->{_data} = LoadFile($sanction_file);
338 5         222989 $self->{last_data_load} = time;
339             }
340              
341 5         765 $self->_index_data();
342              
343 5         9721 foreach my $sanctioned_name (keys $self->{_index}->%*) {
344 23102         38195 my @tokens = _clean_names($sanctioned_name);
345 23102         61801 $self->{_sanctioned_name_tokens}->{$sanctioned_name} = \@tokens;
346 23102         36871 foreach my $token (@tokens) {
347 65499         227619 $self->{_token_sanctioned_names}->{$token}->{$sanctioned_name} = 1;
348             }
349             }
350              
351 5         2791 return $self->{_data};
352             }
353              
354             =head2 _index_data
355              
356             Indexes data by name. Each name may have multiple matching entries.
357              
358             =cut
359              
360             sub _index_data {
361 5     5   285 my $self = shift;
362              
363 5   50     112 $self->{_data} //= {};
364 5         21 $self->{_index} = {};
365 5         73 for my $source (keys $self->{_data}->%*) {
366 8   100     397447 my @content = clone($self->{_data}->{$source}->{content} // [])->@*;
367              
368 8         4687 for my $entry (@content) {
369 15668         36759 $entry->{source} = $source;
370 15668         30415 for my $name ($entry->{names}->@*) {
371 26569         43514 $name = ucfirst($name);
372 26569   100     82118 my $entry_list = $self->{_index}->{$name} // [];
373 26569         45603 push @$entry_list, $entry;
374 26569         72183 $self->{_index}->{$name} = $entry_list;
375             }
376             }
377             }
378              
379 5         19 $self->{last_index} = time;
380              
381 5         13 return;
382             }
383              
384             sub _save_data {
385 0     0   0 my $self = shift;
386              
387 0         0 my $sanction_file = $self->{sanction_file};
388 0         0 my $new_sanction_file = $sanction_file . ".tmp";
389              
390 0         0 DumpFile($new_sanction_file, $self->{_data});
391              
392 0 0       0 rename $new_sanction_file, $sanction_file or die "Can't rename $new_sanction_file to $sanction_file, please check it\n";
393 0         0 $self->{last_modification} = stat($sanction_file)->mtime;
394 0         0 return;
395             }
396              
397             sub _default_sanction_file {
398 2   33 2   15 return $ENV{SANCTION_FILE} // File::ShareDir::dist_file('Data-Validate-Sanctions', 'sanctions.yml');
399             }
400              
401             sub _possible_match {
402 8     8   23 my ($list, $matched_args, $comment) = @_;
403              
404             return +{
405 8         138 matched => 1,
406             list => $list,
407             matched_args => $matched_args,
408             comment => $comment,
409             };
410             }
411              
412             sub _clean_names {
413 23116     23116   43643 my ($full_name) = @_;
414              
415             # Remove non-alphabets
416 23116         93228 my @cleaned_full_name = split " ", uc($full_name =~ s/[^[:alpha:]\s]//gr);
417              
418 23116         79256 return @cleaned_full_name;
419             }
420              
421             sub _name_matches {
422 165     165   311 my ($small_tokens_list, $bigger_tokens_list) = @_;
423              
424 165         232 my $name_matches_count = 0;
425              
426 165         271 foreach my $token (@$small_tokens_list) {
427 482 100   1155   1281 $name_matches_count++ if any { $_ eq $token } @$bigger_tokens_list;
  1155         2220  
428             }
429              
430 165         360 my $small_tokens_size = min(scalar(@$small_tokens_list), scalar(@$bigger_tokens_list));
431              
432             # - If more than one word matches, return it as possible match
433             # - Some sanctioned individuals have only one name (ex. Hamza); this should be returned as well
434 165 100 66     625 return 1 if ($name_matches_count > 1) || ($name_matches_count == 1 && $small_tokens_size == 1);
      100        
435              
436 149         386 return 0;
437             }
438              
439             sub export_data {
440 0     0 1   my ($self, $path) = @_;
441              
442 0           return DumpFile($path, $self->{_data});
443             }
444              
445             1;
446             __END__
447              
448             =encoding utf-8
449              
450             =head1 NAME
451              
452             Data::Validate::Sanctions - Validate a name against sanctions lists
453              
454             =head1 SYNOPSIS
455              
456             # as exported function
457             use Data::Validate::Sanctions qw/is_sanctioned get_sanction_file set_sanction_file/;
458             set_sanction_file('/var/storage/sanction.csv');
459              
460             my ($first_name, $last_name) = ("First", "Last Name");
461             print 'BAD' if is_sanctioned($first_name, $last_name);
462              
463             # as OO
464             use Data::Validate::Sanctions;
465              
466             #You can also set sanction_file in the new method.
467             my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv');
468             print 'BAD' if $validator->is_sanctioned("$last_name $first_name");
469              
470             =head1 DESCRIPTION
471              
472             Data::Validate::Sanctions is a simple validitor to validate a name against sanctions lists.
473              
474             The list is from:
475             - L<https://www.treasury.gov/ofac/downloads/sdn.csv>,
476             - L<https://www.treasury.gov/ofac/downloads/consolidated/cons_prim.csv>
477             - L<https://ofsistorage.blob.core.windows.net/publishlive/ConList.csv>
478             - L<https://webgate.ec.europa.eu/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content?token=$eu_token>
479              
480             run F<update_sanctions_csv> to update the bundled csv.
481              
482             The path of list can be set by function L</set_sanction_file> or by method L</new>. If not set, then environment variable $ENV{SANCTION_FILE} will be checked, at last
483             the default file in this package will be used.
484              
485             =head1 METHODS
486              
487             =head2 is_sanctioned
488              
489             is_sanctioned($last_name, $first_name);
490             is_sanctioned($first_name, $last_name);
491             is_sanctioned("$last_name $first_name");
492              
493             when one string is passed, please be sure last_name is before first_name.
494              
495             or you can pass first_name, last_name (last_name, first_name), we'll check both "$last_name $first_name" and "$first_name $last_name".
496              
497             retrun 1 if match is found and 0 if match is not found.
498              
499             It will remove all non-alpha chars and compare with the list we have.
500              
501             =head2 get_sanctioned_info
502              
503             my $result =get_sanctioned_info($last_name, $first_name, $date_of_birth);
504             print 'match: ', $result->{matched_args}->{name}, ' on list ', $result->{list} if $result->{matched};
505              
506             return hashref with keys:
507             B<matched> 1 or 0, depends if name has matched
508             B<list> name of list matched (present only if matched)
509             B<matched_args> The list of arguments matched (name, date of birth, residence, etc.)
510              
511             It will remove all non-alpha chars and compare with the list we have.
512              
513             =head2 update_data
514              
515             Fetches latest versions of sanction lists, and updates corresponding sections of stored file, if needed
516              
517             =head2 last_updated
518              
519             Returns timestamp of when the latest list was updated.
520             If argument is provided - return timestamp of when that list was updated.
521              
522             =head2 new
523              
524             Create the object, and set sanction_file
525              
526             my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv');
527              
528             =head2 get_sanction_file
529              
530             get sanction_file which is used by L</is_sanctioned> (procedure-oriented)
531              
532             =head2 set_sanction_file
533              
534             set sanction_file which is used by L</is_sanctioned> (procedure-oriented)
535              
536             =head2 _name_matches
537              
538             Pass in the client's name and sanctioned individual's name to see if they are similar or not
539              
540              
541             =head2 export_data
542              
543             Exports the sanction lists to a local file in YAML format.
544              
545             =head2 data
546              
547             Gets the sanction list content with lazy loading.
548              
549             =head1 AUTHOR
550              
551             Binary.com E<lt>fayland@binary.comE<gt>
552              
553             =head1 COPYRIGHT
554              
555             Copyright 2014- Binary.com
556              
557             =head1 LICENSE
558              
559             This library is free software; you can redistribute it and/or modify
560             it under the same terms as Perl itself.
561              
562             =head1 SEE ALSO
563              
564             L<Data::OFAC>
565              
566             =cut