File Coverage

blib/lib/Data/Validate/Sanctions.pm
Criterion Covered Total %
statement 159 209 76.0
branch 27 60 45.0
condition 39 74 52.7
subroutine 29 35 82.8
pod 9 9 100.0
total 263 387 67.9


line stmt bran cond sub pod time code
1             package Data::Validate::Sanctions;
2              
3 7     7   173178 use strict;
  7         46  
  7         216  
4 7     7   36 use warnings;
  7         15  
  7         394  
5              
6             require Exporter;
7             our @ISA = qw(Exporter);
8             our @EXPORT_OK = qw/is_sanctioned set_sanction_file get_sanction_file/;
9              
10 7     7   38 use Carp;
  7         17  
  7         499  
11 7     7   3418 use Data::Validate::Sanctions::Fetcher;
  7         25  
  7         374  
12 7     7   2377 use Data::Validate::Sanctions::Redis;
  7         24  
  7         227  
13 7     7   67 use File::stat;
  7         14  
  7         74  
14 7     7   504 use File::ShareDir;
  7         19  
  7         260  
15 7     7   861 use YAML::XS qw/DumpFile LoadFile/;
  7         6236  
  7         358  
16 7     7   50 use Scalar::Util qw(blessed);
  7         20  
  7         275  
17 7     7   52 use Date::Utility;
  7         16  
  7         164  
18 7     7   3548 use Data::Compare;
  7         82213  
  7         59  
19 7     7   26368 use List::Util qw(any uniq max min);
  7         18  
  7         500  
20 7     7   52 use Locale::Country;
  7         19  
  7         737  
21 7     7   47 use Text::Trim qw(trim);
  7         16  
  7         352  
22 7     7   52 use Clone qw(clone);
  7         23  
  7         16352  
23              
24             our $VERSION = '0.15';
25              
26             my $sanction_file;
27             my $instance;
28              
29             # for OO
30             sub new { ## no critic (RequireArgUnpacking)
31 5     5 1 25 my ($class, %args) = @_;
32              
33 5   50     33 my $storage = delete $args{storage} // '';
34              
35 5 50       22 return Data::Validate::Sanctions::Redis->new(%args) if $storage eq 'redis';
36              
37 5         14 my $self = {};
38              
39 5   33     21 $self->{sanction_file} = $args{sanction_file} // _default_sanction_file();
40              
41 5         22 $self->{args} = {%args};
42              
43 5         14 $self->{last_time} = 0;
44 5   33     33 return bless $self, ref($class) || $class;
45             }
46              
47             sub update_data {
48 0     0 1 0 my ($self, %args) = @_;
49              
50 0         0 $self->_load_data();
51              
52 0         0 my $new_data = Data::Validate::Sanctions::Fetcher::run($self->{args}->%*, %args);
53 0         0 my $updated = 0;
54 0         0 foreach my $k (keys %$new_data) {
55 0   0     0 $self->{_data}->{$k} //= {};
56 0   0     0 $self->{_data}->{$k}->{updated} //= 0;
57 0   0     0 $self->{_data}->{$k}->{content} //= [];
58              
59 0 0 0     0 if (!$new_data->{$k}->{error} && $self->{_data}->{$k}->{error}) {
60 0         0 delete $self->{_data}->{$k}->{error};
61 0         0 $updated = 1;
62             }
63              
64 0 0 0     0 if ($new_data->{$k}->{error}) {
    0          
65 0         0 warn "$k list update failed because: $new_data->{$k}->{error}";
66 0         0 $self->{_data}->{$k}->{error} = $new_data->{$k}->{error};
67 0         0 $updated = 1;
68             } elsif ($self->{_data}{$k}->{updated} != $new_data->{$k}->{updated}
69             || scalar $self->{_data}{$k}->{content}->@* != scalar $new_data->{$k}->{content}->@*)
70             {
71 0 0       0 print "Source $k is updated with new data \n" if $args{verbose};
72 0         0 $self->{_data}->{$k} = $new_data->{$k};
73 0         0 $updated = 1;
74             } else {
75 0 0       0 print "Source $k is not changed \n" if $args{verbose};
76             }
77             }
78              
79 0 0       0 if ($updated) {
80 0         0 $self->_save_data();
81 0         0 $self->_index_data();
82             }
83              
84 0         0 return;
85             }
86              
87             sub last_updated {
88 0     0 1 0 my $self = shift;
89 0         0 my $list = shift;
90              
91 0 0       0 if ($list) {
92 0         0 return $self->{_data}->{$list}->{updated};
93             } else {
94 0         0 $self->_load_data();
95 0   0     0 return max(map { $_->{updated} // 0 } values %{$self->{_data}});
  0         0  
  0         0  
96             }
97             }
98              
99             sub set_sanction_file { ## no critic (RequireArgUnpacking)
100 4   100 4 1 19446 $sanction_file = shift // die "sanction_file is needed";
101 3         159000 undef $instance;
102 3         46 return;
103             }
104              
105             sub get_sanction_file {
106 7   66 7 1 645 $sanction_file //= _default_sanction_file();
107 7 50       61 return $instance ? $instance->{sanction_file} : $sanction_file;
108             }
109              
110             =head2 is_sanctioned
111              
112             Checks if the input profile info matches a sanctioned entity.
113             The arguments are the same as those of B<get_sanctioned_info>.
114              
115             It returns 1 if a match is found, otherwise 0.
116              
117             =cut
118              
119             sub is_sanctioned { ## no critic (RequireArgUnpacking)
120 14     14 1 5447 return (get_sanctioned_info(@_))->{matched};
121             }
122              
123             sub data {
124 0     0 1 0 my ($self) = @_;
125              
126 0 0       0 $self->_load_data() unless $self->{_data};
127              
128 0         0 return $self->{_data};
129             }
130              
131             =head2 _match_other_fields
132              
133             Matches fields possibly available in addition to name and date of birth.
134              
135             Returns a a hash-ref reporting the matched fields if it succeeeds; otherwise returns false (undef).
136              
137             =cut
138              
139             sub _match_other_fields {
140 18     18   40 my ($self, $entry, $args) = @_;
141              
142 18         51 my @optional_fields = qw/place_of_birth residence nationality citizen postal_code national_id passport_no/;
143              
144 18         32 my $matched_args = {};
145 18         35 for my $field (@optional_fields) {
146 126 0 33     246 next unless ($args->{$field} && $entry->{$field} && $entry->{$field}->@*);
      0        
147              
148 0 0   0   0 return undef unless any { $args->{$field} eq $_ } $entry->{$field}->@*;
  0         0  
149 0         0 $matched_args->{$field} = $args->{$field};
150             }
151              
152 18         38 return $matched_args;
153             }
154              
155             =head2 get_sanctioned_info
156              
157             Tries to find a match a sanction entry matching the input profile args.
158             It takes arguments in two forms. In the new API, it takes a hashref containing the following named arguments:
159              
160             =over 4
161              
162             =item * first_name: first name
163              
164             =item * last_name: last name
165              
166             =item * date_of_birth: (optional) date of birth as a string or epoch
167              
168             =item * place_of_birth: (optional) place of birth as a country name or code
169              
170             =item * residence: (optional) name or code of the country of residence
171              
172             =item * nationality: (optional) name or code of the country of nationality
173              
174             =item * citizen: (optional) name or code of the country of citizenship
175              
176             =item * postal_code: (optional) postal/zip code
177              
178             =item * national_id: (optional) national ID number
179              
180             =item * passport_no: (oiptonal) passort number
181              
182             =back
183              
184             For backward compatibility it also supports the old API, taking the following args:
185              
186             =over 4
187              
188             =item * first_name: first name
189              
190             =item * last_name: last name
191              
192             =item * date_of_birth: (optional) date of birth as a string or epoch
193              
194             =back
195              
196             It returns a hash-ref containg the following data:
197              
198             =over 4
199              
200             =item - matched: 1 if a match was found; 0 otherwise
201              
202             =item - list: the source for the matched entry,
203              
204             =item - matched_args: a name-value hash-ref of the similar arguments,
205              
206             =item - comment: additional comments if necessary,
207              
208             =back
209              
210             =cut
211              
212             sub get_sanctioned_info { ## no critic (RequireArgUnpacking)
213 14 50   14 1 62 my $self = blessed($_[0]) ? shift : $instance;
214 14 100       48 unless ($self) {
215 5         18 $instance = __PACKAGE__->new(sanction_file => get_sanction_file());
216 5         13 $self = $instance;
217             }
218              
219             # It's the old interface
220 14         33 my ($first_name, $last_name, $date_of_birth) = @_;
221 14         32 my $args = {};
222              
223             # in the new interface we accept fields in a hashref
224 14 50       48 if (ref $_[0] eq 'HASH') {
225 0         0 ($args) = @_;
226 0         0 ($first_name, $last_name, $date_of_birth) = $args->@{qw/first_name last_name date_of_birth/};
227             }
228              
229             # convert country names to iso codes
230 14         35 for my $field (qw/place_of_birth residence nationality citizen/) {
231 56         87 my $value = $args->{$field};
232 56 50       113 next unless $value;
233              
234 0         0 $args->{$field} = Data::Validate::Sanctions::Fetcher::get_country_code($value);
235             }
236              
237 14         44 $self->_load_data();
238              
239 14   66     1506 my $client_full_name = join(' ', $first_name, $last_name || ());
240              
241             # Split into tokens after cleaning
242 14         36 my @client_name_tokens = _clean_names($client_full_name);
243              
244 14         21 my @match_with_dob_text;
245              
246             # only pick the sanctioned names which have common token with the client tokens
247             # and deduplicate the list
248 14         29 my $filtered_sanctioned_names = {};
249 14         31 foreach my $token (@client_name_tokens) {
250 29         42 foreach my $name (keys %{$self->{_token_sanctioned_names}->{$token}}) {
  29         171  
251 313         575 $filtered_sanctioned_names->{$name} = 1;
252             }
253             }
254              
255 14         26 foreach my $sanctioned_name (keys %{$filtered_sanctioned_names}) {
  14         59  
256 144         335 my $sanctioned_name_tokens = $self->{_sanctioned_name_tokens}->{$sanctioned_name};
257 144 100       262 next unless _name_matches(\@client_name_tokens, $sanctioned_name_tokens);
258              
259 15         44 for my $entry ($self->{_index}->{$sanctioned_name}->@*) {
260 18         52 my $matched_args = $self->_match_other_fields($entry, $args);
261 18 50       41 next unless $matched_args;
262 18         40 $matched_args->{name} = $sanctioned_name;
263              
264             # dob is matched only if it's included in lookup args
265 18 100       51 return _possible_match($entry->{source}, \%$matched_args) unless defined $date_of_birth;
266              
267             # 1- Some entries in sanction list can have more than one date of birth
268             # 2- first epoch is compared, then year
269 13         54 my $client_dob_date = Date::Utility->new($date_of_birth);
270 13         999 $args->{dob_epoch} = $client_dob_date->epoch;
271 13         358 $args->{dob_year} = $client_dob_date->year;
272              
273 13         360 for my $dob_field (qw/dob_epoch dob_year/) {
274 25   100     94 $entry->{$dob_field} //= [];
275 25     13   117 my $checked_dob = any { $_ eq $args->{$dob_field} } $entry->{$dob_field}->@*;
  13         31  
276              
277 25 100       91 return _possible_match($entry->{source}, {%$matched_args, $dob_field => $args->{$dob_field}}) if $checked_dob;
278             }
279              
280             # Saving names with dob_text for later check.
281 11 100 100     45 my $has_no_epoch_or_year = ($entry->{dob_epoch}->@* || $entry->{dob_year}->@*) ? 0 : 1;
282 11 50 50     15 my $has_dob_text = @{$entry->{dob_text} // []} ? 1 : 0;
  11         46  
283 11 100 66     52 if ($has_dob_text || $has_no_epoch_or_year) {
284 1         6 push @match_with_dob_text,
285             {
286             name => $sanctioned_name,
287             entry => $entry,
288             matched_args => $matched_args,
289             };
290             }
291             }
292             }
293              
294             # Return a possible match if the name matches and no date of birth is present in sanctions
295 7         22 for my $match (@match_with_dob_text) {
296             # We match only in case we have full match for the name
297             # in other case we may get to many false positive
298 1         4 my ($sanction_name, $client_name) = map { uc(s/[^[:alpha:]\s]//gr) } ($match->{name}, $client_full_name);
  2         8  
299              
300 1 50       5 next unless $sanction_name eq $client_name;
301              
302 1   50     6 my $dob_text = $match->{entry}->{dob_text} // [];
303              
304 1         2 my $comment;
305 1 50       5 if (@$dob_text) {
306 0         0 $comment = 'dob raw text: ' . join q{, } => @$dob_text;
307             }
308              
309 1         4 return _possible_match($match->{entry}->{source}, $match->{matched_args}, $comment);
310             }
311              
312             # Return if no possible match, regardless if date of birth is provided or not
313 6         56 return {matched => 0};
314             }
315              
316             sub _load_data {
317 14     14   28 my $self = shift;
318 14         40 my $sanction_file = $self->{sanction_file};
319 14   50     53 $self->{last_time} //= 0;
320 14   100     55 $self->{_data} //= {};
321 14   100     47 $self->{_sanctioned_name_tokens} //= {};
322 14   100     48 $self->{_token_sanctioned_names} //= {};
323              
324 14 50       297 if (-e $sanction_file) {
325 14 50 66     86 return $self->{_data} if stat($sanction_file)->mtime <= $self->{last_time} && $self->{_data};
326 5         1055 $self->{last_time} = stat($sanction_file)->mtime;
327 5         785 $self->{_data} = LoadFile($sanction_file);
328             }
329 5         216716 $self->_index_data();
330              
331 5         9393 foreach my $sanctioned_name (keys $self->{_index}->%*) {
332 23102         37907 my @tokens = _clean_names($sanctioned_name);
333 23102         56622 $self->{_sanctioned_name_tokens}->{$sanctioned_name} = \@tokens;
334 23102         36274 foreach my $token (@tokens) {
335 65499         198534 $self->{_token_sanctioned_names}->{$token}->{$sanctioned_name} = 1;
336             }
337             }
338              
339 5         2270 return $self->{_data};
340             }
341              
342             =head2 _index_data
343              
344             Indexes data by name. Each name may have multiple matching entries.
345              
346             =cut
347              
348             sub _index_data {
349 5     5   273 my $self = shift;
350              
351 5   50     319 $self->{_data} //= {};
352 5         18 $self->{_index} = {};
353 5         34 for my $source (keys $self->{_data}->%*) {
354 8   100     362600 my @content = clone($self->{_data}->{$source}->{content} // [])->@*;
355              
356 8         4251 for my $entry (@content) {
357 15668         31463 $entry->{source} = $source;
358 15668         31840 for my $name ($entry->{names}->@*) {
359 26569         43894 $name = ucfirst($name);
360 26569   100     81848 my $entry_list = $self->{_index}->{$name} // [];
361 26569         44885 push @$entry_list, $entry;
362 26569         67531 $self->{_index}->{$name} = $entry_list;
363             }
364             }
365             }
366 5         19 return;
367             }
368              
369             sub _save_data {
370 0     0   0 my $self = shift;
371              
372 0         0 my $sanction_file = $self->{sanction_file};
373 0         0 my $new_sanction_file = $sanction_file . ".tmp";
374              
375 0         0 DumpFile($new_sanction_file, $self->{_data});
376              
377 0 0       0 rename $new_sanction_file, $sanction_file or die "Can't rename $new_sanction_file to $sanction_file, please check it\n";
378 0         0 $self->{last_time} = stat($sanction_file)->mtime;
379 0         0 return;
380             }
381              
382             sub _default_sanction_file {
383 2   33 2   15 return $ENV{SANCTION_FILE} // File::ShareDir::dist_file('Data-Validate-Sanctions', 'sanctions.yml');
384             }
385              
386             sub _possible_match {
387 8     8   21 my ($list, $matched_args, $comment) = @_;
388              
389             return +{
390 8         120 matched => 1,
391             list => $list,
392             matched_args => $matched_args,
393             comment => $comment,
394             };
395             }
396              
397             sub _clean_names {
398 23116     23116   40936 my ($full_name) = @_;
399              
400             # Remove non-alphabets
401 23116         90287 my @cleaned_full_name = split " ", uc($full_name =~ s/[^[:alpha:]\s]//gr);
402              
403 23116         77235 return @cleaned_full_name;
404             }
405              
406             sub _name_matches {
407 144     144   233 my ($small_tokens_list, $bigger_tokens_list) = @_;
408              
409 144         199 my $name_matches_count = 0;
410              
411 144         221 foreach my $token (@$small_tokens_list) {
412 421 100   1006   1106 $name_matches_count++ if any { $_ eq $token } @$bigger_tokens_list;
  1006         1888  
413             }
414              
415 144         271 my $small_tokens_size = min(scalar(@$small_tokens_list), scalar(@$bigger_tokens_list));
416              
417             # - If more than one word matches, return it as possible match
418             # - Some sanctioned individuals have only one name (ex. Hamza); this should be returned as well
419 144 100 66     537 return 1 if ($name_matches_count > 1) || ($name_matches_count == 1 && $small_tokens_size == 1);
      100        
420              
421 129         304 return 0;
422             }
423              
424             sub export_data {
425 0     0 1   my ($self, $path) = @_;
426              
427 0           return DumpFile($path, $self->{_data});
428             }
429              
430             1;
431             __END__
432              
433             =encoding utf-8
434              
435             =head1 NAME
436              
437             Data::Validate::Sanctions - Validate a name against sanctions lists
438              
439             =head1 SYNOPSIS
440              
441             # as exported function
442             use Data::Validate::Sanctions qw/is_sanctioned get_sanction_file set_sanction_file/;
443             set_sanction_file('/var/storage/sanction.csv');
444              
445             my ($first_name, $last_name) = ("First", "Last Name");
446             print 'BAD' if is_sanctioned($first_name, $last_name);
447              
448             # as OO
449             use Data::Validate::Sanctions;
450              
451             #You can also set sanction_file in the new method.
452             my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv');
453             print 'BAD' if $validator->is_sanctioned("$last_name $first_name");
454              
455             =head1 DESCRIPTION
456              
457             Data::Validate::Sanctions is a simple validitor to validate a name against sanctions lists.
458              
459             The list is from:
460             - L<https://www.treasury.gov/ofac/downloads/sdn.csv>,
461             - L<https://www.treasury.gov/ofac/downloads/consolidated/cons_prim.csv>
462             - L<https://ofsistorage.blob.core.windows.net/publishlive/ConList.csv>
463             - L<https://webgate.ec.europa.eu/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content?token=$eu_token>
464              
465             run F<update_sanctions_csv> to update the bundled csv.
466              
467             The path of list can be set by function L</set_sanction_file> or by method L</new>. If not set, then environment variable $ENV{SANCTION_FILE} will be checked, at last
468             the default file in this package will be used.
469              
470             =head1 METHODS
471              
472             =head2 is_sanctioned
473              
474             is_sanctioned($last_name, $first_name);
475             is_sanctioned($first_name, $last_name);
476             is_sanctioned("$last_name $first_name");
477              
478             when one string is passed, please be sure last_name is before first_name.
479              
480             or you can pass first_name, last_name (last_name, first_name), we'll check both "$last_name $first_name" and "$first_name $last_name".
481              
482             retrun 1 if match is found and 0 if match is not found.
483              
484             It will remove all non-alpha chars and compare with the list we have.
485              
486             =head2 get_sanctioned_info
487              
488             my $result =get_sanctioned_info($last_name, $first_name, $date_of_birth);
489             print 'match: ', $result->{matched_args}->{name}, ' on list ', $result->{list} if $result->{matched};
490              
491             return hashref with keys:
492             B<matched> 1 or 0, depends if name has matched
493             B<list> name of list matched (present only if matched)
494             B<matched_args> The list of arguments matched (name, date of birth, residence, etc.)
495              
496             It will remove all non-alpha chars and compare with the list we have.
497              
498             =head2 update_data
499              
500             Fetches latest versions of sanction lists, and updates corresponding sections of stored file, if needed
501              
502             =head2 last_updated
503              
504             Returns timestamp of when the latest list was updated.
505             If argument is provided - return timestamp of when that list was updated.
506              
507             =head2 new
508              
509             Create the object, and set sanction_file
510              
511             my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv');
512              
513             =head2 get_sanction_file
514              
515             get sanction_file which is used by L</is_sanctioned> (procedure-oriented)
516              
517             =head2 set_sanction_file
518              
519             set sanction_file which is used by L</is_sanctioned> (procedure-oriented)
520              
521             =head2 _name_matches
522              
523             Pass in the client's name and sanctioned individual's name to see if they are similar or not
524              
525              
526             =head2 export_data
527              
528             Exports the sanction lists to a local file in YAML format.
529              
530             =head2 data
531              
532             Gets the sanction list content with lazy loading.
533              
534             =head1 AUTHOR
535              
536             Binary.com E<lt>fayland@binary.comE<gt>
537              
538             =head1 COPYRIGHT
539              
540             Copyright 2014- Binary.com
541              
542             =head1 LICENSE
543              
544             This library is free software; you can redistribute it and/or modify
545             it under the same terms as Perl itself.
546              
547             =head1 SEE ALSO
548              
549             L<Data::OFAC>
550              
551             =cut