File Coverage

blib/lib/Data/Validate/Sanctions.pm
Criterion Covered Total %
statement 197 228 86.4
branch 44 68 64.7
condition 49 70 70.0
subroutine 40 44 90.9
pod 8 8 100.0
total 338 418 80.8


line stmt bran cond sub pod time code
1              
2             use strict;
3 4     4   112593 use warnings;
  4     1   20  
  4         85  
  1106         5505  
  1106         1021  
  1106         1292  
4 4     4   15  
  4     1   5  
  4         201  
  2266         4318  
  5806         7967  
  1106         1503  
5             require Exporter;
6             our @ISA = qw(Exporter);
7             our @EXPORT_OK = qw/is_sanctioned set_sanction_file get_sanction_file/;
8              
9             use Carp;
10 4     4   18 use Data::Validate::Sanctions::Fetcher;
  4     1   6  
  4         226  
  1106         2775  
  1072         1788  
  1         67  
11 4     4   1036 use File::stat;
  4     1   8  
  4         131  
  1         343  
  1         2  
  1         31  
12 4     4   26 use File::ShareDir;
  4     1   8  
  4         30  
  1         7  
  1         2  
  1         8  
13 4     4   216 use YAML::XS qw/DumpFile LoadFile/;
  4     1   8  
  4         111  
  1         76  
  1         2  
  1         35  
14 4     4   605 use Scalar::Util qw(blessed);
  4     1   4106  
  4         162  
  1         292  
  1         2034  
  1         46  
15 4     4   20 use Date::Utility;
  4     1   6  
  4         116  
  1         6  
  1         2  
  1         28  
16 4     4   19 use Data::Compare;
  4     1   7  
  4         66  
  1         5  
  1         2  
  1         14  
17 4     4   1021 use List::Util qw(any uniq max min);
  4     1   25268  
  4         25  
  1         349  
  1         8947  
  1         7  
18 4     4   8999 use Locale::Country;
  4     1   7  
  4         212  
  1         2662  
  1         2  
  1         59  
19 4     4   19 use Text::Trim qw(trim);
  4     1   8  
  4         314  
  1         5  
  1         1  
  1         78  
20 4     4   17  
  4     1   9  
  4         5823  
  1         5  
  1         1  
  1         1608  
21             our $VERSION = '0.14';
22              
23             my $sanction_file = _default_sanction_file();
24             my $instance;
25              
26             # for OO
27             my ($class, %args) = @_;
28              
29 9     9 1 3582 my $self = {};
30             $self->{sanction_file} = $args{sanction_file} // _default_sanction_file();
31 9         21  
32 9   66     34 $self->{args} = {%args};
      66        
33              
34 9         182 $self->{last_time} = 0;
35             return bless $self, ref($class) || $class;
36 9         19 }
37 9   33     95935  
      100        
38             my ($self, %args) = @_;
39              
40             $self->_load_data();
41 0     0 1 0  
42             my $new_data = Data::Validate::Sanctions::Fetcher::run($self->{args}->%*, %args);
43 0         0  
44             my $updated;
45 0         0 foreach my $k (keys %$new_data) {
46             $self->{_data}->{$k} //= {};
47 0         0 $self->{_data}->{$k}->{updated} //= 0;
48 0         0 $self->{_data}->{$k}->{content} //= [];
49 0   0     0 if ($self->{_data}{$k}->{updated} != $new_data->{$k}->{updated}
50 0   0     0 || scalar $self->{_data}{$k}->{content}->@* != scalar $new_data->{$k}->{content}->@*)
51 0   0     0 {
52 0 0 0     0 $self->{_data}->{$k} = $new_data->{$k};
    0          
53             $updated = 1;
54             print "Source $k is updated with new data \n" if $args{verbose};
55 0         0 } else {
56 0         0 print "Source $k is not changed \n" if $args{verbose};
57 0 0       0 }
    100          
58             }
59 0 0       0  
    100          
60             if ($updated) {
61             $self->_save_data();
62             $self->_index_data();
63 0 0       0 }
    0          
64 0         0  
65 0         0 return;
66             }
67              
68 0         0 my $self = shift;
69             my $list = shift;
70              
71             if ($list) {
72 0     0 1 0 return $self->{_data}->{$list}->{updated};
73 0         0 } else {
74             $self->_load_data();
75 0 0       0 return max(map { $_->{updated} } values %{$self->{_data}});
    0          
76 0         0 }
77             }
78 0         0  
79 0         0 $sanction_file = shift // die "sanction_file is needed";
  0         0  
  0         0  
80             undef $instance;
81             return;
82             }
83              
84 4   100 4 1 1391 return $instance ? $instance->{sanction_file} : $sanction_file;
85 3         90021 }
86 3         34  
87             =head2 is_sanctioned
88              
89             Checks if the input profile info matches a sanctioned entity.
90 2 50   2 1 427 The arguments are the same as those of B<get_sanctioned_info>.
    0          
91              
92             It returns 1 if a match is found, otherwise 0.
93              
94             =cut
95              
96             return (get_sanctioned_info(@_))->{matched};
97             }
98              
99             =head2 _match_other_fields
100              
101             Matches fields possibly available in addition to name and date of birth.
102              
103 26     26 1 4056 Returns a a hash-ref reporting the matched fields if it succeeeds; otherwise returns false (undef).
104              
105             =cut
106              
107             my ($self, $entry, $args) = @_;
108              
109             my @optional_fields = qw/place_of_birth residence nationality citizen postal_code national_id passport_no/;
110              
111             my $matched_args = {};
112             for my $field (@optional_fields) {
113             next unless ($args->{$field} && $entry->{$field} && $entry->{$field}->@*);
114              
115 56     56   89 return undef unless any { $args->{$field} eq $_ } $entry->{$field}->@*;
116             $matched_args->{$field} = $args->{$field};
117 56         117 }
118              
119 56         78 return $matched_args;
120 56         78 }
121 371 50 66     651  
    0 33        
122             =head2 get_sanctioned_info
123 77 100   0   186  
  79         163  
124 70         144 Tries to find a match a sanction entry matching the input profile args.
125             It takes arguments in two forms. In the new API, it takes a hashref containing the following named arguments:
126              
127 49         94 =over 4
128              
129             =item * first_name: first name
130              
131             =item * last_name: last name
132              
133             =item * date_of_birth: (optional) date of birth as a string or epoch
134              
135             =item * place_of_birth: (optional) place of birth as a country name or code
136              
137             =item * residence: (optional) name or code of the country of residence
138              
139             =item * nationality: (optional) name or code of the country of nationality
140              
141             =item * citizen: (optional) name or code of the country of citizenship
142              
143             =item * postal_code: (optional) postal/zip code
144              
145             =item * national_id: (optional) national ID number
146              
147             =item * passport_no: (oiptonal) passort number
148              
149             =back
150              
151             For backward compatibility it also supports the old API, taking the following args:
152              
153             =over 4
154              
155             =item * first_name: first name
156              
157             =item * last_name: last name
158              
159             =item * date_of_birth: (optional) date of birth as a string or epoch
160              
161             =back
162              
163             It returns a hash-ref containg the following data:
164              
165             =over 4
166              
167             =item - matched: 1 if a match was found; 0 otherwise
168             list: the source for the matched entry,
169             matched_args: a name-value hash-ref of the similar arguments,
170             comment: additional comments if necessary,
171              
172             =back
173              
174             =cut
175              
176             my $self = blessed($_[0]) ? shift : $instance;
177             unless ($self) {
178             $instance = __PACKAGE__->new(sanction_file => $sanction_file);
179             $self = $instance;
180             }
181              
182             # It's the old interface
183             my ($first_name, $last_name, $date_of_birth) = @_;
184             my $args = {};
185 47 100   47 1 5210  
          1    
186 47 100       126 # in the new interface we accept fields in a hashref
187 5         31 if (ref $_[0] eq 'HASH') {
188 5         8 ($args) = @_;
189             ($first_name, $last_name, $date_of_birth) = $args->@{qw/first_name last_name date_of_birth/};
190             }
191              
192 47         97 # convert country names to iso codes
193 47         63 for my $field (qw/place_of_birth residence nationality citizen/) {
194             my $value = $args->{$field};
195             next unless $value;
196 47 100       109  
197 15         26 $args->{$field} = Data::Validate::Sanctions::Fetcher::get_country_code($value);
198 15         33 }
199              
200             $self->_load_data();
201              
202 47         74 my $client_full_name = join(' ', $first_name, $last_name || ());
203 188         1855  
204 188 100       296 # Split into tokens after cleaning
205             my @client_name_tokens = _clean_names($client_full_name);
206 56         91  
207             my @match_with_dob_text;
208              
209 47         588 # only pick the sanctioned names which have common token with the client tokens
210             # and deduplicate the list
211 47   66     4718 my $filtered_sanctioned_names = {};
212             foreach my $token (@client_name_tokens) {
213             foreach my $name (keys %{$self->{_token_sanctioned_names}->{$token}}) {
214 47         104 $filtered_sanctioned_names->{$name} = 1;
215             }
216 47         61 }
217              
218             foreach my $sanctioned_name (keys %{$filtered_sanctioned_names}) {
219             my $sanctioned_name_tokens = $self->{_sanctioned_name_tokens}->{$sanctioned_name};
220 47         61 next unless _name_matches(\@client_name_tokens, $sanctioned_name_tokens);
221 47         72  
222 97         105 for my $entry ($self->{_index}->{$sanctioned_name}->@*) {
  97         1272  
223 3772         4787 my $matched_args = $self->_match_other_fields($entry, $args);
224             next unless $matched_args;
225             $matched_args->{name} = $sanctioned_name;
226              
227 47         56 # dob is matched only if it's included in lookup args
  47         338  
228 1231         1856 return _possible_match($entry->{source}, \%$matched_args) unless defined $date_of_birth;
229 1231 100       1427  
230             # 1- Some entries in sanction list can have more than one date of birth
231 50         108 # 2- first epoch is compared, then year
232 54         104 my $client_dob_date = Date::Utility->new($date_of_birth);
233 54 100       99 $args->{dob_epoch} = $client_dob_date->epoch;
234 47         86 $args->{dob_year} = $client_dob_date->year;
235              
236             for my $dob_field (qw/dob_epoch dob_year/) {
237 47 100       132 $entry->{$dob_field} //= [];
238             my $checked_dob = any { $_ eq $args->{$dob_field} } $entry->{$dob_field}->@*;
239              
240             return _possible_match($entry->{source}, {%$matched_args, $dob_field => $args->{$dob_field}}) if $checked_dob;
241 30         123 }
242 30         2652  
243 30         621 # Saving names with dob_text for later check.
244             my $has_no_epoch_or_year = ($entry->{dob_epoch}->@* || $entry->{dob_year}->@*) ? 0 : 1;
245 30         613 my $has_dob_text = @{$entry->{dob_text} // []} ? 1 : 0;
246 57   100     160 if ($has_dob_text || $has_no_epoch_or_year) {
247 57     16   196 push @match_with_dob_text,
  27         52  
248             {
249 57 100       165 name => $sanctioned_name,
250             entry => $entry,
251             matched_args => $matched_args,
252             };
253 21 100 100     60 }
254 21 100 100     26 }
  21         86  
255 21 100 100     77 }
256 4         18  
257             # Return a possible match if the name matches and no date of birth is present in sanctions
258             for my $match (@match_with_dob_text) {
259             # We match only in case we have full match for the name
260             # in other case we may get to many false positive
261             my ($sanction_name, $client_name) = map { uc(s/[^[:alpha:]\s]//gr) } ($match->{name}, $client_full_name);
262              
263             next unless $sanction_name eq $client_name;
264              
265             my $dob_text = $match->{entry}->{dob_text} // [];
266              
267 21         59 my $comment;
268             if (@$dob_text) {
269             $comment = 'dob raw text: ' . join q{, } => @$dob_text;
270 4         11 }
  8         28  
271              
272 4 100       22 return _possible_match($match->{entry}->{source}, $match->{matched_args}, $comment);
273             }
274 3   100     14  
275             # Return if no possible match, regardless if date of birth is provided or not
276 3         7 return {matched => 0};
277 3 100       20 }
278 1         4  
279             my $self = shift;
280             my $sanction_file = $self->{sanction_file};
281 3         9 $self->{last_time} //= 0;
282             $self->{_data} //= {};
283             $self->{_sanctioned_name_tokens} //= {};
284             $self->{_token_sanctioned_names} //= {};
285 18         143  
286             if (-e $sanction_file) {
287             return $self->{_data} if stat($sanction_file)->mtime <= $self->{last_time} && $self->{_data};
288             $self->{last_time} = stat($sanction_file)->mtime;
289 51     49   71 $self->{_data} = LoadFile($sanction_file);
290 51         90 }
291 49   66     95 $self->_index_data();
292 49   100     107  
293 51   66     109 foreach my $sanctioned_name (keys $self->{_index}->%*) {
294 51   100     119 my @tokens = _clean_names($sanctioned_name);
295             $self->{_sanctioned_name_tokens}->{$sanctioned_name} = \@tokens;
296 51 100       693 foreach my $token (@tokens) {
297 49 50 66     185 $self->{_token_sanctioned_names}->{$token}->{$sanctioned_name} = 1;
298 9         1383 }
299 11         1026 }
300              
301 17         314874 return $self->{_data};
302             }
303 17         11439  
304 46212         56165 =head2 _index_data
305 46214         73906  
306 46214         52747 Indexes data by name. Each name may have multiple matching entries.
307 131021         273981  
308             =cut
309              
310             my $self = shift;
311 11         3493  
312             $self->{_index} = {};
313             for my $source (keys $self->{_data}->%*) {
314             my @content = ($self->{_data}->{$source}->{content} // [])->@*;
315             warn "Content is empty for the sanction source $source. The sanctions file should be updated." unless @content;
316             for my $entry (@content) {
317             $entry->{source} = $source;
318             for my $name ($entry->{names}->@*) {
319             $name = ucfirst($name);
320             my $entry_list = $self->{_index}->{$name} // [];
321 11     9   430 push @$entry_list, $entry;
322             $self->{_index}->{$name} = $entry_list;
323 11         420 }
324 11         109 }
325 17   100     2556 }
326 17 100       69 return;
327 17         39 }
328 31347         40719  
329 31347         41325 my $self = shift;
330 53148         59246  
331 53148   100     117612 my $sanction_file = $self->{sanction_file};
332 53148         63166 my $new_sanction_file = $sanction_file . ".tmp";
333 53148         95297  
334             DumpFile($new_sanction_file, $self->{_data});
335              
336             rename $new_sanction_file, $sanction_file or die "Can't rename $new_sanction_file to $sanction_file, please check it\n";
337 11         27 $self->{last_time} = stat($sanction_file)->mtime;
338             return;
339             }
340              
341 2     0   7 return $ENV{SANCTION_FILE} // File::ShareDir::dist_file('Data-Validate-Sanctions', 'sanctions.yml');
342             }
343 0         0  
344 0         0 my ($list, $matched_args, $comment) = @_;
345              
346 0         0 return +{
347             matched => 1,
348 0 50       0 list => $list,
349 0         0 matched_args => $matched_args,
350 0         0 comment => $comment,
351             };
352             }
353              
354 5   66 5   35 my ($full_name) = @_;
355              
356             # Remove non-alphabets
357             my @cleaned_full_name = split " ", uc($full_name =~ s/[^[:alpha:]\s]//gr);
358 31     31   61  
359             return @cleaned_full_name;
360             }
361 29         489  
362             my ($small_tokens_list, $bigger_tokens_list) = @_;
363              
364             my $name_matches_count = 0;
365              
366             foreach my $token (@$small_tokens_list) {
367             $name_matches_count++ if any { $_ eq $token } @$bigger_tokens_list;
368             }
369 46254     46254   59004  
370             my $small_tokens_size = min(scalar(@$small_tokens_list), scalar(@$bigger_tokens_list));
371              
372 46254         130019 # - If more than one word matches, return it as possible match
373             # - Some sanctioned individuals have only one name (ex. Hamza); this should be returned as well
374 46252         112871 return 1 if ($name_matches_count > 1) || ($name_matches_count == 1 && $small_tokens_size == 1);
375              
376             return 0;
377             }
378 137     137   172  
379             1;
380 137         145  
381             =encoding utf-8
382 137         166  
383 368 100   7785   746 =head1 NAME
  877         1260  
384              
385             Data::Validate::Sanctions - Validate a name against sanctions lists
386 128         205  
387             =head1 SYNOPSIS
388              
389             # as exported function
390 128 100 100     350 use Data::Validate::Sanctions qw/is_sanctioned get_sanction_file set_sanction_file/;
      100        
391             set_sanction_file('/var/storage/sanction.csv');
392 110         194  
393             my ($first_name, $last_name) = ("First", "Last Name");
394             print 'BAD' if is_sanctioned($first_name, $last_name);
395              
396             # as OO
397             use Data::Validate::Sanctions;
398              
399             #You can also set sanction_file in the new method.
400             my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv');
401             print 'BAD' if $validator->is_sanctioned("$last_name $first_name");
402              
403             =head1 DESCRIPTION
404              
405             Data::Validate::Sanctions is a simple validitor to validate a name against sanctions lists.
406              
407             The list is from:
408             - L<https://www.treasury.gov/ofac/downloads/sdn.csv>,
409             - L<https://www.treasury.gov/ofac/downloads/consolidated/cons_prim.csv>
410             - L<https://ofsistorage.blob.core.windows.net/publishlive/ConList.csv>
411             - L<https://webgate.ec.europa.eu/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content?token=$eu_token>
412              
413             run F<update_sanctions_csv> to update the bundled csv.
414              
415             The path of list can be set by function L</set_sanction_file> or by method L</new>. If not set, then environment variable $ENV{SANCTION_FILE} will be checked, at last
416             the default file in this package will be used.
417              
418             =head1 METHODS
419              
420             =head2 is_sanctioned
421              
422             is_sanctioned($last_name, $first_name);
423             is_sanctioned($first_name, $last_name);
424             is_sanctioned("$last_name $first_name");
425              
426             when one string is passed, please be sure last_name is before first_name.
427              
428             or you can pass first_name, last_name (last_name, first_name), we'll check both "$last_name $first_name" and "$first_name $last_name".
429              
430             retrun 1 if match is found and 0 if match is not found.
431              
432             It will remove all non-alpha chars and compare with the list we have.
433              
434             =head2 get_sanctioned_info
435              
436             my $result =get_sanctioned_info($last_name, $first_name, $date_of_birth);
437             print 'match: ', $result->{matched_args}->{name}, ' on list ', $result->{list} if $result->{matched};
438              
439             return hashref with keys:
440             B<matched> 1 or 0, depends if name has matched
441             B<list> name of list matched (present only if matched)
442             B<matched_args> The list of arguments matched (name, date of birth, residence, etc.)
443              
444             It will remove all non-alpha chars and compare with the list we have.
445              
446             =head2 update_data
447              
448             Fetches latest versions of sanction lists, and updates corresponding sections of stored file, if needed
449              
450             =head2 last_updated
451              
452             Returns timestamp of when the latest list was updated.
453             If argument is provided - return timestamp of when that list was updated.
454              
455             =head2 new
456              
457             Create the object, and set sanction_file
458              
459             my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv');
460              
461             =head2 get_sanction_file
462              
463             get sanction_file which is used by L</is_sanctioned> (procedure-oriented)
464              
465             =head2 set_sanction_file
466              
467             set sanction_file which is used by L</is_sanctioned> (procedure-oriented)
468              
469             =head2 _name_matches
470              
471             Pass in the client's name and sanctioned individual's name to see if they are similar or not
472              
473             =head1 AUTHOR
474              
475             Binary.com E<lt>fayland@binary.comE<gt>
476              
477             =head1 COPYRIGHT
478              
479             Copyright 2014- Binary.com
480              
481             =head1 LICENSE
482              
483             This library is free software; you can redistribute it and/or modify
484             it under the same terms as Perl itself.
485              
486             =head1 SEE ALSO
487              
488             L<Data::OFAC>
489              
490             =cut