line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Validate::Sanctions; |
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
173178
|
use strict; |
|
7
|
|
|
|
|
46
|
|
|
7
|
|
|
|
|
216
|
|
4
|
7
|
|
|
7
|
|
36
|
use warnings; |
|
7
|
|
|
|
|
15
|
|
|
7
|
|
|
|
|
394
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
require Exporter; |
7
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
8
|
|
|
|
|
|
|
our @EXPORT_OK = qw/is_sanctioned set_sanction_file get_sanction_file/; |
9
|
|
|
|
|
|
|
|
10
|
7
|
|
|
7
|
|
38
|
use Carp; |
|
7
|
|
|
|
|
17
|
|
|
7
|
|
|
|
|
499
|
|
11
|
7
|
|
|
7
|
|
3418
|
use Data::Validate::Sanctions::Fetcher; |
|
7
|
|
|
|
|
25
|
|
|
7
|
|
|
|
|
374
|
|
12
|
7
|
|
|
7
|
|
2377
|
use Data::Validate::Sanctions::Redis; |
|
7
|
|
|
|
|
24
|
|
|
7
|
|
|
|
|
227
|
|
13
|
7
|
|
|
7
|
|
67
|
use File::stat; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
74
|
|
14
|
7
|
|
|
7
|
|
504
|
use File::ShareDir; |
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
260
|
|
15
|
7
|
|
|
7
|
|
861
|
use YAML::XS qw/DumpFile LoadFile/; |
|
7
|
|
|
|
|
6236
|
|
|
7
|
|
|
|
|
358
|
|
16
|
7
|
|
|
7
|
|
50
|
use Scalar::Util qw(blessed); |
|
7
|
|
|
|
|
20
|
|
|
7
|
|
|
|
|
275
|
|
17
|
7
|
|
|
7
|
|
52
|
use Date::Utility; |
|
7
|
|
|
|
|
16
|
|
|
7
|
|
|
|
|
164
|
|
18
|
7
|
|
|
7
|
|
3548
|
use Data::Compare; |
|
7
|
|
|
|
|
82213
|
|
|
7
|
|
|
|
|
59
|
|
19
|
7
|
|
|
7
|
|
26368
|
use List::Util qw(any uniq max min); |
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
500
|
|
20
|
7
|
|
|
7
|
|
52
|
use Locale::Country; |
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
737
|
|
21
|
7
|
|
|
7
|
|
47
|
use Text::Trim qw(trim); |
|
7
|
|
|
|
|
16
|
|
|
7
|
|
|
|
|
352
|
|
22
|
7
|
|
|
7
|
|
52
|
use Clone qw(clone); |
|
7
|
|
|
|
|
23
|
|
|
7
|
|
|
|
|
16352
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $VERSION = '0.15'; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $sanction_file; |
27
|
|
|
|
|
|
|
my $instance; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# for OO |
30
|
|
|
|
|
|
|
sub new { ## no critic (RequireArgUnpacking) |
31
|
5
|
|
|
5
|
1
|
25
|
my ($class, %args) = @_; |
32
|
|
|
|
|
|
|
|
33
|
5
|
|
50
|
|
|
33
|
my $storage = delete $args{storage} // ''; |
34
|
|
|
|
|
|
|
|
35
|
5
|
50
|
|
|
|
22
|
return Data::Validate::Sanctions::Redis->new(%args) if $storage eq 'redis'; |
36
|
|
|
|
|
|
|
|
37
|
5
|
|
|
|
|
14
|
my $self = {}; |
38
|
|
|
|
|
|
|
|
39
|
5
|
|
33
|
|
|
21
|
$self->{sanction_file} = $args{sanction_file} // _default_sanction_file(); |
40
|
|
|
|
|
|
|
|
41
|
5
|
|
|
|
|
22
|
$self->{args} = {%args}; |
42
|
|
|
|
|
|
|
|
43
|
5
|
|
|
|
|
14
|
$self->{last_time} = 0; |
44
|
5
|
|
33
|
|
|
33
|
return bless $self, ref($class) || $class; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub update_data { |
48
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
49
|
|
|
|
|
|
|
|
50
|
0
|
|
|
|
|
0
|
$self->_load_data(); |
51
|
|
|
|
|
|
|
|
52
|
0
|
|
|
|
|
0
|
my $new_data = Data::Validate::Sanctions::Fetcher::run($self->{args}->%*, %args); |
53
|
0
|
|
|
|
|
0
|
my $updated = 0; |
54
|
0
|
|
|
|
|
0
|
foreach my $k (keys %$new_data) { |
55
|
0
|
|
0
|
|
|
0
|
$self->{_data}->{$k} //= {}; |
56
|
0
|
|
0
|
|
|
0
|
$self->{_data}->{$k}->{updated} //= 0; |
57
|
0
|
|
0
|
|
|
0
|
$self->{_data}->{$k}->{content} //= []; |
58
|
|
|
|
|
|
|
|
59
|
0
|
0
|
0
|
|
|
0
|
if (!$new_data->{$k}->{error} && $self->{_data}->{$k}->{error}) { |
60
|
0
|
|
|
|
|
0
|
delete $self->{_data}->{$k}->{error}; |
61
|
0
|
|
|
|
|
0
|
$updated = 1; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
0
|
0
|
0
|
|
|
0
|
if ($new_data->{$k}->{error}) { |
|
|
0
|
|
|
|
|
|
65
|
0
|
|
|
|
|
0
|
warn "$k list update failed because: $new_data->{$k}->{error}"; |
66
|
0
|
|
|
|
|
0
|
$self->{_data}->{$k}->{error} = $new_data->{$k}->{error}; |
67
|
0
|
|
|
|
|
0
|
$updated = 1; |
68
|
|
|
|
|
|
|
} elsif ($self->{_data}{$k}->{updated} != $new_data->{$k}->{updated} |
69
|
|
|
|
|
|
|
|| scalar $self->{_data}{$k}->{content}->@* != scalar $new_data->{$k}->{content}->@*) |
70
|
|
|
|
|
|
|
{ |
71
|
0
|
0
|
|
|
|
0
|
print "Source $k is updated with new data \n" if $args{verbose}; |
72
|
0
|
|
|
|
|
0
|
$self->{_data}->{$k} = $new_data->{$k}; |
73
|
0
|
|
|
|
|
0
|
$updated = 1; |
74
|
|
|
|
|
|
|
} else { |
75
|
0
|
0
|
|
|
|
0
|
print "Source $k is not changed \n" if $args{verbose}; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
0
|
0
|
|
|
|
0
|
if ($updated) { |
80
|
0
|
|
|
|
|
0
|
$self->_save_data(); |
81
|
0
|
|
|
|
|
0
|
$self->_index_data(); |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
0
|
|
|
|
|
0
|
return; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub last_updated { |
88
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
89
|
0
|
|
|
|
|
0
|
my $list = shift; |
90
|
|
|
|
|
|
|
|
91
|
0
|
0
|
|
|
|
0
|
if ($list) { |
92
|
0
|
|
|
|
|
0
|
return $self->{_data}->{$list}->{updated}; |
93
|
|
|
|
|
|
|
} else { |
94
|
0
|
|
|
|
|
0
|
$self->_load_data(); |
95
|
0
|
|
0
|
|
|
0
|
return max(map { $_->{updated} // 0 } values %{$self->{_data}}); |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub set_sanction_file { ## no critic (RequireArgUnpacking) |
100
|
4
|
|
100
|
4
|
1
|
19446
|
$sanction_file = shift // die "sanction_file is needed"; |
101
|
3
|
|
|
|
|
159000
|
undef $instance; |
102
|
3
|
|
|
|
|
46
|
return; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub get_sanction_file { |
106
|
7
|
|
66
|
7
|
1
|
645
|
$sanction_file //= _default_sanction_file(); |
107
|
7
|
50
|
|
|
|
61
|
return $instance ? $instance->{sanction_file} : $sanction_file; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 is_sanctioned |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Checks if the input profile info matches a sanctioned entity. |
113
|
|
|
|
|
|
|
The arguments are the same as those of B<get_sanctioned_info>. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
It returns 1 if a match is found, otherwise 0. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=cut |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub is_sanctioned { ## no critic (RequireArgUnpacking) |
120
|
14
|
|
|
14
|
1
|
5447
|
return (get_sanctioned_info(@_))->{matched}; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub data { |
124
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
125
|
|
|
|
|
|
|
|
126
|
0
|
0
|
|
|
|
0
|
$self->_load_data() unless $self->{_data}; |
127
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
0
|
return $self->{_data}; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 _match_other_fields |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Matches fields possibly available in addition to name and date of birth. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Returns a a hash-ref reporting the matched fields if it succeeeds; otherwise returns false (undef). |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=cut |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub _match_other_fields { |
140
|
18
|
|
|
18
|
|
40
|
my ($self, $entry, $args) = @_; |
141
|
|
|
|
|
|
|
|
142
|
18
|
|
|
|
|
51
|
my @optional_fields = qw/place_of_birth residence nationality citizen postal_code national_id passport_no/; |
143
|
|
|
|
|
|
|
|
144
|
18
|
|
|
|
|
32
|
my $matched_args = {}; |
145
|
18
|
|
|
|
|
35
|
for my $field (@optional_fields) { |
146
|
126
|
0
|
33
|
|
|
246
|
next unless ($args->{$field} && $entry->{$field} && $entry->{$field}->@*); |
|
|
|
0
|
|
|
|
|
147
|
|
|
|
|
|
|
|
148
|
0
|
0
|
|
0
|
|
0
|
return undef unless any { $args->{$field} eq $_ } $entry->{$field}->@*; |
|
0
|
|
|
|
|
0
|
|
149
|
0
|
|
|
|
|
0
|
$matched_args->{$field} = $args->{$field}; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
18
|
|
|
|
|
38
|
return $matched_args; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 get_sanctioned_info |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Tries to find a match a sanction entry matching the input profile args. |
158
|
|
|
|
|
|
|
It takes arguments in two forms. In the new API, it takes a hashref containing the following named arguments: |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=over 4 |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item * first_name: first name |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item * last_name: last name |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=item * date_of_birth: (optional) date of birth as a string or epoch |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=item * place_of_birth: (optional) place of birth as a country name or code |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=item * residence: (optional) name or code of the country of residence |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=item * nationality: (optional) name or code of the country of nationality |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=item * citizen: (optional) name or code of the country of citizenship |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item * postal_code: (optional) postal/zip code |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=item * national_id: (optional) national ID number |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=item * passport_no: (oiptonal) passort number |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=back |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
For backward compatibility it also supports the old API, taking the following args: |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=over 4 |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item * first_name: first name |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item * last_name: last name |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=item * date_of_birth: (optional) date of birth as a string or epoch |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=back |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
It returns a hash-ref containg the following data: |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=over 4 |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=item - matched: 1 if a match was found; 0 otherwise |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=item - list: the source for the matched entry, |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=item - matched_args: a name-value hash-ref of the similar arguments, |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=item - comment: additional comments if necessary, |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=back |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=cut |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub get_sanctioned_info { ## no critic (RequireArgUnpacking) |
213
|
14
|
50
|
|
14
|
1
|
62
|
my $self = blessed($_[0]) ? shift : $instance; |
214
|
14
|
100
|
|
|
|
48
|
unless ($self) { |
215
|
5
|
|
|
|
|
18
|
$instance = __PACKAGE__->new(sanction_file => get_sanction_file()); |
216
|
5
|
|
|
|
|
13
|
$self = $instance; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# It's the old interface |
220
|
14
|
|
|
|
|
33
|
my ($first_name, $last_name, $date_of_birth) = @_; |
221
|
14
|
|
|
|
|
32
|
my $args = {}; |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# in the new interface we accept fields in a hashref |
224
|
14
|
50
|
|
|
|
48
|
if (ref $_[0] eq 'HASH') { |
225
|
0
|
|
|
|
|
0
|
($args) = @_; |
226
|
0
|
|
|
|
|
0
|
($first_name, $last_name, $date_of_birth) = $args->@{qw/first_name last_name date_of_birth/}; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# convert country names to iso codes |
230
|
14
|
|
|
|
|
35
|
for my $field (qw/place_of_birth residence nationality citizen/) { |
231
|
56
|
|
|
|
|
87
|
my $value = $args->{$field}; |
232
|
56
|
50
|
|
|
|
113
|
next unless $value; |
233
|
|
|
|
|
|
|
|
234
|
0
|
|
|
|
|
0
|
$args->{$field} = Data::Validate::Sanctions::Fetcher::get_country_code($value); |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
14
|
|
|
|
|
44
|
$self->_load_data(); |
238
|
|
|
|
|
|
|
|
239
|
14
|
|
66
|
|
|
1506
|
my $client_full_name = join(' ', $first_name, $last_name || ()); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# Split into tokens after cleaning |
242
|
14
|
|
|
|
|
36
|
my @client_name_tokens = _clean_names($client_full_name); |
243
|
|
|
|
|
|
|
|
244
|
14
|
|
|
|
|
21
|
my @match_with_dob_text; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# only pick the sanctioned names which have common token with the client tokens |
247
|
|
|
|
|
|
|
# and deduplicate the list |
248
|
14
|
|
|
|
|
29
|
my $filtered_sanctioned_names = {}; |
249
|
14
|
|
|
|
|
31
|
foreach my $token (@client_name_tokens) { |
250
|
29
|
|
|
|
|
42
|
foreach my $name (keys %{$self->{_token_sanctioned_names}->{$token}}) { |
|
29
|
|
|
|
|
171
|
|
251
|
313
|
|
|
|
|
575
|
$filtered_sanctioned_names->{$name} = 1; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
14
|
|
|
|
|
26
|
foreach my $sanctioned_name (keys %{$filtered_sanctioned_names}) { |
|
14
|
|
|
|
|
59
|
|
256
|
144
|
|
|
|
|
335
|
my $sanctioned_name_tokens = $self->{_sanctioned_name_tokens}->{$sanctioned_name}; |
257
|
144
|
100
|
|
|
|
262
|
next unless _name_matches(\@client_name_tokens, $sanctioned_name_tokens); |
258
|
|
|
|
|
|
|
|
259
|
15
|
|
|
|
|
44
|
for my $entry ($self->{_index}->{$sanctioned_name}->@*) { |
260
|
18
|
|
|
|
|
52
|
my $matched_args = $self->_match_other_fields($entry, $args); |
261
|
18
|
50
|
|
|
|
41
|
next unless $matched_args; |
262
|
18
|
|
|
|
|
40
|
$matched_args->{name} = $sanctioned_name; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# dob is matched only if it's included in lookup args |
265
|
18
|
100
|
|
|
|
51
|
return _possible_match($entry->{source}, \%$matched_args) unless defined $date_of_birth; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# 1- Some entries in sanction list can have more than one date of birth |
268
|
|
|
|
|
|
|
# 2- first epoch is compared, then year |
269
|
13
|
|
|
|
|
54
|
my $client_dob_date = Date::Utility->new($date_of_birth); |
270
|
13
|
|
|
|
|
999
|
$args->{dob_epoch} = $client_dob_date->epoch; |
271
|
13
|
|
|
|
|
358
|
$args->{dob_year} = $client_dob_date->year; |
272
|
|
|
|
|
|
|
|
273
|
13
|
|
|
|
|
360
|
for my $dob_field (qw/dob_epoch dob_year/) { |
274
|
25
|
|
100
|
|
|
94
|
$entry->{$dob_field} //= []; |
275
|
25
|
|
|
13
|
|
117
|
my $checked_dob = any { $_ eq $args->{$dob_field} } $entry->{$dob_field}->@*; |
|
13
|
|
|
|
|
31
|
|
276
|
|
|
|
|
|
|
|
277
|
25
|
100
|
|
|
|
91
|
return _possible_match($entry->{source}, {%$matched_args, $dob_field => $args->{$dob_field}}) if $checked_dob; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# Saving names with dob_text for later check. |
281
|
11
|
100
|
100
|
|
|
45
|
my $has_no_epoch_or_year = ($entry->{dob_epoch}->@* || $entry->{dob_year}->@*) ? 0 : 1; |
282
|
11
|
50
|
50
|
|
|
15
|
my $has_dob_text = @{$entry->{dob_text} // []} ? 1 : 0; |
|
11
|
|
|
|
|
46
|
|
283
|
11
|
100
|
66
|
|
|
52
|
if ($has_dob_text || $has_no_epoch_or_year) { |
284
|
1
|
|
|
|
|
6
|
push @match_with_dob_text, |
285
|
|
|
|
|
|
|
{ |
286
|
|
|
|
|
|
|
name => $sanctioned_name, |
287
|
|
|
|
|
|
|
entry => $entry, |
288
|
|
|
|
|
|
|
matched_args => $matched_args, |
289
|
|
|
|
|
|
|
}; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# Return a possible match if the name matches and no date of birth is present in sanctions |
295
|
7
|
|
|
|
|
22
|
for my $match (@match_with_dob_text) { |
296
|
|
|
|
|
|
|
# We match only in case we have full match for the name |
297
|
|
|
|
|
|
|
# in other case we may get to many false positive |
298
|
1
|
|
|
|
|
4
|
my ($sanction_name, $client_name) = map { uc(s/[^[:alpha:]\s]//gr) } ($match->{name}, $client_full_name); |
|
2
|
|
|
|
|
8
|
|
299
|
|
|
|
|
|
|
|
300
|
1
|
50
|
|
|
|
5
|
next unless $sanction_name eq $client_name; |
301
|
|
|
|
|
|
|
|
302
|
1
|
|
50
|
|
|
6
|
my $dob_text = $match->{entry}->{dob_text} // []; |
303
|
|
|
|
|
|
|
|
304
|
1
|
|
|
|
|
2
|
my $comment; |
305
|
1
|
50
|
|
|
|
5
|
if (@$dob_text) { |
306
|
0
|
|
|
|
|
0
|
$comment = 'dob raw text: ' . join q{, } => @$dob_text; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
1
|
|
|
|
|
4
|
return _possible_match($match->{entry}->{source}, $match->{matched_args}, $comment); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# Return if no possible match, regardless if date of birth is provided or not |
313
|
6
|
|
|
|
|
56
|
return {matched => 0}; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub _load_data { |
317
|
14
|
|
|
14
|
|
28
|
my $self = shift; |
318
|
14
|
|
|
|
|
40
|
my $sanction_file = $self->{sanction_file}; |
319
|
14
|
|
50
|
|
|
53
|
$self->{last_time} //= 0; |
320
|
14
|
|
100
|
|
|
55
|
$self->{_data} //= {}; |
321
|
14
|
|
100
|
|
|
47
|
$self->{_sanctioned_name_tokens} //= {}; |
322
|
14
|
|
100
|
|
|
48
|
$self->{_token_sanctioned_names} //= {}; |
323
|
|
|
|
|
|
|
|
324
|
14
|
50
|
|
|
|
297
|
if (-e $sanction_file) { |
325
|
14
|
50
|
66
|
|
|
86
|
return $self->{_data} if stat($sanction_file)->mtime <= $self->{last_time} && $self->{_data}; |
326
|
5
|
|
|
|
|
1055
|
$self->{last_time} = stat($sanction_file)->mtime; |
327
|
5
|
|
|
|
|
785
|
$self->{_data} = LoadFile($sanction_file); |
328
|
|
|
|
|
|
|
} |
329
|
5
|
|
|
|
|
216716
|
$self->_index_data(); |
330
|
|
|
|
|
|
|
|
331
|
5
|
|
|
|
|
9393
|
foreach my $sanctioned_name (keys $self->{_index}->%*) { |
332
|
23102
|
|
|
|
|
37907
|
my @tokens = _clean_names($sanctioned_name); |
333
|
23102
|
|
|
|
|
56622
|
$self->{_sanctioned_name_tokens}->{$sanctioned_name} = \@tokens; |
334
|
23102
|
|
|
|
|
36274
|
foreach my $token (@tokens) { |
335
|
65499
|
|
|
|
|
198534
|
$self->{_token_sanctioned_names}->{$token}->{$sanctioned_name} = 1; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
5
|
|
|
|
|
2270
|
return $self->{_data}; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head2 _index_data |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
Indexes data by name. Each name may have multiple matching entries. |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
=cut |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
sub _index_data { |
349
|
5
|
|
|
5
|
|
273
|
my $self = shift; |
350
|
|
|
|
|
|
|
|
351
|
5
|
|
50
|
|
|
319
|
$self->{_data} //= {}; |
352
|
5
|
|
|
|
|
18
|
$self->{_index} = {}; |
353
|
5
|
|
|
|
|
34
|
for my $source (keys $self->{_data}->%*) { |
354
|
8
|
|
100
|
|
|
362600
|
my @content = clone($self->{_data}->{$source}->{content} // [])->@*; |
355
|
|
|
|
|
|
|
|
356
|
8
|
|
|
|
|
4251
|
for my $entry (@content) { |
357
|
15668
|
|
|
|
|
31463
|
$entry->{source} = $source; |
358
|
15668
|
|
|
|
|
31840
|
for my $name ($entry->{names}->@*) { |
359
|
26569
|
|
|
|
|
43894
|
$name = ucfirst($name); |
360
|
26569
|
|
100
|
|
|
81848
|
my $entry_list = $self->{_index}->{$name} // []; |
361
|
26569
|
|
|
|
|
44885
|
push @$entry_list, $entry; |
362
|
26569
|
|
|
|
|
67531
|
$self->{_index}->{$name} = $entry_list; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
} |
366
|
5
|
|
|
|
|
19
|
return; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
sub _save_data { |
370
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
371
|
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
0
|
my $sanction_file = $self->{sanction_file}; |
373
|
0
|
|
|
|
|
0
|
my $new_sanction_file = $sanction_file . ".tmp"; |
374
|
|
|
|
|
|
|
|
375
|
0
|
|
|
|
|
0
|
DumpFile($new_sanction_file, $self->{_data}); |
376
|
|
|
|
|
|
|
|
377
|
0
|
0
|
|
|
|
0
|
rename $new_sanction_file, $sanction_file or die "Can't rename $new_sanction_file to $sanction_file, please check it\n"; |
378
|
0
|
|
|
|
|
0
|
$self->{last_time} = stat($sanction_file)->mtime; |
379
|
0
|
|
|
|
|
0
|
return; |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
sub _default_sanction_file { |
383
|
2
|
|
33
|
2
|
|
15
|
return $ENV{SANCTION_FILE} // File::ShareDir::dist_file('Data-Validate-Sanctions', 'sanctions.yml'); |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub _possible_match { |
387
|
8
|
|
|
8
|
|
21
|
my ($list, $matched_args, $comment) = @_; |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
return +{ |
390
|
8
|
|
|
|
|
120
|
matched => 1, |
391
|
|
|
|
|
|
|
list => $list, |
392
|
|
|
|
|
|
|
matched_args => $matched_args, |
393
|
|
|
|
|
|
|
comment => $comment, |
394
|
|
|
|
|
|
|
}; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
sub _clean_names { |
398
|
23116
|
|
|
23116
|
|
40936
|
my ($full_name) = @_; |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# Remove non-alphabets |
401
|
23116
|
|
|
|
|
90287
|
my @cleaned_full_name = split " ", uc($full_name =~ s/[^[:alpha:]\s]//gr); |
402
|
|
|
|
|
|
|
|
403
|
23116
|
|
|
|
|
77235
|
return @cleaned_full_name; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub _name_matches { |
407
|
144
|
|
|
144
|
|
233
|
my ($small_tokens_list, $bigger_tokens_list) = @_; |
408
|
|
|
|
|
|
|
|
409
|
144
|
|
|
|
|
199
|
my $name_matches_count = 0; |
410
|
|
|
|
|
|
|
|
411
|
144
|
|
|
|
|
221
|
foreach my $token (@$small_tokens_list) { |
412
|
421
|
100
|
|
1006
|
|
1106
|
$name_matches_count++ if any { $_ eq $token } @$bigger_tokens_list; |
|
1006
|
|
|
|
|
1888
|
|
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
144
|
|
|
|
|
271
|
my $small_tokens_size = min(scalar(@$small_tokens_list), scalar(@$bigger_tokens_list)); |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
# - If more than one word matches, return it as possible match |
418
|
|
|
|
|
|
|
# - Some sanctioned individuals have only one name (ex. Hamza); this should be returned as well |
419
|
144
|
100
|
66
|
|
|
537
|
return 1 if ($name_matches_count > 1) || ($name_matches_count == 1 && $small_tokens_size == 1); |
|
|
|
100
|
|
|
|
|
420
|
|
|
|
|
|
|
|
421
|
129
|
|
|
|
|
304
|
return 0; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub export_data { |
425
|
0
|
|
|
0
|
1
|
|
my ($self, $path) = @_; |
426
|
|
|
|
|
|
|
|
427
|
0
|
|
|
|
|
|
return DumpFile($path, $self->{_data}); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
1; |
431
|
|
|
|
|
|
|
__END__ |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=encoding utf-8 |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=head1 NAME |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Data::Validate::Sanctions - Validate a name against sanctions lists |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=head1 SYNOPSIS |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
# as exported function |
442
|
|
|
|
|
|
|
use Data::Validate::Sanctions qw/is_sanctioned get_sanction_file set_sanction_file/; |
443
|
|
|
|
|
|
|
set_sanction_file('/var/storage/sanction.csv'); |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
my ($first_name, $last_name) = ("First", "Last Name"); |
446
|
|
|
|
|
|
|
print 'BAD' if is_sanctioned($first_name, $last_name); |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
# as OO |
449
|
|
|
|
|
|
|
use Data::Validate::Sanctions; |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
#You can also set sanction_file in the new method. |
452
|
|
|
|
|
|
|
my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv'); |
453
|
|
|
|
|
|
|
print 'BAD' if $validator->is_sanctioned("$last_name $first_name"); |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head1 DESCRIPTION |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Data::Validate::Sanctions is a simple validitor to validate a name against sanctions lists. |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
The list is from: |
460
|
|
|
|
|
|
|
- L<https://www.treasury.gov/ofac/downloads/sdn.csv>, |
461
|
|
|
|
|
|
|
- L<https://www.treasury.gov/ofac/downloads/consolidated/cons_prim.csv> |
462
|
|
|
|
|
|
|
- L<https://ofsistorage.blob.core.windows.net/publishlive/ConList.csv> |
463
|
|
|
|
|
|
|
- L<https://webgate.ec.europa.eu/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content?token=$eu_token> |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
run F<update_sanctions_csv> to update the bundled csv. |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
The path of list can be set by function L</set_sanction_file> or by method L</new>. If not set, then environment variable $ENV{SANCTION_FILE} will be checked, at last |
468
|
|
|
|
|
|
|
the default file in this package will be used. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=head1 METHODS |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=head2 is_sanctioned |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
is_sanctioned($last_name, $first_name); |
475
|
|
|
|
|
|
|
is_sanctioned($first_name, $last_name); |
476
|
|
|
|
|
|
|
is_sanctioned("$last_name $first_name"); |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
when one string is passed, please be sure last_name is before first_name. |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
or you can pass first_name, last_name (last_name, first_name), we'll check both "$last_name $first_name" and "$first_name $last_name". |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
retrun 1 if match is found and 0 if match is not found. |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
It will remove all non-alpha chars and compare with the list we have. |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=head2 get_sanctioned_info |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
my $result =get_sanctioned_info($last_name, $first_name, $date_of_birth); |
489
|
|
|
|
|
|
|
print 'match: ', $result->{matched_args}->{name}, ' on list ', $result->{list} if $result->{matched}; |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
return hashref with keys: |
492
|
|
|
|
|
|
|
B<matched> 1 or 0, depends if name has matched |
493
|
|
|
|
|
|
|
B<list> name of list matched (present only if matched) |
494
|
|
|
|
|
|
|
B<matched_args> The list of arguments matched (name, date of birth, residence, etc.) |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
It will remove all non-alpha chars and compare with the list we have. |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head2 update_data |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Fetches latest versions of sanction lists, and updates corresponding sections of stored file, if needed |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=head2 last_updated |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Returns timestamp of when the latest list was updated. |
505
|
|
|
|
|
|
|
If argument is provided - return timestamp of when that list was updated. |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
=head2 new |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
Create the object, and set sanction_file |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv'); |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=head2 get_sanction_file |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
get sanction_file which is used by L</is_sanctioned> (procedure-oriented) |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head2 set_sanction_file |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
set sanction_file which is used by L</is_sanctioned> (procedure-oriented) |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=head2 _name_matches |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
Pass in the client's name and sanctioned individual's name to see if they are similar or not |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=head2 export_data |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Exports the sanction lists to a local file in YAML format. |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 data |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Gets the sanction list content with lazy loading. |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head1 AUTHOR |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Binary.com E<lt>fayland@binary.comE<gt> |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=head1 COPYRIGHT |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Copyright 2014- Binary.com |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
=head1 LICENSE |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
545
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=head1 SEE ALSO |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
L<Data::OFAC> |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=cut |