| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Data::Validate::Sanctions::Fetcher; |
|
2
|
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
588
|
use strict; |
|
|
8
|
|
|
|
|
24
|
|
|
|
8
|
|
|
|
|
224
|
|
|
4
|
8
|
|
|
8
|
|
40
|
use warnings; |
|
|
8
|
|
|
|
|
23
|
|
|
|
8
|
|
|
|
|
203
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
8
|
|
|
8
|
|
4773
|
use DateTime::Format::Strptime; |
|
|
8
|
|
|
|
|
6046199
|
|
|
|
8
|
|
|
|
|
42
|
|
|
7
|
8
|
|
|
8
|
|
6325
|
use Date::Utility; |
|
|
8
|
|
|
|
|
5960908
|
|
|
|
8
|
|
|
|
|
452
|
|
|
8
|
8
|
|
|
8
|
|
7158
|
use IO::Uncompress::Unzip qw(unzip $UnzipError); |
|
|
8
|
|
|
|
|
445011
|
|
|
|
8
|
|
|
|
|
1029
|
|
|
9
|
8
|
|
|
8
|
|
82
|
use List::Util qw(uniq any); |
|
|
8
|
|
|
|
|
24
|
|
|
|
8
|
|
|
|
|
570
|
|
|
10
|
8
|
|
|
8
|
|
4717
|
use Mojo::UserAgent; |
|
|
8
|
|
|
|
|
2918587
|
|
|
|
8
|
|
|
|
|
93
|
|
|
11
|
8
|
|
|
8
|
|
6479
|
use Text::CSV; |
|
|
8
|
|
|
|
|
123947
|
|
|
|
8
|
|
|
|
|
405
|
|
|
12
|
8
|
|
|
8
|
|
3389
|
use Text::Trim qw(trim); |
|
|
8
|
|
|
|
|
4239
|
|
|
|
8
|
|
|
|
|
559
|
|
|
13
|
8
|
|
|
8
|
|
76
|
use Syntax::Keyword::Try; |
|
|
8
|
|
|
|
|
21
|
|
|
|
8
|
|
|
|
|
98
|
|
|
14
|
8
|
|
|
8
|
|
4228
|
use XML::Fast; |
|
|
8
|
|
|
|
|
7518
|
|
|
|
8
|
|
|
|
|
463
|
|
|
15
|
8
|
|
|
8
|
|
3436
|
use Locale::Country; |
|
|
8
|
|
|
|
|
351984
|
|
|
|
8
|
|
|
|
|
30805
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our $VERSION = '0.16'; # VERSION |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head2 config |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Creastes a hash-ref of sanction source configuration, including their url, description and parser callback. |
|
22
|
|
|
|
|
|
|
It accepts the following list of named args: |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=over 4 |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=item B<-eu_token>: required if B<eu_url> is empty |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
The token required for accessing EU sanctions (usually added as an arg to URL). |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=item <eu_url>: required if B<eu_token> is empty |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
EU Sanctions full url, token included. |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=item B<ofac_sdn_url>: optional |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
OFAC-SDN download url. |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=item B<ofac_consolidated_url>: optional |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
OFAC Consilidated download url. |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=item B<hmt_url>: optional |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
MHT Sanctions download url. |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=back |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=cut |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub config { |
|
51
|
8
|
|
|
8
|
1
|
28
|
my %args = @_; |
|
52
|
|
|
|
|
|
|
|
|
53
|
8
|
|
66
|
|
|
50
|
my $eu_token = $args{eu_token} // $ENV{EU_SANCTIONS_TOKEN}; |
|
54
|
8
|
|
66
|
|
|
41
|
my $eu_url = $args{eu_url} || $ENV{EU_SANCTIONS_URL}; |
|
55
|
|
|
|
|
|
|
|
|
56
|
8
|
100
|
100
|
|
|
48
|
warn 'EU Sanctions will fail whithout eu_token or eu_url' unless $eu_token or $eu_url; |
|
57
|
|
|
|
|
|
|
|
|
58
|
8
|
100
|
|
|
|
94
|
if ($eu_token) { |
|
59
|
2
|
|
66
|
|
|
11
|
$eu_url ||= "https://webgate.ec.europa.eu/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content?token=$eu_token"; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
return { |
|
63
|
|
|
|
|
|
|
'OFAC-SDN' => { |
|
64
|
|
|
|
|
|
|
description => 'TREASURY.GOV: Specially Designated Nationals List with a.k.a included', |
|
65
|
|
|
|
|
|
|
url => $args{ofac_sdn_url} |
|
66
|
|
|
|
|
|
|
|| 'https://www.treasury.gov/ofac/downloads/sdn_xml.zip', #let's be polite and use zippped version of this 7mb+ file |
|
67
|
|
|
|
|
|
|
parser => \&_ofac_xml_zip, |
|
68
|
|
|
|
|
|
|
}, |
|
69
|
|
|
|
|
|
|
'OFAC-Consolidated' => { |
|
70
|
|
|
|
|
|
|
description => 'TREASURY.GOV: Consolidated Sanctions List Data Files', |
|
71
|
|
|
|
|
|
|
url => $args{ofac_consolidated_url} || 'https://www.treasury.gov/ofac/downloads/consolidated/consolidated.xml', |
|
72
|
|
|
|
|
|
|
parser => \&_ofac_xml, |
|
73
|
|
|
|
|
|
|
}, |
|
74
|
|
|
|
|
|
|
'HMT-Sanctions' => { |
|
75
|
|
|
|
|
|
|
description => 'GOV.UK: Financial sanctions: consolidated list of targets', |
|
76
|
8
|
|
100
|
|
|
156
|
url => $args{hmt_url} || 'https://ofsistorage.blob.core.windows.net/publishlive/ConList.csv', |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
77
|
|
|
|
|
|
|
parser => \&_hmt_csv, |
|
78
|
|
|
|
|
|
|
}, |
|
79
|
|
|
|
|
|
|
'EU-Sanctions' => { |
|
80
|
|
|
|
|
|
|
description => 'EUROPA.EU: Consolidated list of persons, groups and entities subject to EU financial sanctions', |
|
81
|
|
|
|
|
|
|
url => $eu_url, |
|
82
|
|
|
|
|
|
|
parser => \&_eu_xml, |
|
83
|
|
|
|
|
|
|
}, |
|
84
|
|
|
|
|
|
|
}; |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# |
|
88
|
|
|
|
|
|
|
# Parsers - returns timestamp of last update and arrayref of names |
|
89
|
|
|
|
|
|
|
# |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub _process_name { |
|
92
|
13903
|
|
|
13903
|
|
39647
|
my $r = join ' ', @_; |
|
93
|
13903
|
|
|
|
|
74529
|
$r =~ s/^\s+|\s+$//g; |
|
94
|
13903
|
|
|
|
|
34050
|
return $r; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub _ofac_xml_zip { |
|
98
|
7
|
|
|
7
|
|
31
|
my $raw_data = shift; |
|
99
|
7
|
|
|
|
|
14
|
my $output; |
|
100
|
7
|
50
|
|
|
|
46
|
unzip \$raw_data => \$output or die "unzip failed: $UnzipError\n"; |
|
101
|
7
|
|
|
|
|
178269
|
return _ofac_xml($output); |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub _date_to_epoch { |
|
105
|
5444
|
|
|
5444
|
|
10650
|
my $date = shift; |
|
106
|
|
|
|
|
|
|
|
|
107
|
5444
|
100
|
|
|
|
19060
|
$date = "$3-$2-$1" if $date =~ m/^(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{4})$/; |
|
108
|
|
|
|
|
|
|
|
|
109
|
5444
|
|
|
|
|
10545
|
my $result = eval { Date::Utility->new($date)->epoch; }; |
|
|
5444
|
|
|
|
|
21823
|
|
|
110
|
5444
|
|
|
|
|
4692362
|
return $result; |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head2 get_country_code |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
If the arg is a country code, it's returned in lower case; otherwise the arg is converted to country code. |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=cut |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub get_country_code { |
|
120
|
11680
|
|
|
11680
|
1
|
27994
|
my $value = trim shift; |
|
121
|
|
|
|
|
|
|
|
|
122
|
11680
|
100
|
100
|
|
|
218341
|
return lc(code2country($value) ? $value : country2code($value) // ''); |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head2 _process_sanction_entry |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
Processes an entry retrieved from sanction resources and saves it into the specified key-value dataset. |
|
128
|
|
|
|
|
|
|
An entry may have multilpe names (aliases), each of which will be taken as a key in the dataset with the same values/info. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
It takes following list of args: |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=over 4 |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item - dataset: A hash ref of form [ name => info ] in which the entry will be saved |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item - data: a hash of entry data that may contain: |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=over 4 |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=item * name: an array of names/aliases |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=item * date_of_birth: an array of dates of birth |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Dates of birth are not of standardized format in some data sources; so they are processed in three steps: |
|
145
|
|
|
|
|
|
|
1- as a first step it will be tried to converetd them into epoch, saved as B<dob_epoch>; |
|
146
|
|
|
|
|
|
|
2- otherwise to extract year (or an array of years) of birth, saved as B<dob_year>; and |
|
147
|
|
|
|
|
|
|
3- finally, to saved as raw text in B<dob_text>. |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item * place_of_birth: an array of country names or codes |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item * residence: an array of country names or codes |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item * nationality: an array of country names or codes |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=item * citizen: an array of country names or codes |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item * postal_code: an array of postal/zip codes |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=item * national_id: an array of national ID numbers |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item * passport_no: an array of passort numbers |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=back |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=back |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=cut |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub _process_sanction_entry { |
|
170
|
6216
|
|
|
6216
|
|
36264
|
my ($dataset, %data) = @_; |
|
171
|
|
|
|
|
|
|
|
|
172
|
6216
|
|
|
|
|
15451
|
my @dob_list = $data{date_of_birth}->@*; |
|
173
|
6216
|
|
|
|
|
10735
|
my (@dob_epoch, @dob_year, @dob_text); |
|
174
|
|
|
|
|
|
|
|
|
175
|
6216
|
|
|
|
|
11932
|
for my $dob (@dob_list) { |
|
176
|
6874
|
|
|
|
|
19843
|
$dob = trim($dob); |
|
177
|
6874
|
50
|
|
|
|
131863
|
next unless $dob; |
|
178
|
|
|
|
|
|
|
|
|
179
|
6874
|
|
|
|
|
32448
|
$dob =~ s/[ \/]/-/g; |
|
180
|
|
|
|
|
|
|
#dobs with month = day = 0 are converted to year. |
|
181
|
6874
|
100
|
|
|
|
35832
|
if ($dob =~ m/^(\d{1,2})-(\d{1,2})-(\d{4})$/) { |
|
|
|
50
|
|
|
|
|
|
|
182
|
161
|
100
|
100
|
|
|
877
|
$dob = $3 if $1 == 0 or $2 == 0; |
|
183
|
|
|
|
|
|
|
} elsif ($dob =~ m/^(\d{4})-(\d0{1,2})-(\d{1,2})$/) { |
|
184
|
0
|
0
|
0
|
|
|
0
|
$dob = $1 if $2 == 0 or $3 == 0; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
6874
|
100
|
|
|
|
17827
|
$dob = $1 if $dob =~ m/^[A-Z][a-z]{2}-(\d{4})$/; |
|
187
|
|
|
|
|
|
|
|
|
188
|
6874
|
100
|
|
|
|
21428
|
if ($dob =~ m/^\d{4}$/) { |
|
|
|
100
|
|
|
|
|
|
|
189
|
1269
|
|
|
|
|
3344
|
push @dob_year, $dob; |
|
190
|
|
|
|
|
|
|
} elsif ($dob =~ m/(\d{4}).*to.*(\d{4})$/) { |
|
191
|
193
|
|
|
|
|
1261
|
push @dob_year, ($1 .. $2); |
|
192
|
|
|
|
|
|
|
} else { |
|
193
|
5412
|
|
|
|
|
11147
|
my $epoch = _date_to_epoch($dob); |
|
194
|
5412
|
100
|
|
|
|
20432
|
(defined $epoch) ? push(@dob_epoch, $epoch) : push(@dob_text, $dob); |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
} |
|
197
|
6216
|
|
|
|
|
13278
|
delete $data{date_of_birth}; |
|
198
|
6216
|
|
|
|
|
13487
|
$data{dob_epoch} = \@dob_epoch; |
|
199
|
6216
|
|
|
|
|
11811
|
$data{dob_year} = \@dob_year; |
|
200
|
6216
|
|
|
|
|
12813
|
$data{dob_text} = \@dob_text; |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# convert all country names to iso codes |
|
203
|
6216
|
|
|
|
|
12338
|
for my $field (qw/place_of_birth residence nationality citizen/) { |
|
204
|
24864
|
|
|
|
|
52280
|
$data{$field} = [map { get_country_code($_) } $data{$field}->@*]; |
|
|
11680
|
|
|
|
|
176598
|
|
|
205
|
24864
|
|
|
|
|
737079
|
$data{$field} = [grep { $_ } $data{$field}->@*]; |
|
|
11680
|
|
|
|
|
33831
|
|
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# remove commas |
|
209
|
6216
|
|
|
|
|
12483
|
$data{names} = [map { trim($_) =~ s/,//gr } $data{names}->@*]; |
|
|
13998
|
|
|
|
|
147909
|
|
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# make values unique |
|
212
|
6216
|
|
|
|
|
138358
|
%data = map { $_ => [uniq $data{$_}->@*] } keys %data; |
|
|
68215
|
|
|
|
|
221535
|
|
|
213
|
|
|
|
|
|
|
# remove empty values |
|
214
|
6216
|
|
|
|
|
25135
|
for (keys %data) { |
|
215
|
|
|
|
|
|
|
# dob = 0 is acceptable |
|
216
|
68215
|
100
|
|
|
|
127887
|
next if $_ eq 'dob_epoch'; |
|
217
|
|
|
|
|
|
|
|
|
218
|
61999
|
|
|
|
|
109745
|
$data{$_} = [grep { $_ } $data{$_}->@*]; |
|
|
28061
|
|
|
|
|
63490
|
|
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
# remove fields with empty list |
|
221
|
6216
|
|
|
|
|
18926
|
%data = %data{grep { $data{$_}->@* } keys %data}; |
|
|
68215
|
|
|
|
|
115350
|
|
|
222
|
|
|
|
|
|
|
|
|
223
|
6216
|
50
|
|
|
|
21701
|
push $dataset->@*, \%data if $data{names}; |
|
224
|
|
|
|
|
|
|
|
|
225
|
6216
|
|
|
|
|
32328
|
return $dataset; |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub _ofac_xml { |
|
229
|
14
|
|
|
14
|
|
17847
|
my $raw_data = shift; |
|
230
|
|
|
|
|
|
|
|
|
231
|
14
|
|
|
|
|
89
|
my $ref = xml2hash($raw_data, array => ['aka'])->{sdnList}; |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
my $publish_epoch = |
|
234
|
14
|
50
|
|
|
|
637045
|
$ref->{publshInformation}{Publish_Date} =~ m/(\d{1,2})\/(\d{1,2})\/(\d{4})/ |
|
235
|
|
|
|
|
|
|
? _date_to_epoch("$3-$1-$2") |
|
236
|
|
|
|
|
|
|
: undef; # publshInformation is a typo in ofac xml tags |
|
237
|
14
|
50
|
|
|
|
51
|
die "Corrupt data. Release date is invalid\n" unless defined $publish_epoch; |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
my $parse_list_node = sub { |
|
240
|
36162
|
|
|
36162
|
|
71800
|
my ($entry, $parent, $child, $attribute) = @_; |
|
241
|
|
|
|
|
|
|
|
|
242
|
36162
|
|
100
|
|
|
123017
|
my $node = $entry->{$parent}->{$child} // []; |
|
243
|
36162
|
100
|
|
|
|
83264
|
$node = [$node] if (ref $node eq 'HASH'); |
|
244
|
|
|
|
|
|
|
|
|
245
|
36162
|
|
66
|
|
|
66508
|
return map { $_->{$attribute} // () } @$node; |
|
|
24316
|
|
|
|
|
96362
|
|
|
246
|
14
|
|
|
|
|
86
|
}; |
|
247
|
|
|
|
|
|
|
|
|
248
|
14
|
|
|
|
|
34
|
my $dataset = []; |
|
249
|
|
|
|
|
|
|
|
|
250
|
14
|
|
|
|
|
34
|
foreach my $entry (@{$ref->{sdnEntry}}) { |
|
|
14
|
|
|
|
|
49
|
|
|
251
|
11961
|
100
|
|
|
|
46867
|
next unless $entry->{sdnType} eq 'Individual'; |
|
252
|
|
|
|
|
|
|
|
|
253
|
6027
|
|
|
|
|
9689
|
my @names; |
|
254
|
6027
|
|
100
|
|
|
10356
|
for ($entry, @{$entry->{akaList}{aka} // []}) { |
|
|
6027
|
|
|
|
|
30476
|
|
|
255
|
15565
|
|
100
|
|
|
53947
|
my $category = $_->{category} // 'strong'; |
|
256
|
15565
|
100
|
100
|
|
|
61174
|
push @names, _process_name($_->{firstName} // '', $_->{lastName} // '') if $category eq 'strong'; |
|
|
|
|
50
|
|
|
|
|
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# my @dob_list; |
|
260
|
|
|
|
|
|
|
# my $dobs = $entry->{dateOfBirthList}{dateOfBirthItem}; |
|
261
|
|
|
|
|
|
|
# # In one of the xml files, some of the clients have more than one date of birth |
|
262
|
|
|
|
|
|
|
# # Hence, $dob can be either an array or a hashref |
|
263
|
|
|
|
|
|
|
# foreach my $dob (map { $_->{dateOfBirth} || () } (ref($dobs) eq 'ARRAY' ? @$dobs : $dobs)) { |
|
264
|
|
|
|
|
|
|
# push @dob_list, $dob; |
|
265
|
|
|
|
|
|
|
# } |
|
266
|
6027
|
|
|
|
|
14767
|
my @dob_list = $parse_list_node->($entry, 'dateOfBirthList', 'dateOfBirthItem', 'dateOfBirth'); |
|
267
|
6027
|
|
|
|
|
12607
|
my @citizen = $parse_list_node->($entry, 'citizenshipList', 'citizenship', 'country'); |
|
268
|
6027
|
|
|
|
|
11054
|
my @residence = $parse_list_node->($entry, 'addressList', 'address', 'country'); |
|
269
|
6027
|
|
|
|
|
13157
|
my @postal_code = $parse_list_node->($entry, 'addressList', 'address', 'postalCode'); |
|
270
|
6027
|
|
|
|
|
11839
|
my @nationality = $parse_list_node->($entry, 'naationalityList', 'nationality', 'country'); |
|
271
|
|
|
|
|
|
|
|
|
272
|
6027
|
|
|
|
|
12263
|
my @place_of_birth = $parse_list_node->($entry, 'placeOfBirthList', 'placeOfBirthItem', 'placeOfBirth'); |
|
273
|
6027
|
|
|
|
|
11783
|
@place_of_birth = map { my @parts = split ',', $_; $parts[-1] } @place_of_birth; |
|
|
3708
|
|
|
|
|
15675
|
|
|
|
3708
|
|
|
|
|
11658
|
|
|
274
|
|
|
|
|
|
|
|
|
275
|
6027
|
|
100
|
|
|
26433
|
my $id_list = $entry->{idList}->{id} // []; |
|
276
|
6027
|
100
|
|
|
|
14678
|
$id_list = [$id_list] if ref $id_list eq 'HASH'; |
|
277
|
6027
|
100
|
|
|
|
12562
|
my @passport_no = map { $_->{idType} eq 'Passport' ? $_->{idNumber} : () } @$id_list; |
|
|
9249
|
|
|
|
|
30210
|
|
|
278
|
6027
|
100
|
|
|
|
10592
|
my @national_id = map { $_->{idType} =~ 'National ID' ? $_->{idNumber} : () } @$id_list; |
|
|
9249
|
|
|
|
|
22717
|
|
|
279
|
|
|
|
|
|
|
|
|
280
|
6027
|
|
|
|
|
17483
|
_process_sanction_entry( |
|
281
|
|
|
|
|
|
|
$dataset, |
|
282
|
|
|
|
|
|
|
names => \@names, |
|
283
|
|
|
|
|
|
|
date_of_birth => \@dob_list, |
|
284
|
|
|
|
|
|
|
place_of_birth => \@place_of_birth, |
|
285
|
|
|
|
|
|
|
residence => \@residence, |
|
286
|
|
|
|
|
|
|
nationality => \@nationality, |
|
287
|
|
|
|
|
|
|
citizen => \@citizen, |
|
288
|
|
|
|
|
|
|
postal_code => \@postal_code, |
|
289
|
|
|
|
|
|
|
national_id => \@national_id, |
|
290
|
|
|
|
|
|
|
passport_no => \@passport_no, |
|
291
|
|
|
|
|
|
|
); |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
return { |
|
295
|
14
|
|
|
|
|
116722
|
updated => $publish_epoch, |
|
296
|
|
|
|
|
|
|
content => $dataset, |
|
297
|
|
|
|
|
|
|
}; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sub _hmt_csv { |
|
301
|
7
|
|
|
7
|
|
20
|
my $raw_data = shift; |
|
302
|
7
|
|
|
|
|
23
|
my $dataset = []; |
|
303
|
|
|
|
|
|
|
|
|
304
|
7
|
50
|
|
|
|
79
|
my $csv = Text::CSV->new({binary => 1}) or die "Cannot use CSV: " . Text::CSV->error_diag() . "\n"; |
|
305
|
|
|
|
|
|
|
|
|
306
|
7
|
|
|
|
|
1350
|
my @lines = split("\n", $raw_data); |
|
307
|
|
|
|
|
|
|
|
|
308
|
7
|
|
|
|
|
37
|
my $parsed = $csv->parse(trim(shift @lines)); |
|
309
|
7
|
50
|
|
|
|
515
|
my @info = $parsed ? $csv->fields() : (); |
|
310
|
7
|
50
|
33
|
|
|
130
|
die "Currupt data. Release date was not found\n" unless @info && _date_to_epoch($info[1]); |
|
311
|
|
|
|
|
|
|
|
|
312
|
7
|
|
|
|
|
33
|
my $publish_epoch = _date_to_epoch($info[1]); |
|
313
|
7
|
50
|
|
|
|
26
|
die "Currupt data. Release date is invalid\n" unless defined $publish_epoch; |
|
314
|
|
|
|
|
|
|
|
|
315
|
7
|
|
|
|
|
26
|
$parsed = $csv->parse(trim(shift @lines)); |
|
316
|
7
|
|
|
|
|
438
|
my @row = $csv->fields(); |
|
317
|
7
|
|
|
|
|
99
|
my %column = map { trim($row[$_]) => $_ } (0 .. @row - 1); |
|
|
252
|
|
|
|
|
2794
|
|
|
318
|
|
|
|
|
|
|
|
|
319
|
7
|
|
|
|
|
228
|
foreach my $line (@lines) { |
|
320
|
161
|
|
|
|
|
411
|
$line = trim($line); |
|
321
|
|
|
|
|
|
|
|
|
322
|
161
|
|
|
|
|
3609
|
$parsed = $csv->parse($line); |
|
323
|
161
|
50
|
|
|
|
5867
|
next unless $parsed; |
|
324
|
|
|
|
|
|
|
|
|
325
|
161
|
|
|
|
|
412
|
my @row = $csv->fields(); |
|
326
|
|
|
|
|
|
|
|
|
327
|
161
|
|
|
|
|
1913
|
@row = map { trim($_ =~ s/\([^(]*\)$//r) } @row; |
|
|
5796
|
|
|
|
|
63265
|
|
|
328
|
|
|
|
|
|
|
|
|
329
|
161
|
50
|
|
|
|
2516
|
($row[$column{'Group Type'}] eq "Individual") or next; |
|
330
|
161
|
|
|
|
|
506
|
my $name = _process_name @row[0 .. 5]; |
|
331
|
|
|
|
|
|
|
|
|
332
|
161
|
50
|
|
|
|
506
|
next if $name =~ /^\s*$/; |
|
333
|
|
|
|
|
|
|
|
|
334
|
161
|
|
|
|
|
319
|
my $date_of_birth = $row[$column{'DOB'}]; |
|
335
|
161
|
|
|
|
|
309
|
my $place_of_birth = $row[$column{'Country of Birth'}]; |
|
336
|
|
|
|
|
|
|
# nationality is saved as an adjective (Iranian, American, etc); let's ignore it. |
|
337
|
161
|
|
|
|
|
245
|
my $nationality = ''; |
|
338
|
161
|
|
|
|
|
296
|
my $residence = $row[$column{'Country'}]; |
|
339
|
161
|
|
|
|
|
275
|
my $postal_code = $row[$column{'Post/Zip Code'}]; |
|
340
|
161
|
|
|
|
|
284
|
my $national_id = $row[$column{'National Identification Number'}]; |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# Fields to be added in the new file format (https://redmine.deriv.cloud/issues/51922) |
|
343
|
|
|
|
|
|
|
# We can read these fields normally after the data is released in the new format |
|
344
|
161
|
|
|
|
|
282
|
my ($passport_no, $non_latin_alias); |
|
345
|
161
|
50
|
|
|
|
401
|
$passport_no = $row[$column{'Passport Number'}] if defined $column{'Passport Number'}; |
|
346
|
161
|
50
|
|
|
|
375
|
$non_latin_alias = $row[$column{'Name Non-Latin Script'}] if defined $column{'Name Non-Latin Script'}; |
|
347
|
|
|
|
|
|
|
|
|
348
|
161
|
100
|
|
|
|
830
|
_process_sanction_entry( |
|
|
|
50
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
$dataset, |
|
350
|
|
|
|
|
|
|
names => [$name, $non_latin_alias ? $non_latin_alias : ()], |
|
351
|
|
|
|
|
|
|
date_of_birth => [$date_of_birth], |
|
352
|
|
|
|
|
|
|
place_of_birth => [$place_of_birth], |
|
353
|
|
|
|
|
|
|
residence => [$residence], |
|
354
|
|
|
|
|
|
|
nationality => [$nationality], |
|
355
|
|
|
|
|
|
|
postal_code => [$postal_code], |
|
356
|
|
|
|
|
|
|
national_id => [$national_id], |
|
357
|
|
|
|
|
|
|
$passport_no ? (passport_no => [$passport_no]) : (), |
|
358
|
|
|
|
|
|
|
); |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
return { |
|
362
|
7
|
|
|
|
|
221
|
updated => $publish_epoch, |
|
363
|
|
|
|
|
|
|
content => $dataset, |
|
364
|
|
|
|
|
|
|
}; |
|
365
|
|
|
|
|
|
|
} |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub _eu_xml { |
|
368
|
4
|
|
|
4
|
|
14
|
my $raw_data = shift; |
|
369
|
4
|
|
|
|
|
27
|
my $ref = xml2hash($raw_data, array => ['nameAlias', 'birthdate'])->{export}; |
|
370
|
4
|
|
|
|
|
2469
|
my $dataset = []; |
|
371
|
|
|
|
|
|
|
|
|
372
|
4
|
|
|
|
|
12
|
foreach my $entry (@{$ref->{sanctionEntity}}) { |
|
|
4
|
|
|
|
|
19
|
|
|
373
|
28
|
50
|
|
|
|
93
|
next unless $entry->{subjectType}->{'-code'} eq 'person'; |
|
374
|
|
|
|
|
|
|
|
|
375
|
28
|
|
|
|
|
58
|
for (qw/birthdate citizenship address identification/) { |
|
376
|
112
|
|
100
|
|
|
371
|
$entry->{$_} //= []; |
|
377
|
112
|
100
|
|
|
|
295
|
$entry->{$_} = [$entry->{$_}] if ref $entry->{$_} eq 'HASH'; |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
28
|
|
|
|
|
53
|
my @names; |
|
381
|
28
|
|
50
|
|
|
45
|
for (@{$entry->{nameAlias} // []}) { |
|
|
28
|
|
|
|
|
87
|
|
|
382
|
60
|
|
|
|
|
105
|
my $name = $_->{'-wholeName'}; |
|
383
|
60
|
50
|
0
|
|
|
118
|
$name = join ' ', ($_->{'-firstName'} // '', $_->{'-lastName'} // '') unless $name; |
|
|
|
|
0
|
|
|
|
|
|
384
|
60
|
50
|
|
|
|
149
|
push @names, $name if $name ne ' '; |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
|
|
387
|
28
|
|
|
|
|
44
|
my @dob_list; |
|
388
|
28
|
|
|
|
|
73
|
foreach my $dob ($entry->{birthdate}->@*) { |
|
389
|
36
|
100
|
|
|
|
114
|
push @dob_list, $dob->{'-birthdate'} if $dob->{'-birthdate'}; |
|
390
|
36
|
50
|
66
|
|
|
121
|
push @dob_list, $dob->{'-year'} if not $dob->{'-birthdate'} and $dob->{'-year'}; |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
|
|
393
|
28
|
50
|
|
|
|
63
|
my @place_of_birth = map { $_->{'-countryIso2Code'} || () } $entry->{birthdate}->@*; |
|
|
36
|
|
|
|
|
105
|
|
|
394
|
28
|
50
|
|
|
|
59
|
my @citizen = map { $_->{'-countryIso2Code'} || () } $entry->{citizenship}->@*; |
|
|
12
|
|
|
|
|
40
|
|
|
395
|
28
|
50
|
|
|
|
56
|
my @residence = map { $_->{'-countryIso2Code'} || () } $entry->{address}->@*; |
|
|
8
|
|
|
|
|
28
|
|
|
396
|
28
|
50
|
33
|
|
|
57
|
my @postal_code = map { $_->{'-zipCode'} || $_->{'-poBox'} || () } $entry->{address}->@*; |
|
|
8
|
|
|
|
|
58
|
|
|
397
|
28
|
50
|
|
|
|
58
|
my @nationality = map { $_->{'-countryIso2Code'} || () } $entry->{identification}->@*; |
|
|
12
|
|
|
|
|
44
|
|
|
398
|
28
|
100
|
33
|
|
|
50
|
my @national_id = map { $_->{'-identificationTypeCode'} eq 'id' ? $_->{'-number'} || () : () } $entry->{identification}->@*; |
|
|
12
|
|
|
|
|
41
|
|
|
399
|
28
|
100
|
33
|
|
|
56
|
my @passport_no = map { $_->{'-identificationTypeCode'} eq 'passport' ? $_->{'-number'} || () : () } $entry->{identification}->@*; |
|
|
12
|
|
|
|
|
45
|
|
|
400
|
|
|
|
|
|
|
|
|
401
|
28
|
|
|
|
|
82
|
_process_sanction_entry( |
|
402
|
|
|
|
|
|
|
$dataset, |
|
403
|
|
|
|
|
|
|
names => \@names, |
|
404
|
|
|
|
|
|
|
date_of_birth => \@dob_list, |
|
405
|
|
|
|
|
|
|
place_of_birth => \@place_of_birth, |
|
406
|
|
|
|
|
|
|
residence => \@residence, |
|
407
|
|
|
|
|
|
|
nationality => \@nationality, |
|
408
|
|
|
|
|
|
|
citizen => \@citizen, |
|
409
|
|
|
|
|
|
|
postal_code => \@postal_code, |
|
410
|
|
|
|
|
|
|
national_id => \@national_id, |
|
411
|
|
|
|
|
|
|
passport_no => \@passport_no, |
|
412
|
|
|
|
|
|
|
); |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
4
|
|
50
|
|
|
58
|
my @date_parts = split('T', $ref->{'-generationDate'} // ''); |
|
416
|
4
|
|
50
|
|
|
21
|
my $publish_epoch = _date_to_epoch($date_parts[0] // ''); |
|
417
|
|
|
|
|
|
|
|
|
418
|
4
|
50
|
|
|
|
18
|
die "Corrupt data. Release date is invalid\n" unless $publish_epoch; |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
return { |
|
421
|
4
|
|
|
|
|
312
|
updated => $publish_epoch, |
|
422
|
|
|
|
|
|
|
content => $dataset, |
|
423
|
|
|
|
|
|
|
}; |
|
424
|
|
|
|
|
|
|
} |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=head2 run |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Fetches latest version of lists, and returns combined hash of successfully downloaded ones |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=cut |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
sub run { |
|
433
|
8
|
|
|
8
|
1
|
59472
|
my %args = @_; |
|
434
|
|
|
|
|
|
|
|
|
435
|
8
|
|
|
|
|
24
|
my $result = {}; |
|
436
|
|
|
|
|
|
|
|
|
437
|
8
|
|
|
|
|
39
|
my $config = config(%args); |
|
438
|
8
|
|
50
|
|
|
44
|
my $retries = $args{retries} // 3; |
|
439
|
|
|
|
|
|
|
|
|
440
|
8
|
|
|
|
|
44
|
foreach my $id (sort keys %$config) { |
|
441
|
32
|
|
|
|
|
69
|
my $source = $config->{$id}; |
|
442
|
|
|
|
|
|
|
try { |
|
443
|
|
|
|
|
|
|
die "Url is empty for $id\n" unless $source->{url}; |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
my $raw_data; |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
if ($source->{url} =~ m/^file:\/\/(.*)$/) { |
|
448
|
|
|
|
|
|
|
$raw_data = _entries_from_file($id); |
|
449
|
|
|
|
|
|
|
} else { |
|
450
|
|
|
|
|
|
|
$raw_data = _entries_from_remote_src({ |
|
451
|
|
|
|
|
|
|
id => $id, |
|
452
|
|
|
|
|
|
|
source => $source->{url}, |
|
453
|
|
|
|
|
|
|
retries => $retries |
|
454
|
|
|
|
|
|
|
}); |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
my $data = $source->{parser}->($raw_data); |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
if ($data->{updated} > 1) { |
|
460
|
|
|
|
|
|
|
$result->{$id} = $data; |
|
461
|
|
|
|
|
|
|
my $count = $data->{content}->@*; |
|
462
|
|
|
|
|
|
|
print "Source $id: $count entries fetched \n" if $args{verbose}; |
|
463
|
|
|
|
|
|
|
} |
|
464
|
32
|
|
|
|
|
81
|
} catch ($e) { |
|
465
|
|
|
|
|
|
|
$result->{$id}->{error} = $e; |
|
466
|
|
|
|
|
|
|
} |
|
467
|
|
|
|
|
|
|
} |
|
468
|
|
|
|
|
|
|
|
|
469
|
8
|
|
|
|
|
215
|
return $result; |
|
470
|
|
|
|
|
|
|
} |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=head2 _entries_from_file |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
Get the sanction entries from a file locally |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=cut |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
sub _entries_from_file { |
|
479
|
23
|
|
|
23
|
|
57
|
my ($id) = @_; |
|
480
|
|
|
|
|
|
|
|
|
481
|
23
|
|
|
|
|
31
|
my $entries; |
|
482
|
|
|
|
|
|
|
|
|
483
|
23
|
50
|
|
|
|
1122
|
open my $fh, '<', "$1" or die "Can't open $id file $1 $!\n"; |
|
484
|
23
|
|
|
|
|
72
|
$entries = do { local $/; <$fh> }; |
|
|
23
|
|
|
|
|
111
|
|
|
|
23
|
|
|
|
|
1310
|
|
|
485
|
23
|
|
|
|
|
279
|
close $fh; |
|
486
|
|
|
|
|
|
|
|
|
487
|
23
|
|
|
|
|
168
|
return $entries; |
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head2 _entries_from_remote_src |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Get the sanction entries from a remote source includes retry mechanism |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=cut |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
sub _entries_from_remote_src { |
|
497
|
8
|
|
|
8
|
|
22
|
my ($args) = @_; |
|
498
|
|
|
|
|
|
|
|
|
499
|
8
|
|
|
|
|
18
|
my ($id, $src_url, $retries) = @{$args}{qw/ id source retries /}; |
|
|
8
|
|
|
|
|
26
|
|
|
500
|
8
|
|
50
|
|
|
24
|
$retries //= 3; |
|
501
|
|
|
|
|
|
|
|
|
502
|
8
|
|
|
|
|
15
|
my $entries; |
|
503
|
8
|
|
|
|
|
16
|
my $error_log = 'Unknown Error'; |
|
504
|
|
|
|
|
|
|
|
|
505
|
8
|
|
|
|
|
89
|
my $ua = Mojo::UserAgent->new; |
|
506
|
8
|
|
|
|
|
88
|
$ua->connect_timeout(15); |
|
507
|
8
|
|
|
|
|
86
|
$ua->inactivity_timeout(60); |
|
508
|
|
|
|
|
|
|
|
|
509
|
8
|
|
|
|
|
46
|
my $retry_counter = 0; |
|
510
|
8
|
|
|
|
|
24
|
while ($retry_counter < $retries) { |
|
511
|
20
|
|
|
|
|
34
|
$retry_counter++; |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
try { |
|
514
|
|
|
|
|
|
|
my $resp = $ua->get($src_url); |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
die "File not downloaded for $id\n" if $resp->result->is_error; |
|
517
|
|
|
|
|
|
|
$entries = $resp->result->body; |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
last; |
|
520
|
|
|
|
|
|
|
} catch ($e) { |
|
521
|
|
|
|
|
|
|
$error_log = $e; |
|
522
|
|
|
|
|
|
|
} |
|
523
|
20
|
|
|
|
|
32
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
8
|
|
100
|
|
|
97
|
return $entries // die "An error occurred while fetching data from '$src_url' due to $error_log\n"; |
|
526
|
|
|
|
|
|
|
} |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
1; |