File Coverage

blib/lib/Parse/PhoneNumber/ID.pm
Criterion Covered Total %
statement 284 286 99.3
branch 59 78 75.6
condition 36 54 66.6
subroutine 14 14 100.0
pod 1 1 100.0
total 394 433 90.9


line stmt bran cond sub pod time code
1             package Parse::PhoneNumber::ID;
2              
3             our $DATE = '2015-01-04'; # DATE
4             our $VERSION = '0.13'; # VERSION
5              
6 1     1   30414 use 5.010001;
  1         5  
  1         66  
7 1     1   8 use strict;
  1         2  
  1         59  
8 1     1   8 use warnings;
  1         2  
  1         51  
9 1     1   6 use Log::Any '$log';
  1         1  
  1         17  
10              
11 1     1   916 use Function::Fallback::CoreOrPP qw(clone);
  1         878  
  1         116  
12 1     1   888 use Perinci::Sub::Util qw(gen_modified_sub);
  1         2830  
  1         9275  
13              
14             require Exporter;
15             our @ISA = qw(Exporter);
16             our @EXPORT_OK = qw(extract_id_phones parse_id_phone
17             list_id_operators list_id_area_codes);
18              
19             # from: http://id.wikipedia.org/wiki/Daftar_kode_telepon_di_Indonesia
20             # last updated: 2011-03-08
21             my %area_codes = (
22             '0627' => {province=>'aceh', cities=>'Kota Subulussalam'},
23             '0629' => {province=>'aceh', cities=>'Kutacane (Kabupaten Aceh Tenggara)'},
24             '0641' => {province=>'aceh', cities=>'Kota Langsa'},
25             '0642' => {province=>'aceh', cities=>'Blang Kejeren (Kabupaten Gayo Lues)'},
26             '0643' => {province=>'aceh', cities=>'Takengon (Kabupaten Aceh Tengah)'},
27             '0644' => {province=>'aceh', cities=>'Bireuen (Kabupaten Bireuen)'},
28             '0645' => {province=>'aceh', cities=>'Kota Lhokseumawe'},
29             '0646' => {province=>'aceh', cities=>'Idi (Kabupaten Aceh Timur)'},
30             '0650' => {province=>'aceh', cities=>'Sinabang (Kabupaten Simeulue)'},
31             '0651' => {province=>'aceh', cities=>'Kota Banda Aceh - Jantho (Kabupaten Aceh Besar) - Lamno (Kabupaten Aceh Jaya)'},
32             '0652' => {province=>'aceh', cities=>'Kota Sabang'},
33             '0653' => {province=>'aceh', cities=>'Sigli (Kabupaten Pidie)'},
34             '0654' => {province=>'aceh', cities=>'Calang (Kabupaten Aceh Jaya)'},
35             '0655' => {province=>'aceh', cities=>'Meulaboh (Kabupaten Aceh Barat)'},
36             '0656' => {province=>'aceh', cities=>'Tapaktuan (Kabupaten Aceh Selatan)'},
37             '0657' => {province=>'aceh', cities=>'Bakongan (Kabupaten Aceh Selatan)'},
38             '0658' => {province=>'aceh', cities=>'Singkil (Kabupaten Aceh Singkil)'},
39             '0659' => {province=>'aceh', cities=>'Blangpidie (Kabupaten Aceh Barat Daya)'},
40              
41             '061' => {province=>'sumut', cities=>'Kota Medan - Kota Binjai - Stabat (Kabupaten Langkat)'},
42             '0620' => {province=>'sumut', cities=>'Pangkalan Brandan (Kabupaten Langkat)'},
43             '0621' => {province=>'sumut', cities=>'Kota Tebing Tinggi'},
44             '0622' => {province=>'sumut', cities=>'Kota Pematangsiantar'},
45             '0623' => {province=>'sumut', cities=>'Kisaran (Kabupaten Asahan) - Kota Tanjung Balai'},
46             '0624' => {province=>'sumut', cities=>'Rantau Prapat (Kabupaten Labuhanbatu)'},
47             '0625' => {province=>'sumut', cities=>'Parapat (Kabupaten Simalungun)'},
48             '0626' => {province=>'sumut', cities=>'Pangururan (Kabupaten Samosir)'},
49             '0627' => {province=>'sumut', cities=>'Sidikalang (Kabupaten Dairi) - Salak (Kabupaten Pakpak Bharat)'},
50             '0628' => {province=>'sumut', cities=>'Kabanjahe (Kabupaten Karo)'},
51             '0630' => {province=>'sumut', cities=>'Teluk Dalam (Kabupaten Nias Selatan)'},
52             '0631' => {province=>'sumut', cities=>'Kota Sibolga'},
53             '0636' => {province=>'sumut', cities=>'Balige (Kabupaten Toba Samosir)'},
54             '0633' => {province=>'sumut', cities=>'Tarutung (Kabupaten Tapanuli Utara)'},
55             '0634' => {province=>'sumut', cities=>'Kota Padang Sidempuan'},
56             '0635' => {province=>'sumut', cities=>'Gunung Tua (Kabupaten Padang Lawas Utara)'},
57             '0636' => {province=>'sumut', cities=>'Panyabungan (Kabupaten Mandailing Natal)'},
58             '0638' => {province=>'sumut', cities=>'Barus (Kabupaten Tapanuli Tengah)'},
59             '0639' => {province=>'sumut', cities=>'Kota Gunung Sitoli'},
60              
61             '0751' => {province=>'sumbar', cities=>'Kota Padang - Kota Pariaman'},
62             '0752' => {province=>'sumbar', cities=>'Kota Bukittinggi - Kota Padang Panjang - Kota Payakumbuh - Batusangkar (Kabupaten Tanah Datar)'},
63             '0753' => {province=>'sumbar', cities=>'Lubuk Sikaping (Kabupaten Pasaman)'},
64             '0754' => {province=>'sumbar', cities=>'Kabupaten Sijunjung'},
65             '0755' => {province=>'sumbar', cities=>'Kota Solok - Kabupaten Solok Selatan - Alahan Panjang (Kabupaten Solok)'},
66             '0756' => {province=>'sumbar', cities=>'Painan (Kabupaten Pesisir Selatan)'},
67             '0757' => {province=>'sumbar', cities=>'Balai Selasa (Kabupaten Agam)'},
68             '0759' => {province=>'sumbar', cities=>'Tuapejat (Kabupaten Kepulauan Mentawai)'},
69              
70             '0760' => {province=>'riau', cities=>'Teluk Kuantan (Kabupaten Kuantan Singingi)'},
71             '0761' => {province=>'riau', cities=>'Kota Pekanbaru - Pangkalan Kerinci (Kabupaten Pelalawan)'},
72             '0762' => {province=>'riau', cities=>'Bangkinang (Kabupaten Kampar)'},
73             '0763' => {province=>'riau', cities=>'Selatpanjang (Kabupaten Bengkalis)'},
74             '0764' => {province=>'riau', cities=>'Siak Sri Indrapura (Kabupaten Siak)'},
75             '0765' => {province=>'riau', cities=>'Kota Dumai - Duri (Kabupaten Bengkalis)'},
76             '0766' => {province=>'riau', cities=>'Bengkalis (Kabupaten Bengkalis)'},
77             '0767' => {province=>'riau', cities=>'Bagan Siapi-api (Kabupaten Rokan Hilir)'},
78             '0768' => {province=>'riau', cities=>'Tembilahan (Kabupaten Indragiri Hilir)'},
79             '0769' => {province=>'riau', cities=>'Rengat - Air Molek (Kabupaten Indragiri Hulu)'},
80              
81             '0771' => {province=>'kepriau', cities=>'Kota Tanjung Pinang'},
82             '0772' => {province=>'kepriau', cities=>'Tarempa (Kabupaten Kepulauan Anambas)'},
83             '0773' => {province=>'kepriau', cities=>'Ranai (Kabupaten Natuna)'},
84             '0776' => {province=>'kepriau', cities=>'Dabosingkep (Kabupaten Lingga)'},
85             '0777' => {province=>'kepriau', cities=>'Tanjung Balai Karimun (Kabupaten Karimun)'},
86             '0778' => {province=>'kepriau', cities=>'Kota Batam'},
87             '0779' => {province=>'kepriau', cities=>'Tanjungbatu (Kabupaten Karimun)'},
88              
89             '0740' => {province=>'jambi', cities=>'Mendahara - Muara Sabak (Kabupaten Tanjung Jabung Timur)'},
90             '0741' => {province=>'jambi', cities=>'Kota Jambi'},
91             '0742' => {province=>'jambi', cities=>'Kualatungkal (Kabupaten Tanjung Jabung Barat)'},
92             '0743' => {province=>'jambi', cities=>'Muara Bulian (Kabupaten Batanghari)'},
93             '0744' => {province=>'jambi', cities=>'Muara Tebo (Kabupaten Tebo)'},
94             '0745' => {province=>'jambi', cities=>'Sarolangun (Kabupaten Sarolangun)'},
95             '0746' => {province=>'jambi', cities=>'Bangko (Kabupaten Merangin)'},
96             '0747' => {province=>'jambi', cities=>'Muarabungo (Kabupaten Bungo)'},
97             '0748' => {province=>'jambi', cities=>'Kota Sungai Penuh'},
98              
99             '0711' => {province=>'sumsel', cities=>'Kota Palembang - Pangkalan Balai - Betung (Kabupaten Banyuasin) - Indralaya (Kabupaten Ogan Ilir)'},
100             '0712' => {province=>'sumsel', cities=>'Kayu Agung (Kabupaten Ogan Komering Ilir)'},
101             '0713' => {province=>'sumsel', cities=>'Kota Prabumulih'},
102             '0714' => {province=>'sumsel', cities=>'Sekayu (Kabupaten Musi Banyuasin)'},
103             '0730' => {province=>'sumsel', cities=>'Kota Pagar Alam'},
104             '0731' => {province=>'sumsel', cities=>'Lahat (Kabupaten Lahat)'},
105             '0733' => {province=>'sumsel', cities=>'Kota Lubuklinggau - Pendopo (Kabupaten Lahat)'},
106             '0734' => {province=>'sumsel', cities=>'Muara Enim (Kabupaten Muara Enim)'},
107             '0735' => {province=>'sumsel', cities=>'Baturaja (Kabupaten Ogan Komering Ulu)'},
108              
109             '0715' => {province=>'kbb', cities=>'Belinyu (Kabupaten Bangka)'},
110             '0716' => {province=>'kbb', cities=>'Muntok (Kabupaten Bangka Barat)'},
111             '0717' => {province=>'kbb', cities=>'Kota Pangkal Pinang - Sungailiat (Kabupaten Bangka)'},
112             '0718' => {province=>'kbb', cities=>'Koba (Kabupaten Bangka Tengah) - Toboali (Kabupaten Bangka Selatan)'},
113             '0719' => {province=>'kbb', cities=>'Manggar (Kabupaten Belitung Timur) - Tanjung Pandan (Kabupaten Belitung)'},
114              
115             '0732' => {province=>'bengkulu', cities=>'Curup (Kabupaten Rejang Lebong)'},
116             '0736' => {province=>'bengkulu', cities=>'Kota Bengkulu - Lais (Kabupaten Bengkulu Utara)'},
117             '0737' => {province=>'bengkulu', cities=>'Arga Makmur (Kabupaten Bengkulu Utara) - Mukomuko (Kabupaten Mukomuko)'},
118             '0738' => {province=>'bengkulu', cities=>'Muara Aman (Kabupaten Lebong)'},
119             '0739' => {province=>'bengkulu', cities=>'Bintuhan (Kabupaten Kaur) - Kota Manna (Kabupaten Bengkulu Selatan)'},
120              
121             '0721' => {province=>'lampung', cities=>'Kota Bandar Lampung'},
122             '0722' => {province=>'lampung', cities=>'Kota Agung (Kabupaten Tanggamus)'},
123             '0723' => {province=>'lampung', cities=>'Blambangan Umpu (Kabupaten Way Kanan)'},
124             '0724' => {province=>'lampung', cities=>'Kotabumi (Kabupaten Lampung Utara)'},
125             '0725' => {province=>'lampung', cities=>'Kota Metro'},
126             '0726' => {province=>'lampung', cities=>'Menggala (Kabupaten Tulang Bawang)'},
127             '0727' => {province=>'lampung', cities=>'Kalianda (Kabupaten Lampung Selatan)'},
128             '0728' => {province=>'lampung', cities=>'Kota Liwa (Kabupaten Lampung Barat)'},
129             '0729' => {province=>'lampung', cities=>'Pringsewu (Kabupaten Pringsewu)'},
130              
131             '021' => {province=>'dki/banten/jabar', cities=>'Kepulauan Seribu - Jakarta Barat - Jakarta Pusat - Jakarta Selatan - Jakarta Timur - Jakarta Utara/Tigaraksa (Kabupaten Tangerang) - Kota Tangerang - Kota Tangerang Selatan/Kota Bekasi - Cikarang (Kabupaten Bekasi) - Kota Depok - Cibinong (Kabupaten Bogor)'},
132              
133             '0252' => {province=>'banten', cities=>'Rangkasbitung (Kabupaten Lebak)'},
134             '0253' => {province=>'banten', cities=>'Pandeglang - Labuan (Kabupaten Pandeglang)'},
135             '0254' => {province=>'banten', cities=>'Kota Serang - Kabupaten Serang - Merak (Kota Cilegon)'},
136             '0257' => {province=>'banten', cities=>'Pasauran (Kabupaten Serang)'},
137              
138             '022' => {province=>'jabar', cities=>'Kota Bandung - Kota Cimahi - Soreang (Kabupaten Bandung) - Lembang - Ngamprah (Kabupaten Bandung Barat)'},
139             '0231' => {province=>'jabar', cities=>'Kota Cirebon - Sumber - Losari (Kabupaten Cirebon)'},
140             '0232' => {province=>'jabar', cities=>'Kabupaten Kuningan'},
141             '0233' => {province=>'jabar', cities=>'Kadipaten (Kabupaten Majalengka)'},
142             '0234' => {province=>'jabar', cities=>'Jatibarang (Kabupaten Indramayu)'},
143             '0251' => {province=>'jabar', cities=>'Kota Bogor'},
144             '0260' => {province=>'jabar', cities=>'Pamanukan (Kabupaten Subang)'},
145             '0261' => {province=>'jabar', cities=>'Kabupaten Sumedang'},
146             '0262' => {province=>'jabar', cities=>'Kabupaten Garut'},
147             '0263' => {province=>'jabar', cities=>'Kabupaten Cianjur'},
148             '0264' => {province=>'jabar', cities=>'Kabupaten Purwakarta - Cikampek)'},
149             '0265' => {province=>'jabar', cities=>'Kota Tasikmalaya - Kadipaten - Singaparna (Kabupaten Tasikmalaya) - Kota Banjar - Ciamis - Pangandaran (Kabupaten Ciamis)'},
150             '0266' => {province=>'jabar', cities=>'Kota Sukabumi - Palabuhanratu (Kabupaten Sukabumi)'},
151             '0267' => {province=>'jabar', cities=>'Kabupaten Karawang'},
152              
153             '024' => {province=>'jateng', cities=>'Semarang, Ungaran'},
154             '0271' => {province=>'jateng', cities=>'Surakarta (Solo), Kartasura, Sukoharjo, Karanganyar, Sragen'},
155             '0272' => {province=>'jateng', cities=>'Klaten'},
156             '0273' => {province=>'jateng', cities=>'Wonogiri'},
157             '0275' => {province=>'jateng', cities=>'Purworejo,Kutoarjo'},
158             '0276' => {province=>'jateng', cities=>'Boyolali'},
159             '0280' => {province=>'jateng', cities=>'Majenang, Sidareja (Kabupaten Cilacap bagian barat)'},
160             '0281' => {province=>'jateng', cities=>'Purwokerto, Banyumas, Purbalingga'},
161             '0282' => {province=>'jateng', cities=>'Cilacap (bagian timur)'},
162             '0283' => {province=>'jateng', cities=>'Tegal, Slawi, Brebes'},
163             '0284' => {province=>'jateng', cities=>'Pemalang'},
164             '0285' => {province=>'jateng', cities=>'Pekalongan, Batang (bagian barat)'},
165             '0286' => {province=>'jateng', cities=>'Banjarnegara, Wonosobo'},
166             '0287' => {province=>'jateng', cities=>'Kebumen, Gombong'},
167             '0289' => {province=>'jateng', cities=>'Bumiayu (Kabupaten Brebes bagian selatan)'},
168             '0291' => {province=>'jateng', cities=>'Demak, Jepara, Kudus'},
169             '0292' => {province=>'jateng', cities=>'Purwodadi'},
170             '0293' => {province=>'jateng', cities=>'Magelang, Mungkid, Temanggung'},
171             '0294' => {province=>'jateng', cities=>'Kendal, Kaliwungu, Weleri, Batang (bagian timur)'},
172             '0295' => {province=>'jateng', cities=>'Pati, Rembang, Lasem'},
173             '0296' => {province=>'jateng', cities=>'Blora, Cepu'},
174             '0297' => {province=>'jateng', cities=>'Karimun Jawa'},
175             '0298' => {province=>'jateng', cities=>'Salatiga, Ambarawa (Kabupaten Semarang bagian tengah dan selatan)'},
176             '0356' => {province=>'jateng', cities=>'Rembang bagian Timur (wilayah yang berbatasan dengan Tuban)'},
177              
178             '0274' => {province=>'diy', cities=>'Yogyakarta, Sleman, Wates, Bantul, Wonosari'},
179              
180             '031' => {province=>'jatim', cities=>'Surabaya, Gresik, Sidoarjo, Bangkalan'},
181             '0321' => {province=>'jatim', cities=>'Mojokerto, Jombang'},
182             '0322' => {province=>'jatim', cities=>'Lamongan, Babat'},
183             '0323' => {province=>'jatim', cities=>'Sampang'},
184             '0324' => {province=>'jatim', cities=>'Pamekasan'},
185             '0325' => {province=>'jatim', cities=>'Sangkapura (Bawean)'},
186             '0327' => {province=>'jatim', cities=>'Kepulauan Kangean, Kepulauan Masalembu'},
187             '0328' => {province=>'jatim', cities=>'Sumenep'},
188             '0331' => {province=>'jatim', cities=>'Jember'},
189             '0332' => {province=>'jatim', cities=>'Bondowoso, Sukosari, Prajekan'},
190             '0333' => {province=>'jatim', cities=>'Banyuwangi, Muncar'},
191             '0334' => {province=>'jatim', cities=>'Lumajang'},
192             '0335' => {province=>'jatim', cities=>'Probolinggo, Kraksaan'},
193             '0336' => {province=>'jatim', cities=>'Ambulu, Puger (Kabupaten Jember bagian selatan)'},
194             '0338' => {province=>'jatim', cities=>'Situbondo, Besuki'},
195             '0341' => {province=>'jatim', cities=>'Malang, Kepanjen, Batu'},
196             '0342' => {province=>'jatim', cities=>'Blitar, Wlingi'},
197             '0343' => {province=>'jatim', cities=>'Pasuruan, Pandaan, Gempol'},
198             '0351' => {province=>'jatim', cities=>'Madiun, Caruban, Magetan, Ngawi'},
199             '0352' => {province=>'jatim', cities=>'Ponorogo'},
200             '0353' => {province=>'jatim', cities=>'Bojonegoro'},
201             '0354' => {province=>'jatim', cities=>'Kediri, Pare'},
202             '0355' => {province=>'jatim', cities=>'Tulungagung, Trenggalek'},
203             '0356' => {province=>'jatim', cities=>'Tuban'},
204             '0357' => {province=>'jatim', cities=>'Pacitan'},
205             '0358' => {province=>'jatim', cities=>'Nganjuk, Kertosono'},
206              
207             '0361' => {province=>'bali', cities=>'Denpasar, Gianyar, Kuta, Tabanan, Tampaksiring, Ubud'},
208             '0362' => {province=>'bali', cities=>'Singaraja'},
209             '0363' => {province=>'bali', cities=>'Amlapura'},
210             '0365' => {province=>'bali', cities=>'Negara, Gilimanuk'},
211             '0366' => {province=>'bali', cities=>'Klungkung, Kintamani'},
212             '0368' => {province=>'bali', cities=>'Baturiti'},
213              
214             '0364' => {province=>'ntb', cities=>'Kota Mataram'},
215             '0370' => {province=>'ntb', cities=>'Mataram, Praya'},
216             '0371' => {province=>'ntb', cities=>'Sumbawa'},
217             '0372' => {province=>'ntb', cities=>'Alas, Taliwang'},
218             '0373' => {province=>'ntb', cities=>'Dompu'},
219             '0374' => {province=>'ntb', cities=>'Bima'},
220             '0376' => {province=>'ntb', cities=>'Selong'},
221              
222             '0380' => {province=>'ntt', cities=>'Kupang, Baa (Roti)'},
223             '0381' => {province=>'ntt', cities=>'Ende'},
224             '0382' => {province=>'ntt', cities=>'Maumere'},
225             '0383' => {province=>'ntt', cities=>'Larantuka'},
226             '0384' => {province=>'ntt', cities=>'Bajawa'},
227             '0385' => {province=>'ntt', cities=>'Labuhanbajo, Ruteng'},
228             '0386' => {province=>'ntt', cities=>'Kalabahi'},
229             '0387' => {province=>'ntt', cities=>'Waingapu, Waikabubak'},
230             '0388' => {province=>'ntt', cities=>'Kefamenanu, Soe'},
231             '0389' => {province=>'ntt', cities=>'Atambua'},
232              
233             '0561' => {province=>'kalbar', cities=>'Pontianak, Mempawah'},
234             '0562' => {province=>'kalbar', cities=>'Sambas, Singkawang, Bengkayang'},
235             '0563' => {province=>'kalbar', cities=>'Ngabang'},
236             '0564' => {province=>'kalbar', cities=>'Sanggau'},
237             '0565' => {province=>'kalbar', cities=>'Sintang'},
238             '0567' => {province=>'kalbar', cities=>'Putussibau'},
239             '0568' => {province=>'kalbar', cities=>'Nanga Pinoh'},
240             '0534' => {province=>'kalbar', cities=>'Ketapang'},
241              
242             '0513' => {province=>'kalteng', cities=>'Kuala Kapuas, Pulang Pisau'},
243             '0519' => {province=>'kalteng', cities=>'Muara Teweh'},
244             '0522' => {province=>'kalteng', cities=>'Ampah (Dusun Tengah, Barito Timur)'},
245             '0525' => {province=>'kalteng', cities=>'Buntok'},
246             '0526' => {province=>'kalteng', cities=>'Tamiang Layang'},
247             '0528' => {province=>'kalteng', cities=>'Purukcahu'},
248             '0531' => {province=>'kalteng', cities=>'Sampit'},
249             '0532' => {province=>'kalteng', cities=>'Pangkalan Bun, Kumai'},
250             '0534' => {province=>'kalteng', cities=>'Kendawangan'},
251             '0536' => {province=>'kalteng', cities=>'Palangkaraya, Kasongan'},
252             '0537' => {province=>'kalteng', cities=>'Kuala Kurun'},
253             '0538' => {province=>'kalteng', cities=>'Kuala Pembuang'},
254             '0539' => {province=>'kalteng', cities=>'Kuala Kuayan (Mentaya Hulu, Kotawaringin Timur)'},
255              
256             '0511' => {province=>'kalsel', cities=>'Banjarmasin, Banjarbaru, Martapura, Marabahan'},
257             '0512' => {province=>'kalsel', cities=>'Pelaihari'},
258             '0517' => {province=>'kalsel', cities=>'Kandangan, Barabai, Rantau, Negara'},
259             '0518' => {province=>'kalsel', cities=>'Kotabaru, Batulicin'},
260             '0526' => {province=>'kalsel', cities=>'Tanjung'},
261             '0527' => {province=>'kalsel', cities=>'Amuntai'},
262              
263             '0541' => {province=>'kaltim', cities=>'Samarinda, Tenggarong'},
264             '0542' => {province=>'kaltim', cities=>'Balikpapan'},
265             '0543' => {province=>'kaltim', cities=>'Tanah Grogot'},
266             '0545' => {province=>'kaltim', cities=>'Melak'},
267             '0548' => {province=>'kaltim', cities=>'Bontang'},
268             '0549' => {province=>'kaltim', cities=>'Sangatta'},
269             '0551' => {province=>'kaltim', cities=>'Tarakan'},
270             '0552' => {province=>'kaltim', cities=>'Tanjungselor'},
271             '0553' => {province=>'kaltim', cities=>'Malinau'},
272             '0554' => {province=>'kaltim', cities=>'Tanjung Redeb'},
273             '0556' => {province=>'kaltim', cities=>'Nunukan'},
274              
275             '0430' => {province=>'sulut', cities=>'Amurang'},
276             '0431' => {province=>'sulut', cities=>'Manado, Tomohon, Tondano'},
277             '0432' => {province=>'sulut', cities=>'Tahuna'},
278             '0434' => {province=>'sulut', cities=>'Kotamobagu'},
279             '0438' => {province=>'sulut', cities=>'Bitung'},
280              
281             '0435' => {province=>'gorontalo', cities=>'Gorontalo, Limboto'},
282             '0443' => {province=>'gorontalo', cities=>'Marisa'},
283              
284             '0450' => {province=>'sulteng', cities=>'Parigi'},
285             '0451' => {province=>'sulteng', cities=>'Palu'},
286             '0452' => {province=>'sulteng', cities=>'Poso'},
287             '0453' => {province=>'sulteng', cities=>'Tolitoli'},
288             '0457' => {province=>'sulteng', cities=>'Donggala'},
289             '0458' => {province=>'sulteng', cities=>'Tentena'},
290             '0461' => {province=>'sulteng', cities=>'Luwuk'},
291             '0462' => {province=>'sulteng', cities=>'Banggai'},
292             '0463' => {province=>'sulteng', cities=>'Bunta'},
293             '0464' => {province=>'sulteng', cities=>'Ampana'},
294             '0465' => {province=>'sulteng', cities=>'Kolonedale'},
295             '0455' => {province=>'sulteng', cities=>'kotaraya,moutong'},
296              
297             '0422' => {province=>'sulbar', cities=>'Majene'},
298             '0426' => {province=>'sulbar', cities=>'Mamuju'},
299             '0428' => {province=>'sulbar', cities=>'Polewali'},
300              
301             '0410' => {province=>'sulsel', cities=>'Pangkep'},
302             '0411' => {province=>'sulsel', cities=>'Makassar, Maros, Sungguminasa'},
303             '0413' => {province=>'sulsel', cities=>'Bulukumba'},
304             '0414' => {province=>'sulsel', cities=>'Bantaeng (Selayar)'},
305             '0417' => {province=>'sulsel', cities=>'Malino'},
306             '0418' => {province=>'sulsel', cities=>'Takalar'},
307             '0419' => {province=>'sulsel', cities=>'Janeponto'},
308             '0420' => {province=>'sulsel', cities=>'Enrekang'},
309             '0421' => {province=>'sulsel', cities=>'Parepare, Pinrang'},
310             '0422' => {province=>'sulsel', cities=>'Manene'},
311             '0423' => {province=>'sulsel', cities=>'Makale, Rantepao'},
312             '0427' => {province=>'sulsel', cities=>'Barru'},
313             '0428' => {province=>'sulsel', cities=>'Wonomulyo'},
314             '0471' => {province=>'sulsel', cities=>'Palopo'},
315             '0472' => {province=>'sulsel', cities=>'Pitumpanua'},
316             '0473' => {province=>'sulsel', cities=>'Masamba'},
317             '0474' => {province=>'sulsel', cities=>'Malili'},
318             '0475' => {province=>'sulsel', cities=>'Soroako'},
319             '0481' => {province=>'sulsel', cities=>'Watampone'},
320             '0482' => {province=>'sulsel', cities=>'Sinjai'},
321             '0484' => {province=>'sulsel', cities=>'Watansoppeng'},
322             '0485' => {province=>'sulsel', cities=>'Sengkang'},
323              
324             '0401' => {province=>'sultra', cities=>'Kendari'},
325             '0402' => {province=>'sultra', cities=>'Baubau'},
326             '0403' => {province=>'sultra', cities=>'Raha'},
327             '0404' => {province=>'sultra', cities=>'Wanci'},
328             '0405' => {province=>'sultra', cities=>'Kolaka'},
329             '0408' => {province=>'sultra', cities=>'Unaaha'},
330              
331             '0910' => {province=>'maluku', cities=>'Bandanaira'},
332             '0911' => {province=>'maluku', cities=>'Ambon'},
333             '0913' => {province=>'maluku', cities=>'Namlea'},
334             '0914' => {province=>'maluku', cities=>'Masohi'},
335             '0915' => {province=>'maluku', cities=>'Bula'},
336             '0916' => {province=>'maluku', cities=>'Tual'},
337             '0917' => {province=>'maluku', cities=>'Dobo'},
338             '0918' => {province=>'maluku', cities=>'Saumlaku'},
339             '0921' => {province=>'maluku', cities=>'Soasiu'},
340             '0922' => {province=>'maluku', cities=>'Jailolo'},
341             '0923' => {province=>'maluku', cities=>'Morotai'},
342             '0924' => {province=>'maluku', cities=>'Tobelo'},
343             '0927' => {province=>'maluku', cities=>'Labuha'},
344             '0929' => {province=>'maluku', cities=>'Sanana'},
345             '0931' => {province=>'maluku', cities=>'Saparua'},
346             '0901' => {province=>'maluku', cities=>'Timika, Tembagapura'},
347              
348             '0902' => {province=>'papua', cities=>'Agats (Asmat)'},
349             '0951' => {province=>'papua', cities=>'Sorong'},
350             '0952' => {province=>'papua', cities=>'Teminabuan'},
351             '0955' => {province=>'papua', cities=>'Bintuni'},
352             '0956' => {province=>'papua', cities=>'Fakfak'},
353             '0957' => {province=>'papua', cities=>'Kaimana'},
354             '0966' => {province=>'papua', cities=>'Sarmi'},
355             '0967' => {province=>'papua', cities=>'Jayapura, Abepura'},
356             '0969' => {province=>'papua', cities=>'Wamena'},
357             '0971' => {province=>'papua', cities=>'Merauke'},
358             '0975' => {province=>'papua', cities=>'Tanahmerah'},
359             '0980' => {province=>'papua', cities=>'Ransiki'},
360             '0981' => {province=>'papua', cities=>'Biak'},
361             '0983' => {province=>'papua', cities=>'Serui'},
362             '0984' => {province=>'papua', cities=>'Nabire'},
363             '0985' => {province=>'papua', cities=>'Nabire'},
364             '0986' => {province=>'papua', cities=>'Manokwari'},
365             );
366              
367             my %cell_prefixes = (
368             '0811' => {operator=>'telkomsel', product=>'halo', is_gsm=>1},
369             '0812' => {operator=>'telkomsel', product=>'halo/simpati', is_gsm=>1},
370             '0813' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
371             '0814' => {operator=>'indosat', product=>'matrix', is_gsm=>1},
372             '0815' => {operator=>'indosat', product=>'matrix/mentari', is_gsm=>1},
373             '0816' => {operator=>'indosat', product=>'matrix/mentari', is_gsm=>1},
374             '0817' => {operator=>'xl', is_gsm=>1},
375             '0818' => {operator=>'xl', is_gsm=>1},
376             '0819' => {operator=>'xl', is_gsm=>1},
377             '0821' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
378             '0822' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
379             '0823' => {operator=>'telkomsel', product=>'as', is_gsm=>1},
380             '0828' => {operator=>'sampoerna', product=>'ceria', is_gsm=>1},
381             #'08315' => {operator=>'nts', is_gsm=>1},
382             '0831' => {operator=>'axis', is_gsm=>1},
383             '0832' => {operator=>'axis', is_gsm=>1},
384             '0838' => {operator=>'axis', is_gsm=>1},
385             '0852' => {operator=>'telkomsel', product=>'as', is_gsm=>1},
386             '0853' => {operator=>'telkomsel', product=>'as', is_gsm=>1}, # fress
387             '0855' => {operator=>'indosat', product=>'matrix bright', is_gsm=>1},
388             '0856' => {operator=>'indosat', product=>'im3', is_gsm=>1},
389             '0857' => {operator=>'indosat', product=>'im3', is_gsm=>1},
390             '0858' => {operator=>'indosat', product=>'mentari', is_gsm=>1},
391             '0859' => {operator=>'xl', is_gsm=>1},
392             #'08681' => {operator=>'psn', product=>'byru', is_gsm=>0}, # satellite
393             '0868' => {operator=>'psn', product=>'byru', is_gsm=>0}, # satellite
394             '0877' => {operator=>'xl', product=>'axiata', is_gsm=>1},
395             '0878' => {operator=>'xl', product=>'axiata', is_gsm=>1},
396             '0879' => {operator=>'xl', product=>'axiata', is_gsm=>1},
397             '0881' => {operator=>'smartfren', is_cdma=>1},
398             '0882' => {operator=>'smartfren', is_cdma=>1},
399             '0883' => {operator=>'smartfren', is_cdma=>1},
400             '0884' => {operator=>'smartfren', is_cdma=>1},
401             '0885' => {operator=>'smartfren', is_cdma=>1},
402             '0886' => {operator=>'smartfren', is_cdma=>1},
403             '0887' => {operator=>'smartfren', is_cdma=>1},
404             '0888' => {operator=>'smartfren', is_cdma=>1},
405             '0889' => {operator=>'smartfren', is_cdma=>1},
406             '0896' => {operator=>'three', is_gsm=>1},
407             '0897' => {operator=>'three', is_gsm=>1},
408             '0898' => {operator=>'three', is_gsm=>1},
409             '0899' => {operator=>'three', is_gsm=>1},
410             );
411              
412             my %fwa_prefixes = (
413             30 => {operator=>'indosat', product=>'starone'},
414             32 => {operator=>'telkom', product=>'flexi'},
415             #39 is fixed telcom
416             40 => {operator=>'telkom', product=>'flexi'},
417             50 => {operator=>'telkom', product=>'flexi'},
418             60 => {operator=>'indosat', product=>'starone'},
419             62 => {operator=>'indosat', product=>'starone'},
420             68 => {operator=>'telkom', product=>'flexi'},
421             70 => {operator=>'telkom', product=>'flexi'},
422             710 => {operator=>'telkom', product=>'flexi'},
423             711 => {operator=>'telkom', product=>'flexi'},
424             712 => {operator=>'telkom', product=>'flexi'},
425             713 => {operator=>'telkom', product=>'flexi'},
426             714 => {operator=>'telkom', product=>'flexi'},
427             715 => {operator=>'telkom', product=>'flexi'},
428             716 => {operator=>'telkom', product=>'flexi'},
429             717 => {}, # land
430             718 => {}, # land
431             719 => {}, # land
432             72 => {}, # land
433             73 => {}, # land
434             74 => {}, # land
435             75 => {}, # land
436             76 => {}, # land
437             77 => {}, # land
438             78 => {}, # land
439             79 => {}, # land
440             80 => {operator=>'esia'},
441             81 => {operator=>'esia'}, # jkt
442             82 => {operator=>'esia'}, # assumed 8x
443             83 => {operator=>'esia'},
444             84 => {operator=>'esia'}, # assumed 8x
445             85 => {operator=>'esia'}, # jkt
446             86 => {operator=>'esia'}, # assumed 8x
447             87 => {operator=>'esia'}, # jkt
448             88 => {operator=>'esia'}, # assumed 8x
449             89 => {operator=>'esia'},
450             90 => {operator=>'esia'}, # assumed 9x
451             91 => {operator=>'esia'},
452             92 => {operator=>'esia'},
453             93 => {operator=>'esia'},
454             94 => {operator=>'esia'}, # assumed 9x
455             95 => {operator=>'esia'}, # assumed 9x
456             96 => {operator=>'esia'}, # assumed 9x
457             97 => {operator=>'esia'}, # assumed 9x
458             98 => {operator=>'esia'},
459             99 => {operator=>'esia'},
460             );
461              
462             our %SPEC;
463              
464             $SPEC{':package'} = {
465             v => 1.1,
466             summary => 'Parse Indonesian phone numbers',
467             };
468              
469             my $extract_args = {
470             text => {
471             summary => 'Text containing phone numbers to extract from',
472             schema => 'str*',
473             req => 1,
474             pos => 0,
475             },
476             max_numbers => {
477             schema => 'int',
478             },
479             default_area_code => {
480             summary => 'When encountering a number without area code, use this',
481             schema => ['str' => {
482             match => qr/^0\d{2,3}$/,
483             }],
484             description => <<'_',
485              
486             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
487             you'll need to provide this.
488              
489             _
490             },
491             level => {
492             summary => 'How hard should the function extract numbers (1-9)',
493             schema => ['int' => {
494             default => 5,
495             between => [1, 9],
496             }],
497             description => <<'_',
498              
499             The higher the level, the harder this function will try finding phone numbers,
500             but the higher the risk of false positives will be. E.g. in text
501             '123456789012345' with level=5 it will not find a phone number, but with level=9
502             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
503             default level is fine.
504              
505             _
506             },
507             };
508              
509             $SPEC{extract_id_phones} = {
510             v => 1.1,
511             summary => 'Extract phone number(s) from text',
512             description => <<'_',
513              
514             Extracts phone number(s) from text. Return an array of one or more parsed phone
515             number structure (a hash). Understands the list of known area codes and cellular
516             operators, as well as other information. Understands various syntax e.g.
517             +62.22.1234567, (022) 123-4567, 022-123-4567 ext 102, and even things like
518             7123456/57 (2 adjacent numbers).
519              
520             Extraction algorithm is particularly targetted at classified ads text in
521             Indonesian language, but should be quite suitable for any other normal text.
522              
523             Non-Indonesian phone numbers (e.g. +65 12 3456 7890) will still be extracted,
524             but without any other detailed information other than country code.
525              
526             _
527             args => $extract_args,
528             result_naked => 1,
529             };
530             sub extract_id_phones {
531 35     35 1 106674 my %args = @_;
532 35         107 my $text = $args{text};
533 35   100     279 my $level = $args{level} // 5;
534 35         61 my $defac = $args{default_area_code};
535              
536 35         176 $log->tracef("text = %s", $text);
537              
538 35         104 my %nums; # normalized num => {_level=>..., _order=>..., raw=>..., ...}
539              
540             # note: capital prefix means it has capturing group
541 35         40 state $_Cc_prefix_local;
542 35         42 state $_Kprefix_local;
543 35         58 state $_Cc_karea_local_ext;
544 35         49 state $_Karea_local_ext;
545 35         45 state $_Prefix_local;
546 35         50 state $_Klocal;
547 35         48 state $_Local;
548 35         38 state $_Indicator;
549 35         46 state $_sep;
550 35         38 state $_start_w;
551 35         43 state $_start_d;
552 35         34 state $_end_d;
553 35         32 state $_Adjacent;
554 35 100       107 if (!$_Prefix_local) {
555             # known prefixes
556 1         1 $_start_w = '(?:\A|\b)';
557 1         1 $_start_d = '(?:\A|(?<=\D))';
558 1         1 $_end_d = '(?:\z|(?=\D))';
559 1         194 my $_kprefix =
560             '(?:'.join("|",sort(keys %area_codes, keys %cell_prefixes)).')';
561 1         117 my $_karea = '(?:'.join("|",sort keys %area_codes).')';
562 1         9 my @_kareanz;
563 1         21 for (keys %area_codes) { s/^0//; push @_kareanz, $_ }
  306         506  
  306         358  
564 1         139 my $_kareanz = '(?:'.join("|",sort @_kareanz).')';
565             # XXX currently ignores 08681
566 1         2 my $_prefix = '(?:0[1-9](?:[0-9]){1,2})';
567 1         3 my $_prefixnz = '(?:[1-9](?:[0-9]){1,2})';
568 1         1 $_sep = '(?:\s+|\.|-)';
569 1         2 my $_cc = '(?:\+[1-9][0-9]{1,2})';
570              
571 1         3 $_Local = '(\d{5,8}|(?:\d'.$_sep.'?){4,7}\d)';
572              
573             # heuristic: we know that is FWA is 7-8 digits, there is no prefix 1
574             # (?). also (not for exact reason though, just minimizing false
575             # negatives) be stricter (no in-between seps).
576 1         2 my @_klocal;
577 1         17 for (keys %fwa_prefixes) {
578 46         43 my $l = length($_);
579 46         84 push @_klocal, sprintf("%s\\d{%d,%d}", $_, 7-$l, 8-$l);
580             }
581 1         12 $_Klocal = '(' . join("|", @_klocal, '[2-9]{5,7}'). ')';
582              
583 1         7 my $_Ext =
584             qr!((?:extension|ekstensi|ext?|ekst?)(?:\s|:|\.)*(?:\d{1,5}))!ix;
585              
586 1         1153 $_Kprefix_local = # (021) 123-4567, 021-123-4567
587             qr!(\(\s*$_kprefix\s*\)|$_kprefix) $_sep* $_Local!sx;
588 1         170 $_Prefix_local = # same as above, but w/o checking known prefixes
589             qr!(\(\s*$_prefix\s*\)|$_prefix) $_sep* $_Local!sx;
590 1         1007 $_Karea_local_ext = # (021) 123-4567 ext 102, mobile assumed has no ext
591             qr!(\(\s*$_karea\s*\)|$_karea) $_sep*
592             $_Local $_sep*
593             $_Ext!sx;
594 1         142 $_Cc_prefix_local = # (+62) 22 123-4567, 62 812 123-4567
595             qr!(\(\s*$_cc\s*\)|$_cc) $_sep*
596             (\(\s*$_prefixnz\s*\)|$_prefixnz) $_sep*
597             $_Local!sx;
598 1         610 $_Cc_karea_local_ext = # (+62) 22 123-4567 ext 1000
599             qr!(\(\s*$_cc\s*\)|$_cc) $_sep*
600             (\(\s*$_kareanz\s*\)|$_kareanz) $_sep*
601             $_Local $_sep*
602             $_Ext!sx;
603 1         6 $_Indicator = qr!(
604             menghubungi|hubungi|hub|
605             contact|kontak|mengontak|mengkontak|
606             nomor|nomer|no|num|
607             to|ke|
608             tele?pon|tilpun|tilp|te?lp|tel|tl?|
609             phone|ph|
610             handphone|h\.?p|ponsel|cellular|cell|
611             faximile|facsimile|faksimile|fax|facs|faks|f
612             )(?:\s*|\.|:)*!ix;
613 1         42 $_Adjacent = qr!(\s*/\s*\d\d?)!;
614             }
615              
616             # preprocess text: 0 1 2 3 4 5 -> 012345
617 35 100       167 if ($level >= 6) {
618             state $_remove_spaces = sub {
619 2     2   5 local $_ = shift;
620 2         15 s/\s//sg;
621 2         8 $_;
622 17         32 };
623 17         29 my $oldtext = $text;
624 17         92 $text =~ s/((?:\d\s){4,}\d)/$_remove_spaces->($1)/seg;
  2         7  
625 17 100       65 $log->tracef("Preprocess text: remove spaces: %s", $text)
626             if $oldtext ne $text;
627             }
628              
629             # preprocess text: O (letter O) as 0 and l/I/| as 1
630 35 100       113 if ($level >= 6) {
631 17         30 state $diglets = {o=>0, O=>0, l=>1, '|'=>1, I=>1, S=>5};
632 17         24 state $lets = join("", keys %$diglets);
633             state $_replace_lets = sub {
634 20     20   54 my ($lets) = @_;
635 20 100       73 $lets =~ s!(.)!defined($diglets->{$1}) ? $diglets->{$1} : $1!eg;
  240         751  
636             # when will emacs grok //? grr...
637 20         112 $lets;
638 17         23 };
639 17         23 my $oldtext = $text;
640 17         383 $text =~ s/((?:[0-9$lets](?:\s+|-|\.)?){5,})/$_replace_lets->($1)/eg;
  20         47  
641 17 100       72 $log->tracef("Preprocess text: letters->digits: %s", $text)
642             if $oldtext ne $text;
643             }
644              
645             # TODO: preprocess text: words as numbers (nol satu delapan ...)
646              
647 35         61 my $i;
648             my @r;
649              
650             # first, try to find numbers tacked after some indicator, e.g. Hub: blah,
651             # T.blah, etc.
652 35 50       111 if ($level >= 1) {
653 35         58 $i = 0; @r = ();
  35         76  
654 35         1815 while ($text =~ m!($_start_w $_Indicator $_sep*
655             $_Cc_karea_local_ext $_end_d)!xg) {
656 1         9 push @r, $1;
657 1         5 my $ind = $2;
658 1         7 my $num = _normalize($3, $4, $5, $6);
659 1   50     17 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
660             _pat=>"ind+cc+karea+local+ext"};
661 1 50       21 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
662             }
663 35         181 _remove_text(\$text, \@r);
664              
665 35         56 $i = 0; @r = ();
  35         60  
666 35         876 while ($text =~ m!($_start_w $_Indicator $_sep*
667             $_Cc_prefix_local $_end_d)!xg) {
668 2         8 push @r, $1;
669 2         7 my $ind = $2;
670 2         11 my $num = _normalize($3, $4, $5);
671 2   50     28 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
672             _pat=>"ind+cc+prefix+local"};
673 2 50       36 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
674             }
675 35         114 _remove_text(\$text, \@r);
676              
677 35         55 $i = 0; @r = ();
  35         58  
678 35         1596 while ($text =~ m!($_start_w $_Indicator $_Karea_local_ext
679             $_end_d)!xg) {
680 1         7 push @r, $1;
681 1         4 my $ind = $2;
682 1         5 my $num = _normalize(undef, $3, $4, $5);
683 1   50     17 $nums{$num} //= {_level=>1, _order=>++$i, raw=>$1,
684             _pat=>"ind+karea+local+ext"};
685 1 50       19 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
686             }
687 35         116 _remove_text(\$text, \@r);
688              
689 35         61 $i = 0; @r = ();
  35         67  
690 35         1549 while ($text =~ m!($_start_w $_Indicator $_Kprefix_local
691             $_Adjacent? $_end_d)!xg) {
692 5         19 push @r, $1;
693 5         11 my $ind = $2;
694 5         21 my $num = _normalize(undef, $3, $4);
695 5         14 my $adj = $5;
696 5   50     60 $nums{$num} //= {_level=>1, _order=>++$i, raw=>$1,
697             _pat=>"ind+kprefix+local"};
698 5 50       28 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/;
699 5         21 _add_adjacent(\%nums, $num, $adj);
700             }
701 35         116 _remove_text(\$text, \@r);
702             }
703 35 50       110 if ($level >= 2) {
704 35         59 $i = 0; @r = ();
  35         62  
705 35   100     821 while (defined($defac) &&
706             $text =~ m!($_start_w $_Indicator $_sep* $_Klocal
707             $_Adjacent? $_end_d)!xg) {
708 2         8 push @r, $1;
709 2         5 my $ind = $2;
710 2         8 my $num = _normalize(undef, $defac, $3);
711 2         7 my $adj = $4;
712 2   50     25 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
713             _pat=>"ind+klocal"};
714 2 50       13 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
715 2         8 _add_adjacent(\%nums, $num, $adj);
716             }
717 35         120 _remove_text(\$text, \@r);
718             }
719 35 50       110 if ($level >= 2) {
720 35         53 $i = 0; @r = ();
  35         67  
721 35         804 while ($text =~ m!($_start_w $_Indicator $_sep* $_Prefix_local
722             $_Adjacent? $_end_d)!xg) {
723 1         5 push @r, $1;
724 1         3 my $ind = $2;
725 1         5 my $num = _normalize(undef, $3, $4);
726 1         4 my $adj = $5;
727 1   50     16 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
728             _pat=>"ind+prefix+local"};
729 1 50       16 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
730 1         6 _add_adjacent(\%nums, $num, $adj);
731             }
732 35         116 _remove_text(\$text, \@r);
733              
734 35         48 $i = 0; @r = ();
  35         61  
735 35   100     647 while (defined($defac) &&
736             $text =~ m!($_start_w $_Indicator $_sep* $_Local
737             $_Adjacent? $_end_d)!xg) {
738 3         21 push @r, $1;
739 3         8 my $ind = $2;
740 3         12 my $num = _normalize(undef, $defac, $3);
741 3         10 my $adj = $4;
742 3   50     36 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
743             _pat=>"ind+local"};
744 3 50       20 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
745 3         11 _add_adjacent(\%nums, $num, $adj);
746             }
747 35         116 _remove_text(\$text, \@r);
748             }
749              
750             # try to find any cc+area+local numbers
751 35 50       167 if ($level >= 3) {
752 35         62 $i = 0; @r = ();
  35         60  
753 35         1182 while ($text =~ m!($_start_d $_Cc_karea_local_ext $_end_d)!xg) {
754 1         6 push @r, $1;
755 1   50     7 $nums{_normalize($2, $3, $4, $5)} //=
756             {_level=>3, _order=>++$i, raw=>$1, _pat=>"cc+karea+local+ext"};
757             }
758 35         112 _remove_text(\$text, \@r);
759              
760 35         57 $i = 0; @r = ();
  35         67  
761 35         507 while ($text =~ m!($_start_d $_Cc_prefix_local $_end_d)!xg) {
762 0         0 push @r, $1;
763 0   0     0 $nums{_normalize($2, $3, $4)} //=
764             {_level=>3, _order=>++$i, raw=>$1, _pat=>"cc+prefix+local"};
765             }
766 35         98 _remove_text(\$text, \@r);
767             }
768              
769             # try to find numbers with known area code/cell number prefixes
770 35 50       103 if ($level >= 3) {
771 35         44 $i = 0; @r = ();
  35         58  
772 35         1632 while ($text =~ m!($_start_d $_Kprefix_local $_Adjacent? $_end_d)!xg) {
773 11         48 push @r, $1;
774 11         34 my $num = _normalize(undef, $2, $3);
775 11         32 my $adj = $4;
776 11   50     120 $nums{$num} //=
777             {_level=>3, _order=>++$i, raw=>$1, _pat=>"kprefix+local"};
778 11         38 _add_adjacent(\%nums, $num, $adj);
779             }
780 35         192 _remove_text(\$text, \@r);
781             }
782              
783 35 50       119 if ($level >= 5) {
784 35         42 $i = 0; @r = ();
  35         64  
785 35   100     539 while (defined($defac) &&
786             $text =~ m!($_start_w $_Klocal
787             $_Adjacent? $_end_d)!xg) {
788 6         23 push @r, $1;
789 6         22 my $num = _normalize(undef, $defac, $2);
790 6         17 my $adj = $3;
791 6   50     77 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
792             _pat=>"klocal"};
793 6         21 _add_adjacent(\%nums, $num, $adj);
794             }
795 35         111 _remove_text(\$text, \@r);
796             }
797              
798             # try to find any area+local numbers
799 35 50       121 if ($level >= 5) {
800 35         59 $i = 0; @r = ();
  35         58  
801 35         577 while ($text =~ m!($_start_d $_Prefix_local $_Adjacent? $_end_d)!xg) {
802 1         5 push @r, $1;
803 1         5 my $num = _normalize(undef, $2, $3);
804 1         5 my $adj = $4;
805 1   50     16 $nums{$num} //=
806             {_level=>5, _order=>++$i, raw=>$1, _pat=>"prefix+local"};
807 1         10 _add_adjacent(\%nums, $num, $adj);
808             }
809 35         102 _remove_text(\$text, \@r);
810             }
811              
812             # try to find any local numbers (6-8 digit, because 5 digits are easily
813             # confused with indonesian postal code, even though they might still be used
814             # in smaller cities)
815 35 100 66     215 if ($level >= 5 && defined($defac)) {
816 19         28 $i = 0; @r = ();
  19         33  
817 19         363 while ($text =~ m!($_start_d $_Local $_Adjacent? $_end_d)!xg) {
818 5         15 push @r, $1;
819 5         15 my $num = _normalize(undef, $defac, $2);
820 5         10 my $adj = $3;
821 5   50     49 $nums{$num} //=
822             {_level=>5, _order=>++$i, raw=>$1, _pat=>"local (defac)"};
823 5         43 _add_adjacent(\%nums, $num, $adj);
824             }
825 19         53 _remove_text(\$text, \@r);
826             }
827              
828 35         141 for (keys %nums) { $nums{$_}{standard} = $_ }
  41         133  
829 35         166 $log->tracef("\\%%nums = %s", \%nums);
830              
831             # if we are told to extract only N max_numbers, use the lower level ones and
832             # the ones at the end (they are more likely to be numbers, in the case of
833             # classified ads)
834 41 50 66     134 my @nums = map { $nums{$_} } sort {
  9         89  
835 35         214 $nums{$a}{_level} <=> $nums{$b}{_level} ||
836             $nums{$b}{_order} <=> $nums{$a}{_order} ||
837             $nums{$b}{standard} cmp $nums{$a}{standard}
838             } keys %nums;
839 35 100 66     155 if (defined($args{max_numbers}) && $args{max_numbers} > 0 &&
      100        
840             @nums > $args{max_numbers}
841             ) {
842 1         4 splice @nums, $args{max_numbers};
843             }
844              
845             # sort again according to order (ascending), this is what most people expect
846 35         60 @nums = sort {$a->{_order} <=> $b->{_order}} @nums;
  8         25  
847              
848             # remove internal data
849 35         68 for my $num (@nums) {
850             #for (keys %$num) { delete $num->{$_} if /^_/ }
851 40         105 _add_info($num);
852             }
853              
854 35         194 $log->tracef("\\\@nums = %s", \@nums);
855              
856 35         246 \@nums;
857             }
858              
859             gen_modified_sub(
860             output_name => 'parse_id_phone',
861             base_name => 'extract_id_phones',
862             summary => 'Alias for extract_id_phones(..., max_numbers=>1)->[0]',
863             remove_args => [qw/max_numbers/],
864             output_code => sub {
865 1     1   2086 my %args = @_;
866 1         4 my $res = extract_id_phones(%args, max_numbers=>1);
867 1         4 $res->[0];
868             },
869             );
870              
871             sub _normalize {
872 39     39   129 my ($cc, $area, $local, $ext) = @_;
873 39   100     189 $cc //= "62";
874 39 100       86 for ($cc, $area, $local, $ext) { s/\D+//g if defined($_) }
  156         492  
875 39         171 $area =~ s/^0//;
876 39 100 66     318 "+$cc.$area.$local".(defined($ext) && length($ext) ? ".ext$ext" : "");
877             }
878              
879             sub _remove_text {
880 439     439   526 my ($textref, $strs) = @_;
881 439         570 my $oldtext = $$textref;
882 439         751 for (@$strs) {
883 39         959 $$textref =~ s/\Q$_\E//;
884             }
885 439 100       1261 $log->tracef("removed match, text = %s", $$textref)
886             if $$textref ne $oldtext;
887             }
888              
889             sub _add_adjacent {
890 34     34   77 my ($nums, $num, $adj) = @_;
891 34 100       401 return unless $adj;
892 2         11 $adj =~ s/\D//g;
893 2         8 my $first = substr($num, -length($adj));
894 2 50       12 return unless abs($first - $adj) == 1;
895 2         7 my $num2 = $num;
896 2         6 substr($num2, -length($adj)) = $adj;
897 2         14 $nums->{$num2} = clone($nums->{$num});
898 2         146 $nums->{$num2}{_order} += 0.5;
899             }
900              
901             sub _add_info {
902 40     40   58 my ($num) = @_;
903 40 50       410 my ($cc, $prefix, $local, $ext) =
904             $num->{standard} =~ /^\+(\d+)\.(\d+)\.(\d+)(?:\.ext*(\d+))?$/
905             or die "BUG: invalid standard format: $num->{standard}";
906 40         100 $prefix = "0$prefix";
907 40         91 $num->{country_code} = $cc;
908 40         85 $num->{area_code} = $prefix;
909 40         100 $num->{local_number} = $local;
910 40 100       90 $num->{ext} = $ext if defined($ext);
911              
912             # XXX country calling code -> name for other countries
913 40 100       150 $num->{country} = 'Indonesia' if $cc eq '62';
914 40 100       105 return unless $cc eq '62';
915              
916 38 100       115 if (length($local) >= 8) {
917 21         79 $local =~ /(....)(.+)/;
918 21         99 $num->{pretty} = "$prefix-$1-$2";
919             } else {
920 17         63 $local =~ /(...)(.+)/;
921 17         86 $num->{pretty} = "$prefix-$1-$2";
922             }
923              
924 38 100       114 if (my $c = $cell_prefixes{$prefix}) {
925 9         18 $num->{is_cell} = 1;
926 9 50       34 $num->{is_gsm} = $c->{is_gsm} ? 1:0;
927 9 50       28 $num->{is_cdma} = $c->{is_cdma} ? 1:0;
928 9         25 $num->{operator} = $c->{operator};
929 9         22 $num->{product} = $c->{product};
930             } else {
931 29         60 $num->{is_cell} = 0;
932             }
933              
934 38 100       127 if (my $a = $area_codes{$prefix}) {
935 27         48 $num->{is_land} = 1;
936 27         69 $num->{province} = $a->{province};
937 27         55 $num->{cities} = $a->{cities};
938 27         38 state $_fwa_prefixes;
939 27 100       67 if (!$_fwa_prefixes) {
940 1         24 $_fwa_prefixes = '(?:'.join("|", keys %fwa_prefixes).')';
941             }
942 27 100       466 if ($local =~ /^($_fwa_prefixes)/) {
943 18         65 my $fwa = $fwa_prefixes{$1};
944 18         39 $num->{is_cdma} = 1;
945 18         67 $num->{operator} = $fwa->{operator};
946 18         94 $num->{product} = $fwa->{product};
947             }
948             } else {
949 11         55 $num->{is_land} = 0;
950             }
951             }
952              
953             #$SPEC{list_id_operators} = {
954             # v => 1.1,
955             # summary => 'Return list of known phone operators',
956             # result_naked => 1,
957             #};
958             #sub list_id_operators {
959             #
960             #}
961              
962             #$SPEC{list_id_area_codes} = {
963             # v => 1.1,
964             # summary => 'Return list of known area codes in Indonesia, '.
965             # 'along with area names',
966             # result_naked => 1,
967             #};
968             #sub list_id_area_codes {
969             #}
970              
971             1;
972             # ABSTRACT: Parse Indonesian phone numbers
973              
974             __END__
975              
976             =pod
977              
978             =encoding UTF-8
979              
980             =head1 NAME
981              
982             Parse::PhoneNumber::ID - Parse Indonesian phone numbers
983              
984             =head1 VERSION
985              
986             This document describes version 0.13 of Parse::PhoneNumber::ID (from Perl distribution Parse-PhoneNumber-ID), released on 2015-01-04.
987              
988             =head1 SYNOPSIS
989              
990             use Parse::PhoneNumber::ID qw(parse_id_phone extract_id_phones);
991             use Data::Dump;
992              
993             dd parse_id_phone(text => 'Jual dalmatian 2bl lucu2x. Hub: 7123 4567',
994             default_area_code=>'022');
995              
996             Will print something like:
997              
998             { raw => 'Hub: 7123 4567',
999             pretty => '022-7123-4567',
1000             standard => '+62.22.71234567',
1001             is_cell => 1,
1002             is_gsm => 0,
1003             is_cdma => 1,
1004             operator => 'telkom',
1005             product => 'flexi',
1006             area_code => '022',
1007             province => 'jabar',
1008             cities => 'Bandung, Cimahi, ...',
1009             local_number => '71234567',
1010             country => 'Indonesia',
1011             country_code => '62',
1012             ext => undef, }
1013              
1014             To extract more than one numbers in a text:
1015              
1016             my $phones = extract_id_phones(text => 'some text containing phone number(s):'.
1017             '0812 2345 6789, +62-22-91234567');
1018             say "There are ", scalar(@$phones), "phone number(s) found in text";
1019             for (@$phones) { say $_->{pretty} }
1020              
1021             =head1 FUNCTIONS
1022              
1023              
1024             =head2 extract_id_phones(%args) -> any
1025              
1026             Extract phone number(s) from text.
1027              
1028             Extracts phone number(s) from text. Return an array of one or more parsed phone
1029             number structure (a hash). Understands the list of known area codes and cellular
1030             operators, as well as other information. Understands various syntax e.g.
1031             +62.22.1234567, (022) 123-4567, 022-123-4567 ext 102, and even things like
1032             7123456/57 (2 adjacent numbers).
1033              
1034             Extraction algorithm is particularly targetted at classified ads text in
1035             Indonesian language, but should be quite suitable for any other normal text.
1036              
1037             Non-Indonesian phone numbers (e.g. +65 12 3456 7890) will still be extracted,
1038             but without any other detailed information other than country code.
1039              
1040             Arguments ('*' denotes required arguments):
1041              
1042             =over 4
1043              
1044             =item * B<default_area_code> => I<str>
1045              
1046             When encountering a number without area code, use this.
1047              
1048             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
1049             you'll need to provide this.
1050              
1051             =item * B<level> => I<int> (default: 5)
1052              
1053             How hard should the function extract numbers (1-9).
1054              
1055             The higher the level, the harder this function will try finding phone numbers,
1056             but the higher the risk of false positives will be. E.g. in text
1057             '123456789012345' with level=5 it will not find a phone number, but with level=9
1058             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
1059             default level is fine.
1060              
1061             =item * B<max_numbers> => I<int>
1062              
1063             =item * B<text>* => I<str>
1064              
1065             Text containing phone numbers to extract from.
1066              
1067             =back
1068              
1069             Return value: (any)
1070              
1071             =head2 parse_id_phone(%args) -> any
1072              
1073             Alias for extract_id_phones(..., max_numbers=>1)->[0].
1074              
1075             Extracts phone number(s) from text. Return an array of one or more parsed phone
1076             number structure (a hash). Understands the list of known area codes and cellular
1077             operators, as well as other information. Understands various syntax e.g.
1078             +62.22.1234567, (022) 123-4567, 022-123-4567 ext 102, and even things like
1079             7123456/57 (2 adjacent numbers).
1080              
1081             Extraction algorithm is particularly targetted at classified ads text in
1082             Indonesian language, but should be quite suitable for any other normal text.
1083              
1084             Non-Indonesian phone numbers (e.g. +65 12 3456 7890) will still be extracted,
1085             but without any other detailed information other than country code.
1086              
1087             Arguments ('*' denotes required arguments):
1088              
1089             =over 4
1090              
1091             =item * B<default_area_code> => I<str>
1092              
1093             When encountering a number without area code, use this.
1094              
1095             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
1096             you'll need to provide this.
1097              
1098             =item * B<level> => I<int> (default: 5)
1099              
1100             How hard should the function extract numbers (1-9).
1101              
1102             The higher the level, the harder this function will try finding phone numbers,
1103             but the higher the risk of false positives will be. E.g. in text
1104             '123456789012345' with level=5 it will not find a phone number, but with level=9
1105             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
1106             default level is fine.
1107              
1108             =item * B<text>* => I<str>
1109              
1110             Text containing phone numbers to extract from.
1111              
1112             =back
1113              
1114             Return value: (any)
1115             =head1 SEE ALSO
1116              
1117             L<Parse::PhoneNumber>
1118              
1119             =head1 HOMEPAGE
1120              
1121             Please visit the project's homepage at L<https://metacpan.org/release/Parse-PhoneNumber-ID>.
1122              
1123             =head1 SOURCE
1124              
1125             Source repository is at L<https://github.com/sharyanto/perl-Parse-PhoneNumber-ID>.
1126              
1127             =head1 BUGS
1128              
1129             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-PhoneNumber-ID>
1130              
1131             When submitting a bug or request, please include a test-file or a
1132             patch to an existing test-file that illustrates the bug or desired
1133             feature.
1134              
1135             =head1 AUTHOR
1136              
1137             perlancar <perlancar@cpan.org>
1138              
1139             =head1 COPYRIGHT AND LICENSE
1140              
1141             This software is copyright (c) 2015 by perlancar@cpan.org.
1142              
1143             This is free software; you can redistribute it and/or modify it under
1144             the same terms as the Perl 5 programming language system itself.
1145              
1146             =cut