File Coverage

blib/lib/Parse/PhoneNumber/ID.pm
Criterion Covered Total %
statement 281 283 99.2
branch 59 78 75.6
condition 36 54 66.6
subroutine 13 13 100.0
pod 2 2 100.0
total 391 430 90.9


line stmt bran cond sub pod time code
1             package Parse::PhoneNumber::ID;
2              
3 1     1   20276 use 5.010001;
  1         3  
  1         33  
4 1     1   4 use strict;
  1         1  
  1         29  
5 1     1   4 use warnings;
  1         6  
  1         26  
6 1     1   4 use Log::Any '$log';
  1         1  
  1         7  
7              
8             require Exporter;
9             our @ISA = qw(Exporter);
10             our @EXPORT_OK = qw(extract_id_phones parse_id_phone
11             list_id_operators list_id_area_codes);
12              
13 1     1   523 use Data::Clone;
  1         1171  
  1         6388  
14              
15             our $DATE = '2015-01-03'; # DATE
16             our $VERSION = '0.12'; # VERSION
17              
18             # from: http://id.wikipedia.org/wiki/Daftar_kode_telepon_di_Indonesia
19             # last updated: 2011-03-08
20             my %area_codes = (
21             '0627' => {province=>'aceh', cities=>'Kota Subulussalam'},
22             '0629' => {province=>'aceh', cities=>'Kutacane (Kabupaten Aceh Tenggara)'},
23             '0641' => {province=>'aceh', cities=>'Kota Langsa'},
24             '0642' => {province=>'aceh', cities=>'Blang Kejeren (Kabupaten Gayo Lues)'},
25             '0643' => {province=>'aceh', cities=>'Takengon (Kabupaten Aceh Tengah)'},
26             '0644' => {province=>'aceh', cities=>'Bireuen (Kabupaten Bireuen)'},
27             '0645' => {province=>'aceh', cities=>'Kota Lhokseumawe'},
28             '0646' => {province=>'aceh', cities=>'Idi (Kabupaten Aceh Timur)'},
29             '0650' => {province=>'aceh', cities=>'Sinabang (Kabupaten Simeulue)'},
30             '0651' => {province=>'aceh', cities=>'Kota Banda Aceh - Jantho (Kabupaten Aceh Besar) - Lamno (Kabupaten Aceh Jaya)'},
31             '0652' => {province=>'aceh', cities=>'Kota Sabang'},
32             '0653' => {province=>'aceh', cities=>'Sigli (Kabupaten Pidie)'},
33             '0654' => {province=>'aceh', cities=>'Calang (Kabupaten Aceh Jaya)'},
34             '0655' => {province=>'aceh', cities=>'Meulaboh (Kabupaten Aceh Barat)'},
35             '0656' => {province=>'aceh', cities=>'Tapaktuan (Kabupaten Aceh Selatan)'},
36             '0657' => {province=>'aceh', cities=>'Bakongan (Kabupaten Aceh Selatan)'},
37             '0658' => {province=>'aceh', cities=>'Singkil (Kabupaten Aceh Singkil)'},
38             '0659' => {province=>'aceh', cities=>'Blangpidie (Kabupaten Aceh Barat Daya)'},
39              
40             '061' => {province=>'sumut', cities=>'Kota Medan - Kota Binjai - Stabat (Kabupaten Langkat)'},
41             '0620' => {province=>'sumut', cities=>'Pangkalan Brandan (Kabupaten Langkat)'},
42             '0621' => {province=>'sumut', cities=>'Kota Tebing Tinggi'},
43             '0622' => {province=>'sumut', cities=>'Kota Pematangsiantar'},
44             '0623' => {province=>'sumut', cities=>'Kisaran (Kabupaten Asahan) - Kota Tanjung Balai'},
45             '0624' => {province=>'sumut', cities=>'Rantau Prapat (Kabupaten Labuhanbatu)'},
46             '0625' => {province=>'sumut', cities=>'Parapat (Kabupaten Simalungun)'},
47             '0626' => {province=>'sumut', cities=>'Pangururan (Kabupaten Samosir)'},
48             '0627' => {province=>'sumut', cities=>'Sidikalang (Kabupaten Dairi) - Salak (Kabupaten Pakpak Bharat)'},
49             '0628' => {province=>'sumut', cities=>'Kabanjahe (Kabupaten Karo)'},
50             '0630' => {province=>'sumut', cities=>'Teluk Dalam (Kabupaten Nias Selatan)'},
51             '0631' => {province=>'sumut', cities=>'Kota Sibolga'},
52             '0636' => {province=>'sumut', cities=>'Balige (Kabupaten Toba Samosir)'},
53             '0633' => {province=>'sumut', cities=>'Tarutung (Kabupaten Tapanuli Utara)'},
54             '0634' => {province=>'sumut', cities=>'Kota Padang Sidempuan'},
55             '0635' => {province=>'sumut', cities=>'Gunung Tua (Kabupaten Padang Lawas Utara)'},
56             '0636' => {province=>'sumut', cities=>'Panyabungan (Kabupaten Mandailing Natal)'},
57             '0638' => {province=>'sumut', cities=>'Barus (Kabupaten Tapanuli Tengah)'},
58             '0639' => {province=>'sumut', cities=>'Kota Gunung Sitoli'},
59              
60             '0751' => {province=>'sumbar', cities=>'Kota Padang - Kota Pariaman'},
61             '0752' => {province=>'sumbar', cities=>'Kota Bukittinggi - Kota Padang Panjang - Kota Payakumbuh - Batusangkar (Kabupaten Tanah Datar)'},
62             '0753' => {province=>'sumbar', cities=>'Lubuk Sikaping (Kabupaten Pasaman)'},
63             '0754' => {province=>'sumbar', cities=>'Kabupaten Sijunjung'},
64             '0755' => {province=>'sumbar', cities=>'Kota Solok - Kabupaten Solok Selatan - Alahan Panjang (Kabupaten Solok)'},
65             '0756' => {province=>'sumbar', cities=>'Painan (Kabupaten Pesisir Selatan)'},
66             '0757' => {province=>'sumbar', cities=>'Balai Selasa (Kabupaten Agam)'},
67             '0759' => {province=>'sumbar', cities=>'Tuapejat (Kabupaten Kepulauan Mentawai)'},
68              
69             '0760' => {province=>'riau', cities=>'Teluk Kuantan (Kabupaten Kuantan Singingi)'},
70             '0761' => {province=>'riau', cities=>'Kota Pekanbaru - Pangkalan Kerinci (Kabupaten Pelalawan)'},
71             '0762' => {province=>'riau', cities=>'Bangkinang (Kabupaten Kampar)'},
72             '0763' => {province=>'riau', cities=>'Selatpanjang (Kabupaten Bengkalis)'},
73             '0764' => {province=>'riau', cities=>'Siak Sri Indrapura (Kabupaten Siak)'},
74             '0765' => {province=>'riau', cities=>'Kota Dumai - Duri (Kabupaten Bengkalis)'},
75             '0766' => {province=>'riau', cities=>'Bengkalis (Kabupaten Bengkalis)'},
76             '0767' => {province=>'riau', cities=>'Bagan Siapi-api (Kabupaten Rokan Hilir)'},
77             '0768' => {province=>'riau', cities=>'Tembilahan (Kabupaten Indragiri Hilir)'},
78             '0769' => {province=>'riau', cities=>'Rengat - Air Molek (Kabupaten Indragiri Hulu)'},
79              
80             '0771' => {province=>'kepriau', cities=>'Kota Tanjung Pinang'},
81             '0772' => {province=>'kepriau', cities=>'Tarempa (Kabupaten Kepulauan Anambas)'},
82             '0773' => {province=>'kepriau', cities=>'Ranai (Kabupaten Natuna)'},
83             '0776' => {province=>'kepriau', cities=>'Dabosingkep (Kabupaten Lingga)'},
84             '0777' => {province=>'kepriau', cities=>'Tanjung Balai Karimun (Kabupaten Karimun)'},
85             '0778' => {province=>'kepriau', cities=>'Kota Batam'},
86             '0779' => {province=>'kepriau', cities=>'Tanjungbatu (Kabupaten Karimun)'},
87              
88             '0740' => {province=>'jambi', cities=>'Mendahara - Muara Sabak (Kabupaten Tanjung Jabung Timur)'},
89             '0741' => {province=>'jambi', cities=>'Kota Jambi'},
90             '0742' => {province=>'jambi', cities=>'Kualatungkal (Kabupaten Tanjung Jabung Barat)'},
91             '0743' => {province=>'jambi', cities=>'Muara Bulian (Kabupaten Batanghari)'},
92             '0744' => {province=>'jambi', cities=>'Muara Tebo (Kabupaten Tebo)'},
93             '0745' => {province=>'jambi', cities=>'Sarolangun (Kabupaten Sarolangun)'},
94             '0746' => {province=>'jambi', cities=>'Bangko (Kabupaten Merangin)'},
95             '0747' => {province=>'jambi', cities=>'Muarabungo (Kabupaten Bungo)'},
96             '0748' => {province=>'jambi', cities=>'Kota Sungai Penuh'},
97              
98             '0711' => {province=>'sumsel', cities=>'Kota Palembang - Pangkalan Balai - Betung (Kabupaten Banyuasin) - Indralaya (Kabupaten Ogan Ilir)'},
99             '0712' => {province=>'sumsel', cities=>'Kayu Agung (Kabupaten Ogan Komering Ilir)'},
100             '0713' => {province=>'sumsel', cities=>'Kota Prabumulih'},
101             '0714' => {province=>'sumsel', cities=>'Sekayu (Kabupaten Musi Banyuasin)'},
102             '0730' => {province=>'sumsel', cities=>'Kota Pagar Alam'},
103             '0731' => {province=>'sumsel', cities=>'Lahat (Kabupaten Lahat)'},
104             '0733' => {province=>'sumsel', cities=>'Kota Lubuklinggau - Pendopo (Kabupaten Lahat)'},
105             '0734' => {province=>'sumsel', cities=>'Muara Enim (Kabupaten Muara Enim)'},
106             '0735' => {province=>'sumsel', cities=>'Baturaja (Kabupaten Ogan Komering Ulu)'},
107              
108             '0715' => {province=>'kbb', cities=>'Belinyu (Kabupaten Bangka)'},
109             '0716' => {province=>'kbb', cities=>'Muntok (Kabupaten Bangka Barat)'},
110             '0717' => {province=>'kbb', cities=>'Kota Pangkal Pinang - Sungailiat (Kabupaten Bangka)'},
111             '0718' => {province=>'kbb', cities=>'Koba (Kabupaten Bangka Tengah) - Toboali (Kabupaten Bangka Selatan)'},
112             '0719' => {province=>'kbb', cities=>'Manggar (Kabupaten Belitung Timur) - Tanjung Pandan (Kabupaten Belitung)'},
113              
114             '0732' => {province=>'bengkulu', cities=>'Curup (Kabupaten Rejang Lebong)'},
115             '0736' => {province=>'bengkulu', cities=>'Kota Bengkulu - Lais (Kabupaten Bengkulu Utara)'},
116             '0737' => {province=>'bengkulu', cities=>'Arga Makmur (Kabupaten Bengkulu Utara) - Mukomuko (Kabupaten Mukomuko)'},
117             '0738' => {province=>'bengkulu', cities=>'Muara Aman (Kabupaten Lebong)'},
118             '0739' => {province=>'bengkulu', cities=>'Bintuhan (Kabupaten Kaur) - Kota Manna (Kabupaten Bengkulu Selatan)'},
119              
120             '0721' => {province=>'lampung', cities=>'Kota Bandar Lampung'},
121             '0722' => {province=>'lampung', cities=>'Kota Agung (Kabupaten Tanggamus)'},
122             '0723' => {province=>'lampung', cities=>'Blambangan Umpu (Kabupaten Way Kanan)'},
123             '0724' => {province=>'lampung', cities=>'Kotabumi (Kabupaten Lampung Utara)'},
124             '0725' => {province=>'lampung', cities=>'Kota Metro'},
125             '0726' => {province=>'lampung', cities=>'Menggala (Kabupaten Tulang Bawang)'},
126             '0727' => {province=>'lampung', cities=>'Kalianda (Kabupaten Lampung Selatan)'},
127             '0728' => {province=>'lampung', cities=>'Kota Liwa (Kabupaten Lampung Barat)'},
128             '0729' => {province=>'lampung', cities=>'Pringsewu (Kabupaten Pringsewu)'},
129              
130             '021' => {province=>'dki/banten/jabar', cities=>'Kepulauan Seribu - Jakarta Barat - Jakarta Pusat - Jakarta Selatan - Jakarta Timur - Jakarta Utara/Tigaraksa (Kabupaten Tangerang) - Kota Tangerang - Kota Tangerang Selatan/Kota Bekasi - Cikarang (Kabupaten Bekasi) - Kota Depok - Cibinong (Kabupaten Bogor)'},
131              
132             '0252' => {province=>'banten', cities=>'Rangkasbitung (Kabupaten Lebak)'},
133             '0253' => {province=>'banten', cities=>'Pandeglang - Labuan (Kabupaten Pandeglang)'},
134             '0254' => {province=>'banten', cities=>'Kota Serang - Kabupaten Serang - Merak (Kota Cilegon)'},
135             '0257' => {province=>'banten', cities=>'Pasauran (Kabupaten Serang)'},
136              
137             '022' => {province=>'jabar', cities=>'Kota Bandung - Kota Cimahi - Soreang (Kabupaten Bandung) - Lembang - Ngamprah (Kabupaten Bandung Barat)'},
138             '0231' => {province=>'jabar', cities=>'Kota Cirebon - Sumber - Losari (Kabupaten Cirebon)'},
139             '0232' => {province=>'jabar', cities=>'Kabupaten Kuningan'},
140             '0233' => {province=>'jabar', cities=>'Kadipaten (Kabupaten Majalengka)'},
141             '0234' => {province=>'jabar', cities=>'Jatibarang (Kabupaten Indramayu)'},
142             '0251' => {province=>'jabar', cities=>'Kota Bogor'},
143             '0260' => {province=>'jabar', cities=>'Pamanukan (Kabupaten Subang)'},
144             '0261' => {province=>'jabar', cities=>'Kabupaten Sumedang'},
145             '0262' => {province=>'jabar', cities=>'Kabupaten Garut'},
146             '0263' => {province=>'jabar', cities=>'Kabupaten Cianjur'},
147             '0264' => {province=>'jabar', cities=>'Kabupaten Purwakarta - Cikampek)'},
148             '0265' => {province=>'jabar', cities=>'Kota Tasikmalaya - Kadipaten - Singaparna (Kabupaten Tasikmalaya) - Kota Banjar - Ciamis - Pangandaran (Kabupaten Ciamis)'},
149             '0266' => {province=>'jabar', cities=>'Kota Sukabumi - Palabuhanratu (Kabupaten Sukabumi)'},
150             '0267' => {province=>'jabar', cities=>'Kabupaten Karawang'},
151              
152             '024' => {province=>'jateng', cities=>'Semarang, Ungaran'},
153             '0271' => {province=>'jateng', cities=>'Surakarta (Solo), Kartasura, Sukoharjo, Karanganyar, Sragen'},
154             '0272' => {province=>'jateng', cities=>'Klaten'},
155             '0273' => {province=>'jateng', cities=>'Wonogiri'},
156             '0275' => {province=>'jateng', cities=>'Purworejo,Kutoarjo'},
157             '0276' => {province=>'jateng', cities=>'Boyolali'},
158             '0280' => {province=>'jateng', cities=>'Majenang, Sidareja (Kabupaten Cilacap bagian barat)'},
159             '0281' => {province=>'jateng', cities=>'Purwokerto, Banyumas, Purbalingga'},
160             '0282' => {province=>'jateng', cities=>'Cilacap (bagian timur)'},
161             '0283' => {province=>'jateng', cities=>'Tegal, Slawi, Brebes'},
162             '0284' => {province=>'jateng', cities=>'Pemalang'},
163             '0285' => {province=>'jateng', cities=>'Pekalongan, Batang (bagian barat)'},
164             '0286' => {province=>'jateng', cities=>'Banjarnegara, Wonosobo'},
165             '0287' => {province=>'jateng', cities=>'Kebumen, Gombong'},
166             '0289' => {province=>'jateng', cities=>'Bumiayu (Kabupaten Brebes bagian selatan)'},
167             '0291' => {province=>'jateng', cities=>'Demak, Jepara, Kudus'},
168             '0292' => {province=>'jateng', cities=>'Purwodadi'},
169             '0293' => {province=>'jateng', cities=>'Magelang, Mungkid, Temanggung'},
170             '0294' => {province=>'jateng', cities=>'Kendal, Kaliwungu, Weleri, Batang (bagian timur)'},
171             '0295' => {province=>'jateng', cities=>'Pati, Rembang, Lasem'},
172             '0296' => {province=>'jateng', cities=>'Blora, Cepu'},
173             '0297' => {province=>'jateng', cities=>'Karimun Jawa'},
174             '0298' => {province=>'jateng', cities=>'Salatiga, Ambarawa (Kabupaten Semarang bagian tengah dan selatan)'},
175             '0356' => {province=>'jateng', cities=>'Rembang bagian Timur (wilayah yang berbatasan dengan Tuban)'},
176              
177             '0274' => {province=>'diy', cities=>'Yogyakarta, Sleman, Wates, Bantul, Wonosari'},
178              
179             '031' => {province=>'jatim', cities=>'Surabaya, Gresik, Sidoarjo, Bangkalan'},
180             '0321' => {province=>'jatim', cities=>'Mojokerto, Jombang'},
181             '0322' => {province=>'jatim', cities=>'Lamongan, Babat'},
182             '0323' => {province=>'jatim', cities=>'Sampang'},
183             '0324' => {province=>'jatim', cities=>'Pamekasan'},
184             '0325' => {province=>'jatim', cities=>'Sangkapura (Bawean)'},
185             '0327' => {province=>'jatim', cities=>'Kepulauan Kangean, Kepulauan Masalembu'},
186             '0328' => {province=>'jatim', cities=>'Sumenep'},
187             '0331' => {province=>'jatim', cities=>'Jember'},
188             '0332' => {province=>'jatim', cities=>'Bondowoso, Sukosari, Prajekan'},
189             '0333' => {province=>'jatim', cities=>'Banyuwangi, Muncar'},
190             '0334' => {province=>'jatim', cities=>'Lumajang'},
191             '0335' => {province=>'jatim', cities=>'Probolinggo, Kraksaan'},
192             '0336' => {province=>'jatim', cities=>'Ambulu, Puger (Kabupaten Jember bagian selatan)'},
193             '0338' => {province=>'jatim', cities=>'Situbondo, Besuki'},
194             '0341' => {province=>'jatim', cities=>'Malang, Kepanjen, Batu'},
195             '0342' => {province=>'jatim', cities=>'Blitar, Wlingi'},
196             '0343' => {province=>'jatim', cities=>'Pasuruan, Pandaan, Gempol'},
197             '0351' => {province=>'jatim', cities=>'Madiun, Caruban, Magetan, Ngawi'},
198             '0352' => {province=>'jatim', cities=>'Ponorogo'},
199             '0353' => {province=>'jatim', cities=>'Bojonegoro'},
200             '0354' => {province=>'jatim', cities=>'Kediri, Pare'},
201             '0355' => {province=>'jatim', cities=>'Tulungagung, Trenggalek'},
202             '0356' => {province=>'jatim', cities=>'Tuban'},
203             '0357' => {province=>'jatim', cities=>'Pacitan'},
204             '0358' => {province=>'jatim', cities=>'Nganjuk, Kertosono'},
205              
206             '0361' => {province=>'bali', cities=>'Denpasar, Gianyar, Kuta, Tabanan, Tampaksiring, Ubud'},
207             '0362' => {province=>'bali', cities=>'Singaraja'},
208             '0363' => {province=>'bali', cities=>'Amlapura'},
209             '0365' => {province=>'bali', cities=>'Negara, Gilimanuk'},
210             '0366' => {province=>'bali', cities=>'Klungkung, Kintamani'},
211             '0368' => {province=>'bali', cities=>'Baturiti'},
212              
213             '0364' => {province=>'ntb', cities=>'Kota Mataram'},
214             '0370' => {province=>'ntb', cities=>'Mataram, Praya'},
215             '0371' => {province=>'ntb', cities=>'Sumbawa'},
216             '0372' => {province=>'ntb', cities=>'Alas, Taliwang'},
217             '0373' => {province=>'ntb', cities=>'Dompu'},
218             '0374' => {province=>'ntb', cities=>'Bima'},
219             '0376' => {province=>'ntb', cities=>'Selong'},
220              
221             '0380' => {province=>'ntt', cities=>'Kupang, Baa (Roti)'},
222             '0381' => {province=>'ntt', cities=>'Ende'},
223             '0382' => {province=>'ntt', cities=>'Maumere'},
224             '0383' => {province=>'ntt', cities=>'Larantuka'},
225             '0384' => {province=>'ntt', cities=>'Bajawa'},
226             '0385' => {province=>'ntt', cities=>'Labuhanbajo, Ruteng'},
227             '0386' => {province=>'ntt', cities=>'Kalabahi'},
228             '0387' => {province=>'ntt', cities=>'Waingapu, Waikabubak'},
229             '0388' => {province=>'ntt', cities=>'Kefamenanu, Soe'},
230             '0389' => {province=>'ntt', cities=>'Atambua'},
231              
232             '0561' => {province=>'kalbar', cities=>'Pontianak, Mempawah'},
233             '0562' => {province=>'kalbar', cities=>'Sambas, Singkawang, Bengkayang'},
234             '0563' => {province=>'kalbar', cities=>'Ngabang'},
235             '0564' => {province=>'kalbar', cities=>'Sanggau'},
236             '0565' => {province=>'kalbar', cities=>'Sintang'},
237             '0567' => {province=>'kalbar', cities=>'Putussibau'},
238             '0568' => {province=>'kalbar', cities=>'Nanga Pinoh'},
239             '0534' => {province=>'kalbar', cities=>'Ketapang'},
240              
241             '0513' => {province=>'kalteng', cities=>'Kuala Kapuas, Pulang Pisau'},
242             '0519' => {province=>'kalteng', cities=>'Muara Teweh'},
243             '0522' => {province=>'kalteng', cities=>'Ampah (Dusun Tengah, Barito Timur)'},
244             '0525' => {province=>'kalteng', cities=>'Buntok'},
245             '0526' => {province=>'kalteng', cities=>'Tamiang Layang'},
246             '0528' => {province=>'kalteng', cities=>'Purukcahu'},
247             '0531' => {province=>'kalteng', cities=>'Sampit'},
248             '0532' => {province=>'kalteng', cities=>'Pangkalan Bun, Kumai'},
249             '0534' => {province=>'kalteng', cities=>'Kendawangan'},
250             '0536' => {province=>'kalteng', cities=>'Palangkaraya, Kasongan'},
251             '0537' => {province=>'kalteng', cities=>'Kuala Kurun'},
252             '0538' => {province=>'kalteng', cities=>'Kuala Pembuang'},
253             '0539' => {province=>'kalteng', cities=>'Kuala Kuayan (Mentaya Hulu, Kotawaringin Timur)'},
254              
255             '0511' => {province=>'kalsel', cities=>'Banjarmasin, Banjarbaru, Martapura, Marabahan'},
256             '0512' => {province=>'kalsel', cities=>'Pelaihari'},
257             '0517' => {province=>'kalsel', cities=>'Kandangan, Barabai, Rantau, Negara'},
258             '0518' => {province=>'kalsel', cities=>'Kotabaru, Batulicin'},
259             '0526' => {province=>'kalsel', cities=>'Tanjung'},
260             '0527' => {province=>'kalsel', cities=>'Amuntai'},
261              
262             '0541' => {province=>'kaltim', cities=>'Samarinda, Tenggarong'},
263             '0542' => {province=>'kaltim', cities=>'Balikpapan'},
264             '0543' => {province=>'kaltim', cities=>'Tanah Grogot'},
265             '0545' => {province=>'kaltim', cities=>'Melak'},
266             '0548' => {province=>'kaltim', cities=>'Bontang'},
267             '0549' => {province=>'kaltim', cities=>'Sangatta'},
268             '0551' => {province=>'kaltim', cities=>'Tarakan'},
269             '0552' => {province=>'kaltim', cities=>'Tanjungselor'},
270             '0553' => {province=>'kaltim', cities=>'Malinau'},
271             '0554' => {province=>'kaltim', cities=>'Tanjung Redeb'},
272             '0556' => {province=>'kaltim', cities=>'Nunukan'},
273              
274             '0430' => {province=>'sulut', cities=>'Amurang'},
275             '0431' => {province=>'sulut', cities=>'Manado, Tomohon, Tondano'},
276             '0432' => {province=>'sulut', cities=>'Tahuna'},
277             '0434' => {province=>'sulut', cities=>'Kotamobagu'},
278             '0438' => {province=>'sulut', cities=>'Bitung'},
279              
280             '0435' => {province=>'gorontalo', cities=>'Gorontalo, Limboto'},
281             '0443' => {province=>'gorontalo', cities=>'Marisa'},
282              
283             '0450' => {province=>'sulteng', cities=>'Parigi'},
284             '0451' => {province=>'sulteng', cities=>'Palu'},
285             '0452' => {province=>'sulteng', cities=>'Poso'},
286             '0453' => {province=>'sulteng', cities=>'Tolitoli'},
287             '0457' => {province=>'sulteng', cities=>'Donggala'},
288             '0458' => {province=>'sulteng', cities=>'Tentena'},
289             '0461' => {province=>'sulteng', cities=>'Luwuk'},
290             '0462' => {province=>'sulteng', cities=>'Banggai'},
291             '0463' => {province=>'sulteng', cities=>'Bunta'},
292             '0464' => {province=>'sulteng', cities=>'Ampana'},
293             '0465' => {province=>'sulteng', cities=>'Kolonedale'},
294             '0455' => {province=>'sulteng', cities=>'kotaraya,moutong'},
295              
296             '0422' => {province=>'sulbar', cities=>'Majene'},
297             '0426' => {province=>'sulbar', cities=>'Mamuju'},
298             '0428' => {province=>'sulbar', cities=>'Polewali'},
299              
300             '0410' => {province=>'sulsel', cities=>'Pangkep'},
301             '0411' => {province=>'sulsel', cities=>'Makassar, Maros, Sungguminasa'},
302             '0413' => {province=>'sulsel', cities=>'Bulukumba'},
303             '0414' => {province=>'sulsel', cities=>'Bantaeng (Selayar)'},
304             '0417' => {province=>'sulsel', cities=>'Malino'},
305             '0418' => {province=>'sulsel', cities=>'Takalar'},
306             '0419' => {province=>'sulsel', cities=>'Janeponto'},
307             '0420' => {province=>'sulsel', cities=>'Enrekang'},
308             '0421' => {province=>'sulsel', cities=>'Parepare, Pinrang'},
309             '0422' => {province=>'sulsel', cities=>'Manene'},
310             '0423' => {province=>'sulsel', cities=>'Makale, Rantepao'},
311             '0427' => {province=>'sulsel', cities=>'Barru'},
312             '0428' => {province=>'sulsel', cities=>'Wonomulyo'},
313             '0471' => {province=>'sulsel', cities=>'Palopo'},
314             '0472' => {province=>'sulsel', cities=>'Pitumpanua'},
315             '0473' => {province=>'sulsel', cities=>'Masamba'},
316             '0474' => {province=>'sulsel', cities=>'Malili'},
317             '0475' => {province=>'sulsel', cities=>'Soroako'},
318             '0481' => {province=>'sulsel', cities=>'Watampone'},
319             '0482' => {province=>'sulsel', cities=>'Sinjai'},
320             '0484' => {province=>'sulsel', cities=>'Watansoppeng'},
321             '0485' => {province=>'sulsel', cities=>'Sengkang'},
322              
323             '0401' => {province=>'sultra', cities=>'Kendari'},
324             '0402' => {province=>'sultra', cities=>'Baubau'},
325             '0403' => {province=>'sultra', cities=>'Raha'},
326             '0404' => {province=>'sultra', cities=>'Wanci'},
327             '0405' => {province=>'sultra', cities=>'Kolaka'},
328             '0408' => {province=>'sultra', cities=>'Unaaha'},
329              
330             '0910' => {province=>'maluku', cities=>'Bandanaira'},
331             '0911' => {province=>'maluku', cities=>'Ambon'},
332             '0913' => {province=>'maluku', cities=>'Namlea'},
333             '0914' => {province=>'maluku', cities=>'Masohi'},
334             '0915' => {province=>'maluku', cities=>'Bula'},
335             '0916' => {province=>'maluku', cities=>'Tual'},
336             '0917' => {province=>'maluku', cities=>'Dobo'},
337             '0918' => {province=>'maluku', cities=>'Saumlaku'},
338             '0921' => {province=>'maluku', cities=>'Soasiu'},
339             '0922' => {province=>'maluku', cities=>'Jailolo'},
340             '0923' => {province=>'maluku', cities=>'Morotai'},
341             '0924' => {province=>'maluku', cities=>'Tobelo'},
342             '0927' => {province=>'maluku', cities=>'Labuha'},
343             '0929' => {province=>'maluku', cities=>'Sanana'},
344             '0931' => {province=>'maluku', cities=>'Saparua'},
345             '0901' => {province=>'maluku', cities=>'Timika, Tembagapura'},
346              
347             '0902' => {province=>'papua', cities=>'Agats (Asmat)'},
348             '0951' => {province=>'papua', cities=>'Sorong'},
349             '0952' => {province=>'papua', cities=>'Teminabuan'},
350             '0955' => {province=>'papua', cities=>'Bintuni'},
351             '0956' => {province=>'papua', cities=>'Fakfak'},
352             '0957' => {province=>'papua', cities=>'Kaimana'},
353             '0966' => {province=>'papua', cities=>'Sarmi'},
354             '0967' => {province=>'papua', cities=>'Jayapura, Abepura'},
355             '0969' => {province=>'papua', cities=>'Wamena'},
356             '0971' => {province=>'papua', cities=>'Merauke'},
357             '0975' => {province=>'papua', cities=>'Tanahmerah'},
358             '0980' => {province=>'papua', cities=>'Ransiki'},
359             '0981' => {province=>'papua', cities=>'Biak'},
360             '0983' => {province=>'papua', cities=>'Serui'},
361             '0984' => {province=>'papua', cities=>'Nabire'},
362             '0985' => {province=>'papua', cities=>'Nabire'},
363             '0986' => {province=>'papua', cities=>'Manokwari'},
364             );
365              
366             my %cell_prefixes = (
367             '0811' => {operator=>'telkomsel', product=>'halo', is_gsm=>1},
368             '0812' => {operator=>'telkomsel', product=>'halo/simpati', is_gsm=>1},
369             '0813' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
370             '0814' => {operator=>'indosat', product=>'matrix', is_gsm=>1},
371             '0815' => {operator=>'indosat', product=>'matrix/mentari', is_gsm=>1},
372             '0816' => {operator=>'indosat', product=>'matrix/mentari', is_gsm=>1},
373             '0817' => {operator=>'xl', is_gsm=>1},
374             '0818' => {operator=>'xl', is_gsm=>1},
375             '0819' => {operator=>'xl', is_gsm=>1},
376             '0821' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
377             '0822' => {operator=>'telkomsel', product=>'simpati', is_gsm=>1},
378             '0823' => {operator=>'telkomsel', product=>'as', is_gsm=>1},
379             '0828' => {operator=>'sampoerna', product=>'ceria', is_gsm=>1},
380             #'08315' => {operator=>'nts', is_gsm=>1},
381             '0831' => {operator=>'axis', is_gsm=>1},
382             '0832' => {operator=>'axis', is_gsm=>1},
383             '0838' => {operator=>'axis', is_gsm=>1},
384             '0852' => {operator=>'telkomsel', product=>'as', is_gsm=>1},
385             '0853' => {operator=>'telkomsel', product=>'as', is_gsm=>1}, # fress
386             '0855' => {operator=>'indosat', product=>'matrix bright', is_gsm=>1},
387             '0856' => {operator=>'indosat', product=>'im3', is_gsm=>1},
388             '0857' => {operator=>'indosat', product=>'im3', is_gsm=>1},
389             '0858' => {operator=>'indosat', product=>'mentari', is_gsm=>1},
390             '0859' => {operator=>'xl', is_gsm=>1},
391             #'08681' => {operator=>'psn', product=>'byru', is_gsm=>0}, # satellite
392             '0868' => {operator=>'psn', product=>'byru', is_gsm=>0}, # satellite
393             '0877' => {operator=>'xl', product=>'axiata', is_gsm=>1},
394             '0878' => {operator=>'xl', product=>'axiata', is_gsm=>1},
395             '0879' => {operator=>'xl', product=>'axiata', is_gsm=>1},
396             '0881' => {operator=>'smartfren', is_cdma=>1},
397             '0882' => {operator=>'smartfren', is_cdma=>1},
398             '0883' => {operator=>'smartfren', is_cdma=>1},
399             '0884' => {operator=>'smartfren', is_cdma=>1},
400             '0885' => {operator=>'smartfren', is_cdma=>1},
401             '0886' => {operator=>'smartfren', is_cdma=>1},
402             '0887' => {operator=>'smartfren', is_cdma=>1},
403             '0888' => {operator=>'smartfren', is_cdma=>1},
404             '0889' => {operator=>'smartfren', is_cdma=>1},
405             '0896' => {operator=>'three', is_gsm=>1},
406             '0897' => {operator=>'three', is_gsm=>1},
407             '0898' => {operator=>'three', is_gsm=>1},
408             '0899' => {operator=>'three', is_gsm=>1},
409             );
410              
411             my %fwa_prefixes = (
412             30 => {operator=>'indosat', product=>'starone'},
413             32 => {operator=>'telkom', product=>'flexi'},
414             #39 is fixed telcom
415             40 => {operator=>'telkom', product=>'flexi'},
416             50 => {operator=>'telkom', product=>'flexi'},
417             60 => {operator=>'indosat', product=>'starone'},
418             62 => {operator=>'indosat', product=>'starone'},
419             68 => {operator=>'telkom', product=>'flexi'},
420             70 => {operator=>'telkom', product=>'flexi'},
421             710 => {operator=>'telkom', product=>'flexi'},
422             711 => {operator=>'telkom', product=>'flexi'},
423             712 => {operator=>'telkom', product=>'flexi'},
424             713 => {operator=>'telkom', product=>'flexi'},
425             714 => {operator=>'telkom', product=>'flexi'},
426             715 => {operator=>'telkom', product=>'flexi'},
427             716 => {operator=>'telkom', product=>'flexi'},
428             717 => {}, # land
429             718 => {}, # land
430             719 => {}, # land
431             72 => {}, # land
432             73 => {}, # land
433             74 => {}, # land
434             75 => {}, # land
435             76 => {}, # land
436             77 => {}, # land
437             78 => {}, # land
438             79 => {}, # land
439             80 => {operator=>'esia'},
440             81 => {operator=>'esia'}, # jkt
441             82 => {operator=>'esia'}, # assumed 8x
442             83 => {operator=>'esia'},
443             84 => {operator=>'esia'}, # assumed 8x
444             85 => {operator=>'esia'}, # jkt
445             86 => {operator=>'esia'}, # assumed 8x
446             87 => {operator=>'esia'}, # jkt
447             88 => {operator=>'esia'}, # assumed 8x
448             89 => {operator=>'esia'},
449             90 => {operator=>'esia'}, # assumed 9x
450             91 => {operator=>'esia'},
451             92 => {operator=>'esia'},
452             93 => {operator=>'esia'},
453             94 => {operator=>'esia'}, # assumed 9x
454             95 => {operator=>'esia'}, # assumed 9x
455             96 => {operator=>'esia'}, # assumed 9x
456             97 => {operator=>'esia'}, # assumed 9x
457             98 => {operator=>'esia'},
458             99 => {operator=>'esia'},
459             );
460              
461             our %SPEC;
462              
463             $SPEC{':package'} = {
464             v => 1.1,
465             summary => 'Parse Indonesian phone numbers',
466             };
467              
468             my $extract_args = {
469             text => {
470             summary => 'Text containing phone numbers to extract from',
471             schema => 'str*',
472             req => 1,
473             pos => 0,
474             },
475             max_numbers => {
476             schema => 'int',
477             },
478             default_area_code => {
479             summary => 'When encountering a number without area code, use this',
480             schema => ['str' => {
481             match => qr/^0\d{2,3}$/,
482             }],
483             description => <<'_',
484              
485             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
486             you'll need to provide this.
487              
488             _
489             },
490             level => {
491             summary => 'How hard should the function extract numbers (1-9)',
492             schema => ['int' => {
493             default => 5,
494             between => [1, 9],
495             }],
496             description => <<'_',
497              
498             The higher the level, the harder this function will try finding phone numbers,
499             but the higher the risk of false positives will be. E.g. in text
500             '123456789012345' with level=5 it will not find a phone number, but with level=9
501             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
502             default level is fine.
503              
504             _
505             },
506             };
507              
508             $SPEC{extract_id_phones} = {
509             v => 1.1,
510             summary => 'Extract phone number(s) from text',
511             description => <<'_',
512              
513             Extracts phone number(s) from text. Return an array of one or more parsed phone
514             number structure (a hash). Understands the list of known area codes and cellular
515             operators, as well as other information. Understands various syntax e.g.
516             +62.22.1234567, (022) 123-4567, 022-123-4567 ext 102, and even things like
517             7123456/57 (2 adjacent numbers).
518              
519             Extraction algorithm is particularly targetted at classified ads text in
520             Indonesian language, but should be quite suitable for any other normal text.
521              
522             Non-Indonesian phone numbers (e.g. +65 12 3456 7890) will still be extracted,
523             but without any other detailed information other than country code.
524              
525             _
526             args => $extract_args,
527             result_naked => 1,
528             };
529             sub extract_id_phones {
530 35     35 1 84752 my %args = @_;
531 35         66 my $text = $args{text};
532 35   100     136 my $level = $args{level} // 5;
533 35         58 my $defac = $args{default_area_code};
534              
535 35         108 $log->tracef("text = %s", $text);
536              
537 35         71 my %nums; # normalized num => {_level=>..., _order=>..., raw=>..., ...}
538              
539             # note: capital prefix means it has capturing group
540 35         35 state $_Cc_prefix_local;
541 35         33 state $_Kprefix_local;
542 35         40 state $_Cc_karea_local_ext;
543 35         24 state $_Karea_local_ext;
544 35         31 state $_Prefix_local;
545 35         37 state $_Klocal;
546 35         35 state $_Local;
547 35         26 state $_Indicator;
548 35         27 state $_sep;
549 35         35 state $_start_w;
550 35         28 state $_start_d;
551 35         26 state $_end_d;
552 35         30 state $_Adjacent;
553 35 100       79 if (!$_Prefix_local) {
554             # known prefixes
555 1         1 $_start_w = '(?:\A|\b)';
556 1         1 $_start_d = '(?:\A|(?<=\D))';
557 1         1 $_end_d = '(?:\z|(?=\D))';
558 1         242 my $_kprefix =
559             '(?:'.join("|",sort(keys %area_codes, keys %cell_prefixes)).')';
560 1         174 my $_karea = '(?:'.join("|",sort keys %area_codes).')';
561 1         16 my @_kareanz;
562 1         38 for (keys %area_codes) { s/^0//; push @_kareanz, $_ }
  306         448  
  306         354  
563 1         159 my $_kareanz = '(?:'.join("|",sort @_kareanz).')';
564             # XXX currently ignores 08681
565 1         2 my $_prefix = '(?:0[1-9](?:[0-9]){1,2})';
566 1         2 my $_prefixnz = '(?:[1-9](?:[0-9]){1,2})';
567 1         3 $_sep = '(?:\s+|\.|-)';
568 1         1 my $_cc = '(?:\+[1-9][0-9]{1,2})';
569              
570 1         4 $_Local = '(\d{5,8}|(?:\d'.$_sep.'?){4,7}\d)';
571              
572             # heuristic: we know that is FWA is 7-8 digits, there is no prefix 1
573             # (?). also (not for exact reason though, just minimizing false
574             # negatives) be stricter (no in-between seps).
575 1         1 my @_klocal;
576 1         19 for (keys %fwa_prefixes) {
577 46         40 my $l = length($_);
578 46         102 push @_klocal, sprintf("%s\\d{%d,%d}", $_, 7-$l, 8-$l);
579             }
580 1         9 $_Klocal = '(' . join("|", @_klocal, '[2-9]{5,7}'). ')';
581              
582 1         5 my $_Ext =
583             qr!((?:extension|ekstensi|ext?|ekst?)(?:\s|:|\.)*(?:\d{1,5}))!ix;
584              
585 1         790 $_Kprefix_local = # (021) 123-4567, 021-123-4567
586             qr!(\(\s*$_kprefix\s*\)|$_kprefix) $_sep* $_Local!sx;
587 1         135 $_Prefix_local = # same as above, but w/o checking known prefixes
588             qr!(\(\s*$_prefix\s*\)|$_prefix) $_sep* $_Local!sx;
589 1         694 $_Karea_local_ext = # (021) 123-4567 ext 102, mobile assumed has no ext
590             qr!(\(\s*$_karea\s*\)|$_karea) $_sep*
591             $_Local $_sep*
592             $_Ext!sx;
593 1         89 $_Cc_prefix_local = # (+62) 22 123-4567, 62 812 123-4567
594             qr!(\(\s*$_cc\s*\)|$_cc) $_sep*
595             (\(\s*$_prefixnz\s*\)|$_prefixnz) $_sep*
596             $_Local!sx;
597 1         481 $_Cc_karea_local_ext = # (+62) 22 123-4567 ext 1000
598             qr!(\(\s*$_cc\s*\)|$_cc) $_sep*
599             (\(\s*$_kareanz\s*\)|$_kareanz) $_sep*
600             $_Local $_sep*
601             $_Ext!sx;
602 1         5 $_Indicator = qr!(
603             menghubungi|hubungi|hub|
604             contact|kontak|mengontak|mengkontak|
605             nomor|nomer|no|num|
606             to|ke|
607             tele?pon|tilpun|tilp|te?lp|tel|tl?|
608             phone|ph|
609             handphone|h\.?p|ponsel|cellular|cell|
610             faximile|facsimile|faksimile|fax|facs|faks|f
611             )(?:\s*|\.|:)*!ix;
612 1         24 $_Adjacent = qr!(\s*/\s*\d\d?)!;
613             }
614              
615             # preprocess text: 0 1 2 3 4 5 -> 012345
616 35 100       93 if ($level >= 6) {
617             state $_remove_spaces = sub {
618 2     2   6 local $_ = shift;
619 2         14 s/\s//sg;
620 2         7 $_;
621 17         28 };
622 17         23 my $oldtext = $text;
623 17         74 $text =~ s/((?:\d\s){4,}\d)/$_remove_spaces->($1)/seg;
  2         6  
624 17 100       53 $log->tracef("Preprocess text: remove spaces: %s", $text)
625             if $oldtext ne $text;
626             }
627              
628             # preprocess text: O (letter O) as 0 and l/I/| as 1
629 35 100       121 if ($level >= 6) {
630 17         22 state $diglets = {o=>0, O=>0, l=>1, '|'=>1, I=>1, S=>5};
631 17         23 state $lets = join("", keys %$diglets);
632             state $_replace_lets = sub {
633 20     20   47 my ($lets) = @_;
634 20 100       56 $lets =~ s!(.)!defined($diglets->{$1}) ? $diglets->{$1} : $1!eg;
  240         582  
635             # when will emacs grok //? grr...
636 20         77 $lets;
637 17         18 };
638 17         18 my $oldtext = $text;
639 17         268 $text =~ s/((?:[0-9$lets](?:\s+|-|\.)?){5,})/$_replace_lets->($1)/eg;
  20         48  
640 17 100       55 $log->tracef("Preprocess text: letters->digits: %s", $text)
641             if $oldtext ne $text;
642             }
643              
644             # TODO: preprocess text: words as numbers (nol satu delapan ...)
645              
646 35         42 my $i;
647             my @r;
648              
649             # first, try to find numbers tacked after some indicator, e.g. Hub: blah,
650             # T.blah, etc.
651 35 50       77 if ($level >= 1) {
652 35         31 $i = 0; @r = ();
  35         63  
653 35         1209 while ($text =~ m!($_start_w $_Indicator $_sep*
654             $_Cc_karea_local_ext $_end_d)!xg) {
655 1         4 push @r, $1;
656 1         2 my $ind = $2;
657 1         4 my $num = _normalize($3, $4, $5, $6);
658 1   50     9 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
659             _pat=>"ind+cc+karea+local+ext"};
660 1 50       11 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
661             }
662 35         101 _remove_text(\$text, \@r);
663              
664 35         47 $i = 0; @r = ();
  35         43  
665 35         674 while ($text =~ m!($_start_w $_Indicator $_sep*
666             $_Cc_prefix_local $_end_d)!xg) {
667 2         39 push @r, $1;
668 2         4 my $ind = $2;
669 2         5 my $num = _normalize($3, $4, $5);
670 2   50     22 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
671             _pat=>"ind+cc+prefix+local"};
672 2 50       23 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
673             }
674 35         79 _remove_text(\$text, \@r);
675              
676 35         42 $i = 0; @r = ();
  35         38  
677 35         1071 while ($text =~ m!($_start_w $_Indicator $_Karea_local_ext
678             $_end_d)!xg) {
679 1         3 push @r, $1;
680 1         3 my $ind = $2;
681 1         3 my $num = _normalize(undef, $3, $4, $5);
682 1   50     9 $nums{$num} //= {_level=>1, _order=>++$i, raw=>$1,
683             _pat=>"ind+karea+local+ext"};
684 1 50       14 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
685             }
686 35         80 _remove_text(\$text, \@r);
687              
688 35         73 $i = 0; @r = ();
  35         45  
689 35         1124 while ($text =~ m!($_start_w $_Indicator $_Kprefix_local
690             $_Adjacent? $_end_d)!xg) {
691 5         16 push @r, $1;
692 5         8 my $ind = $2;
693 5         16 my $num = _normalize(undef, $3, $4);
694 5         11 my $adj = $5;
695 5   50     43 $nums{$num} //= {_level=>1, _order=>++$i, raw=>$1,
696             _pat=>"ind+kprefix+local"};
697 5 50       19 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/;
698 5         15 _add_adjacent(\%nums, $num, $adj);
699             }
700 35         73 _remove_text(\$text, \@r);
701             }
702 35 50       90 if ($level >= 2) {
703 35         36 $i = 0; @r = ();
  35         47  
704 35   100     597 while (defined($defac) &&
705             $text =~ m!($_start_w $_Indicator $_sep* $_Klocal
706             $_Adjacent? $_end_d)!xg) {
707 2         6 push @r, $1;
708 2         5 my $ind = $2;
709 2         5 my $num = _normalize(undef, $defac, $3);
710 2         4 my $adj = $4;
711 2   50     19 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
712             _pat=>"ind+klocal"};
713 2 50       10 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
714 2         5 _add_adjacent(\%nums, $num, $adj);
715             }
716 35         79 _remove_text(\$text, \@r);
717             }
718 35 50       74 if ($level >= 2) {
719 35         39 $i = 0; @r = ();
  35         45  
720 35         520 while ($text =~ m!($_start_w $_Indicator $_sep* $_Prefix_local
721             $_Adjacent? $_end_d)!xg) {
722 1         5 push @r, $1;
723 1         3 my $ind = $2;
724 1         5 my $num = _normalize(undef, $3, $4);
725 1         4 my $adj = $5;
726 1   50     13 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
727             _pat=>"ind+prefix+local"};
728 1 50       15 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
729 1         4 _add_adjacent(\%nums, $num, $adj);
730             }
731 35         75 _remove_text(\$text, \@r);
732              
733 35         44 $i = 0; @r = ();
  35         39  
734 35   100     517 while (defined($defac) &&
735             $text =~ m!($_start_w $_Indicator $_sep* $_Local
736             $_Adjacent? $_end_d)!xg) {
737 3         16 push @r, $1;
738 3         7 my $ind = $2;
739 3         9 my $num = _normalize(undef, $defac, $3);
740 3         10 my $adj = $4;
741 3   50     33 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
742             _pat=>"ind+local"};
743 3 50       21 $nums{$num}{is_fax} = 1 if $ind =~ /fax|faks|\bf\b/i;
744 3         11 _add_adjacent(\%nums, $num, $adj);
745             }
746 35         65 _remove_text(\$text, \@r);
747             }
748              
749             # try to find any cc+area+local numbers
750 35 50       88 if ($level >= 3) {
751 35         33 $i = 0; @r = ();
  35         38  
752 35         856 while ($text =~ m!($_start_d $_Cc_karea_local_ext $_end_d)!xg) {
753 1         6 push @r, $1;
754 1   50     6 $nums{_normalize($2, $3, $4, $5)} //=
755             {_level=>3, _order=>++$i, raw=>$1, _pat=>"cc+karea+local+ext"};
756             }
757 35         71 _remove_text(\$text, \@r);
758              
759 35         35 $i = 0; @r = ();
  35         44  
760 35         329 while ($text =~ m!($_start_d $_Cc_prefix_local $_end_d)!xg) {
761 0         0 push @r, $1;
762 0   0     0 $nums{_normalize($2, $3, $4)} //=
763             {_level=>3, _order=>++$i, raw=>$1, _pat=>"cc+prefix+local"};
764             }
765 35         79 _remove_text(\$text, \@r);
766             }
767              
768             # try to find numbers with known area code/cell number prefixes
769 35 50       68 if ($level >= 3) {
770 35         40 $i = 0; @r = ();
  35         40  
771 35         1184 while ($text =~ m!($_start_d $_Kprefix_local $_Adjacent? $_end_d)!xg) {
772 11         34 push @r, $1;
773 11         29 my $num = _normalize(undef, $2, $3);
774 11         24 my $adj = $4;
775 11   50     92 $nums{$num} //=
776             {_level=>3, _order=>++$i, raw=>$1, _pat=>"kprefix+local"};
777 11         25 _add_adjacent(\%nums, $num, $adj);
778             }
779 35         138 _remove_text(\$text, \@r);
780             }
781              
782 35 50       88 if ($level >= 5) {
783 35         36 $i = 0; @r = ();
  35         48  
784 35   100     371 while (defined($defac) &&
785             $text =~ m!($_start_w $_Klocal
786             $_Adjacent? $_end_d)!xg) {
787 6         18 push @r, $1;
788 6         14 my $num = _normalize(undef, $defac, $2);
789 6         11 my $adj = $3;
790 6   50     50 $nums{$num} //= {_level=>2, _order=>++$i, raw=>$1,
791             _pat=>"klocal"};
792 6         20 _add_adjacent(\%nums, $num, $adj);
793             }
794 35         68 _remove_text(\$text, \@r);
795             }
796              
797             # try to find any area+local numbers
798 35 50       90 if ($level >= 5) {
799 35         32 $i = 0; @r = ();
  35         44  
800 35         303 while ($text =~ m!($_start_d $_Prefix_local $_Adjacent? $_end_d)!xg) {
801 1         5 push @r, $1;
802 1         4 my $num = _normalize(undef, $2, $3);
803 1         4 my $adj = $4;
804 1   50     14 $nums{$num} //=
805             {_level=>5, _order=>++$i, raw=>$1, _pat=>"prefix+local"};
806 1         4 _add_adjacent(\%nums, $num, $adj);
807             }
808 35         66 _remove_text(\$text, \@r);
809             }
810              
811             # try to find any local numbers (6-8 digit, because 5 digits are easily
812             # confused with indonesian postal code, even though they might still be used
813             # in smaller cities)
814 35 100 66     155 if ($level >= 5 && defined($defac)) {
815 19         25 $i = 0; @r = ();
  19         23  
816 19         228 while ($text =~ m!($_start_d $_Local $_Adjacent? $_end_d)!xg) {
817 5         13 push @r, $1;
818 5         9 my $num = _normalize(undef, $defac, $2);
819 5         9 my $adj = $3;
820 5   50     41 $nums{$num} //=
821             {_level=>5, _order=>++$i, raw=>$1, _pat=>"local (defac)"};
822 5         11 _add_adjacent(\%nums, $num, $adj);
823             }
824 19         39 _remove_text(\$text, \@r);
825             }
826              
827 35         101 for (keys %nums) { $nums{$_}{standard} = $_ }
  41         106  
828 35         104 $log->tracef("\\%%nums = %s", \%nums);
829              
830             # if we are told to extract only N max_numbers, use the lower level ones and
831             # the ones at the end (they are more likely to be numbers, in the case of
832             # classified ads)
833 41 50 66     99 my @nums = map { $nums{$_} } sort {
  9         56  
834 35         144 $nums{$a}{_level} <=> $nums{$b}{_level} ||
835             $nums{$b}{_order} <=> $nums{$a}{_order} ||
836             $nums{$b}{standard} cmp $nums{$a}{standard}
837             } keys %nums;
838 35 100 66     118 if (defined($args{max_numbers}) && $args{max_numbers} > 0 &&
      100        
839             @nums > $args{max_numbers}
840             ) {
841 1         2 splice @nums, $args{max_numbers};
842             }
843              
844             # sort again according to order (ascending), this is what most people expect
845 35         50 @nums = sort {$a->{_order} <=> $b->{_order}} @nums;
  8         20  
846              
847             # remove internal data
848 35         45 for my $num (@nums) {
849             #for (keys %$num) { delete $num->{$_} if /^_/ }
850 40         74 _add_info($num);
851             }
852              
853 35         101 $log->tracef("\\\@nums = %s", \@nums);
854              
855 35         185 \@nums;
856             }
857              
858             my $parse_args = clone($extract_args);
859             delete $parse_args->{max_numbers};
860             $SPEC{parse_id_phone} = {
861             v => 1.1,
862             summary => 'Alias for extract_id_phones(..., max_numbers=>1)->[0]',
863             args => $parse_args,
864             result_naked => 1,
865             };
866             sub parse_id_phone {
867 1     1 1 1973 my %args = @_;
868 1         4 my $res = extract_id_phones(%args, max_numbers=>1);
869 1         4 $res->[0];
870             }
871              
872             sub _normalize {
873 39     39   87 my ($cc, $area, $local, $ext) = @_;
874 39   100     142 $cc //= "62";
875 39 100       65 for ($cc, $area, $local, $ext) { s/\D+//g if defined($_) }
  156         373  
876 39         123 $area =~ s/^0//;
877 39 100 66     213 "+$cc.$area.$local".(defined($ext) && length($ext) ? ".ext$ext" : "");
878             }
879              
880             sub _remove_text {
881 439     439   418 my ($textref, $strs) = @_;
882 439         430 my $oldtext = $$textref;
883 439         551 for (@$strs) {
884 39         558 $$textref =~ s/\Q$_\E//;
885             }
886 439 100       1078 $log->tracef("removed match, text = %s", $$textref)
887             if $$textref ne $oldtext;
888             }
889              
890             sub _add_adjacent {
891 34     34   52 my ($nums, $num, $adj) = @_;
892 34 100       294 return unless $adj;
893 2         5 $adj =~ s/\D//g;
894 2         6 my $first = substr($num, -length($adj));
895 2 50       8 return unless abs($first - $adj) == 1;
896 2         1 my $num2 = $num;
897 2         5 substr($num2, -length($adj)) = $adj;
898 2         18 $nums->{$num2} = clone($nums->{$num});
899 2         19 $nums->{$num2}{_order} += 0.5;
900             }
901              
902             sub _add_info {
903 40     40   44 my ($num) = @_;
904 40 50       294 my ($cc, $prefix, $local, $ext) =
905             $num->{standard} =~ /^\+(\d+)\.(\d+)\.(\d+)(?:\.ext*(\d+))?$/
906             or die "BUG: invalid standard format: $num->{standard}";
907 40         61 $prefix = "0$prefix";
908 40         103 $num->{country_code} = $cc;
909 40         58 $num->{area_code} = $prefix;
910 40         74 $num->{local_number} = $local;
911 40 100       80 $num->{ext} = $ext if defined($ext);
912              
913             # XXX country calling code -> name for other countries
914 40 100       102 $num->{country} = 'Indonesia' if $cc eq '62';
915 40 100       83 return unless $cc eq '62';
916              
917 38 100       70 if (length($local) >= 8) {
918 21         58 $local =~ /(....)(.+)/;
919 21         74 $num->{pretty} = "$prefix-$1-$2";
920             } else {
921 17         47 $local =~ /(...)(.+)/;
922 17         62 $num->{pretty} = "$prefix-$1-$2";
923             }
924              
925 38 100       86 if (my $c = $cell_prefixes{$prefix}) {
926 9         12 $num->{is_cell} = 1;
927 9 50       29 $num->{is_gsm} = $c->{is_gsm} ? 1:0;
928 9 50       23 $num->{is_cdma} = $c->{is_cdma} ? 1:0;
929 9         16 $num->{operator} = $c->{operator};
930 9         16 $num->{product} = $c->{product};
931             } else {
932 29         47 $num->{is_cell} = 0;
933             }
934              
935 38 100       87 if (my $a = $area_codes{$prefix}) {
936 27         30 $num->{is_land} = 1;
937 27         50 $num->{province} = $a->{province};
938 27         40 $num->{cities} = $a->{cities};
939 27         19 state $_fwa_prefixes;
940 27 100       44 if (!$_fwa_prefixes) {
941 1         26 $_fwa_prefixes = '(?:'.join("|", keys %fwa_prefixes).')';
942             }
943 27 100       256 if ($local =~ /^($_fwa_prefixes)/) {
944 18         36 my $fwa = $fwa_prefixes{$1};
945 18         20 $num->{is_cdma} = 1;
946 18         51 $num->{operator} = $fwa->{operator};
947 18         53 $num->{product} = $fwa->{product};
948             }
949             } else {
950 11         42 $num->{is_land} = 0;
951             }
952             }
953              
954             #$SPEC{list_id_operators} = {
955             # v => 1.1,
956             # summary => 'Return list of known phone operators',
957             # result_naked => 1,
958             #};
959             #sub list_id_operators {
960             #
961             #}
962              
963             #$SPEC{list_id_area_codes} = {
964             # v => 1.1,
965             # summary => 'Return list of known area codes in Indonesia, '.
966             # 'along with area names',
967             # result_naked => 1,
968             #};
969             #sub list_id_area_codes {
970             #}
971              
972             1;
973             # ABSTRACT: Parse Indonesian phone numbers
974              
975             __END__
976              
977             =pod
978              
979             =encoding UTF-8
980              
981             =head1 NAME
982              
983             Parse::PhoneNumber::ID - Parse Indonesian phone numbers
984              
985             =head1 VERSION
986              
987             This document describes version 0.12 of Parse::PhoneNumber::ID (from Perl distribution Parse-PhoneNumber-ID), released on 2015-01-03.
988              
989             =head1 SYNOPSIS
990              
991             use Parse::PhoneNumber::ID qw(parse_id_phone extract_id_phones);
992             use Data::Dump;
993              
994             dd parse_id_phone(text => 'Jual dalmatian 2bl lucu2x. Hub: 7123 4567',
995             default_area_code=>'022');
996              
997             Will print something like:
998              
999             { raw => 'Hub: 7123 4567',
1000             pretty => '022-7123-4567',
1001             standard => '+62.22.71234567',
1002             is_cell => 1,
1003             is_gsm => 0,
1004             is_cdma => 1,
1005             operator => 'telkom',
1006             product => 'flexi',
1007             area_code => '022',
1008             province => 'jabar',
1009             cities => 'Bandung, Cimahi, ...',
1010             local_number => '71234567',
1011             country => 'Indonesia',
1012             country_code => '62',
1013             ext => undef, }
1014              
1015             To extract more than one numbers in a text:
1016              
1017             my $phones = extract_id_phones(text => 'some text containing phone number(s):'.
1018             '0812 2345 6789, +62-22-91234567');
1019             say "There are ", scalar(@$phones), "phone number(s) found in text";
1020             for (@$phones) { say $_->{pretty} }
1021              
1022             =head1 FUNCTIONS
1023              
1024              
1025             =head2 extract_id_phones(%args) -> any
1026              
1027             Extract phone number(s) from text.
1028              
1029             Extracts phone number(s) from text. Return an array of one or more parsed phone
1030             number structure (a hash). Understands the list of known area codes and cellular
1031             operators, as well as other information. Understands various syntax e.g.
1032             +62.22.1234567, (022) 123-4567, 022-123-4567 ext 102, and even things like
1033             7123456/57 (2 adjacent numbers).
1034              
1035             Extraction algorithm is particularly targetted at classified ads text in
1036             Indonesian language, but should be quite suitable for any other normal text.
1037              
1038             Non-Indonesian phone numbers (e.g. +65 12 3456 7890) will still be extracted,
1039             but without any other detailed information other than country code.
1040              
1041             Arguments ('*' denotes required arguments):
1042              
1043             =over 4
1044              
1045             =item * B<default_area_code> => I<str>
1046              
1047             When encountering a number without area code, use this.
1048              
1049             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
1050             you'll need to provide this.
1051              
1052             =item * B<level> => I<int> (default: 5)
1053              
1054             How hard should the function extract numbers (1-9).
1055              
1056             The higher the level, the harder this function will try finding phone numbers,
1057             but the higher the risk of false positives will be. E.g. in text
1058             '123456789012345' with level=5 it will not find a phone number, but with level=9
1059             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
1060             default level is fine.
1061              
1062             =item * B<max_numbers> => I<int>
1063              
1064             =item * B<text>* => I<str>
1065              
1066             Text containing phone numbers to extract from.
1067              
1068             =back
1069              
1070             Return value: (any)
1071              
1072             =head2 parse_id_phone(%args) -> any
1073              
1074             Alias for extract_id_phones(..., max_numbers=>1)->[0].
1075              
1076             Arguments ('*' denotes required arguments):
1077              
1078             =over 4
1079              
1080             =item * B<default_area_code> => I<str>
1081              
1082             When encountering a number without area code, use this.
1083              
1084             If you want to extract numbers that doesn't contain area code (e.g. 7123 4567),
1085             you'll need to provide this.
1086              
1087             =item * B<level> => I<int> (default: 5)
1088              
1089             How hard should the function extract numbers (1-9).
1090              
1091             The higher the level, the harder this function will try finding phone numbers,
1092             but the higher the risk of false positives will be. E.g. in text
1093             '123456789012345' with level=5 it will not find a phone number, but with level=9
1094             it might assume, e.g. 1234567890 to be a phone number. Normally leaving level at
1095             default level is fine.
1096              
1097             =item * B<text>* => I<str>
1098              
1099             Text containing phone numbers to extract from.
1100              
1101             =back
1102              
1103             Return value: (any)
1104             =head1 SEE ALSO
1105              
1106             L<Parse::PhoneNumber>
1107              
1108             =head1 HOMEPAGE
1109              
1110             Please visit the project's homepage at L<https://metacpan.org/release/Parse-PhoneNumber-ID>.
1111              
1112             =head1 SOURCE
1113              
1114             Source repository is at L<https://github.com/sharyanto/perl-Parse-PhoneNumber-ID>.
1115              
1116             =head1 BUGS
1117              
1118             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-PhoneNumber-ID>
1119              
1120             When submitting a bug or request, please include a test-file or a
1121             patch to an existing test-file that illustrates the bug or desired
1122             feature.
1123              
1124             =head1 AUTHOR
1125              
1126             perlancar <perlancar@cpan.org>
1127              
1128             =head1 COPYRIGHT AND LICENSE
1129              
1130             This software is copyright (c) 2015 by perlancar@cpan.org.
1131              
1132             This is free software; you can redistribute it and/or modify it under
1133             the same terms as the Perl 5 programming language system itself.
1134              
1135             =cut