| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::MultiPhone::de; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
11110
|
use 5.006; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
45
|
|
|
4
|
1
|
|
|
1
|
|
7
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
37
|
|
|
5
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
39
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
3194
|
use POSIX qw(setlocale LC_COLLATE LC_CTYPE); |
|
|
1
|
|
|
|
|
39541
|
|
|
|
1
|
|
|
|
|
10
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
1494
|
use base qw(Text::MultiPhone); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
223
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
6
|
use constant VOWELS => [qw(a e i o u y ä ü ö)]; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
224
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $VERSION = do { my @r = (q$Revision: 1.1.1.1 $ =~ /\d+/g); sprintf " %d." . "%02d" x $#r, @r }; |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub pre_split { |
|
17
|
2
|
|
|
2
|
0
|
3
|
my ($self, $word) = @_; |
|
18
|
2
|
|
|
|
|
26
|
my $orgLocale = setlocale(LC_CTYPE); |
|
19
|
2
|
|
|
|
|
305
|
setlocale(LC_CTYPE, 'de_DE'); |
|
20
|
1
|
|
|
1
|
|
1776
|
use locale; |
|
|
1
|
|
|
|
|
289
|
|
|
|
1
|
|
|
|
|
6
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
2
|
|
|
|
|
7
|
$word = lc($word); |
|
23
|
2
|
|
|
|
|
5
|
$word =~ s/ß/s/; |
|
24
|
2
|
|
|
|
|
5
|
$word =~ s/qu/q/g; # q is always alone |
|
25
|
2
|
|
|
|
|
4
|
$word =~ s/sch/ch/g; # ch == sch |
|
26
|
2
|
|
|
|
|
5
|
$word =~ s/sc[^h]/ch/g; # usual typo for sch |
|
27
|
2
|
|
|
|
|
4
|
$word =~ s/sh/ch/g; # usual typo for sch |
|
28
|
2
|
|
|
|
|
3
|
$word =~ s/ck/k/g; |
|
29
|
2
|
|
|
|
|
4
|
$word =~ s/ie/i/g; |
|
30
|
2
|
|
|
|
|
9
|
$word =~ s/ph/f/g; |
|
31
|
2
|
|
|
|
|
4
|
$word =~ s/pf/f/g; |
|
32
|
2
|
|
|
|
|
10
|
$word =~ s/(\w)\1/$1/g; # removing double characters |
|
33
|
2
|
|
|
|
|
9
|
$word =~ s/(\w{2})\1/$1/g; # removing double pairs as stst in "selbstständig" |
|
34
|
|
|
|
|
|
|
|
|
35
|
1
|
|
|
1
|
|
253
|
no locale; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
90
|
|
|
36
|
2
|
|
|
|
|
13
|
setlocale(LC_CTYPE, $orgLocale); |
|
37
|
2
|
|
|
|
|
7
|
return $word |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub process_bits { |
|
41
|
2
|
|
|
2
|
0
|
5
|
my ($self, @words) = @_; |
|
42
|
|
|
|
|
|
|
|
|
43
|
2
|
|
|
|
|
3
|
my @results; |
|
44
|
2
|
|
|
|
|
4
|
foreach my $word (@words) { |
|
45
|
2
|
50
|
|
|
|
6
|
next unless defined $word; |
|
46
|
2
|
|
|
|
|
2
|
my $partNo = 0; |
|
47
|
2
|
|
|
|
|
5
|
foreach my $part (@$word) { |
|
48
|
16
|
50
|
|
|
|
30
|
next unless defined $part; |
|
49
|
16
|
|
|
|
|
15
|
$partNo++; |
|
50
|
16
|
|
|
|
|
15
|
my $sequence = ${ $part }[0]; |
|
|
16
|
|
|
|
|
28
|
|
|
51
|
16
|
100
|
|
|
|
37
|
next unless $sequence; |
|
52
|
12
|
|
|
|
|
11
|
my @splits; |
|
53
|
|
|
|
|
|
|
|
|
54
|
12
|
50
|
33
|
|
|
216
|
if ($sequence =~ /v/) { |
|
|
|
50
|
33
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# v sounds like v or f |
|
56
|
0
|
|
|
|
|
0
|
(my $subst = $sequence) =~ s/v/f/; |
|
57
|
0
|
|
|
|
|
0
|
push @splits, $subst; |
|
58
|
0
|
|
|
|
|
0
|
($subst = $sequence) =~ s/v/w/; |
|
59
|
0
|
|
|
|
|
0
|
push @splits, $subst; |
|
60
|
|
|
|
|
|
|
} elsif ($sequence =~ /y/) { |
|
61
|
|
|
|
|
|
|
# y sounds like ü (= u,i), i, j |
|
62
|
0
|
|
|
|
|
0
|
(my $subst = $sequence) =~ s/y/i/; |
|
63
|
0
|
|
|
|
|
0
|
push @splits, $subst; |
|
64
|
0
|
|
|
|
|
0
|
($subst = $sequence) =~ s/y/j/; |
|
65
|
0
|
|
|
|
|
0
|
push @splits, $subst; |
|
66
|
0
|
|
|
|
|
0
|
($subst = $sequence) =~ s/y/u/; |
|
67
|
0
|
|
|
|
|
0
|
push @splits, $subst; |
|
68
|
|
|
|
|
|
|
} elsif ($sequence eq 'ü' or $sequence eq 'ue') { |
|
69
|
|
|
|
|
|
|
# ü sounds like u,i |
|
70
|
0
|
|
|
|
|
0
|
push @splits, 'u'; |
|
71
|
0
|
|
|
|
|
0
|
push @splits, 'i'; |
|
72
|
|
|
|
|
|
|
} elsif ($sequence eq 'ä' or $sequence eq 'ae') { |
|
73
|
|
|
|
|
|
|
# ä sounds like a,e |
|
74
|
0
|
|
|
|
|
0
|
push @splits, 'a'; |
|
75
|
0
|
|
|
|
|
0
|
push @splits, 'e'; |
|
76
|
|
|
|
|
|
|
} elsif ($sequence eq 'ö' or $sequence eq 'oe') { |
|
77
|
|
|
|
|
|
|
# ö sounds like o,e |
|
78
|
0
|
|
|
|
|
0
|
push @splits, 'o'; |
|
79
|
0
|
|
|
|
|
0
|
push @splits, 'e'; |
|
80
|
|
|
|
|
|
|
} elsif ($sequence eq 'ai') { |
|
81
|
0
|
|
|
|
|
0
|
push @splits, 'ei'; |
|
82
|
|
|
|
|
|
|
} elsif ($sequence eq 'oi') { |
|
83
|
0
|
|
|
|
|
0
|
push @splits, 'eu'; |
|
84
|
|
|
|
|
|
|
} elsif ($sequence eq 'c') { |
|
85
|
0
|
|
|
|
|
0
|
push @splits, 'z'; |
|
86
|
|
|
|
|
|
|
} elsif ($partNo > 1 and $sequence =~ /^h/) { |
|
87
|
|
|
|
|
|
|
# ignore silent h after vowel (lengthening the vowel |
|
88
|
0
|
|
|
|
|
0
|
(my $subst = $sequence) =~ s/^h//; |
|
89
|
0
|
|
|
|
|
0
|
push @splits, $sequence; |
|
90
|
|
|
|
|
|
|
} else { |
|
91
|
12
|
|
|
|
|
23
|
push @splits, $sequence; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
12
|
|
|
|
|
40
|
@$part = @splits; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
} |
|
97
|
2
|
|
|
|
|
9
|
return @words; |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub post_join { |
|
101
|
2
|
|
|
2
|
0
|
5
|
my ($self, @words) = @_; |
|
102
|
|
|
|
|
|
|
# nothing to do here |
|
103
|
2
|
|
|
|
|
7
|
return @words; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
1; |