File Coverage

blib/lib/Lingua/FI/Transcribe.pm
Criterion Covered Total %
statement 24 24 100.0
branch 2 4 50.0
condition n/a
subroutine 5 5 100.0
pod 1 1 100.0
total 32 34 94.1


line stmt bran cond sub pod time code
1             package Lingua::FI::Transcribe;
2              
3 1     1   749 use strict;
  1         2  
  1         43  
4              
5 1     1   6 use vars qw($VERSION);
  1         2  
  1         68  
6              
7             $VERSION = 0.03;
8              
9 1     1   1051 use Lingua::FI::Hyphenate qw(tavuta);
  1         546  
  1         460  
10              
11             sub English {
12 19     19 1 149 shift; # drop the class
13              
14 19         318 my %T = (
15             'a' => 'ah',
16             'aa' => 'ahh',
17             'ai' => 'igh',
18             'au' => 'ow',
19             'b' => 'b',
20             'c' => 'k',
21             'd' => 'd',
22             'e' => 'eh',
23             'ee' => 'ehh',
24             'ei' => 'ey',
25             'f' => 'f',
26             'g' => 'g',
27             'h' => 'hh',
28             'i' => 'ee',
29             'j' => 'y',
30             'k' => 'k',
31             'l' => 'l',
32             'm' => 'm',
33             'n' => 'n',
34             'ng' => 'nng',
35             'nk' => 'ng',
36             'o' => 'aw',
37             'oi' => 'oy',
38             'oo' => 'aww',
39             'ou' => 'ow',
40             'p' => 'p',
41             'q' => 'q',
42             'r' => 'rr',
43             's' => 's',
44             't' => 't',
45             'u' => 'oo',
46             'v' => 'v',
47             'w' => 'v',
48             'x' => 'ks',
49             'y' => 'ew',
50             'y' => 'eww',
51             'z' => 'ts',
52             'å' => 'aw',
53             'ä' => 'a',
54             'ö' => 'ur',
55             'öö' => 'urr',
56             );
57              
58 19 50       134 my $T = join("|", sort { length($b) <=> length($a) || $a cmp $b } keys %T);
  3182         5487  
59              
60             my $English = sub {
61 21     21   63 my @tavut = tavuta($_[0]);
62 21         1206 for (@tavut) { s/($T)/$T{$1}/g }
  37         579  
63 21         89 join("-", @tavut);
64 19         149 };
65              
66 19         24 my @a;
67             my $a;
68              
69 19         38 for (@_) {
70 19         99 ($a = $_) =~ s/([aeiouyäåöAEIOUYÅÄÖbcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ]+)/$English->($1)/eg;
  21         52  
71 19         50 push @a, $a;
72             }
73              
74 19 50       271 wantarray ? @a : $a[0];
75             }
76              
77             =pod
78              
79             =head1 NAME
80              
81             Lingua::FI::Transcribe - Finnish transcription
82              
83             =head1 SYNOPIS
84              
85             use Lingua::FI::Transcribe;
86              
87             print Lingua::FI::Transcribe->English("sauna"), "\n";
88             print Lingua::FI::Transcribe->English("sisu"), "\n";
89             print Lingua::FI::Transcribe->English("olut"), "\n";
90              
91             print Lingua::FI::Transcribe->English("jarkko hietaniemi"), "\n";
92              
93             # The results being
94              
95             sow-nah
96             see-soo
97             aw-loot
98             yahrrk-kaw hheeeh-tah-neeeh-mee
99              
100             =head1 DESCRIPTION
101              
102             With this module you can get a rough approximation of Finnish
103             pronunciation by I Finnish into something
104             (awful mess, usually) that sounds somewhat similar to Finnish
105             if read aloud (with a straight face). In addition to transcribing
106             the sounds the module also hyphenates the word so that you get more
107             hints as to the correct rhytm. (The stress is always on the first
108             syllable.)
109              
110             However, currently only transcription into English is implemented.
111             Contributions from speakers of other languages gladly accepted.
112              
113             One more time: the approximation is very rough. I disclaim
114             any responsibility if after ordering a beer in a Finnish pub
115             the bartender looks at you funny and hands you an umbrella.
116              
117             =head2 About the English transcription
118              
119             Note that the transcription of Finnish to "English" is very rough:
120             it is basically a very simple substitution of one or more letters of
121             Finnish to one or more letters of "English". The highly irregular
122             pronunciation of English doesn't help things. The vowels are the
123             hardest part to right. In principle the basic vowels
124              
125             a e i o u
126              
127             are simple: just use the simple vowel sounds you can find
128             in the English words
129              
130             pun pet pit pot put
131              
132             but consider how "pun" and "put" have different vowels, and when
133             Finnish diphthongs like "au" are introduced, the above simple rule
134             breaks down horribly. (That particular Finnish diphthong is
135             pronounced like the English "ow" in "how", in case your are
136             wondering.)
137              
138             =head1 ABOUT FINNISH
139              
140             Finnish is a highly phonemic and phonetic language-- what this means
141             is that the correlation between graphemes/letters and phonemes/sounds
142             is really strong: all you can see you can hear, all you can hear you
143             can see. One letter corresponds to one sound, and no silent
144             letters. Since Finnish is a natural language, this is of course an
145             oversimplification, there are nuances and exceptions to the above
146             ideal. More information about Finnish pronunciation can be found from
147              
148             http://www.cs.tut.fi/~jkorpela/finnish.pronunciation.html
149              
150             and sound examples from
151              
152             http://www.helsinki-hs.net/thisishelsinki/kieli.html
153              
154             =head1 LIMITATIONS
155              
156             Only English transcription has been implemented.
157              
158             Only lowercase letters are transcribed.
159              
160             Only Latin-1 (ISO 8859-1) is supported as the encoding.
161              
162             =head1 AUTHOR
163              
164             Jarkko Hietaniemi
165              
166             =head1 COPYRIGHT AND LICENSE
167              
168             Copyright 2001 Jarkko Hietaniemi
169              
170             This library is free software; you can redistribute it and/or modify
171             it under the same terms as Perl itself.
172              
173             =cut
174              
175             1;