File Coverage

blib/lib/Lingua/EN/Syllable.pm
Criterion Covered Total %
statement 25 25 100.0
branch 10 12 83.3
condition n/a
subroutine 4 4 100.0
pod 0 1 0.0
total 39 42 92.8


line stmt bran cond sub pod time code
1             package Lingua::EN::Syllable;
2             $Lingua::EN::Syllable::VERSION = '0.30';
3             # ABSTRACT: count the number of syllables in English words
4              
5 1     1   451 use 5.006;
  1         2  
6 1     1   3 use strict;
  1         1  
  1         14  
7 1     1   2 use warnings;
  1         3  
  1         262  
8              
9             # note that this is not infallible. it does fail for some percentage of
10             # words (10% seems a good guess)... so it's useful for approximation, but
11             # don't use this for running your nuclear reactor...
12              
13             require Exporter;
14              
15             our @ISA = qw/ Exporter /;
16             our @EXPORT = qw/ syllable /;
17             our @EXPORT_OK = qw/ @AddSyl @SubSyl /;
18             our @AddSyl;
19             our @SubSyl;
20              
21             # basic algortithm:
22             # each vowel-group indicates a syllable, except for:
23             # final (silent) e
24             # 'ia' ind two syl
25              
26             # @AddSyl and @SubSyl list regexps to massage the basic count.
27             # Each match from @AddSyl adds 1 to the basic count, each @SubSyl match -1
28             # Keep in mind that when the regexps are checked, any final 'e' will have
29             # been removed, and all '\'' will have been removed.
30              
31             @SubSyl = (
32             'cial',
33             'tia',
34             'cius',
35             'cious',
36             'giu', # belgium!
37             'ion',
38             'iou',
39             'sia$',
40             '.ely$', # absolutely! (but not ely!)
41             '[^td]ed$', # accused is 2, but executed is 4
42             );
43             @AddSyl = (
44             'ia',
45             'riet',
46             'dien',
47             'iu',
48             'io',
49             'ii',
50             'microor',
51             '[aeiouym]bl$', # -Vble, plus -mble
52             '[aeiou]{3}', # agreeable
53             '^mc',
54             'ism$', # -ism
55             'isms$', # -isms
56             '([^aeiouy])\1l$', # middle twiddle battle bottle, etc.
57             '[^l]lien', # alien, salient [1]
58             '^coa[dglx].', # [2]
59             '[^gq]ua[^auieo]', # i think this fixes more than it breaks
60             'dnt$', # couldn't
61             );
62              
63             # (comments refer to titan's /usr/dict/words)
64             # [1] alien, salient, but not lien or ebbullient...
65             # (those are the only 2 exceptions i found, there may be others)
66             # [2] exception for 7 words:
67             # coadjutor coagulable coagulate coalesce coalescent coalition coaxial
68              
69             #----------------------------------------
70             sub syllable {
71 29     29 0 7053 my $word = shift;
72 29         23 my(@scrugg,$syl);
73              
74 29         41 $word =~ tr/A-Z/a-z/;
75 29 50       57 return 2 if $word eq 'w';
76 29 100       38 return 1 if length($word) == 1;
77 27         28 $word =~ s/\'//g; # fold contractions. not very effective.
78 27         26 $word =~ s/e$//;
79 27         97 @scrugg = split(/[^aeiouy]+/, $word); # '-' should perhaps be added?
80 27 100       44 shift(@scrugg) unless ($scrugg[0]);
81 27         19 $syl = 0;
82             # special cases
83 27         38 foreach (@SubSyl) {
84 270 100       1354 $syl-- if $word=~/$_/;
85             }
86 27         33 foreach (@AddSyl) {
87 459 100       2552 $syl++ if $word=~/$_/;
88             }
89             # count vowel groupings
90 27         28 $syl += scalar(@scrugg);
91 27 50       46 $syl=1 if $syl==0; # got no vowels? ("the", "crwth")
92 27         55 return $syl;
93             }
94             # syllable
95              
96              
97             1;
98             __END__