| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Lingua::Stem::Patch::PL; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 59185 | use v5.8.1; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 47 |  | 
| 4 | 1 |  |  | 1 |  | 6 | use utf8; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 8 |  | 
| 5 | 1 |  |  | 1 |  | 22 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 6 | 1 |  |  | 1 |  | 4 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 41 |  | 
| 7 | 1 |  |  | 1 |  | 804 | use parent 'Exporter'; | 
|  | 1 |  |  |  |  | 329 |  | 
|  | 1 |  |  |  |  | 5 |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | our $VERSION   = '0.04'; | 
| 10 |  |  |  |  |  |  | our @EXPORT_OK = qw( stem stem_pl ); | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | *stem_pl = \&stem; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | sub stem { | 
| 15 | 1 |  |  | 1 | 0 | 7 | my $word = lc shift; | 
|  | 1 |  |  | 77 |  | 1 |  | 
|  | 1 |  |  |  |  | 15 |  | 
|  | 77 |  |  |  |  | 451 |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 | 77 |  |  |  |  | 24841 | $word = remove_noun($word); | 
| 18 | 77 |  |  |  |  | 147 | $word = remove_diminutive($word); | 
| 19 | 77 |  |  |  |  | 136 | $word = remove_adjective($word); | 
| 20 | 77 |  |  |  |  | 123 | $word = remove_verb($word); | 
| 21 | 77 |  |  |  |  | 142 | $word = remove_adverb($word); | 
| 22 | 77 |  |  |  |  | 136 | $word = remove_plural($word); | 
| 23 | 77 |  |  |  |  | 121 | $word = remove_other($word); | 
| 24 |  |  |  |  |  |  |  | 
| 25 | 77 |  |  |  |  | 1316 | return $word; | 
| 26 |  |  |  |  |  |  | } | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | sub remove_noun { | 
| 29 | 77 |  |  | 77 | 0 | 103 | my ($word) = @_; | 
| 30 | 77 |  |  |  |  | 122 | my $length = length $word; | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 77 | 100 |  |  |  | 184 | if ($length > 6) { | 
| 33 | 37 | 100 |  |  |  | 230 | return $word if $word =~ s{ (?: | 
| 34 |  |  |  |  |  |  | tach | 
| 35 |  |  |  |  |  |  | | acj[aąi]      # -acja -acją -acji | 
| 36 |  |  |  |  |  |  | | [ae]ni[eu]    # -anie -aniu -enie -eniu | 
| 37 |  |  |  |  |  |  | | (?<= ty ) ka  # -tyka → -ty | 
| 38 |  |  |  |  |  |  | ) $}{}x; | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  |  | 
| 41 | 68 | 100 |  |  |  | 139 | if ($length > 5) { | 
| 42 | 53 | 100 |  |  |  | 267 | return $word if $word =~ s{ (?: | 
| 43 |  |  |  |  |  |  | ach | ami | ce | ta | 
| 44 |  |  |  |  |  |  | | [cn]i[au]        # -cia -ciu -nia -niu | 
| 45 |  |  |  |  |  |  | | (?<= c ) j[aąi]  # -cja -cją -cji → -c | 
| 46 |  |  |  |  |  |  | ) $}{}x; | 
| 47 |  |  |  |  |  |  | } | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 56 |  |  |  |  | 120 | return $word; | 
| 50 |  |  |  |  |  |  | } | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | sub remove_diminutive { | 
| 53 | 77 |  |  | 77 | 0 | 100 | my ($word) = @_; | 
| 54 | 77 |  |  |  |  | 99 | my $length = length $word; | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 77 | 100 |  |  |  | 141 | if ($length > 6) { | 
| 57 | 21 | 100 |  |  |  | 111 | return $word if $word =~ s{ (?: | 
| 58 |  |  |  |  |  |  | (?: [aiu]s | [ei]c ) zek  # -aszek -eczek -iczek -iszek -uszek | 
| 59 |  |  |  |  |  |  | | (?<= e[jnr] ) ek        # -ejek -enek -erek → -ej -en -er | 
| 60 |  |  |  |  |  |  | ) $}{}x; | 
| 61 |  |  |  |  |  |  | } | 
| 62 |  |  |  |  |  |  |  | 
| 63 | 71 | 100 |  |  |  | 138 | if ($length > 4) { | 
| 64 | 55 | 100 |  |  |  | 157 | return $word if $word =~ s{ | 
| 65 |  |  |  |  |  |  | [ae]k  # -ak -ek | 
| 66 |  |  |  |  |  |  | $}{}x; | 
| 67 |  |  |  |  |  |  | } | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 67 |  |  |  |  | 148 | return $word; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | sub remove_adjective { | 
| 73 | 77 |  |  | 77 | 0 | 91 | my ($word) = @_; | 
| 74 | 77 |  |  |  |  | 101 | my $length = length $word; | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 77 | 100 |  |  |  | 152 | if ($length > 7) { | 
| 77 | 10 | 100 |  |  |  | 45 | return $1 if $word =~ m{^ | 
| 78 |  |  |  |  |  |  | naj                  # naj- | 
| 79 |  |  |  |  |  |  | ( .+ )               # $1 | 
| 80 |  |  |  |  |  |  | sz (?: [ey] | ych )  # -sze -szy -szych | 
| 81 |  |  |  |  |  |  | $}x; | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  |  | 
| 84 | 74 | 100 |  |  |  | 138 | if ($length > 6) { | 
| 85 | 11 | 100 |  |  |  | 40 | return $word | 
| 86 |  |  |  |  |  |  | if $word =~ s{ czny $}{}x; | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  |  | 
| 89 | 73 | 100 |  |  |  | 117 | if ($length > 5) { | 
| 90 | 31 | 100 |  |  |  | 155 | return $word if $word =~ s{ (?: | 
| 91 |  |  |  |  |  |  | ego | ej | ych | 
| 92 |  |  |  |  |  |  | | ow[aey]  # -owa -owe -owy | 
| 93 |  |  |  |  |  |  | ) $}{}x; | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 67 |  |  |  |  | 137 | return $word; | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | sub remove_verb { | 
| 100 | 77 |  |  | 77 | 0 | 89 | my ($word) = @_; | 
| 101 | 77 |  |  |  |  | 95 | my $length = length $word; | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 77 | 100 |  |  |  | 155 | if ($length > 5) { | 
| 104 | 25 | 100 |  |  |  | 129 | return $word if $word =~ s{ (?: | 
| 105 |  |  |  |  |  |  | bym | cie | łem | 
| 106 |  |  |  |  |  |  | | [ae] (?: my | sz | ść )  # -amy -emy -asz -esz -aść -eść | 
| 107 |  |  |  |  |  |  | ) $}{}x; | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 68 | 100 |  |  |  | 117 | if ($length > 3) { | 
| 111 | 54 | 100 |  |  |  | 277 | return $word if $word =~ s{ (?: | 
| 112 |  |  |  |  |  |  | ąc | 
| 113 |  |  |  |  |  |  | | a[ćmł]                     # -ać -am -ał | 
| 114 |  |  |  |  |  |  | | e[ćm]                      # -eć -em | 
| 115 |  |  |  |  |  |  | | i[ćł]                      # -ić -ił | 
| 116 |  |  |  |  |  |  | | (?<= a    ) j              # -aj                 → -a | 
| 117 |  |  |  |  |  |  | | (?<= [ae] ) (?: sz | ść )  # -asz -aść -esz -eść → -a -e | 
| 118 |  |  |  |  |  |  | ) $}{}x; | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  |  | 
| 121 | 55 |  |  |  |  | 105 | return $word; | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | sub remove_adverb { | 
| 125 | 77 |  |  | 77 | 0 | 87 | my ($word) = @_; | 
| 126 | 77 |  |  |  |  | 107 | my $length = length $word; | 
| 127 |  |  |  |  |  |  |  | 
| 128 | 77 | 100 |  |  |  | 154 | if ($length > 4) { | 
| 129 | 29 | 100 |  |  |  | 113 | return $word if $word =~ s{ (?: | 
| 130 |  |  |  |  |  |  | (?<= r    ) ze  # -rze      → -r | 
| 131 |  |  |  |  |  |  | | (?<= [nw] ) ie  # -nie -wie → -n -w | 
| 132 |  |  |  |  |  |  | ) $}{}x; | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 74 |  |  |  |  | 135 | return $word; | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | sub remove_plural { | 
| 139 | 77 |  |  | 77 | 0 | 83 | my ($word) = @_; | 
| 140 | 77 |  |  |  |  | 93 | my $length = length $word; | 
| 141 |  |  |  |  |  |  |  | 
| 142 | 77 | 100 |  |  |  | 139 | if ($length > 4) { | 
| 143 | 26 | 100 |  |  |  | 109 | return $word if $word =~ s{ (?: | 
| 144 |  |  |  |  |  |  | ami | om | ów | 
| 145 |  |  |  |  |  |  | ) $}{}x; | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 75 |  |  |  |  | 156 | return $word; | 
| 149 |  |  |  |  |  |  | } | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | sub remove_other { | 
| 152 | 77 |  |  | 77 | 0 | 91 | my ($word) = @_; | 
| 153 | 77 |  |  |  |  | 106 | my $length = length $word; | 
| 154 |  |  |  |  |  |  |  | 
| 155 | 77 | 100 |  |  |  | 159 | if ($length > 4) { | 
| 156 | 25 | 100 |  |  |  | 132 | return $word if $word =~ s{ (?: | 
| 157 |  |  |  |  |  |  | [aąęiłuy] | 
| 158 |  |  |  |  |  |  | | i[ae]  # -ia -ie | 
| 159 |  |  |  |  |  |  | ) $}{}x; | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  |  | 
| 162 | 67 |  |  |  |  | 117 | return $word; | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | 1; | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | __END__ |