| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# -*- Mode: Perl -*- |
|
2
|
|
|
|
|
|
|
# Regel.pm -- |
|
3
|
|
|
|
|
|
|
# Author : Ulrich Pfeifer |
|
4
|
|
|
|
|
|
|
# Created On : Thu Feb 1 09:10:48 1996 |
|
5
|
|
|
|
|
|
|
# Last Modified By: Ulrich Pfeifer |
|
6
|
|
|
|
|
|
|
# Last Modified On: Sun Apr 3 12:11:51 2005 |
|
7
|
|
|
|
|
|
|
# Language : Perl |
|
8
|
|
|
|
|
|
|
# Update Count : 73 |
|
9
|
|
|
|
|
|
|
# Status : Unknown, Use with caution! |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
package Text::German::Regel; |
|
12
|
2
|
|
|
2
|
|
10
|
use Text::German::Util; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
2008
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$debug = 0; |
|
15
|
|
|
|
|
|
|
@REGEL = (); # -w |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
{ |
|
18
|
|
|
|
|
|
|
local ($_); |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
while () { |
|
21
|
|
|
|
|
|
|
chomp; |
|
22
|
|
|
|
|
|
|
my ($regel, $a,$b,$c,$d,$e,@f) = split(/:/, $_); |
|
23
|
|
|
|
|
|
|
next unless $regel; |
|
24
|
|
|
|
|
|
|
push(@{$REGEL[$regel]}, [$a, |
|
25
|
|
|
|
|
|
|
$b, |
|
26
|
|
|
|
|
|
|
$c, |
|
27
|
|
|
|
|
|
|
bit_to_int($d), |
|
28
|
|
|
|
|
|
|
bit_to_int($e), |
|
29
|
|
|
|
|
|
|
@f]); |
|
30
|
|
|
|
|
|
|
} |
|
31
|
|
|
|
|
|
|
close DATA; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
sub reduce { |
|
34
|
20
|
|
|
20
|
0
|
38
|
my($v,$s,$e) = @_; |
|
35
|
|
|
|
|
|
|
#my $init = join ':', ($v,$s,$e); |
|
36
|
|
|
|
|
|
|
#local ($debug) = ($s eq 'Mit')?4:0; |
|
37
|
|
|
|
|
|
|
|
|
38
|
20
|
50
|
|
|
|
75
|
return undef if length($s.$e) < 3; |
|
39
|
20
|
|
|
|
|
57
|
while (length($s)<3) { |
|
40
|
0
|
|
|
|
|
0
|
$s .= substr($e,0,1); |
|
41
|
0
|
|
|
|
|
0
|
$e = substr($e,1); |
|
42
|
|
|
|
|
|
|
} |
|
43
|
20
|
|
|
|
|
57
|
while (1) { |
|
44
|
20
|
|
|
|
|
41
|
my @tmp = reduce1($v,$s,$e); |
|
45
|
20
|
100
|
|
|
|
50
|
if ($#tmp) { |
|
46
|
16
|
|
|
|
|
35
|
my $tmp = join ':', @tmp; |
|
47
|
|
|
|
|
|
|
#print STDERR "$init => $tmp\n"; |
|
48
|
16
|
|
|
|
|
144
|
return @tmp; # if $tmp ne $init; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
4
|
50
|
|
|
|
20
|
return @tmp if !$e; |
|
51
|
0
|
|
|
|
|
0
|
$s .= substr($e,0,1); |
|
52
|
0
|
|
|
|
|
0
|
$e = substr($e,1); |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
} |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub reduce1 { |
|
57
|
20
|
|
|
20
|
0
|
34
|
my($v,$s,$e) = @_; |
|
58
|
20
|
|
|
|
|
20
|
my $fc; |
|
59
|
|
|
|
|
|
|
my $fr; |
|
60
|
0
|
|
|
|
|
0
|
my $did_match; |
|
61
|
|
|
|
|
|
|
|
|
62
|
20
|
|
|
|
|
24
|
while (1) { |
|
63
|
20
|
|
|
|
|
49
|
$fr = Text::German::Endung::regel($e); # || '001'; # ??? |
|
64
|
20
|
100
|
|
|
|
51
|
last if defined $fr; |
|
65
|
4
|
50
|
|
|
|
11
|
last unless $e; |
|
66
|
0
|
|
|
|
|
0
|
$s .= substr($e,0,1); |
|
67
|
0
|
|
|
|
|
0
|
$e = substr($e,1); |
|
68
|
|
|
|
|
|
|
} |
|
69
|
20
|
100
|
|
|
|
40
|
return undef unless $fr; |
|
70
|
16
|
|
|
|
|
40
|
$fc = Text::German::Endung::wort_klasse($e); |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
ruleset: |
|
73
|
16
|
|
|
|
|
58
|
while (defined $REGEL[$fr]) { |
|
74
|
24
|
|
|
|
|
30
|
for $r (@{$REGEL[$fr]}) { |
|
|
24
|
|
|
|
|
59
|
|
|
75
|
328
|
50
|
|
|
|
706
|
next unless $r->[4] | $fc; # allowed wordclasses |
|
76
|
328
|
|
|
|
|
441
|
my $match = $r->[5]; |
|
77
|
328
|
|
|
|
|
392
|
$match =~ s/\+/[bcdfghjklmnpqrstvwxyz]/; |
|
78
|
328
|
|
|
|
|
351
|
$match =~ s/\%/[aeiou\344\366\374]/; |
|
79
|
|
|
|
|
|
|
#my $ns = $s.$e; |
|
80
|
|
|
|
|
|
|
#$ns = substr($ns,0,length($ns)-$r->[1]); |
|
81
|
|
|
|
|
|
|
#$e = substr($e, length($e)-$r->[1]); |
|
82
|
328
|
50
|
|
|
|
543
|
print "\tREGEL: $fr:", (join ':', @{$r}),"\t($s,$match)\n" |
|
|
0
|
|
|
|
|
0
|
|
|
83
|
|
|
|
|
|
|
if $debug > 1; |
|
84
|
328
|
100
|
|
|
|
3181
|
if ($s =~ /$match$/) { |
|
85
|
22
|
|
|
|
|
27
|
$did_match++; |
|
86
|
22
|
50
|
|
|
|
41
|
print "\tREGEL: $fr:", (join ':', @{$r}),"\t$s => " |
|
|
0
|
|
|
|
|
0
|
|
|
87
|
|
|
|
|
|
|
if $debug; |
|
88
|
22
|
50
|
|
|
|
51
|
$s = (substr($s,0,length($s)-$r->[7])) if $r->[7]; |
|
89
|
22
|
100
|
|
|
|
50
|
$s .= $r->[8] if $r->[8]; |
|
90
|
22
|
50
|
|
|
|
40
|
print "$s\n" if $debug; |
|
91
|
22
|
100
|
|
|
|
46
|
if ($r->[6]) { # vorsilbe 'ge' kann entfallen? |
|
92
|
2
|
|
|
|
|
6
|
$v =~ s/^ge//; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
22
|
|
|
|
|
32
|
$fr = $r->[0]; |
|
95
|
22
|
|
|
|
|
102
|
$fc = $r->[3]; # ??? |
|
96
|
22
|
100
|
|
|
|
44
|
if ($fr ne '000') { |
|
97
|
8
|
|
|
|
|
41
|
next ruleset; |
|
98
|
|
|
|
|
|
|
} else { |
|
99
|
|
|
|
|
|
|
#$s = substr($s,0,length($s)-$r->[1]); |
|
100
|
14
|
|
|
|
|
28
|
last; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
} |
|
104
|
16
|
|
|
|
|
24
|
last; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
16
|
50
|
|
|
|
30
|
if ($did_match) { |
|
107
|
16
|
|
|
|
|
60
|
return ($v,$s,$e); |
|
108
|
|
|
|
|
|
|
} else { |
|
109
|
0
|
|
|
|
|
|
return undef; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
1; |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# regel |
|
116
|
|
|
|
|
|
|
# 0 Folgeregel |
|
117
|
|
|
|
|
|
|
# 1 # zeichen entfernen |
|
118
|
|
|
|
|
|
|
# 2 |
|
119
|
|
|
|
|
|
|
# 3 new wc |
|
120
|
|
|
|
|
|
|
# 4 KLASSE fuer match |
|
121
|
|
|
|
|
|
|
# 5 MATCH |
|
122
|
|
|
|
|
|
|
# 6 vorsilbe ge |
|
123
|
|
|
|
|
|
|
# 7 #chars to remove |
|
124
|
|
|
|
|
|
|
# 8 string to append |
|
125
|
|
|
|
|
|
|
# 017:000:2:te:01001:01001:önn:1:3:ann |
|
126
|
|
|
|
|
|
|
__DATA__ |