File Coverage

blib/lib/Lingua/DE/Wortschatz.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             #$Id: Wortschatz.pm 1151 2008-10-05 20:57:26Z schroeer $
2              
3             package Lingua::DE::Wortschatz;
4              
5 1     1   19968 use strict;
  1         2  
  1         30  
6 1     1   354 use SOAP::Lite;# +trace=>'all';
  0            
  0            
7             use HTML::Entities;
8             use Text::Autoformat;
9             use Exporter 'import';
10             use Data::Dumper;
11             $Data::Dumper::Indent=1;
12              
13             our @EXPORT_OK = qw(use_service help);
14             our %EXPORT_TAGS = (all => [@EXPORT_OK]);
15            
16             our $VERSION = "1.27";
17              
18             my $BASE_URL = 'http://anonymous:anonymous@pcai055.informatik.uni-leipzig.de:8100/axis/services/';
19             my $LIMIT = 10;
20             my $MINSIG = 1;
21              
22             #description of available services and necessary corpus, in and out parameters
23             my %services=( # service_name => [ 'corpus', [ 'inparam<=default>', .. ], [ 'outparam', .. ] )
24             ServiceOverview => [ 'webservice', ['Name='], ['Name','Id','Status','Description','AuthorizationLevel','InputFields'] ],
25             Cooccurrences => [ 'de', ['Wort',"Mindestsignifikanz=$MINSIG","Limit=$LIMIT"], ['Wort','Kookkurrenz','Signifikanz'] ],
26             Baseform => [ 'de', ['Wort'], ['Grundform','Wortart'] ],
27             Sentences => [ 'de', ['Wort',"Limit=$LIMIT"], ['Satz'] ],
28             RightNeighbours => [ 'de', ['Wort',"Limit=$LIMIT"], ['Wort','Nachbar','Signifikanz'] ],
29             LeftNeighbours => [ 'de', ['Wort',"Limit=$LIMIT"], ['Nachbar','Wort','Signifikanz'] ],
30             Frequencies => [ 'de', ['Wort'], ['Anzahl','Frequenzklasse'] ],
31             Synonyms => [ 'de', ['Wort',"Limit=$LIMIT"], ['Synonym'] ],
32             Thesaurus => [ 'de', ['Wort',"Limit=$LIMIT"], ['Synonym'] ],
33             Wordforms => [ 'de', ['Word',"Limit=$LIMIT"], ['Form'] ], #find the trap that wortschatz.u-l guys have hidden here
34             Similarity => [ 'de', ['Wort',"Limit=$LIMIT"], ['Wort','Verwandter','Signifikanz'] ],
35             LeftCollocationFinder
36             => [ 'de', ['Wort','Wortart',"Limit=$LIMIT"], ['Kollokation','Wortart','Wort'] ],
37             RightCollocationFinder
38             => [ 'de', ['Wort','Wortart',"Limit=$LIMIT"], ['Wort','Kollokation','Wortart'] ],
39             Sachgebiet => [ 'de', ['Wort'], ['Sachgebiet'] ],
40             Kreuzwortraetsel
41             => [ 'de', ['Wort','Wortlaenge',"Limit=$LIMIT"], ['Wort'] ],
42             );
43              
44             sub use_service {
45             #returns a Lingua::DE::Wortschatz::Result object
46              
47             #get input parameters and set defaults or return undef if service unknown or insufficent parameters
48             my $service=parse_servicename(shift) || return undef;
49             my %params;
50             for (@{$services{$service}->[1]}) {
51             if (/([^=]+)=(.*)/) { $params{$1}=shift || $2}
52             else { $params{$_}=shift || return undef }
53             }
54             my $corpus=$services{$service}->[0];
55             my @resultnames=@{$services{$service}->[2]};
56              
57             # perform the soap query
58             my $soap = SOAP::Lite->proxy($BASE_URL.$service);
59             my $result=$soap->execute(make_params($corpus,\%params));
60             #print Dumper($result);
61             #print $soap->execute(make_params($corpus,\%params))->{_context}->{_transport}->{_proxy}->{_http_response}->{_content};
62             die "SOAP has returned an error (".$result->faultcode."):\n".$result->faultstring if ($result->fault);
63            
64             # bring results into shape
65             # wortschatz has two different kind of return types; kind of scalar and list
66             my @res=$result->valueof('//result/'.((@resultnames > 1) ? '*' : 'dataVectors').'/*');
67             my $resobj=Lingua::DE::Wortschatz::Result->new($service,@resultnames);
68             $resobj->add(splice @res,0,@resultnames) while (@res);
69             return $resobj;
70             }
71              
72             sub help {
73             my $cmd=shift || 'list';
74             $cmd = parse_servicename($cmd) || 'list' unless ($cmd =~ /(list)|(full)/);
75             my @so=use_service('ServiceOverview')->hashrefs();
76             my $help=($cmd =~ /(list)|(full)/) ? "Available services:\n" : "";
77             for my $so (@so) {
78             my $sn=$so->{Name};
79             if ($services{$sn} && (($sn =~ /^$cmd/) || ($cmd =~ /(list)|(full)/))) {
80             $help.=sprintf "* %s %s %s %s\n",$sn,@{$services{$sn}->[1]},"","","";
81             unless ($cmd eq 'list') {
82             my $t="\n ".decode_entities($so->{Description})."\n\n";
83             $help.=autoformat($t,{all=>1})."\n";
84             }
85             }
86             }
87             return $help;
88             }
89              
90             sub make_params {
91             # create the soap request parameters by hand
92             # not the idea of soap, but everything else failed (see manpage)
93             my ($corpus,$params)=@_;
94             my $ns='http://datatypes.webservice.wortschatz.uni-leipzig.de';
95             my $xml="$corpus";
96             my $num=1;
97             for (keys %$params) {
98             $xml.=q ().
99             qq($_).
100             qq($$params{$_}).
101             q ();
102             $num++;
103             }
104             $xml.='';
105             SOAP::Data->type(xml=>$xml);
106             }
107              
108             sub parse_servicename {
109             my ($service)=grep(/^$_[0]/,keys %services);
110             return $service;
111             }
112              
113             package Lingua::DE::Wortschatz::Result;
114             use strict;
115              
116             our $VERSION = $Lingua::DE::Wortschatz::VERSION;
117              
118             sub new {
119             my ($proto,$service,@names) = @_;
120             my $class = ref($proto) || $proto;
121             return bless { service => $service, names => \@names, data => [] }, $class;
122             }
123              
124             sub add {
125             my ($self,@values)=@_;
126             push(@{$self->{data}},\@values);
127             }
128              
129             sub dump {
130             my $self=shift;
131             print "Service ",$self->service,"\n\n";
132             my @lengths;
133             for my $row ($self->data,$self->{names}) {
134             for (0..$#$row) {
135             my $l=length($row->[$_]);
136             $lengths[$_]=$l unless ($lengths[$_] && ($lengths[$_] > $l));
137             }
138             }
139             my $form=(join " ",map {'%-'.$_.'s'} @lengths)."\n";
140             printf $form,$self->names;
141             printf $form, map {"-"x$_} @lengths;
142             printf $form,@$_ for ($self->data);
143             }
144              
145             sub service { shift->{service} }
146            
147             sub names { @{shift->{names}} }
148            
149             sub data { @{shift->{data}} }
150            
151             sub hashrefs {
152             my $self=shift;
153             my @res=();
154             for (@{$self->{data}}) {
155             my %hash;
156             @hash{@{$self->{names}}}=@$_;
157             push(@res,\%hash);
158             }
159             return @res;
160             }
161              
162             1;
163              
164             __END__