File Coverage

blib/lib/Lingua/Wordnet/Analysis.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             package Lingua::Wordnet::Analysis;
2              
3 1     1   1244 use strict;
  1         2  
  1         40  
4 1     1   859 use Lingua::Wordnet;
  0            
  0            
5             use vars qw($VERSION);
6              
7             $VERSION = '0.74';
8              
9             =head1 NAME
10              
11             Lingua::Wordnet::Analysis - Perl extension for high-level processing of Wordnet databases.
12              
13             =head1 SYNOPSIS
14              
15             use Lingua::Wordnet::Analysis;
16              
17             $analysis = new Lingua::Wordnet::Analysis;
18              
19             # How many articles of clothing have 'tongues'?
20             $tongue = $wn->lookup_synset("tongue","n",2);
21             @articles = $analysis->search($clothes,$tongue,"all_meronyms");
22            
23             # Are there any parts, of any kinds, of any shoes, made of glass?
24             @shoe_types = $analysis->traverse("hyponyms",$shoes);
25             $count = $analysis->search(@shoe_types,$glass,"stuff_meronyms");
26              
27             # Compute the intersection of two lists of synsets
28             @array1 = $shoes->all_holonyms;
29             @intersect = $analysis->intersection
30             (\@{$shoes->attributes},\@{$socks->attributes});
31              
32             # Generate a list of the inherited comp_meronyms for "apple"
33             @apple_hypernyms = $analysis->traverse("hypernyms",$apple);
34             @apple_parts = $analysis->traverse("comp_meronyms",@apple_hypernyms);
35            
36              
37             =head1 DESCRIPTION
38              
39             Lingua::Wordnet::Analysis supplies high-level functions for analysis of word relationships. Most of these functions process and return potentially large amounts of data, so only use them if you "know what you are doing."
40              
41             These functions could have been put into Lingua::Wordnet::Synset objects, but I wanted to keep those limited to core functionality. Besides, many of these functions have unproven usefulness.
42              
43             =head1 Lingua::Wordnet::Analysis functions
44              
45             =item $analysis->match(SYNSET,ARRAY)
46              
47             Finds any occurance of SYNSET in the synset list ARRAY and the list's pointers. Returns a positive value if a match is found. match() does not traverse.
48              
49              
50             =item $analysis->search(SYNSET1,SYNSET2,POINTER)
51              
52             Searches all pointers of type POINTER in SYNSET1 for SYNSET2. search() is recursive, and will traverse all depths. Returns the number of matches.
53            
54              
55             =item $analysis->traverse(POINTER,SYNSET)
56              
57             Traverses all pointer types of POINTER in SYNSET and returns a list of all synsets found in the tree.
58              
59              
60             =item $analysis->coordinates(SYNSET)
61              
62             Returns a list of the coordinate sisters of SYNSET.
63              
64              
65             =item $analysis->union(LIST)
66              
67             Returns a list of synsets which is the union of synsets LIST. The union consists of synsets which occur in any lists. This is useful, for example, for determining all the holonyms for two or more synsets.
68              
69              
70             =item $analysis->intersection(ref LIST)
71              
72             Returns a list of synsets of the intersection of ARRAY1 list of synsets with ARRAY2 list of synsets. The intersection consists of synsets which occur in both lists. This is useful, for example, to determine which meronyms are shared by two synsets:
73              
74             @synsets = $analysis->intersection
75             (\@{$synset1->all_meronyms},\@{$synset2->all_meronyms});
76              
77             =item $analysis->distance(SYNSET1,SYNSET2,POINTER)
78              
79             Returns an integer value representing the distance in pointers between SYNSET1 and SYNSET2 using POINTER as the search path.
80              
81             =head1 EXAMPLES
82              
83             To print out an inherited meronym list, use traverse():
84              
85             $orange = $wn->lookup_synset("orange","n",1);
86             @orange_hypernyms = $analysis->traverse("hypernyms",$orange);
87             foreach ($analysis->traverse("all_meronyms",@orange_hypernyms)) {
88             print $_->words, "\n";
89             }
90              
91             Note that the inherited meronyms will not contain the direct meronyms of
92             $orange.
93              
94              
95             =head1 BUGS/TODO
96              
97             There is tons that could go in this module ... submissions are welcome!
98              
99             Lots of cleanup.
100              
101             Need to add a search_path function that will return a path to a match as a linked list or hash of hashes.
102              
103             Some might want inherited meronym/holonym trees.
104              
105             Please send bugs and suggestions/requests to dbrian@brians.org. Development
106             on this module is active as of Winter 2000.
107              
108             =head1 AUTHOR
109              
110             Dan Brian
111              
112             =head1 SEE ALSO
113              
114             Lingua::Wordnet.
115              
116             =cut
117              
118             sub new {
119             my $class = shift;
120             my $self = {};
121             bless $self, $class;
122             return $self;
123             }
124              
125             sub match {
126             my $self = shift;
127             my $synset = shift;
128             my @synsets = @_;
129             my $match = 0;
130             foreach (@synsets) {
131             if ($_->{offset} eq $synset->{offset}) {
132             $match++;
133             }
134             }
135             return $match;
136             }
137              
138             sub distance {
139             my $self = shift;
140             my $synset1 = shift;
141             my $matching = shift;
142             my $ptrtype = shift;
143             my @synsets = ( );
144             my $findit;
145             $findit = sub {
146             my $synset = shift;
147             my $matching = shift;
148             my $pointer = shift;
149             my $count = shift;
150             $count++;
151             my @synsets1 = ( );
152             my @list = ( );
153             my $found = 0;
154             eval("\@list = \$synset->$pointer");
155             die ($@) if ($@);
156             foreach (@list) {
157             if ($_->{offset} eq $matching->{offset}) {
158             return (1,$count);
159             }
160             ($found,$count) = &{$findit}($_,$matching,$pointer,$count);
161             if ($found) { return (1,$count); }
162             }
163             return (0,$count);
164             };
165             my ($found,$count) = &{$findit}($synset1,$matching,$ptrtype,0);
166             if ($found) { return $count; }
167             else { return 0; }
168             }
169              
170              
171             sub global_distance {
172             my $self = shift;
173              
174             my $syns1 = shift;
175             my $syns2 = shift;
176              
177             my $maxdist = shift;
178             my $dist = shift;
179             my $checked = shift || {};
180              
181             my $parentpathes = shift;
182              
183             unless (defined $dist) {
184             if ($#{$syns1} < $#{$syns2}) {
185             # print "Swapping syns1 and syns2\n";
186             my $tmp = $syns1;
187             $syns1 = $syns2;
188             $syns2 = $tmp;
189             }
190             }
191              
192             unless ($parentpathes) {
193             $parentpathes = [ (map { $_->offset } @{$syns2}) ];
194             }
195             # print "Checked " . scalar keys( %{$checked} ) . " so far.\n";
196             # print "Now checking: " . scalar @{$syns2} . " syns agains " . scalar @{$syns1} . "\n";
197             if ($maxdist && $dist && $dist > $maxdist) { return (undef, "Max. distance ($maxdist) reached\n"); }
198             my @around;
199             my @pathes;
200             my $i = 0;
201             foreach my $syn2 (@{$syns2}) {
202              
203             $checked->{$syn2->offset} = 1;
204             foreach my $syn1(@{$syns1}) {
205             # compare syn1 and syn2
206             if ($self->equals($syn1, $syn2)) {
207             return ( ( $dist || 0 ), ($parentpathes->[$i] || ""));
208             }
209             # =comment (this was slower)
210             # if (my $hyper = is_hypernym($syn1, $syn2)) {
211             # return ($dist) ? ($dist+1, $parentpathes->[$i] . " : " . $hyper->offset . "|hyper") : (1, $parentpathes->[$i] . " : " . $hyper->offset . "|hyper");
212             # }
213             # if (my $hypo = is_hyponym($syn1, $syn2)) {
214             # return ($dist) ? ($dist+1, $parentpathes->[$i] . " : " . $hypo->offset . "|hypo") : (1, $parentpathes->[$i] . " : " . $hypo->offset . "|hypo");
215             # }
216             # if (my $mero = is_meronym($syn1, $syn2)) {
217             # return ($dist) ? ($dist+1, $parentpathes->[$i] . " : " . $mero->offset . "|mero") : (1, $parentpathes->[$i] . " : " . $mero->offset . "|mero");
218             # }
219             # if (my $holo = is_holonym($syn1, $syn2)) {
220             # return ($dist) ? ($dist+1, $parentpathes->[$i] . " : " . $holo->offset . "|holo") : (1, $parentpathes->[$i] . " : " . $holo->offset . "|holo");
221             # }
222             # =cut
223             }
224             # get surrounding synsets of $syn2
225             my ($ar, $pa) = $self->get_surroundings($syn2, $checked, $parentpathes->[$i]);
226             push @pathes, @{$pa};
227             push @around, @{$ar};
228             $i++;
229             }
230              
231             return $self->global_distance($syns1, \@around, $maxdist, (($dist) ? $dist+1 : 1), $checked, \@pathes);
232             }
233              
234              
235             sub equals {
236             my $self = shift;
237             my $syn1 = shift;
238             my $syn2 = shift;
239              
240             return 1 if ($syn1->offset eq $syn2->offset);
241             return 0;
242             }
243              
244             sub is_hypernym {
245             my $self = shift;
246             my $syn1 = shift;
247             my $syn2 = shift;
248              
249             foreach ($syn2->hypernyms) {
250             if ($self->equals($syn1, $_)) {
251             # print "MATCH: $syn2 is a HYPERNYM of $syn1\n";
252             return $_;
253             }
254             }
255             return 0;
256             }
257              
258             sub is_hyponym {
259             my $self = shift;
260             my $syn1 = shift;
261             my $syn2 = shift;
262              
263             foreach ($syn2->hyponyms) {
264             if ($self->equals($syn1, $_)) {
265             # print "MATCH: $syn2 is a HYPONYM of $syn1\n";
266             return $_;
267             }
268             }
269             return 0;
270             }
271              
272             sub is_meronym {
273             my $self = shift;
274             my $syn1 = shift;
275             my $syn2 = shift;
276              
277             foreach ($syn2->all_meronyms) {
278             if ($self->equals($syn1, $_)) {
279             # print "MATCH: $syn2 is a MERONYM of $syn1\n";
280             return $_;
281             }
282             }
283             return 0;
284             }
285              
286              
287             sub is_holonym {
288             my $self = shift;
289             my $syn1 = shift;
290             my $syn2 = shift;
291             foreach ($syn2->all_holonyms) {
292             if ($self->equals($syn1, $_)) {
293             # print "MATCH: $syn2 is a HOLONYM of $syn1\n";
294             return $_;
295             }
296             }
297             return 0;
298             }
299              
300              
301             sub get_surroundings {
302             my $self = shift;
303             my $syn = shift;
304             my $checked = shift;
305             my $parentpath = shift || "";
306             my @around;
307             my @pathes;
308             foreach ($syn->hypernyms) {
309             unless ($checked->{$_->offset}) {
310             push @pathes, join (" : ", $parentpath, $_->offset . "|hyper");
311             push @around,$_;
312             }
313             }
314             foreach ($syn->hyponyms) {
315             unless ($checked->{$_->offset}) {
316             push @pathes, join (" : ", $parentpath, $_->offset . "|hypo");
317             push @around,$_;
318             }
319             }
320             foreach ($syn->all_meronyms) {
321             unless ($checked->{$_->offset}) {
322             push @pathes, join (" : ", $parentpath, $_->offset . "|mero");
323             push @around,$_;
324             }
325             }
326             foreach ($syn->all_holonyms) {
327             unless ($checked->{$_->offset}) {
328             push @pathes, join (" : ", $parentpath, $_->offset . "|holo");
329             push @around,$_;
330             }
331             }
332             return (\@around, \@pathes);
333             }
334              
335             sub search {
336             my $self = shift;
337             my $synset1 = shift;
338             my $matching = shift;
339             my $ptrtype = shift;
340             my $lastsynset;
341             my @synsets;
342             my $matchit;
343             $matchit = sub {
344             my $synset = shift;
345             my @list;
346             eval("\@list = \$synset->$ptrtype");
347             die ($@) if ($@);
348             foreach (@list) {
349             if ($_->{offset} eq $matching->{offset}) {
350             push (@synsets,$lastsynset);
351             }
352             $lastsynset = $_;
353             &{$matchit}($_);
354             }
355             };
356             &{$matchit}($synset1);
357             @synsets;
358             }
359              
360             sub traverse {
361             my $self = shift;
362             my $ptrtype = shift;
363             my @synsets = ( );
364             my %hash;
365             my $traverseit;
366             $traverseit = sub {
367             my $synset = shift;
368             my $pointer = shift;
369             my @synsets1 = ( );
370             my @list = ( );
371             eval("\@list = \$synset->$pointer");
372             die ($@) if ($@);
373             foreach (@list) {
374             unless (exists $hash{$_}) {
375             push @synsets1, $_;
376             push @synsets1, &{$traverseit}($_,$pointer);
377             $hash{$_->{offset}} = "";
378             }
379             }
380             @synsets1;
381             };
382             foreach (@_) {
383             push @synsets, &{$traverseit}($_,$ptrtype);
384             }
385             @synsets;
386             }
387              
388             sub coordinates {
389             my $self = shift;
390             my $synset = shift;
391             return ($synset->hypernyms)[0]->hyponyms;
392             }
393              
394             sub union {
395             my $self = shift;
396             my @synsets;
397             my %union = ( );
398             foreach (@_) {
399             @union{$_->{offset}} = $_;
400             }
401             foreach (keys %union) {
402             push(@synsets,$union{$_});
403             }
404             return @synsets;
405             }
406              
407             sub intersection {
408             my $self = shift;
409             my ($i,$sizei) = (0, scalar @{$_[0]});
410             my ($j,$sizej);
411             my $set;
412             for ($j = 1; $j < scalar @_; $j++) {
413             $sizej = scalar @{$_[$j]};
414             ($i,$sizei) = ($j,$sizej) if ($sizej < $sizei);
415             }
416             my @intersection;
417             my $array = splice @_, $i, 1;
418             my %valuehash;
419             foreach (@$array) { push @intersection,
420             $_->{offset}; $valuehash{$_->{offset}} = $_; }
421             while ($set = shift) {
422             my $newlist;
423             foreach (@$set) {
424             my @offsets;
425             push @offsets, $_->{offset};
426             $newlist->{$_->{offset}} = "";
427             }
428             @intersection = grep { exists $newlist->{$_} } @intersection;
429             }
430             my @synsets;
431             foreach (@intersection) { push @synsets, $valuehash{$_} }
432             return @synsets;
433             }
434              
435             sub close {
436             my $self = shift;
437             $self->DESTROY();
438             }
439              
440             sub DESTROY {
441             my $self = shift;
442             undef $self;
443             }
444              
445             1;
446             __END__