File Coverage

blib/lib/Lingua/Wordnet.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package Lingua::Wordnet;
2              
3             require 5.005;
4 2     2   962 use strict;
  2         4  
  2         75  
5 2     2   12 use warnings;
  2         3  
  2         81  
6              
7 2     2   12 use vars qw($VERSION @ISA @EXPORT_OK @EXPORT $DICTDIR $DELIM $SUBDELIM);
  2         6  
  2         221  
8 2     2   9284 use DB_File;
  0            
  0            
9              
10             require Exporter;
11              
12             @ISA = qw(Exporter);
13             @EXPORT_OK = ( );
14             @EXPORT = qw( );
15             $VERSION = '0.74';
16             $DICTDIR = '/usr/local/WordNet-2.0/lingua-wordnet/';
17             $DELIM = '||';
18             $SUBDELIM = '|';
19              
20             =head1 NAME
21              
22             Lingua::Wordnet - Perl extension for accessing and manipulating Wordnet databases.
23              
24             =head1 SYNOPSIS
25              
26             use Lingua::Wordnet;
27             use Lingua::Wordnet::Analysis;
28              
29             $wn->unlock();
30             $synset = $wn->lookup_synset("canary","n",4);
31             $synset2 = $wn->lookup_synset("small","a",1);
32             $synset->add_attributes($synset2);
33             $synset->write();
34             print $synset, "\n";
35             $wn->close();
36              
37             =head1 DESCRIPTION
38              
39             Wordnet is a lexical reference system inspired by current psycholinguitics theories of human lexical memory. This module allows access to the Wordnet lexicon from Perl applications, as well as manipulation and extension of the lexicon. Lingua::Wordnet::Analysis provides numerous high-level extensions to the system.
40              
41             Version 0.1 was a complete rewrite of the module in pure Perl, whereas the old module embedded the Wordnet C API functions. In order to use the module, the database files must first be converted to Berkeley DB files using the 'scripts/convertdb.pl' file. Why did I do that?
42              
43             - The Wordnet API consists mostly of searching and text manipulation functions, something Perl is, um .. well suited for.
44              
45             - Data retrieval is more fast with the hash lookup than with the binary searches
46              
47             - Converting the databases allows optional manipulation of the data, including adding and editing synsets, as well as extension of the system to allow for more pointer types (including noun attributes and 'functions').
48              
49             - Developers can use the Wordnet databases without needing to compile the Wordnet API and browsers, allowing Wordnet to run on any Perl/Berkeley DB-capable platform (the database files are still needed for the conversion, of course)
50              
51             - A pure Perl implementation allows easier debugging and modification for people who want to experiment or alter the processing.
52              
53              
54             With that said, there are actually two modules. Lingua::Wordnet impersonates the basic Wordnet API functions for searching and retrieving data, as well as adding, editing, and deleting synsets. Lingua::Wordnet::Analysis brings the interface up a level, allowing commands like "is 'yellow' an attribute of any 'birds'", and taking care of the recursive analysis.
55              
56              
57             =head1 Lingua::Wordnet functions
58              
59             =over 4
60              
61             =item $wn = new Lingua::Wordnet( [DATA_DIR] );
62              
63             Creates and assigns a new object of class Lingua::Wordnet. DATA_DIR is optional, and indicates the location of the index and data files.
64              
65              
66             =item $wn->unlock()
67              
68             Allows files to be written to when data is added/edited/deleted.
69              
70              
71             =item $wn->lock()
72              
73             Locks files to prohibit write permissions (default).
74              
75              
76             =item $wn->grep(TEXT)
77              
78             Returns an array of compound words matching TEXT.
79              
80              
81             =item @synsets = $wn->lookup_synset( TEXT, POS [,NUMBER] )
82              
83             Assigns a list of synset objects (Lingua::Wordnet::Synset) matching TEXT within POS, where POS is 'n', 'v', 'a', 's' or 'r'. Without NUMBER, lookup_synset() will return all matches in POS. NUMBER is the sequential order of the desired synset within POS.
84              
85              
86             =item $synset = $wn->lookup_synset_offset(SYNSET_OFFSET)
87              
88             Assigns a synset object SYNSET_OFFSET.
89              
90              
91             =item $synset = $wn->new_synset(WORD,POS);
92              
93             Creates a new (empty) synset entry in the database. Both WORD and POS are required. An offset will be assigned when write() is called.
94              
95             =back
96              
97             =head1 Lingua::Wordnet::Synset functions
98              
99             =over 4
100              
101             =item @words = $synset->words([TEXT ..)]
102              
103             Retrieves or sets the list of words for this synset. add_words() should be used
104             if you are only adding an entry, rather than setting all entries. Each word is
105             in the format: TEXT%SENSE, where TEXT is the word, and SENSE is the sense number for the word. If SENSE is not supplied when assigning words to a synset, Lingua::Wordnet will assign the appropriate sense numbers to the words when $synset->write() is called (since they must be unique). In this case, the word list should consist only of the word text, without the '%'. The new words will be written to the data and index files.
106              
107              
108             =item $wn->familiarity(WORD, POS [, POLY_CNT])
109              
110             Returns an integer of the familiarity/polysemy count for WORD in POS. Given a third value POLY_CNT, sets the polysemy count for WORD in POS. In Lingua::Wordnet, this is a value which must be updated by the user, and is not automatically modified. This makes it useful for recording familiarity or frequency counts outside of the Wordnet lexicons. Note that polysemy within Lingua::Wordnet can be identified for a given word by counting the synsets returned by lookup_synset().
111              
112              
113             =item $wn->morph(WORD, POS)
114              
115             Returns an array containing the base form(s) of WORD in POS as found in the Wordnet morph files. The synset_lookup() functions performs morphological conversion automatically, so a call to morph() is not required. Changes: This fuynction now returns an array, because on WORD may have more than one base form.
116              
117              
118             =item $synset->overview()
119              
120             Returns the terms and gloss for the synset in a format for printing. This method is also used to overload a print performed on the synset. Note that this is different from the "overview" parameter of the 'wn' executable, since it only returns information about the current synset.
121              
122              
123             =item $synset->write()
124              
125             Writes any changes made to $synset to the database and updates all affected synset data and indexes. If $synset passes out of scope before write() is called, the changes are lost.
126              
127              
128             All of following functions retrieve data in synsets. Each has two corresponding
129             functions which can be called by prepending 'add_' or 'delete_' before the
130             function name. These functions accept a synset object or objects as input. Unless noted otherwise in the following functions, any returned data is a synset object or array of synset objects. See below for examples usages.
131              
132             $synset->antonyms()
133             $synset->add_antonyms($synset2[, ...])
134             $synset->delete_antonyms($synset2[, ...])
135              
136             Returns, adds, or deletes antonyms for $synset. WARNING: When adding/deleting synset pointers to Wordnet, it is important to add pointer entries to the corresponding synset in order to maintain database accuracy. Earlier versions of this module planned to automate this function, however, they have been abandoned in favor of having control over database writes with the 'write()' function, and are now considered functionality which belongs outside of the module. Thus, your program must implement the functionality to, in the above examples, add an antonym entry to '$synset' for '$synset2', in addition to adding an antonym entry to '$synset2' for '$synset'.
137              
138              
139             =item $synset->hypernyms()
140              
141             Returns hypernyms for $synset.
142              
143             =item $synset->hyponyms()
144              
145             Returns hyponyms for $synset.
146              
147             =item $synset->entailment()
148              
149             Returns verb entailment pointers.
150              
151             =item $synset->synonyms()
152              
153             Returns a list of words within $synset.
154              
155              
156             =item $synset->comp_meronyms()
157              
158             Returns component-object meronyms for $synset.
159              
160              
161             =item $synset->member_meronyms()
162              
163             Returns member-collection meronyms for $synset.
164              
165              
166             =item $synset->stuff_meronyms()
167              
168             Returns stuff-object meronyms for $synset (a.k.a. substance-object).
169              
170              
171             =item $synset->portion_meronyms()
172              
173             Returns portion-mass meronyms for $synset.
174              
175              
176             =item $synset->feature_meronyms()
177              
178             Returns feature-activity meronyms for $synset.
179              
180              
181             =item $synset->place_meronym()
182              
183             Returns place-area meronyms for $synset.
184              
185              
186             =item $synset->phase_meronym()
187              
188             Sets or returns phase-process meronyms for $synset.
189              
190              
191             =item $synset->all_meronyms()
192              
193             Returns an array of synset objects for all meronyms types of $synset.
194              
195              
196             =item $synset->all_holonyms()
197              
198             Returns an array of synset objects for all holonyms of $synset.
199              
200              
201             The following seven functions mirror the above functionality for holonyms, and accordingly have corresponding add_ and delete_ functions which update any set values to the corresponding meronym pointers:
202              
203             =item $synset->comp_holonyms()
204              
205             =item $synset->member_holonyms()
206              
207             =item $synset->stuff_holonyms()
208              
209             =item $synset->portion_holonyms()
210              
211             =item $synset->feature_holonyms()
212              
213             =item $synset->place_holonyms()
214              
215             =item $synset->phase_holonyms()
216              
217              
218             =item $synset->gloss([TEXT])
219              
220             Returns the glass for $synset. If TEXT is present, the gloss for $synset will
221             be assigned that value.
222              
223              
224             =item $synset->attributes()
225              
226             Returns a list of synset objects of attribute pointers for $synset.
227              
228              
229             =item $synset->functions()
230              
231             Returns a list of synset objects of function pointers for $synset.
232              
233              
234             =item $synset->causes()
235              
236             Returns the 'cause to' pointers for verbs.
237              
238              
239             =item $synset->pertainyms()
240              
241             Returns the 'pertains to' pointers for adj and adv.
242              
243              
244             =item $synset->frames()
245              
246             Returns a text array of verb frames for $synset. The add_frames() and delete_frames() functions accept only integers corresponding to the frames. The list of frames can be edited in Wordnet.pm directly, but probably shouldn't be.
247              
248              
249             =item $synset->lex_info([INT])
250              
251             Returns a string containing lexicographer file information. The optional INT assigns the lexicographer file information, and should correspond to the file list in Wordnet.pm.
252              
253             =item $synset->offset()
254              
255             Returns the synset offset of $synset.
256              
257             =back
258              
259             =head1 EXAMPLES
260              
261             Extensive examples can be found in the 'scripts/' directory; here I will
262             summarize the basic functionality. There are also some examples in the pod
263             documentation for Lingua::Wordnet::Analysis.
264              
265             This will display a hypernym tree for $synset:
266              
267             my $synset = $wn->lookup_synset_offset("00333350%n");
268             while ($synset = ($synset->hypernyms)[0]) {
269             $i++;
270             print " "x$i, "->", $synset->words, "\n";
271             }
272              
273             Outputting the following for synset "baseball":
274              
275             -> field_game%0
276             -> outdoor_game%0
277             -> athletic_game%0
278             -> sport%0athletics%0
279             -> diversion%0recreation%0
280             -> activity%0
281             -> act%0human_action%0human_activity%0
282              
283             The example below will create a synset object and print a list of the hyponyms
284             for that object:
285            
286             use Lingua::Wordnet;
287             my $wn = new Lingua::Wordnet;
288             my $synset = $wn->lookup_synset("baseball","n",1);
289             print "The following are kinds of baseball games:\n";
290             foreach $bb_synset ($synset->hyponyms) {
291             my $words;
292             foreach $word ($bb_synset->words) {
293             $word =~ s/\%\d+$//; $word =~ s/\_/ /g;
294             $words .= "$word, ";
295             }
296             $words =~ s/\,\s*$//;
297             print " $words\n";
298             }
299             $wn->close();
300              
301              
302             This will output:
303              
304              
305             The following are kinds of baseball games:
306             professional baseball
307             hardball
308             perfect game
309             no-hit game, no-hitter
310             one-hitter, 1-hitter
311             two-hitter, 2-hitter
312             three-hitter, 3-hitter
313             four-hitter, 4-hitter
314             five-hitter, 5-hitter
315             softball, softball game
316             rounders
317             stickball, stickball game
318              
319              
320             And an assignment example. This will create a new synset and add it to the kinds of baseball games. We unlock the Wordnet files to enable changes to the database:
321              
322              
323             use Lingua::Wordnet;
324             my $wn = new Lingua::Wordnet;
325             $wn->unlock();
326             my $synset = $wn->lookup_synset("baseball","n",1);
327             my $newsynset = $wn->new_synset("fooball","n");
328             $newsynset->gloss("A baseball game in which a foo is used.");
329             $synset->add_hyponym($newsynset);
330             $wn->close();
331              
332              
333             Remember, proceeded most synset functions with "add" will append the supplied data to the corresponding field, rather than replacing its value.
334              
335              
336             We could add an attribute 'fun' to "fooball" thus (not necessarily recommended pointer, but it will suffice for an example):
337              
338              
339             $fun_synset = $wn->lookup_synset("fun","adj",1);
340             $newsynset->add_attributes($fun_synset);
341              
342              
343             See the Lingua::Wordnet::Analysis documentation for examples to retrieving and searching entire trees and inheritance functions.
344              
345             =head1 BUGS/TODO
346              
347             Please send bugs and suggestions/requests to dbrian@brians.org. Development on this module is active as of Spring 2001.
348              
349             Clean up code, put references where beneficial.
350              
351             =head1 AUTHOR
352              
353             Dan Brian
354              
355             =head1 SEE ALSO
356              
357             Lingua::Wordnet::Analysis.
358              
359             =cut
360              
361             sub new {
362             my $class = shift;
363             my ($datapath) = shift || $DICTDIR;
364             my $self = {};
365             bless $self, $class;
366             $self->{lock} = 1;
367             $self->{indexobj} = tie %{$self->{indexhash}},
368             "DB_File", "$datapath/lingua_wordnet.index",
369             O_RDWR, 0666, $DB_BTREE or
370             die "Unable to load $datapath/lingua_wordnet.index: $!";
371             $self->{dataobj} = tie %{$self->{datahash}},
372             "DB_File", "$datapath/lingua_wordnet.data",
373             O_RDWR, 0666, $DB_BTREE or
374             die "Unable to load $datapath/lingua_wordnet.data: $!";
375             $self->{morphobj} = tie %{$self->{morphhash}},
376             "DB_File", "$datapath/lingua_wordnet.morph",
377             O_RDWR, 0666, $DB_BTREE or
378             die "Unable to load $datapath/lingua_wordnet.morph: $!";
379             return $self;
380             }
381              
382             sub lock {
383             my $self = shift;
384             $self->{lock} = 1;
385             }
386              
387             sub unlock {
388             my $self = shift;
389             $self->{lock} = 0;
390             }
391              
392             sub DESTROY {
393             my $self = shift;
394             untie $self->{dicthash};
395             untie $self->{indexhash};
396             untie $self->{morphhash};
397             $self = {};
398             }
399              
400             sub close {
401             my $self = shift;
402             $self->DESTROY();
403             }
404              
405             sub familiarity {
406             my $self = shift;
407             my ($word,$pos) = @_;
408             if ($self->{indexhash}->{"$word\%$pos"}) {
409             return (split(/\Q$DELIM/,$self->{indexhash}->{"$word\%$pos"}))[0];
410             } else {
411             return undef;
412             }
413             }
414              
415             sub lookup_synset {
416             my $self = shift;
417             my ($word,$pos,$num) = @_;
418             if (!exists($self->{indexhash}->{"$word\%$pos"})) {
419             #print "Morphing $word\n";
420             my @morphed = $self->morph($word,$pos);
421             my @synsets;
422             foreach (@morphed) {
423             next if ($_ eq $word);
424             #print "M: $_\n";
425             push @synsets, $self->lookup_synset($_, $pos);
426             }
427             if ($num) {
428             return $synsets[$num-1];
429             } else {
430             return @synsets;
431             }
432             }
433             if ($pos && $num) {
434             my ($poly,$offsets) = split(/\Q$DELIM/,$self->{indexhash}->{"$word\%$pos"});
435             my $offset = (split(/\Q$SUBDELIM/,$offsets))[$num-1] . "\%$pos";
436             return Lingua::Wordnet::Synset->new(\$self,$offset,$pos);
437             } else {
438             # print qq|$word is in index: $self->{indexhash}->{"$word\%$pos"}\n|;
439             my ($poly,$offsets) = split(/\Q$DELIM/,$self->{indexhash}->{"$word\%$pos"});
440             my @offsets = (split(/\Q$SUBDELIM/,$offsets));
441             my @synsets;
442             foreach (@offsets) {
443             push(@synsets,Lingua::Wordnet::Synset->new(\$self,"$_\%$pos",$pos));
444             }
445             return @synsets;
446             }
447             }
448              
449             sub lookup_synset_offset {
450             my $self = shift;
451             my $offset = shift;
452             return Lingua::Wordnet::Synset->new(\$self,$offset);
453             }
454              
455             sub morph {
456             my $self = shift;
457             my $word = shift;
458             my $pos = shift;
459             my $morphed = $self->{morphhash}->{"$word\%$pos"};
460             return unless $morphed;
461             my @morphed;
462             if ($morphed =~ /\Q$DELIM/) {
463             @morphed = split(/\Q$DELIM/, $morphed);
464             } else {
465             @morphed = ($morphed);
466             }
467             return wantarray ? @morphed : $morphed[0] || undef;
468             }
469              
470             sub reverse_morph {
471             my $self = shift;
472             my $word = shift;
473             my $pos = shift;
474             my %rev_hash = reverse %{$self->{morphhash}};
475             if ($rev_hash{"$word"}) {
476             return $rev_hash{"$word"};
477             } else {
478             # this takes care of multiple baseforms of a word
479             my @words = map { $rev_hash{$_} } grep { /^$word\Q$DELIM/ ||
480             /\Q$DELIM\E$word\Q$DELIM\E/ ||
481             /\Q$DELIM\E$word$/ } keys %rev_hash;
482              
483             if ($#words > 0) {
484             print STDERR "Warning Something is wrong: $word has more the one reverse morphed meanings\n";
485             print STDERR "\t" . join(",", @words);
486             }
487             return $words[0];
488             }
489             }
490              
491             sub grep {
492             my $self = shift;
493             my $key = shift;
494             if (!defined $key) { return; }
495             my $origkey = $key;
496             my $st = 0;
497             my $value = 0;
498             my @words;
499             for ($st = $self->{indexobj}->seq($key, $value, R_CURSOR);
500             $key =~ /^$origkey/i && $st == 0;
501             $st = $self->{indexobj}->seq($key, $value, R_NEXT) )
502             { push(@words, $key); }
503             return @words;
504             }
505              
506             sub new_synset {
507             my $self = shift;
508             my ($word,$pos) = @_;
509             my $sense = 0;
510             # assign a sense number
511             if (exists($self->{indexhash}->{"$word\%$pos"})) {
512             my ($poly,$offsets) = split(/\Q$DELIM/,$self->{indexhash}->{"$word\%$pos"});
513             my @offsets = (split(/\Q$SUBDELIM/,$offsets));
514             $sense = scalar(@offsets);
515             }
516             #$word = $word . "\%$sense";
517             return Lingua::Wordnet::Synset->new(\$self,'',$pos,$word);
518             }
519              
520             my @lexfiles = (
521             "adj.all",
522             "adj.pert",
523             "adv.all",
524             "noun.Tops",
525             "noun.act",
526             "noun.animal",
527             "noun.artifact",
528             "noun.attribute",
529             "noun.body",
530             "noun.cognition",
531             "noun.communication",
532             "noun.event",
533             "noun.feeling",
534             "noun.food",
535             "noun.group",
536             "noun.location",
537             "noun.motive",
538             "noun.object",
539             "noun.person",
540             "noun.phenomenon",
541             "noun.plant",
542             "noun.possession",
543             "noun.process",
544             "noun.quantity",
545             "noun.relation",
546             "noun.shape",
547             "noun.state",
548             "noun.substance",
549             "noun.time",
550             "verb.body",
551             "verb.change",
552             "verb.cognition",
553             "verb.communication",
554             "verb.competition",
555             "verb.consumption",
556             "verb.contact",
557             "verb.creation",
558             "verb.emotion",
559             "verb.motion",
560             "verb.perception",
561             "verb.possession",
562             "verb.social",
563             "verb.stative",
564             "verb.weather",
565             "adj.ppl"
566             );
567              
568             my @vrbsents = (
569             "",
570             "Something ----s",
571             "Somebody ----s",
572             "It is ----ing",
573             "Something is ----ing PP",
574             "Something ----s something Adjective/Noun",
575             "Something ----s Adjective/Noun",
576             "Somebody ----s Adjective",
577             "Somebody ----s something",
578             "Somebody ----s somebody",
579             "Something ----s somebody",
580             "Something ----s something",
581             "Something ----s to somebody",
582             "Somebody ----s on something",
583             "Somebody ----s somebody something",
584             "Somebody ----s something to somebody",
585             "Somebody ----s something from somebody",
586             "Somebody ----s somebody with something",
587             "Somebody ----s somebody of something",
588             "Somebody ----s something on somebody",
589             "Somebody ----s somebody PP",
590             "Somebody ----s something PP",
591             "Somebody ----s PP",
592             "Somebody's (body part) ----s",
593             "Somebody ----s somebody to INFINITIVE",
594             "Somebody ----s somebody INFINITIVE",
595             "Somebody ----s that CLAUSE",
596             "Somebody ----s to somebody",
597             "Somebody ----s to INFINITIVE",
598             "Somebody ----s whether INFINITIVE",
599             "Somebody ----s somebody into V-ing something",
600             "Somebody ----s something with something",
601             "Somebody ----s INFINITIVE",
602             "Somebody ----s VERB-ing",
603             "It ----s that CLAUSE",
604             "Something ----s INFINITIVE",
605             );
606              
607             package Lingua::Wordnet::Synset;
608              
609             use overload qw("") => \&overview;
610              
611             sub new {
612             my $class = shift;
613             my ($wn,$offset,$pos,$word) = @_;
614             my $self = {};
615             my $data;
616             bless $self, $class;
617             $self->{wn} = $wn;
618             if ($offset) {
619             if (!exists(${$self->{wn}}->{datahash}->{$offset})) {
620             die "Synset $offset not found.";
621             }
622             $data = ${$self->{wn}}->{datahash}->{$offset};
623             $self->{offset} = $offset;
624             $self->{pos} = substr($offset,length($offset)-1);
625             ($self->{filenum},$self->{words},$self->{ptrs},$self->{frames},
626             $self->{gloss}) = split(/\Q$Lingua::Wordnet::DELIM/,$data);
627             } else {
628             $self->{offset} = "1\%$pos";
629             $self->{pos} = $pos;
630             $self->{words} = $word;
631             $self->{filenum} = "";
632             $self->{ptrs} = "";
633             $self->{frames} = "";
634             $self->{gloss} = "";
635             }
636             #lex_filenum||words||pointers||frames||gloss
637             return $self;
638             }
639              
640             sub overview {
641             my $self = shift;
642             my @wordlist = $self->words;
643             my $words = join ", ", @wordlist;
644             $words =~ s/\%\d//g;
645             $words =~ s/\_/ /g;
646             return "$words -- (" . $self->gloss . ")";
647             }
648              
649             # write
650             sub write {
651             my $self = shift;
652             my $strippedoffset = "";
653             if (${$self->{wn}}->{lock} == 1) {
654             die "write() not allowed while Wordnet object is locked."
655             } else {
656             # ${$self->{wn}}->{datahash}->{"dgb3"} = 1;
657             # check if this is a new synset, give it an offset if so
658             if ($self->{offset} =~ /^1\%(\w)$/) {
659             ${$self->{wn}}->{datahash}->{offsetcount}++;
660             $strippedoffset = ${$self->{wn}}->{datahash}->{offsetcount};
661             $self->{offset} = ${$self->{wn}}->{datahash}->{offsetcount} . "\%$1";
662             } else {
663             $strippedoffset = $self->{offset};
664             $strippedoffset =~ s/\%(\w)$//;
665             }
666             # write the data entry
667             ${$self->{wn}}->{datahash}->{$self->{offset}} =
668             $self->{filenum} . "$Lingua::Wordnet::DELIM" . $self->{words} . "$Lingua::Wordnet::DELIM" .
669             $self->{ptrs} . "$Lingua::Wordnet::DELIM" . $self->{frames} . "$Lingua::Wordnet::DELIM" .
670             $self->{gloss};
671             # write the index entries
672             my $word;
673             foreach $word ($self->words) {
674             my $iword = $word . "\%" . $self->{pos};
675             if (exists(${$self->{wn}}->{indexhash}->{$iword})) {
676             # check if synset is already here
677             unless (${$self->{wn}}->{indexhash}->{$iword} =~
678             /$strippedoffset/) {
679             ${$self->{wn}}->{indexhash}->{$iword} .= "$Lingua::Wordnet::SUBDELIM" .
680             $strippedoffset;
681             }
682             } else {
683             ${$self->{wn}}->{indexhash}->{$iword} = "1$Lingua::Wordnet::DELIM" . $strippedoffset;
684             }
685             }
686             }
687             return $self->{offset};
688             }
689              
690             # offset
691             sub offset {
692             my $self = shift;
693             return $self->{offset};
694             }
695              
696             # pos
697             sub pos {
698             my $self = shift;
699             return $self->{pos};
700             }
701              
702             # words
703             sub words {
704             my $self = shift;
705             my @newwords = shift;
706             my @wordlist;
707             if (@newwords > 0) {
708             @wordlist = @newwords;
709             }
710             @wordlist = split(/\Q$Lingua::Wordnet::SUBDELIM/,$self->{words});
711             return @wordlist;
712             }
713             sub add_words {
714             my $self = shift;
715             my @newwords = shift;
716             if (@newwords == 0) { return; }
717             my @wordlist = split(/\Q$Lingua::Wordnet::SUBDELIM/,$self->{words});
718             push (@wordlist, @newwords);
719             $self->{words} = join("$Lingua::Wordnet::SUBDELIM",@wordlist);
720             }
721             sub delete_words {
722             my $self = shift;
723             my @delwords = shift;
724             my $word;
725             if (@delwords == 0) { return; }
726             my @wordlist = split(/\Q$Lingua::Wordnet::SUBDELIM/,$self->{words});
727             my @retwords;
728             foreach $word (@wordlist) {
729             unless (grep {$word} @delwords) {
730             push(@retwords,$word);
731             }
732             }
733             $self->{words} = join("$Lingua::Wordnet::SUBDELIM",@retwords);
734             }
735              
736             sub synonyms {
737             my $self = shift;
738              
739             my @syns;
740             my @words = $self->words;
741             return wantarray ? @words : \@words;
742             }
743              
744             # standard synset functions
745              
746             sub synset_pointers {
747             my ($self,$ptr) = @_;
748             my @synsets = ();
749             foreach (split(/\Q$Lingua::Wordnet::SUBDELIM/,$self->{ptrs})) {
750             /^$ptr\w*\s(\d+)\%(\w)\s(\d{4})/ && do {
751             push(@synsets,Lingua::Wordnet::Synset->new($self->{wn},"$1\%$2"));
752             };
753             }
754             return @synsets;
755             }
756             sub add_synset_pointers {
757             my ($self,$ptr,@synsets) = @_;
758             my $synset;
759             foreach $synset (@synsets) {
760             if ($self->{ptrs} =~ /^$ptr\s$synset->{offset}/) {
761              
762             } else {
763             $self->{ptrs} .= "$Lingua::Wordnet::SUBDELIM$ptr $synset->{offset} 0000";
764             }
765             }
766             }
767              
768             sub delete_synset_pointers {
769             my ($self,$ptr,@synsets) = @_;
770             my $synset;
771             my $delim = "$Lingua::Wordnet::SUBDELIM";
772             foreach $synset (@synsets) {
773             $self->{ptrs} =~ s/(?:\Q$delim\E)*$ptr\s$synset->{offset}\s\d{4}//g;
774             }
775             $self->{ptrs} =~ s/^\Q$Lingua::Wordnet::SUBDELIM//;
776             }
777              
778             # antonyms: pointers are '!' (consistent)
779             sub antonyms {
780             my $self = shift;
781             return synset_pointers($self,"!");
782             }
783             sub add_antonyms {
784             my $self = shift;
785             my @add_synsets = @_;
786             add_synset_pointers($self,"!",@add_synsets);
787             }
788             sub delete_antonyms {
789             my $self = shift;
790             my @delete_synsets = @_;
791             delete_synset_pointers($self,"!",@delete_synsets);
792             }
793              
794             # hypernyms: pointers are '@' (consistent)
795             sub hypernyms {
796             my $self = shift;
797             return synset_pointers($self,"\@");
798             }
799             sub add_hypernyms {
800             my $self = shift;
801             my @add_synsets = @_;
802             add_synset_pointers($self,"\@",@add_synsets);
803             }
804             sub delete_hypernyms {
805             my $self = shift;
806             my @delete_synsets = @_;
807             delete_synset_pointers($self,"\@",@delete_synsets);
808             }
809              
810             # entailment: pointers are '*' (consistent)
811             sub entailment {
812             my $self = shift;
813             return synset_pointers($self,'\*');
814             }
815             sub add_entailment {
816             my $self = shift;
817             my @add_synsets = @_;
818             add_synset_pointers($self,"\\*",@add_synsets);
819             }
820             sub delete_entailment {
821             my $self = shift;
822             my @delete_synsets = @_;
823             delete_synset_pointers($self,'\*',@delete_synsets);
824             }
825              
826             # hyponyms: pointers are '~' (consistent)
827             sub hyponyms {
828             my $self = shift;
829             return synset_pointers($self,"~");
830             }
831             sub add_hyponyms {
832             my $self = shift;
833             my @add_synsets = @_;
834             add_synset_pointers($self,"~",@add_synsets);
835             }
836             sub delete_hyponyms {
837             my $self = shift;
838             my @delete_synsets = @_;
839             delete_synset_pointers($self,"~",@delete_synsets);
840             }
841              
842              
843             # In case you're wondering why meronyms use "%" pointers
844             # while holonyms use "#" pointers, even though wninput.5 says
845             # that "#" is a meronym and "%" is a holonym ... I don't know.
846             # But the API uses IS..PTR as "#" and HAS..PTR as "%". If you
847             # understand this, please let me know, because I haven't
848             # figured it out. Logically is seems backwards to me. I think
849             # it has to do with the "IS" vs. "HAS" language behind the
850             # pointers. *shrug*
851              
852             # meroynyms
853             sub all_meronyms {
854             my $self = shift;
855             return synset_pointers($self,"\%");
856             }
857              
858             # member meronym: pointers are '%m' (consistent)
859             sub member_meronyms {
860             my $self = shift;
861             return synset_pointers($self,"\%m");
862             }
863             sub add_member_meronyms {
864             my $self = shift;
865             my @add_synsets = @_;
866             add_synset_pointers($self,"\%m",@add_synsets);
867             }
868             sub delete_member_meronyms {
869             my $self = shift;
870             my @delete_synsets = @_;
871             delete_synset_pointers($self,"\%m",@delete_synsets);
872             }
873              
874             # stuff meronym: pointers are '%s' (consistent)
875             sub stuff_meronyms {
876             my $self = shift;
877             return synset_pointers($self,"\%s");
878             }
879             sub add_stuff_meronyms {
880             my $self = shift;
881             my @add_synsets = @_;
882             add_synset_pointers($self,"\%s",@add_synsets);
883             }
884             sub delete_stuff_meronyms {
885             my $self = shift;
886             my @delete_synsets = @_;
887             delete_synset_pointers($self,"\%s",@delete_synsets);
888             }
889              
890             # portion meronym: pointers are '%o' (new)
891             sub portion_meronyms {
892             my $self = shift;
893             return synset_pointers($self,"\%o");
894             }
895             sub add_portion_meronyms {
896             my $self = shift;
897             my @add_synsets = @_;
898             add_synset_pointers($self,"\%o",@add_synsets);
899             }
900             sub delete_portion_meronyms {
901             my $self = shift;
902             my @delete_synsets = @_;
903             delete_synset_pointers($self,"\%o",@delete_synsets);
904             }
905              
906             # component meronym: pointers are '%p' (consistent - part)
907             sub comp_meronyms {
908             my $self = shift;
909             return synset_pointers($self,"\%p");
910             }
911             sub add_comp_meronyms {
912             my $self = shift;
913             my @add_synsets = @_;
914             add_synset_pointers($self,"\%p",@add_synsets);
915             }
916             sub delete_comp_meronyms {
917             my $self = shift;
918             my @delete_synsets = @_;
919             delete_synset_pointers($self,"\%p",@delete_synsets);
920             }
921              
922             # feature meronym: pointers are '%f' (new)
923             sub feature_meronyms {
924             my $self = shift;
925             return synset_pointers($self,"\%f");
926             }
927             sub add_feature_meronyms {
928             my $self = shift;
929             my @add_synsets = @_;
930             add_synset_pointers($self,"\%f",@add_synsets);
931             }
932             sub delete_feature_meronyms {
933             my $self = shift;
934             my @delete_synsets = @_;
935             delete_synset_pointers($self,"\%f",@delete_synsets);
936             }
937              
938             # phase meronym: pointers are '%a' (new)
939             sub phase_meronyms {
940             my $self = shift;
941             return synset_pointers($self,"\%a");
942             }
943             sub add_phase_meronyms {
944             my $self = shift;
945             my @add_synsets = @_;
946             add_synset_pointers($self,"\%a",@add_synsets);
947             }
948             sub delete_phase_meronyms {
949             my $self = shift;
950             my @delete_synsets = @_;
951             delete_synset_pointers($self,"\%a",@delete_synsets);
952             }
953              
954             # place meronym: pointers are '%l' (new)
955             sub place_meronyms {
956             my $self = shift;
957             return synset_pointers($self,"\%l");
958             }
959             sub add_place_meronyms {
960             my $self = shift;
961             my @add_synsets = @_;
962             add_synset_pointers($self,"\%l",@add_synsets);
963             }
964             sub delete_place_meronyms {
965             my $self = shift;
966             my @delete_synsets = @_;
967             delete_synset_pointers($self,"\%l",@delete_synsets);
968             }
969              
970              
971             # holonyms
972             sub all_holonyms {
973             my $self = shift;
974             return synset_pointers($self,"#");
975             }
976              
977             # member holonym: pointers are '#m' (consistent)
978             sub member_holonyms {
979             my $self = shift;
980             return synset_pointers($self,"#m");
981             }
982             sub add_member_holonyms {
983             my $self = shift;
984             my @add_synsets = @_;
985             add_synset_pointers($self,"#m",@add_synsets);
986             }
987             sub delete_member_holonyms {
988             my $self = shift;
989             my @delete_synsets = @_;
990             delete_synset_pointers($self,"#m",@delete_synsets);
991             }
992              
993             # stuff holonym: pointers are '#s' (consistent)
994             sub stuff_holonyms {
995             my $self = shift;
996             return synset_pointers($self,"#s");
997             }
998             sub add_stuff_holonyms {
999             my $self = shift;
1000             my @add_synsets = @_;
1001             add_synset_pointers($self,"#s",@add_synsets);
1002             }
1003             sub delete_stuff_holonyms {
1004             my $self = shift;
1005             my @delete_synsets = @_;
1006             delete_synset_pointers($self,"#s",@delete_synsets);
1007             }
1008              
1009             # portion holonym: pointers are '#o' (new)
1010             sub portion_holonyms {
1011             my $self = shift;
1012             return synset_pointers($self,"#o");
1013             }
1014             sub add_portion_holonym {
1015             my $self = shift;
1016             my @add_synsets = @_;
1017             add_synset_pointers($self,"#o",@add_synsets);
1018             }
1019             sub delete_portion_holonym {
1020             my $self = shift;
1021             my @delete_synsets = @_;
1022             delete_synset_pointers($self,"#o",@delete_synsets);
1023             }
1024              
1025             # component holonym: pointers are '#p' (consistent - part)
1026             sub comp_holonyms {
1027             my $self = shift;
1028             return synset_pointers($self,"#p");
1029             }
1030             sub add_comp_holonym {
1031             my $self = shift;
1032             my @add_synsets = @_;
1033             add_synset_pointers($self,"#p",@add_synsets);
1034             }
1035             sub delete_comp_holonym {
1036             my $self = shift;
1037             my @delete_synsets = @_;
1038             delete_synset_pointers($self,"#p",@delete_synsets);
1039             }
1040              
1041             # feature holonym: pointers are '#f' (new)
1042             sub feature_holonyms {
1043             my $self = shift;
1044             return synset_pointers($self,"#f");
1045             }
1046             sub add_feature_holonym {
1047             my $self = shift;
1048             my @add_synsets = @_;
1049             add_synset_pointers($self,"#f",@add_synsets);
1050             }
1051             sub delete_feature_holonym {
1052             my $self = shift;
1053             my @delete_synsets = @_;
1054             delete_synset_pointers($self,"#f",@delete_synsets);
1055             }
1056              
1057             # phase holonym: pointers are '#a' (new)
1058             sub phase_holonyms {
1059             my $self = shift;
1060             return synset_pointers($self,"#a");
1061             }
1062             sub add_phase_holonym {
1063             my $self = shift;
1064             my @add_synsets = @_;
1065             add_synset_pointers($self,"#a",@add_synsets);
1066             }
1067             sub delete_phase_holonym {
1068             my $self = shift;
1069             my @delete_synsets = @_;
1070             delete_synset_pointers($self,"#a",@delete_synsets);
1071             }
1072              
1073             # place holonym: pointers are '#l' (new)
1074             sub place_holonyms {
1075             my $self = shift;
1076             return synset_pointers($self,"#l");
1077             }
1078             sub add_place_holonym {
1079             my $self = shift;
1080             my @add_synsets = @_;
1081             add_synset_pointers($self,"#l",@add_synsets);
1082             }
1083             sub delete_place_holonym {
1084             my $self = shift;
1085             my @delete_synsets = @_;
1086             delete_synset_pointers($self,"#l",@delete_synsets);
1087             }
1088              
1089             # cause: pointers are '>' (consistent)
1090             sub causes {
1091             my $self = shift;
1092             return synset_pointers($self,"\>");
1093             }
1094             sub add_causes {
1095             my $self = shift;
1096             my @add_synsets = @_;
1097             add_synset_pointers($self,"\>",@add_synsets);
1098             }
1099             sub delete_causes {
1100             my $self = shift;
1101             my @delete_synsets = @_;
1102             delete_synset_pointers($self,"\>",@delete_synsets);
1103             }
1104              
1105             # verb group: pointers are '$' (consistent)
1106             sub verb_group {
1107             my $self = shift;
1108             return synset_pointers($self,'\$');
1109             }
1110             sub add_verb_group {
1111             my $self = shift;
1112             my @add_synsets = @_;
1113             add_synset_pointers($self,"\$",@add_synsets);
1114             }
1115             sub delete_verb_group {
1116             my $self = shift;
1117             my @delete_synsets = @_;
1118             delete_synset_pointers($self,'\$',@delete_synsets);
1119             }
1120              
1121             # similar to: pointers are '&' (consistent)
1122             sub similar_to {
1123             my $self = shift;
1124             return synset_pointers($self,'\&');
1125             }
1126             sub add_similar_to {
1127             my $self = shift;
1128             my @add_synsets = @_;
1129             add_synset_pointers($self,"\&",@add_synsets);
1130             }
1131             sub delete_similar_to {
1132             my $self = shift;
1133             my @delete_synsets = @_;
1134             delete_synset_pointers($self,'\&',@delete_synsets);
1135             }
1136              
1137             # participle of verb: pointers are '<' (consistent)
1138             sub participles {
1139             my $self = shift;
1140             return synset_pointers($self,"\<");
1141             }
1142             sub add_participles {
1143             my $self = shift;
1144             my @add_synsets = @_;
1145             add_synset_pointers($self,"\<",@add_synsets);
1146             }
1147             sub delete_participles {
1148             my $self = shift;
1149             my @delete_synsets = @_;
1150             delete_synset_pointers($self,"\<",@delete_synsets);
1151             }
1152              
1153             # pertainym (pertains to noun): pointers are '\' (consistent)
1154             sub pertainyms {
1155             my $self = shift;
1156             return synset_pointers($self,"\\");
1157             }
1158             sub add_pertainyms {
1159             my $self = shift;
1160             my @add_synsets = @_;
1161             add_synset_pointers($self,"\\",@add_synsets);
1162             }
1163             sub delete_pertainyms {
1164             my $self = shift;
1165             my @delete_synsets = @_;
1166             delete_synset_pointers($self,"\\",@delete_synsets);
1167             }
1168              
1169             # attribute: pointers are '=' (consistent, but now also nouns)
1170             sub attributes {
1171             my $self = shift;
1172             return synset_pointers($self,"\=");
1173             }
1174             sub add_attributes {
1175             my $self = shift;
1176             my @add_synsets = @_;
1177             add_synset_pointers($self,"\=",@add_synsets);
1178             }
1179             sub delete_attributes {
1180             my $self = shift;
1181             my @delete_synsets = @_;
1182             delete_synset_pointers($self,"\=",@delete_synsets);
1183             }
1184              
1185             # derived from adjective: pointers are '\' (consistent)
1186             sub derived_from_adj {
1187             my $self = shift;
1188             return synset_pointers($self,"\\");
1189             }
1190             sub add_derived_from_adj {
1191             my $self = shift;
1192             my @add_synsets = @_;
1193             add_synset_pointers($self,"\\",@add_synsets);
1194             }
1195             sub delete_derived_from_adj {
1196             my $self = shift;
1197             my @delete_synsets = @_;
1198             delete_synset_pointers($self,"\\",@delete_synsets);
1199             }
1200              
1201             # also see: pointers are '^' (consistent)
1202             sub also_see {
1203             my $self = shift;
1204             return synset_pointers($self,'\^');
1205             }
1206             sub add_see_also {
1207             my $self = shift;
1208             my @add_synsets = @_;
1209             add_synset_pointers($self,"^",@add_synsets);
1210             }
1211             sub delete_see_also {
1212             my $self = shift;
1213             my @delete_synsets = @_;
1214             delete_synset_pointers($self,'\^',@delete_synsets);
1215             }
1216              
1217             # function: pointers are '+' (new)
1218             # This could be expanded to include objects for the function:
1219             # + verb_synset_offset pos src/trg [noun_synset_offset frame wordnum]
1220             # Where the typical pointer syntax is followed by a list of [] enclosed
1221             # pointers to noun objects of the verb function, a frame number, and a
1222             # word number of the word in that frame which corresponds to the noun.
1223             # But not now. :)
1224              
1225             #sub functions {
1226             # my $self = shift;
1227             # return synset_pointers($self,'\+');
1228             #}
1229             #sub add_functions {
1230             # my $self = shift;
1231             # my @add_synsets = @_;
1232             # add_synset_pointers($self,"\+",@add_synsets);
1233             #}
1234             #sub delete_functions {
1235             # my $self = shift;
1236             # my @delete_synsets = @_;
1237             # delete_synset_pointers($self,'\+',@delete_synsets);
1238             #}
1239              
1240              
1241             # in WordNet 2.0 the '+' - pointer is used for lexical links for
1242             # derivational morphology (new)
1243             #
1244             # From CHANGES:
1245             # Lexical links for derivational morphology are represented as
1246             # "+". Note that in verson 1.7.1, some links between nouns and
1247             # verbs were present in the lexicographer files, but not in the
1248             # database files. In the 1.7.1 lexicographer files, these
1249             # links were represented by the "+" character, followed by a
1250             # letter from "a" to "x". The letter is no longer present, and
1251             # all derivational links are indicated simply by the "+"
1252             # pointer type. Also note that the "+" character also
1253             # precedes the list of verb frames in the verb data file. The
1254             # verb frame list always follows the pointer list, therefore
1255             # the "+" character used for the different purposes are
1256             # distinguishable.
1257              
1258             sub derivedforms {
1259             my $self = shift;
1260             return synset_pointers($self,'\+');
1261             }
1262             sub add_derivedforms {
1263             my $self = shift;
1264             my @add_synsets = @_;
1265             add_synset_pointers($self,"\+",@add_synsets);
1266             }
1267             sub delete_derivedforms {
1268             my $self = shift;
1269             my @delete_synsets = @_;
1270             delete_synset_pointers($self,'\+',@delete_synsets);
1271             }
1272              
1273             # Domains and Domain Terms (new in WN 2.0)
1274             #
1275             # From the CHANGES:
1276             # Some synsets have been organized into topical domains.
1277             # Domains are always noun synset, however synsets from every
1278             # syntacic category may be classified. Each domain is further
1279             # classified as a CATEGORY, REGION, or USAGE. A pointer from a
1280             # domain term synset to its topic is represented by the ";"
1281             # pointer character, followed by "c", "r", or "u", indicating
1282             # the type of domain. The converse relation, from a domain to
1283             # the synsets in that domain, is represented by the "-" pointer
1284             # character, also followed by "c", "r", or "u".
1285              
1286             sub all_domains {
1287             my $self = shift;
1288             return synset_pointers($self,";");
1289             }
1290              
1291             sub category_domains {
1292             my $self = shift;
1293             return synset_pointers($self,";c");
1294             }
1295             sub add_category_domains {
1296             my $self = shift;
1297             my @add_synsets = @_;
1298             add_synset_pointers($self,";c",@add_synsets);
1299             }
1300             sub delete_category_domains {
1301             my $self = shift;
1302             my @delete_synsets = @_;
1303             delete_synset_pointers($self,';c',@delete_synsets);
1304             }
1305              
1306             sub region_domains {
1307             my $self = shift;
1308             return synset_pointers($self,";r");
1309             }
1310             sub add_region_domains {
1311             my $self = shift;
1312             my @add_synsets = @_;
1313             add_synset_pointers($self,";r",@add_synsets);
1314             }
1315             sub delete_region_domains {
1316             my $self = shift;
1317             my @delete_synsets = @_;
1318             delete_synset_pointers($self,';r',@delete_synsets);
1319             }
1320              
1321             sub usage_domains {
1322             my $self = shift;
1323             return synset_pointers($self,";u");
1324             }
1325             sub add_usage_domains {
1326             my $self = shift;
1327             my @add_synsets = @_;
1328             add_synset_pointers($self,";u",@add_synsets);
1329             }
1330             sub delete_usage_domains {
1331             my $self = shift;
1332             my @delete_synsets = @_;
1333             delete_synset_pointers($self,';u',@delete_synsets);
1334             }
1335              
1336             # domain terms
1337             sub all_domainterms {
1338             my $self = shift;
1339             return synset_pointers($self,"-");
1340             }
1341             sub category_domainterms {
1342             my $self = shift;
1343             return synset_pointers($self,"-c");
1344             }
1345             sub add_category_domainterms {
1346             my $self = shift;
1347             my @add_synsets = @_;
1348             add_synset_pointers($self,"-c",@add_synsets);
1349             }
1350             sub delete_category_domainterms {
1351             my $self = shift;
1352             my @delete_synsets = @_;
1353             delete_synset_pointers($self,'-c',@delete_synsets);
1354             }
1355              
1356             sub region_domainterms {
1357             my $self = shift;
1358             return synset_pointers($self,"-r");
1359             }
1360             sub add_region_domainterms {
1361             my $self = shift;
1362             my @add_synsets = @_;
1363             add_synset_pointers($self,"-r",@add_synsets);
1364             }
1365             sub delete_region_domainterms {
1366             my $self = shift;
1367             my @delete_synsets = @_;
1368             delete_synset_pointers($self,'-r',@delete_synsets);
1369             }
1370              
1371             sub usage_domainterms {
1372             my $self = shift;
1373             return synset_pointers($self,"-u");
1374             }
1375             sub add_usage_domainterms {
1376             my $self = shift;
1377             my @add_synsets = @_;
1378             add_synset_pointers($self,"-u",@add_synsets);
1379             }
1380             sub delete_usage_domainterms {
1381             my $self = shift;
1382             my @delete_synsets = @_;
1383             delete_synset_pointers($self,'-u',@delete_synsets);
1384             }
1385              
1386             sub lex_info {
1387             my $self = shift;
1388             return $lexfiles[$self->{filenum}];
1389             }
1390              
1391             sub frames {
1392             my $self = shift;
1393             my @frames = split(/\Q$Lingua::Wordnet::SUBDELIM/,$self->{frames});
1394             my @frametext;
1395             my $frame;
1396             foreach $frame (@frames) {
1397             my ($fnum,$wnum) = split(/ /,$frame);
1398             if ($wnum > 0) {
1399             my $wordtext = " (" . (split(/\Q$Lingua::Wordnet::SUBDELIM/,$self->{words}))[$wnum] . ")";
1400             push(@frametext,$vrbsents[$fnum] . $wordtext);
1401             } else {
1402             push(@frametext,$vrbsents[$fnum]);
1403             }
1404             }
1405             return @frametext;
1406             }
1407              
1408             sub gloss {
1409             my $self = shift;
1410             my $gloss = shift;
1411             if ($gloss) {
1412             $self->{gloss} = $gloss;
1413             }
1414             return $self->{gloss};
1415             }
1416              
1417             sub DESTROY {
1418             my $self = shift;
1419             $self = {};
1420             }
1421              
1422             1;
1423             __END__