File Coverage

blib/lib/Data/Trie.pm
Criterion Covered Total %
statement 41 57 71.9
branch 10 14 71.4
condition n/a
subroutine 5 7 71.4
pod 0 5 0.0
total 56 83 67.4


line stmt bran cond sub pod time code
1             $VERSION = '0.01';
2              
3 1     1   9031 use strict;
  1         3  
  1         703  
4              
5             package Data::Trie;
6              
7             #creates a new Trie node and initializes its value and daughters to zero
8             sub new {
9 23     23 0 57 my $self = {};
10 23         46 my $class = shift;
11             #does this node terminate a word?
12 23         78 $self->{value} = 0;
13             #is this node terminate a prefix of further words?
14 23         45 $self->{daughters} = {};
15 23         87 bless $self, $class;
16             }
17              
18             #returns all words in the trie
19             sub getAll {
20 0     0 0 0 my $self = shift;
21             #keeps track of the path in the trie up to this point
22 0         0 my $path = "";
23             #calls a recursive routine to check the path
24 0         0 return $self->_getAllRecurse($path);
25             }
26              
27             #recursive routine to collect all words, called by getAll()
28             #DON'T CALL THIS DIRECTLY; use getAll() instead
29             sub _getAllRecurse {
30 0     0   0 my $self = shift;
31 0         0 my $path = shift;
32 0         0 my $daughters = $self->{daughters};
33             #the set of words to return
34 0         0 my @result = ();
35             #return the current path if the current node terminates a word
36 0 0       0 if ($self->{value}) {
37 0         0 push @result, $path;
38             }
39 0         0 my @keys = keys %$daughters;
40             #check all daughter nodes recursively adding their results to current ones
41 0         0 foreach my $letter (@keys) {
42 0         0 my $newpath = $path . $letter;
43 0         0 my @letterresult = $daughters->{$letter}->_getAllRecurse($newpath);
44 0         0 push @result, @letterresult;
45             }
46 0         0 return @result;
47             }
48              
49             #adds a word to the tree by recursively checking each letter of the word and
50             #adding nodes as needed.
51             sub add {
52 27     27 0 35 my $self = shift;
53 27         40 my $str = shift;
54             #data can be added or not
55 27         30 my $data = shift;
56             #separates first letter from the rest
57 27         39 my $first = substr $str, 0, 1;
58 27         32 my $rest = substr $str, 1;
59 27         42 my $daughters = $self->{daughters};
60             #checks if there is a node for the first letter
61 27 100       61 if (not exists $daughters->{$first}) {
62             #adds a node if necessary
63 22         43 $daughters->{$first} = Data::Trie->new;
64             }
65 27         39 my $daughter = $daughters->{$first};
66             #is the word only one letter long?
67 27 100       54 if (length $rest > 0) {
68             #recurse on the remaining letters
69 23         64 $daughter->add($rest, $data);
70             } else {
71             #set the value to 1 and store the data
72 4         6 $daughter->{value} = 1;
73 4         8 $daughter->{data} = $data;
74             }
75 27         46 return 1;
76             }
77              
78             #removes a word from the trie (does NOT prune nodes)
79             sub remove {
80 8     8 0 70 my $self = shift;
81 8         10 my $str = shift;
82             #splits the word into first letter and rest
83 8         14 my $first = substr $str, 0, 1;
84 8         13 my $rest = substr $str, 1;
85 8         12 my $daughters = $self->{daughters};
86 8 50       15 if (exists $daughters->{$first}) {
87 8         12 my $daughter = $daughters->{$first};
88 8 100       14 if (length $rest == 0) {
89 1         2 $daughter->{value} = 0;
90             } else {
91 7         24 $daughter->remove($rest);
92             }
93             }
94 8         13 return $str;
95             }
96              
97             #looks up a word in the trie
98             sub lookup {
99 6     6 0 10 my $self = shift;
100 6         10 my $str = shift;
101             #splits the word into first letter and rest
102 6         14 my $first = substr $str, 0, 1;
103 6         8 my $rest = substr $str, 1;
104 6         11 my $daughters = $self->{daughters};
105             #checks if the first letter matches a daughter
106 6 50       18 if (not exists $daughters->{$first}) {
    100          
107             #if not, lookup fails
108 0         0 return 0;
109             #if it does match, recurse on remaining letters
110             } elsif (length $rest == 0) {
111 1         7 return ($daughters->{$first}->{value}, $daughters->{$first}->{data});
112             } else {
113 5         29 return $daughters->{$first}->lookup($rest);
114             }
115             }
116              
117             1;
118              
119             =head1 NAME
120              
121             Data::Trie - An implementation of a letter trie
122              
123             =head1 SYNOPSIS
124              
125             use Data::Trie;
126             $t = Data::Trie->new;
127             $t->add('orange', 'kind of fruit');
128             ($result, $data) = $t->lookup->('orange');
129             $t->remove('orange');
130             $t->getAll;
131              
132             =head1 DESCRIPTION
133              
134             This module implements a letter trie data structure. This is a linked set of
135             nodes representing a set of words. Starting from the root, each letter of an
136             included word is a daughter node of the trie. Hence, if a word is in the trie,
137             there will be a path from root to leaf for that word. If a word is not in the
138             trie, there will be no such path.
139              
140             This structure allows for a relatively compact representation of a set of words.
141             This particular implementation allows each word to be stored alone or with some
142             associated data item.
143              
144             Note that the C method does I prune nodes and thus a C can
145             only grow in size.
146              
147             =head1 COMPARE
148              
149             This implementation differs from L in that C checks for a
150             match, rather than checking for whether the current string is a prefix.
151              
152             =head1 VERSION
153              
154             0.01
155              
156             =head1 AUTHOR
157              
158             Michael Hammond, I
159              
160             =cut
161