File Coverage

blib/lib/Data/Trie.pm

Criterion	Covered	Total	%
statement	41	57	71.9
branch	10	14	71.4
condition			n/a
subroutine	5	7	71.4
pod	0	5	0.0
total	56	83	67.4

line	stmt	bran	sub	pod	time	code
1						$VERSION = '0.01';
2
3	1		1		9031	use strict;
	1				3
	1				703
4
5						package Data::Trie;
6
7						#creates a new Trie node and initializes its value and daughters to zero
8						sub new {
9	23		23	0	57	my $self = {};
10	23				46	my $class = shift;
11						#does this node terminate a word?
12	23				78	$self->{value} = 0;
13						#is this node terminate a prefix of further words?
14	23				45	$self->{daughters} = {};
15	23				87	bless $self, $class;
16						}
17
18						#returns all words in the trie
19						sub getAll {
20	0		0	0	0	my $self = shift;
21						#keeps track of the path in the trie up to this point
22	0				0	my $path = "";
23						#calls a recursive routine to check the path
24	0				0	return $self->_getAllRecurse($path);
25						}
26
27						#recursive routine to collect all words, called by getAll()
28						#DON'T CALL THIS DIRECTLY; use getAll() instead
29						sub _getAllRecurse {
30	0		0		0	my $self = shift;
31	0				0	my $path = shift;
32	0				0	my $daughters = $self->{daughters};
33						#the set of words to return
34	0				0	my @result = ();
35						#return the current path if the current node terminates a word
36	0	0			0	if ($self->{value}) {
37	0				0	push @result, $path;
38						}
39	0				0	my @keys = keys %$daughters;
40						#check all daughter nodes recursively adding their results to current ones
41	0				0	foreach my $letter (@keys) {
42	0				0	my $newpath = $path . $letter;
43	0				0	my @letterresult = $daughters->{$letter}->_getAllRecurse($newpath);
44	0				0	push @result, @letterresult;
45						}
46	0				0	return @result;
47						}
48
49						#adds a word to the tree by recursively checking each letter of the word and
50						#adding nodes as needed.
51						sub add {
52	27		27	0	35	my $self = shift;
53	27				40	my $str = shift;
54						#data can be added or not
55	27				30	my $data = shift;
56						#separates first letter from the rest
57	27				39	my $first = substr $str, 0, 1;
58	27				32	my $rest = substr $str, 1;
59	27				42	my $daughters = $self->{daughters};
60						#checks if there is a node for the first letter
61	27	100			61	if (not exists $daughters->{$first}) {
62						#adds a node if necessary
63	22				43	$daughters->{$first} = Data::Trie->new;
64						}
65	27				39	my $daughter = $daughters->{$first};
66						#is the word only one letter long?
67	27	100			54	if (length $rest > 0) {
68						#recurse on the remaining letters
69	23				64	$daughter->add($rest, $data);
70						} else {
71						#set the value to 1 and store the data
72	4				6	$daughter->{value} = 1;
73	4				8	$daughter->{data} = $data;
74						}
75	27				46	return 1;
76						}
77
78						#removes a word from the trie (does NOT prune nodes)
79						sub remove {
80	8		8	0	70	my $self = shift;
81	8				10	my $str = shift;
82						#splits the word into first letter and rest
83	8				14	my $first = substr $str, 0, 1;
84	8				13	my $rest = substr $str, 1;
85	8				12	my $daughters = $self->{daughters};
86	8	50			15	if (exists $daughters->{$first}) {
87	8				12	my $daughter = $daughters->{$first};
88	8	100			14	if (length $rest == 0) {
89	1				2	$daughter->{value} = 0;
90						} else {
91	7				24	$daughter->remove($rest);
92						}
93						}
94	8				13	return $str;
95						}
96
97						#looks up a word in the trie
98						sub lookup {
99	6		6	0	10	my $self = shift;
100	6				10	my $str = shift;
101						#splits the word into first letter and rest
102	6				14	my $first = substr $str, 0, 1;
103	6				8	my $rest = substr $str, 1;
104	6				11	my $daughters = $self->{daughters};
105						#checks if the first letter matches a daughter
106	6	50			18	if (not exists $daughters->{$first}) {
		100
107						#if not, lookup fails
108	0				0	return 0;
109						#if it does match, recurse on remaining letters
110						} elsif (length $rest == 0) {
111	1				7	return ($daughters->{$first}->{value}, $daughters->{$first}->{data});
112						} else {
113	5				29	return $daughters->{$first}->lookup($rest);
114						}
115						}
116
117						1;
118
119						=head1 NAME
120
121						Data::Trie - An implementation of a letter trie
122
123						=head1 SYNOPSIS
124
125						use Data::Trie;
126						$t = Data::Trie->new;
127						$t->add('orange', 'kind of fruit');
128						($result, $data) = $t->lookup->('orange');
129						$t->remove('orange');
130						$t->getAll;
131
132						=head1 DESCRIPTION
133
134						This module implements a letter trie data structure. This is a linked set of
135						nodes representing a set of words. Starting from the root, each letter of an
136						included word is a daughter node of the trie. Hence, if a word is in the trie,
137						there will be a path from root to leaf for that word. If a word is not in the
138						trie, there will be no such path.
139
140						This structure allows for a relatively compact representation of a set of words.
141						This particular implementation allows each word to be stored alone or with some
142						associated data item.
143
144						Note that the C method does I prune nodes and thus a C can
145						only grow in size.
146
147						=head1 COMPARE
148
149						This implementation differs from L in that C checks for a
150						match, rather than checking for whether the current string is a prefix.
151
152						=head1 VERSION
153
154						0.01
155
156						=head1 AUTHOR
157
158						Michael Hammond, I
159
160						=cut
161