File Coverage

blib/lib/Tree/FP.pm
Criterion Covered Total %
statement 364 386 94.3
branch 91 102 89.2
condition 36 42 85.7
subroutine 50 52 96.1
pod 8 15 53.3
total 549 597 91.9


line stmt bran cond sub pod time code
1             package Tree::FP;
2             # Developer : Martin Paczynski
3             # Copyright (c) 2003 Martin Paczynski. All rights reserved.
4             # This package is free software and is provided "as is" without express or
5             # implied warranty. It may be used, redistributed and/or modified under the
6             # same terms as Perl itself.
7              
8              
9             $VERSION = '0.04';
10             # Whenever version number is increased, check for version in code comments. e.g. if current version is 0.5 going to 0.6,
11             # search for 'v 0.5' within this document
12              
13 1     1   11354 use Exporter;
  1         2  
  1         69  
14             @ISA=(Exporter);
15             @EXPORT=qw();
16             @EXPORT_OK=qw(combinations);
17 1     1   5 use strict;
  1         2  
  1         56  
18 1     1   880 use POSIX;
  1         7033  
  1         6  
19              
20              
21              
22             # FP-Tree Constructor
23             # Note: default support and confidence level is 10%, they can be set using the setter methods (see below)
24             # Parameters:
25             # List of items to be stored in header table, in descending order of frequency in the transactional DB being mined.
26             # Example:
27             # Given the following
28             # Item | Count
29             # ------------
30             # itm1 | 2
31             # itm2 | 4
32             # itm3 | 3
33             # itm4 | 5
34             # itm5 | 3
35             # The code for creating a new FP-Tree would be
36             # $fptree = Tree::FP->new(itm4,itm2,itm3,itm5,itm1);
37             # Returns
38             # A Tree::FP if successful, undef otherwise.
39             sub new
40             {
41 29     29 1 33 my $class = shift;
42            
43 29 100       57 unless(@_)
44             {
45 8         13 return undef;
46             }
47            
48 21         17 my %header_table;
49             my @lookup;
50 21         24 my $count = 0;
51            
52            
53             # Create header nodes from each of the items passed
54 21         39 while(my $item_name = shift)
55             {
56             # If a new FP_Tree_header_node cannot be constructed, the FP_Tree cannot be constructed correctly.
57 45 50       97 unless($header_table{$item_name} = FP_Tree_header_node->new($item_name,++$count))
58             {
59 0         0 return undef;
60             }
61            
62 45         120 $lookup[$count]=$item_name;
63             }
64            
65 21         53 my $self = {
66             header_table => \%header_table,
67             reverse_lookup => \@lookup,
68             root => FP_Tree_node->new, # The root is a standard FP-Tree node except its item field is blank
69             lowest_rank => $count, # Rank of the lowest ranking item in header table
70             patterns => {}, # Hash (ref) that will contain all patterns of the tree
71             max_pattern_len => 0, # The longest pattern found in the tree (initially zero)
72             support => 0.1, # Percent support..
73             confidence => 0.1, # and confidence are expressed as decimal values
74             total_transactions => 0, # Total transactions loaded into the FP-Tree
75             err => '', # Error string
76             };
77            
78 21         60 bless $self, $class;
79             }
80            
81             # Function resets the tree to be allow it to be remined at a different support level
82             sub reset_tree
83             {
84 8     8 0 50 my $self = shift;
85            
86             # Delete all patterns
87 8         15 $self->{patterns} = {};
88            
89 8         34 my @rlook = @{$self->{reverse_lookup}};
  8         26  
90            
91 8         20 for(my $c = $#rlook ; $c > 0; $c--)
92             {
93 40 100       99 if($self->{header_table}->{$rlook[$c]}->{sibling})
94             {
95 35         73 $self->{header_table}->{$rlook[$c]}->{sibling}->reset_used;
96             }
97             }
98            
99 8         17 1;
100             }
101            
102             # Return FP-Tree pattern hash ref.
103             sub patterns
104             {
105 0     0 0 0 my $self = shift;
106 0         0 $self->{patterns};
107             }
108            
109             # Return the root of the FP-Tree
110             sub root
111             {
112 151     151 0 157 my $self = shift;
113 151         315 $self->{root};
114             }
115            
116             # Insert new transaction into FP-Tree
117             # Parameters
118             # List items that appeared in transaction
119             # The items need not be sorted in any way as the function sorts them before further processing.
120             # Also, the list can contain duplicates, as these will be removed before further processing.
121             # If function returns 0, something went wrong with during insertion, check 'FP-Tree'->err for error message.
122             sub insert_tree
123             {
124 80     80 1 84 my $self = shift;
125              
126             # If no items passed, nothing to insert.
127 80 100       154 if($#_<0)
128             {
129 1         7 $self->{err} = "'insert_tree' called with null transaction.";
130 1         4 return 0;
131             }
132            
133             # Strip duplicates from input.
134 79         120 my %unique = map {$_,1} @_;
  135         295  
135 79         170 my @items = keys %unique;
136              
137             # For each item, see if it was found in header table. If not, FP-Tree not constructed correctly OR
138             # there was an error with items being passed to function. In either case, exit.
139 79         167 for(my $a=0; $a <= $#items; $a++)
140             {
141 131 100       128 unless(grep {$_ eq $items[$a]} (keys %{$self->{header_table}}))
  422         934  
  131         276  
142             {
143 2         7 $self->{err} = "Item '" . $items[$a] . "' not found in Header Table.";
144 2         9 return 0;
145             }
146             }
147              
148             # Sort items by L order.
149 77         131 @items = sort {$self->{header_table}->{$a}->rank <=> $self->{header_table}->{$b}->rank} @items;
  62         135  
150              
151            
152             # Call _tree_insert on the root node of FP_Tree. It should return a positive integer
153             # if insertion was successful.
154 77 50       133 if(_tree_insert($self,$self->root,@items))
155             {
156             # Increment the number of transactions.
157 77         131 $self->{total_transactions}++;
158 77         366 return 1;
159             }
160             else
161             {
162 0         0 return 0;
163             }
164             }
165            
166            
167             sub _tree_insert
168             {
169 206     206   202 my $self = shift;
170 206         193 my $node = shift;
171            
172 206         236 my $item = shift;
173            
174 206 100       322 unless($item)
175             {
176 77         230 return 0;
177             }
178            
179 129         101 my $next_node;
180            
181             # If the current node has a child with label $item...
182 129 100       198 if($next_node = $node->child_exists($item))
183             {
184             #... Then increment the count of this child node.
185 68         104 $next_node->inc_count;
186             }
187             else
188             {
189             #... Otherwise:
190            
191             # If node already has children, then increase the number of paths
192 61 100       142 if($node->children)
193             {
194 15         26 $node->inc_num_path;
195             }
196            
197             # Set $next_node to be the new child node with label $item
198 61         111 $next_node = $node->add_child($item);
199            
200             # Starting at the header table node with label $item, find the node whose sibling is null
201 61         90 my $sib_ptr = $self->{header_table}->{$item};
202 61         130 while(ref $sib_ptr->{sibling})
203             {
204 24         47 $sib_ptr = $sib_ptr->{sibling};
205             }
206             # Set the pointer to the sibling of this node to be the $next_node.
207 61         76 $sib_ptr->{sibling} = $next_node;
208             }
209            
210             # Increase the count of $item field in header table.
211 129         293 $self->{header_table}->{$item}->inc_count;
212            
213             # Recursively call _tree_insert, until all items have been added to the FP-Tree.
214 129         196 1 + _tree_insert($self,$next_node,@_);
215             }
216            
217            
218             # Given a set and a subset of this set, function returns the complement subset.
219             # Parameters:
220             # 1. Array ref constituting a set
221             # 2. Array ref constituting subset
222             # Return:
223             # Array with complement subset of set (1.)
224             sub complement
225             {
226 43     43 0 37 my %all = map {$_,1} @{+shift};
  80         166  
  43         250  
227 43         49 grep {!$all{$_}} @{+shift};
  144         381  
  43         58  
228             }
229            
230             # Returns confidence set for FP-Tree.
231             sub confidence
232             {
233 2     2 1 3 my $self = shift;
234 2         7 $self->{confidence};
235             }
236              
237             # Returns support set for FP-Tree.
238             sub support
239             {
240 3     3 1 4 my $self = shift;
241 3         12 $self->{support};
242             }
243              
244             # Sets confidence of FP-Tree.
245             # Parameters:
246             # 1. Decimal corresponding to appropriate confidence level (e.g. 0.1 for 10%, 0.01 for 1%)
247             sub set_confidence
248             {
249 5     5 1 9 my $self = shift;
250 5         5 my $confidence = shift;
251            
252 5 100       16 if($confidence <= 0)
    100          
253             {
254 2         9 $self->{err} = "Confidence must be a positive value [ $confidence ].";
255 2         6 return 0;
256             }
257             elsif($confidence > 1)
258             {
259 1         6 $self->{err} = "Confidence cannot exceed 100% (expressed as a decimal) [ $confidence ].";
260 1         4 return 0;
261             }
262            
263 2         8 $self->{confidence} = $confidence;
264             }
265              
266             # Sets support of FP-Tree.
267             # Parameters:
268             # 1. Decimal corresponding to appropriate support level (e.g. 0.1 for 10%, 0.01 for 1%)
269             sub set_support
270             {
271 6     6 1 10 my $self = shift;
272 6         7 my $support = shift;
273            
274 6 100       19 if($support <= 0)
    100          
275             {
276 2         22 $self->{err} = "Support must be a positive value [ $support ].";
277 2         8 return 0;
278             }
279             elsif($support > 1)
280             {
281 1         5 $self->{err} = "Support cannot exceed 100% (expressed as a decimal) [ $support ].";
282 1         4 return 0;
283             }
284            
285 3         17 $self->{support} = $support;
286             }
287              
288             ####
289             # Returns number of paths in the tree
290             sub num_path
291             {
292 24     24 0 25 my $self = shift;
293 24         34 $self->root->num_path;
294             }
295            
296             # Returns latest error message for FP-Tree.
297             sub err
298             {
299 12     12 1 31 my $self = shift;
300 12         56 $self->{err}
301             }
302            
303            
304             # Function mines associate rules of FP-Tree for given support (and confidence) level.
305             # Parameters:
306             # None
307             # Function returns array of FP_Tree_association_rules.
308             # Each FP_Tree_association_rule contains the following methods, returning corresponding values
309             # left
310             # right
311             # support
312             # confidence
313             # 'left' and 'right' correspond to the left and right side of association rule.
314             # Example: If ham and cheese then bread
315             # ham and cheese are the left side, and bread is the right side.
316             # Both 'left' and 'right' are refs to arrays containing individual item labels.
317             sub association_rules
318             {
319 6     6 1 8 my $self = shift;
320            
321 6         6 my @patterns;
322             my @freq_patterns;
323            
324 6         45 $self->reset_tree;
325            
326             # First call sub _fp_growth, to extract the frequent patterns. This is a slight modification of FPGROWTH algorithm.
327 6 100       15 unless(@patterns = $self->_fp_growth)
328             {
329 3         27 return ();
330             }
331            
332 3         10 for(@patterns)
333             {
334 87         85 @{$_->{pattern}} = sort {$self->{header_table}->{$a}->rank <=> $self->{header_table}->{$b}->rank} @{$_->{pattern}};
  87         183  
  119         260  
  87         214  
335            
336 87 100       133 if($self->{max_pattern_len} < scalar(@{$_->{pattern}}))
  87         187  
337             {
338 2         3 @freq_patterns = ();
339 2         3 $self->{max_pattern_len} = scalar(@{$_->{pattern}});
  2         4  
340             }
341              
342 87         88 my $key = join '~', @{$_->{pattern}};
  87         146  
343            
344 87 100       215 unless($self->{patterns}->{$key})
345             {
346 45 100       50 if($self->{max_pattern_len} == scalar(@{$_->{pattern}}))
  45         103  
347             {
348 5         9 push @freq_patterns, $_;
349             }
350 45         122 $self->{patterns}->{$key} = $_;
351             }
352             }
353            
354 3         7 my @association_rules;
355            
356             # Loop through all the MLFPs
357 3         10 for(my $c=0; $c <= $#freq_patterns; $c++)
358             {
359            
360             # Populate array with all combinations of MLFP
361 5         6 my @all_combos = @{&combinations(@{$freq_patterns[$c]->{pattern}})};
  5         5  
  5         14  
362            
363             # Convert base pattern to a string
364 5         11 my $base_pat_str = join '~', @{$freq_patterns[$c]->{pattern}};
  5         15  
365              
366             # Get the support count for the base pattern
367 5         12 my $support_count = $self->{patterns}->{$base_pat_str}->{count};
368            
369             # Convert support count to %
370 5         11 my $support = $support_count/$self->{total_transactions};
371            
372             # For each sub pattern...
373 5         74 for(my $d=0;$d <= $#all_combos; $d++)
374             {
375             # Get the complement
376 43         93 my @compliment_arr = &complement($all_combos[$d],$freq_patterns[$c]->{pattern});
377            
378             # If complement empty, this is the base pattern so go to next sub pattern.
379 43 100       84 unless(@compliment_arr)
380             {
381 5         25 next;
382             }
383            
384             # Convert the complement array into string
385 38         40 my $left_str = join '~', @{$all_combos[$d]};
  38         62  
386            
387             # Compute confidence for association
388 38         87 my $confidence = $support_count / $self->{patterns}->{$left_str}->{count};
389              
390             # Push new association rule onto @association_rules.
391 38         94 push @association_rules, FP_Tree_association_rule->new($all_combos[$d], \@compliment_arr, $support, $confidence);
392             }
393             }
394              
395             # Sort association rules in descending of confidence. (For those that have not see an sort of this sort it is
396             # a Schwartzian Transformation named after Randal L. Schwartz. Substantial savings in computational time.)
397 3         11 @association_rules = map $_->[0], sort {$b->[1] <=> $a->[1]} map [$_, $_->confidence], @association_rules;
  93         193  
398             }
399              
400              
401            
402             # Function uses modified FPGROWTH algorithm to find Maximal Length Frequent Patterns (MLFPs)
403             # Parameters:
404             # 1. Min. support count (optional)
405             # Returns an array of hash refs, with 'pattern' field being a array ref containing items in pattern and 'count' field being
406             # the support count for the pattern. Returns empty array on failure.
407             # Note: This method is never called directly, only by indirectly from the asssociation_rule method or recursively from itself.
408             sub _fp_growth
409             {
410 25     25   26 my $self = shift;
411             # If support count is not provided, calculate it and round up to a full transaction (i.e. not a float).
412 25 100       73 my $support_count = $_[0]?$_[0]:POSIX::ceil $self->{total_transactions}*$self->{support};
413            
414             # If support is set so low that support count rounds to zero on the system, exit.
415 25 100       42 unless($support_count)
416             {
417 1         2 $self->{err} = "Support count equals zero. FP Tree not fully loaded or support level set too low.";
418 1         4 return ();
419             }
420            
421            
422 24         21 my @all_items;
423             my @all_combos;
424 24         30 my $check_count = 0;
425            
426             # If the number of paths in the tree is equal to one, simply find all the combinations (and their support count) of the
427             # pattern encoded by the tree.
428             # This is the base case
429 24 100       39 if($self->num_path == 1)
430             {
431 15         15 my @rlook = @{$self->{reverse_lookup}};
  15         48  
432 15         17 my @patterns;
433            
434             # Used to check that header table is accurate, see below
435 15         14 my $order_check = 0;
436            
437 15         30 for(my $c = $#rlook ; $c > 0; $c--)
438             {
439             # Unless the support count in the header node increases (non-strictly) as rank goes up, something is is incorrect
440             # either FP-Tree not created correctly or not fully loaded.
441 25 50       54 if($self->{header_table}->{$rlook[$c]}->count < $order_check)
442             {
443 0         0 $self->{err} = "Frequency table not accurate. [$check_count " . $self->{header_table}->{$rlook[$c]}->count . "]";
444 0         0 return ();
445             }
446              
447 25         46 $order_check = $self->{header_table}->{$rlook[$c]}->count;
448              
449             # If the support count of an item is below the min support count, no patterns it creates can meet criteria
450 25 100       50 if($self->{header_table}->{$rlook[$c]}->count < $support_count)
451             {
452             # check that each header node is actually initialized
453 4 50       8 unless($self->{header_table}->{$rlook[$c]}->count)
454             {
455 0         0 $self->{err} = "Header table node '" . $rlook[$c] . "' has no count.";
456 0         0 return ();
457             }
458             # and if it is, the simply go to next item
459 4         10 next;
460             }
461            
462 21         39 my $item_ptr = $self->{header_table}->{$rlook[$c]}->{sibling};
463            
464             # Traverse the tree sideways, examining each node with particular 'item name'
465 21         34 while($item_ptr)
466             {
467 21         35 my $l_count = $item_ptr->adj_count;
468 21         34 my @l_pattern = ($item_ptr->item_name);
469            
470             # If the node is already used up (i.e. has been 'read' as much as 'written', then proceed to the next sibling
471 21 100       36 unless($l_count)
472             {
473 3         4 $item_ptr = $item_ptr->{sibling};
474 3         11 next;
475             }
476            
477             # Otherwise, go up the tree, getting ancestor 'item names' until hitting the root
478 18         24 my $parent_ptr = $item_ptr;
479 18         36 while($parent_ptr = $parent_ptr->{parent})
480             {
481 25 100       36 if($parent_ptr->item_name)
482             {
483 7         10 $parent_ptr->inc_used($l_count);
484 7         12 push @l_pattern, $parent_ptr->item_name;
485             }
486             }
487            
488             # add the pattern created to the @patterns
489 18         50 push @patterns, {
490             pattern => \@l_pattern,
491             count => $l_count
492             };
493             # go to next sibling
494 18         63 $item_ptr = $item_ptr->{sibling};
495             }
496            
497             }
498            
499             # If patterns array is empty, then something may have gone wrong (although not necessarily)
500 15 50       25 unless(scalar(@patterns))
501             {
502 0         0 $self->{err} = "No patterns generated. FP Tree may not be fully loaded or support set too high.";
503             }
504             # return the patterns
505 15         46 return @patterns;
506             }
507            
508            
509             # If more than one path
510            
511 9         11 my @lookup = @{$self->{reverse_lookup}};
  9         32  
512            
513 9         21 while(my $key = pop @lookup)
514             {
515 32 100       71 unless($check_count <= $self->{header_table}->{$key}->count)
516             {
517 1         7 $self->{err} = "Frequency table not accurate. [$check_count " . $self->{header_table}->{$key}->count . "]";
518 1         21 return ();
519             }
520            
521 31         62 $check_count = $self->{header_table}->{$key}->count;
522            
523 31 100       76 if($self->{header_table}->{$key}->count < $support_count)
524             {
525             # check that each header node is actually initialized
526 5 50       10 unless($self->{header_table}->{$key}->count)
527             {
528 0         0 $self->{err} = "Header table node '$key' has no count.";
529 0         0 return ();
530             }
531 5         12 next;
532             }
533            
534             # Because thing may have gotten read, get the adjusted count of the item name
535 26         53 my $adj_count = $self->{header_table}->{$key}->adj_count;
536            
537             # get the prefixes for this item name
538 26         56 my @prefixes = $self->get_prefixes($self->{header_table}->{$key}->item_name);
539              
540             # Create a temporary local frequency table for the item names found in the prefixes
541 26         50 my %loc_freq_table = ();
542 26         57 for(my $a=0; $a <= $#prefixes; $a++)
543             {
544 36         31 for(@{$prefixes[$a]->{prefix}})
  36         73  
545             {
546 57         154 $loc_freq_table{$_} += $prefixes[$a]->{support};
547             }
548             }
549            
550             # Get the item name found in the prefixes in order
551 26         62 my @ordered = sort { $loc_freq_table{$b} <=> $loc_freq_table{$a} } keys %loc_freq_table;
  21         40  
552             # And create a new conditional FP-Tree
553 26         64 my $conditional_tree = Tree::FP->new(@ordered);
554            
555             # If there where prefixes yet there is no conditional tree, something is wrong so exit
556 26 50 66     88 if(@ordered && !$conditional_tree)
557             {
558 0         0 $self->{err} = "Conditional FP-Tree could not be created.";
559 0         0 return ();
560             }
561            
562             # Otherwise, go through the prefixes and load them into the conditional FP-Tree
563 26         63 for(my $a=0; $a <= $#prefixes; $a++)
564             {
565 36         91 for(my $b=0; $b < $prefixes[$a]->{support}; $b++)
566             {
567 59         56 $conditional_tree->insert_tree(@{$prefixes[$a]->{prefix}});
  59         128  
568             }
569             }
570            
571 26         28 my @conditional_patterns;
572             my @cond_patterns_plus_item;
573            
574             # If there is a conditional FP-Tree, then...
575 26 100       41 if($conditional_tree)
576             {
577             # Get the patterns by calling fp_growth on it (this is the recursive step
578 19         45 @conditional_patterns = $conditional_tree->_fp_growth($support_count);
579            
580             # If no patterns then set the error of this tree to the error of the conditional tree
581             #### Check logic flow
582 19 50       41 unless(@conditional_patterns)
583             {
584 0         0 $self->{err} = $conditional_tree->err;
585 0         0 next;
586             }
587            
588             # Add this item name to each of the conditional patterns
589 19         44 for(my $d=0; $d <= $#conditional_patterns; $d++)
590             {
591             # Get all the combinations of a given related pattern
592 44         42 my @related_patterns = &combinations(@{$conditional_patterns[$d]->{pattern}});
  44         85  
593             # Then for each combination, append this item
594 44         97 for(my $e=0; $e <= $#related_patterns; $e++)
595             {
596 44         36 my @l_arr = @{$related_patterns[$e]};
  44         69  
597            
598 44         93 for(my $f = 0; $f <= $#l_arr; $f++)
599             {
600 100         96 push @cond_patterns_plus_item, {pattern => [$key, @{$l_arr[$f]}], count => $conditional_patterns[$d]->{count}};
  100         515  
601             }
602             }
603            
604 44         45 push @{$conditional_patterns[$d]->{pattern}}, $key;
  44         179  
605             }
606             # Push all this onto the
607 19         30 push @conditional_patterns, @cond_patterns_plus_item;
608 19         67 push @conditional_patterns, {pattern=>[$key], count=>$self->{header_table}->{$key}->count};
609             }
610             else
611             {
612 7         25 @conditional_patterns = ({pattern=>[$key], count=>$self->{header_table}->{$key}->count});
613             }
614            
615 26         235 push @all_combos, @conditional_patterns;
616             }
617            
618             # If no patterns or combinations of patterns where formed, then set the error code BUT do nothing else
619             # since this might be a deep recursion and patterns may exist higher up
620 8 100       19 unless(scalar(@all_combos))
621             {
622 1         4 $self->{err} = "No patterns with minimum support of " . $self->support * 100 . "% found.";
623             }
624 8         38 return @all_combos;
625             }
626              
627              
628             # Function retrieves all patterns in the FP-Tree that have 'item name' as their suffix.
629             # Parameters:
630             # 1. 'item name'
631             # Returns:
632             # Hash reference containing all patterns generated from 'item name' if successful, undefined or empty if something went wrong.
633             sub get_prefixes
634             {
635 26     26 0 26 my $self = shift;
636 26         23 my $item_name = shift;
637            
638             # If no item name provided, then nothing to do
639 26 50       42 unless($item_name)
640             {
641 0         0 return undef;
642             }
643            
644            
645             # Get the pointer to the first node in the FP-Tree with label #item name# from the header table.
646 26         44 my $item_ptr = $self->{header_table}->{$item_name}->{sibling};
647            
648            
649 26         26 my @conditional_tree_load;
650            
651             # While the item pointer continues to point to a valid FP-Tree node..
652 26         61 while($item_ptr)
653             {
654             # Create an array of items, beginning with the current item, followed by the prefix
655             # OF THIS NODE. In other words, only get the items between this node and the root node.
656 50         85 my @combo = (
657             $item_ptr->get_prefix($self->root) # This is the get_prefix method of the FP_Tree_node object, see below for usage.
658             );
659 50 100       108 if(@combo)
660             {
661 36         69 push @conditional_tree_load, {prefix => \@combo, support => $item_ptr->count};
662             }
663            
664             # Set the item pointer to be the next node of the name item name
665 50         121 $item_ptr = $item_ptr->{sibling};
666             }
667            
668 26         54 return @conditional_tree_load;
669              
670             }
671              
672            
673             # Function finds all combinations of a given pattern.
674             # Parameters
675             # 1. The first item of the pattern
676             # 2. Array containing the rest of the items
677             # Returns
678             # Array ref where each element is itself an array ref representing the pattern generated.
679             sub combinations
680             {
681             # By shifting the first element off the input array, we guarantee that the function will eventually exit
682 134     134 0 136 my $first = shift;
683            
684             # If nothing got shifted off, that means nothing left so return an empty array ref
685 134 100       204 unless($first)
686             {
687 49         95 return [];
688             }
689            
690 85         149 my @new_combos = ([$first]);
691            
692             # This is the recursive step. Get all the combinations of what remains of the pattern array passed to the funciton.
693 85         82 my @found_combos = @{&combinations(@_)};
  85         135  
694            
695             # Push these found combos onto the new combo array
696 85         104 push @new_combos, @found_combos;
697            
698             # Then for each of the elements of the found combos, push a new array ref onto the new combos array that starts off
699             # with the first element, and then has the sub pattern after it.
700 85         115 foreach (@found_combos)
701             {
702 58         145 push @new_combos, [$first,@$_];
703             }
704              
705             # Return a ref to the new combos array
706 85         189 return \@new_combos;
707             }
708            
709            
710             # The following is an FP_Tree_node object, the main building block of FP-Trees (also see FP_Tree_header_node below).
711             {
712             package FP_Tree_node;
713            
714             # Node constructor.
715             # Parameters
716             # 1. Name of the item [optional, but only if constructing a root node]
717             # 2. Parent, an FP_Tree_node [optional, but only if constructing a root node]
718             sub new
719             {
720 87     87   105 my $class = shift;
721 87         95 my $item_name = shift;
722 87         83 my $parent = shift;
723            
724             # For now (v 0.04) only check that if an item name is passed that the node also has a parent.
725 87 100 100     258 if($item_name && !$parent)
726             {
727 1         4 return undef;
728             }
729            
730 86         511 my $self = {
731             item_name => $item_name,
732             parent => $parent,
733             sibling => undef, # This is a pointer to the next FP_Tree_node with the same item name label.
734             child_nodes => {}, # Hash ref to all child nodes of this node.
735             count => 1, # Number of times this node has been traversed. Creation counts as one traversal.
736             num_path => 1, #### Number of paths
737             used => 0, # Number of times this node has been read.
738             err => '' # Stores any error messages related to this node.
739             };
740 86         398 bless $self, $class;
741             }
742            
743             # Returns name of the item
744             sub item_name
745             {
746 121     121   121 my $self = shift;
747 121         305 $self->{item_name};
748             }
749            
750             # Function gets prefix of current node. In other words, it gets the item name labels of all nodes above it
751             # in the same branch of the FP-Tree.
752             # Parameters:
753             # 1. Root FP_Tree_node
754             # Returns:
755             # Array containing all the item names, in the order they were encountered.
756             sub get_prefix
757             {
758 55     55   61 my $self = shift;
759 55         98 my $root = shift;
760            
761             # Only check to make sure that self and root are not the same. For now (v 0.04) assume that only FP_Tree_nodes
762             # are going to be passed.
763 55 100       107 if($self == $root)
764             {
765 1         2 $self->{err} = "'get_prefix' called on self or incorrect root provided";
766 1         14 return 0;
767             }
768            
769             # Get the adjusted count for this node.
770 54         106 my $count = $self->adj_count;
771             # Increase one's own count by this amount.
772 54         91 $self->inc_used($count);
773            
774 54         52 my @pattern;
775             # Set the next node pointer to the parent of the current node.
776 54         67 my $parent_node = $self->{parent};
777             # While a parent is not the root of the tree...
778 54         127 while($parent_node != $root)
779             {
780             # Push the parent's item name onto the pattern array.
781 58         107 push @pattern, $parent_node->item_name;
782             # Increment the parent's used count by the adjusted count of the node get_prefix was called on.
783 58 50       96 unless($parent_node->inc_used($count))
784             {
785             # Unlikely but if somehow parent is not a FP_Tree_node or if somebody hacked together a cyclic tree
786 0         0 $self->{err} = "Error occured while attempting to increment count of ancestor of " . $self->item_name . " [" . $parent_node->item_name . "]";
787 0         0 return 0;
788             }
789             # Set the next node pointer to its parent
790 58         134 $parent_node = $parent_node->{parent};
791             }
792            
793 54 100       101 unless(@pattern)
794             {
795 17         26 $self->{err} = 'No pattern generated';
796             }
797            
798             # Return the pattern array;
799 54         139 return @pattern;
800             }
801            
802             # Method resets the 'used' property of the node to zero, and calls itself on the sibling of the node (if one exists).
803             sub reset_used
804             {
805 1     1   7995 no warnings;
  1         1  
  1         1178  
806 71     71   69 my $self = shift;
807 71         86 $self->{used} = 0;
808            
809 71 100       177 if($self->{sibling})
810             {
811 36         64 $self->{sibling}->reset_used;
812             }
813             }
814            
815            
816             # Method returns the adjusted count of the node. This is the number of times node was traversed minus the number of times
817             # it has been read (since last reset).
818             sub adj_count
819             {
820 127     127   126 my $self = shift;
821 127         192 $self->count - $self->used;
822             }
823            
824             # Method returns the number of times a node has been read.
825             sub used
826             {
827 127     127   135 my $self = shift;
828 127         290 $self->{used};
829             }
830            
831             # Method increments the number of times that a node has been read.
832             # Parameters:
833             # 1. Positive integer reflecting number of times node read.
834             # Returns:
835             # 1 if increment was successful, and 0 if the read count exceeded the traversal count.
836             sub inc_used
837             {
838 120     120   115 my $self = shift;
839 120         110 my $by = shift;
840 120 100 66     575 if($by && $by > 0)
841             {
842 105         138 $self->{used} += $by;
843             }
844            
845 120 50       241 if($self->{used} > $self->{count})
846             {
847 0         0 $self->{err} = "Node read more times than written [FP_Tree_node : " . $self->item_name," " . $self->{used} . " " . $self->{count} . "]";
848 0         0 return 0;
849             }
850            
851 120         183 return 1;
852             }
853            
854             # Method returns number of children a node has
855             sub children
856             {
857 63     63   61 my $self = shift;
858 63         49 scalar(keys %{$self->{child_nodes}});
  63         195  
859             }
860            
861             # Method used to determine whether node has a child by a particular name
862             # Parameters:
863             # 1. Name of child looked for (string corresponding to item name)
864             # Returns
865             # The child if one is found, or undef if not.
866             sub child_exists
867             {
868 193     193   190 my $self = shift;
869 193         180 my $looking_for = shift;
870            
871 193         543 $self->{child_nodes}->{$looking_for};
872             }
873            
874             # Method adds child to a node.
875             # Parameters:
876             # 1. Item name (string).
877             # Returns:
878             # New child node or undef if: 1. no name specified, 2. a child with that name already exists (do not want to overwrite
879             # children under any circumstances, or 3. FP_Tree_node creation was unsuccessful.
880             sub add_child
881             {
882 63     63   68 my $self = shift;
883 63         56 my $child_name = shift;
884              
885 63 100 66     161 unless($child_name && !$self->child_exists($child_name))
  1         4  
886             {return undef;}
887            
888 62         126 $self->{child_nodes}->{$child_name} = FP_Tree_node->new($child_name,$self);
889             }
890            
891             # Returns the sibling of the node
892             sub sibling
893             {
894 0     0   0 my $self = shift;
895 0         0 $self->{sibling};
896             }
897            
898             # Sets the sibling of the node
899             # Parameters:
900             # 1. An FP_Tree_node
901             # Returns:
902             # 1 if successful, 0 otherwise
903             sub set_sibling
904             {
905 2     2   3 my $self = shift;
906 2         2 my $sibling = shift;
907            
908 2 100       4 if($sibling->item_name eq $self->item_name)
909             {
910 1         2 $self->{sibling} = $sibling;
911 1         9 return 1;
912             }
913            
914 1         2 $self->{err} = "Sibling 'item name' label not the same as own item name [ sib: " . $sibling->item_name . ", self: " . $self->item_name . " ]";
915 1         3 return 0;
916             }
917            
918             # Returns node's count
919             sub count
920             {
921 165     165   155 my $self = shift;
922 165         335 $self->{count};
923             }
924            
925             # Increments the node's count.
926             # Parameters:
927             # None
928             # Returns
929             # Newly adjusted count
930             sub inc_count
931             {
932 72     72   75 my $self = shift;
933 72         98 ++$self->{count};
934             }
935            
936            
937             #####
938             # Returns the number of paths starting at this node
939             sub num_path
940             {
941 24     24   24 my $self = shift;
942 24         59 $self->{num_path};
943             }
944             #####
945             # Increases number of paths starting at node, propigates upward to root (technically, does not check for root, but last ancestor).
946             sub inc_num_path
947             {
948 27     27   26 my $self = shift;
949 27         26 my $affected_count = 1;
950            
951 27         25 $self->{num_path}++;
952 27 100       53 if($self->{parent})
953             {
954 12         21 $affected_count += $self->{parent}->inc_num_path;
955             }
956            
957 27         32 $affected_count;
958             }
959            
960             # Returns the node's error string
961             sub err
962             {
963 6     6   8 my $self = shift;
964 6         22 $self->{err};
965             }
966             }
967              
968              
969             # The following is an FP_Tree_header_node object, a component of FP-Trees.
970             {
971             package FP_Tree_header_node;
972            
973             # Node constructor
974             # Parameters
975             # 1. Item name (string)
976             # 2. Rank (positive integer, reflecting relative rank, higher number, lower rank)
977             # Returns
978             # an FP_Tree_header_node or undef if incorrect parameters were supplied.
979             sub new
980             {
981 50     50   59 my $class = shift;
982 50         45 my $item_name = shift;
983 50         48 my $rank = shift;
984            
985 50 100 100     268 unless($item_name && $rank && $rank > 0)
      100        
986             {
987 4         16 return undef;
988             }
989            
990            
991 46         162 my $self = {
992             item_name => $item_name,
993             sibling => undef, # Pointer to the first FP_Tree_node with label #item name# in the FP-Tree.
994             rank => $rank,
995             count => 0, # Total number of transactions that included #item name#
996             err => ''
997             };
998            
999 46         193 bless $self, $class;
1000             }
1001            
1002             # Returns the rank of the node
1003             sub rank
1004             {
1005 364     364   403 my $self = shift;
1006 364         774 $self->{rank};
1007             }
1008            
1009             # Returns the item name label of the node
1010             sub item_name
1011             {
1012 31     31   43 my $self = shift;
1013 31         74 $self->{item_name};
1014             }
1015            
1016             # Returns error
1017             sub err
1018             {
1019 1     1   2 my $self = shift;
1020 1         4 $self->{err};
1021             }
1022            
1023             # Returns the pointer to the first FP_Tree_node with label #item name# in the FP-Tree.
1024             sub sibling
1025             {
1026 1     1   2 my $self = shift;
1027 1         11 $self->{sibling};
1028             }
1029            
1030             # Sets the sibling of the node
1031             # Parameters:
1032             # 1. An FP_Tree_node
1033             # Returns:
1034             # 1 if successful, 0 otherwise
1035             sub set_sibling
1036             {
1037 2     2   2 my $self = shift;
1038 2         3 my $sibling = shift;
1039            
1040 2 100       5 if($sibling->item_name eq $self->item_name)
1041             {
1042 1         2 $self->{sibling} = $sibling;
1043 1         3 return 1;
1044             }
1045            
1046 1         5 $self->{err} = "Sibling 'item name' label not the same as own item name [ sib: " . $sibling->item_name . ", self: " . $self->item_name . " ]";
1047 1         4 return 0;
1048             }
1049            
1050             # Increments the node's count.
1051             # Parameters:
1052             # None
1053             # Returns
1054             # Newly adjusted count
1055             sub inc_count
1056             {
1057 129     129   128 my $self = shift;
1058 129         153 ++$self->{count};
1059             }
1060            
1061             # Returns node's count
1062             sub count
1063             {
1064 205     205   209 my $self = shift;
1065 205         463 $self->{count};
1066             }
1067            
1068             sub adj_count
1069             {
1070 26     26   27 my $self = shift;
1071 26         24 my $adj_count = 0;
1072            
1073 26         24 my $item_ptr = $self;
1074            
1075 26         54 while($item_ptr = $item_ptr->{sibling})
1076             {
1077 50         77 $adj_count += $item_ptr->adj_count;
1078             }
1079 26         35 $adj_count;
1080            
1081             }
1082             }
1083              
1084             # The following is an FP_Tree_association_rule object used for creating/storing association rules generated from FPGROWTH
1085             # Note: no part of an FP_Tree_association_rule object can be changed after it is created.
1086             {
1087             package FP_Tree_association_rule;
1088            
1089             # Constructor of FP_Tree_association_rule
1090             # Parameters:
1091             # 1. Left side of association rule (a pattern array ref)
1092             # 2. Right side of association rule (a pattern array ref)
1093             # 3. Support of association rule (percentage, given as a decimal)
1094             # 4. Confidence of assocciation rule (percentage, given as a decimal)
1095             # Returns
1096             # A new FP_Tree_association_rule if successful, undef if all parameters were not passed correctly
1097             sub new
1098             {
1099 52     52   158 my $class = shift;
1100 52         56 my $left = shift;
1101 52         45 my $right = shift;
1102 52         53 my $support = shift;
1103 52         46 my $confidence = shift;
1104            
1105 52 100 100     526 unless($left && ref $left && $right && ref $right && $support && $confidence)
      100        
      66        
      100        
      100        
1106             {
1107 10         41 return undef;
1108             }
1109            
1110 42 100 66     304 if($support <= 0 || $support > 1 || $confidence <= 0 || $confidence > 1)
      66        
      100        
1111             {
1112 3         12 return undef;
1113             }
1114            
1115 39         130 my $self = {
1116             left => $left,
1117             right => $right,
1118             support => $support,
1119             confidence => $confidence
1120             };
1121            
1122 39         162 bless $self, $class;
1123             }
1124            
1125             # Returns left side of association rule (a pattern array ref)
1126             sub left
1127             {
1128 1     1   2 my $self = shift;
1129 1         2 @{$self->{left}};
  1         4  
1130             }
1131            
1132             # Returns right side of association rule (a pattern array ref)
1133             sub right
1134             {
1135 1     1   2 my $self = shift;
1136 1         1 @{$self->{right}};
  1         4  
1137             }
1138            
1139             # Returns support of association rule (percentage, given as a decimal)
1140             sub support
1141             {
1142 3     3   12 my $self = shift;
1143 3         9 $self->{support};
1144             }
1145            
1146             # Returns confidence of assocciation rule (percentage, given as a decimal)
1147             sub confidence
1148             {
1149 51     51   407 my $self = shift;
1150 51         140 $self->{confidence};
1151             }
1152            
1153             }
1154              
1155             1;
1156              
1157             __END__