File Coverage

blib/lib/Data/Mining/Apriori.pm
Criterion Covered Total %
statement 22 291 7.5
branch 0 130 0.0
condition 1 45 2.2
subroutine 7 17 41.1
pod 7 11 63.6
total 37 494 7.4


line stmt bran cond sub pod time code
1             package Data::Mining::Apriori;
2            
3 1     1   69037 use 5.010001;
  1         4  
4 1     1   7 use strict;
  1         2  
  1         21  
5 1     1   4 use warnings;
  1         2  
  1         48  
6 1     1   491 use List::PowerSet qw(powerset);
  1         530  
  1         56  
7 1     1   532 use Algorithm::Combinatorics qw(variations);
  1         3572  
  1         70  
8 1     1   648 no if $] >= 5.017011, warnings => 'experimental::smartmatch';
  1         14  
  1         6  
9            
10             our $VERSION = 0.18;
11            
12             my$self;
13            
14             $" = ', ';
15             $| = 1;
16             $SIG{'INT'} = \&stop;
17            
18             sub new{
19 1     1 1 3962 my $type = shift;
20 1   33     8 my $class = ref($type)||$type;
21 1         18 $self = {
22             totalTransactions => 0,
23             metrics => {
24             minSupport => 0.01,
25             minConfidence => 0.10,
26             minLift => undef,
27             minLeverage => undef,
28             minConviction => undef,
29             minCoverage => undef,
30             minCorrelation => undef,
31             minCosine => undef,
32             minLaplace => undef,
33             minJaccard => undef
34             },
35             precision => 3,
36             output => undef,
37             pathOutputFiles => "",
38             messages => undef,
39             keyItemsDescription => undef,
40             keyItemsTransactions => undef,
41             limitRules => undef,
42             limitSubsets => undef,
43             numberSubsets => 0,
44             frequentItemset => [],
45             associationRules => undef,
46             implications => undef,
47             largeItemsetLength => 2,
48             rule => 0
49             };
50 1         3 bless($self,$class);
51 1         3 return $self;
52             }
53            
54             sub validate_data{
55             (defined $self->{keyItemsDescription})
56 0 0   0 0   or die('Error: $apriori->{keyItemsDescription} is not defined!');
57             (defined $self->{keyItemsTransactions})
58 0 0         or die('Error: $apriori->{keyItemsTransactions} is not defined!');
59             (defined $self->{metrics}{minSupport})
60 0 0         or die('Error: $apriori->{metrics}{minSupport} is not defined!');
61             (defined $self->{metrics}{minConfidence})
62 0 0         or die('Error: $apriori->{metrics}{minConfidence} is not defined!');
63             }
64            
65             sub insert_key_items_transaction{
66 0 0   0 1   (scalar(@_)==2)
67             or die('Error: $apriori->insert_key_items_transaction(\@items) missing parameter key items to array reference!');
68 0 0         (ref($_[1]) eq "ARRAY")
69             or die('Error: $apriori->insert_key_items_transaction(\@items) parameter key items is not an array reference!');
70 0           my@items=sort(@{$_[1]});
  0            
71 0           my $itemsets=powerset(@items);
72 0           foreach my$itemset(@{$itemsets}){
  0            
73 0           $self->{keyItemsTransactions}{"@{$itemset}"}++;
  0            
74             }
75 0           $self->{totalTransactions}++;
76             }
77            
78             sub input_data_file{
79 0 0   0 1   (scalar(@_)==3)
80             or die('Error: $apriori->input_data_file("datafile.txt",",") missing parameters path to data file and/or item separator!');
81 0           my$file=$_[1];
82 0           my$separator=$_[2];
83 0 0         (-e $file)
84             or die("Error: the file \"$file\" does not exists!");
85 0 0         (-r $file)
86             or die("Error: the file \"$file\" is not readable!");
87 0 0         (-T $file)
88             or die("Error: the file \"$file\" is not a text file!");
89 0 0         open(FILE,"<$file")
90             or die("Error: $!");
91 0           while(my$line=){
92 0           $line=~s/\r\n|\n//;
93 0           my@items=split($separator,$line);
94 0 0         if(scalar(@items)>=2){
95 0           $self->insert_key_items_transaction(\@items);
96             }
97             }
98 0           close(FILE);
99             }
100            
101             sub quantity_possible_rules{
102 0     0 1   $self->validate_data;
103 0           return ((3**scalar(keys(%{$self->{keyItemsDescription}})))-(2**(scalar(keys(%{$self->{keyItemsDescription}}))+1))+1);
  0            
  0            
104             }
105            
106             sub generate_rules{
107 0     0 1   $self->validate_data;
108 0 0         if($self->{messages}){
109 0           print "\n${\scalar(keys(%{$self->{keyItemsDescription}}))} items, ${\$self->quantity_possible_rules} possible rules";
  0            
  0            
  0            
110             }
111 0           my @largeItemsetLengthOne = grep{(($self->{keyItemsTransactions}{$_}/$self->{totalTransactions})*100)>=$self->{metrics}{minSupport}}keys(%{$self->{keyItemsDescription}});
  0            
  0            
112 0           $self->association_rules(\@largeItemsetLengthOne);
113             }
114            
115             sub association_rules{
116 0     0 1   my @largeItemset = @{$_[1]};
  0            
117 0           my @variations = variations(\@largeItemset,$self->{largeItemsetLength});
118 0           my @frequentItemset;
119 0 0         if($self->{messages}){
120 0           print "\nLarge itemset of length $self->{largeItemsetLength}, ${\scalar(@largeItemset)} items ";
  0            
121 0           print "\nProcessing ...";
122             }
123             VARIATIONS:
124 0           foreach my$variation(@variations){
125 0           my@candidateItemset=@{$variation};
  0            
126 0           my@antecedent;
127             my@consequent;
128 0           for(my$antecedentLength=0;$antecedentLength<$#candidateItemset;$antecedentLength++){
129 0           push@antecedent,$candidateItemset[$antecedentLength];
130 0           @consequent=();
131 0           for(my$consequentLength=($antecedentLength+1);$consequentLength<=$#candidateItemset;$consequentLength++){
132 0           push@consequent,$candidateItemset[$consequentLength];
133             }
134 0           @antecedent=sort@antecedent;
135 0           @consequent=sort@consequent;
136 0 0         next if("@consequent"~~@{$self->{implications}{"@antecedent"}});
  0            
137 0 0 0       last VARIATIONS if(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets});
138 0           $self->{numberSubsets}++;
139 0           push @{$self->{implications}{"@antecedent"}},"@consequent";
  0            
140 0           my@implication;
141 0           push@implication,@antecedent,@consequent;
142 0           @implication=sort(@implication);
143 0 0         next unless $self->{keyItemsTransactions}{"@antecedent"};
144 0           my$supportAntecedent=($self->{keyItemsTransactions}{"@antecedent"}/$self->{totalTransactions});
145 0 0         next unless $self->{keyItemsTransactions}{"@implication"};
146 0           my$supportConsequent=($self->{keyItemsTransactions}{"@implication"}/$self->{totalTransactions});
147 0           my $support = $supportConsequent;
148 0 0         next if($support < $self->{metrics}{minSupport});
149 0           my $confidence = ($supportConsequent/$supportAntecedent);
150 0 0 0       next if(defined $self->{metrics}{minConfidence} && $confidence < $self->{metrics}{minConfidence});
151 0           my $lift = ($support/($supportAntecedent*$supportConsequent));
152 0 0 0       next if(defined $self->{metrics}{minLift} && $lift < $self->{metrics}{minLift});
153 0           my $leverage = ($support-($supportAntecedent*$supportConsequent));
154 0 0 0       next if(defined $self->{metrics}{minLeverage} && $leverage < $self->{metrics}{minLeverage});
155 0 0         my $conviction = ((1-$supportConsequent)==0)?"NaN":((1-$confidence)==0)?"NaN":((1-$supportConsequent)/(1-$confidence));
    0          
156 0 0 0       next if(defined $self->{metrics}{minConviction} && $conviction < $self->{metrics}{minConviction});
157 0           my $coverage = $supportAntecedent;
158 0 0 0       next if(defined $self->{metrics}{minCoverage} && $coverage < $self->{metrics}{minCoverage});
159 0           my $correlation = (($support-($supportAntecedent*$supportConsequent))/sqrt($supportAntecedent*(1-$supportAntecedent)*$supportConsequent*(1-$supportConsequent)));
160 0 0 0       next if(defined $self->{metrics}{minCorrelation} && $correlation < $self->{metrics}{minCorrelation});
161 0           my $cosine = ($support/sqrt($supportAntecedent*$supportConsequent));
162 0 0 0       next if(defined $self->{metrics}{minCosine} && $cosine < $self->{metrics}{minCosine});
163 0           my $laplace = (($support+1)/($supportAntecedent+2));
164 0 0 0       next if(defined $self->{metrics}{minLaplace} && $laplace < $self->{metrics}{minLaplace});
165 0           my $jaccard = ($support/($supportAntecedent+$supportConsequent-$support));
166 0 0 0       next if(defined $self->{metrics}{minJaccard} && $jaccard < $self->{metrics}{minJaccard});
167 0           $self->{rule}++;
168 0           $support = sprintf("%.$self->{precision}f", $support);
169 0           $confidence = sprintf("%.$self->{precision}f", $confidence);
170 0           $lift = sprintf("%.$self->{precision}f", $lift);
171 0           $leverage = sprintf("%.$self->{precision}f", $leverage);
172 0 0         $conviction = sprintf("%.$self->{precision}f", $conviction)if($conviction ne "NaN");
173 0           $coverage = sprintf("%.$self->{precision}f", $coverage);
174 0           $correlation = sprintf("%.$self->{precision}f", $correlation);
175 0           $cosine = sprintf("%.$self->{precision}f", $cosine);
176 0           $laplace = sprintf("%.$self->{precision}f", $laplace);
177 0           $jaccard = sprintf("%.$self->{precision}f", $jaccard);
178             $self->{associationRules}{$self->{rule}} = {
179 0           implication => "{ @antecedent } => { @consequent }",
180             support => $support,
181             confidence => $confidence,
182             lift => $lift,
183             leverage => $leverage,
184             conviction => $conviction,
185             coverage => $coverage,
186             correlation => $correlation,
187             cosine => $cosine,
188             laplace => $laplace,
189             jaccard => $jaccard,
190             items => [@antecedent, @consequent]
191             };
192 0           my@items=grep{!($_~~@frequentItemset)}@implication;
  0            
193 0           push@frequentItemset,@items;
194 0 0 0       last VARIATIONS if(defined $self->{limitRules} && $self->{rule} == $self->{limitRules});
195             }
196             }
197 0 0         if($self->{messages}){
198 0           print "\nFrequent itemset: { @frequentItemset }, ${\scalar(@frequentItemset)} items ";
  0            
199             }
200 0 0         if(defined $self->{associationRules}){
201 0           @{$self->{frequentItemset}}=@frequentItemset;
  0            
202 0           $self->output;
203             }
204             return if((defined $self->{limitRules} && $self->{rule} == $self->{limitRules})
205 0 0 0       ||(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets}));
      0        
      0        
206 0 0         if(scalar(@frequentItemset)>=($self->{largeItemsetLength}+1)){
207 0           $self->{largeItemsetLength}++;
208 0           $self->{associationRules} = undef;
209 0           $self->association_rules(\@frequentItemset);
210             }
211             }
212            
213             sub stop{
214 0 0   0 0   if($self->{messages}){
215 0           print "\nStopping ...";
216 0 0         $self->output if $self->{associationRules};
217 0           print "\nExit? (Y/N): ";
218 0           my $answer = ;
219 0           chomp($answer);
220 0 0         if($answer =~ /^y$/i){
221 0           exit;
222             }
223             else{
224 0           print "Processing ...";
225             }
226             }
227             else{
228 0 0         $self->output if $self->{associationRules};
229 0           exit;
230             }
231             }
232            
233             sub output{
234 0 0   0 1   if($self->{output}){
235 0 0         if($self->{output}==1){
    0          
236 0           $self->file;
237             }
238             elsif($self->{output}==2){
239 0           $self->excel;
240             }
241             }
242             }
243            
244             sub file{
245 0 0   0 0   if($self->{messages}){
246 0           print "\nExporting to file $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt ...";
247             }
248 0 0         open(FILE,">$self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt")
249             or die("\nError: $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt $!");
250 0           print FILE "Rules\tSupport\tConfidence";
251 0           my@headings=('Lift', 'Leverage', 'Conviction', 'Coverage', 'Correlation', 'Cosine', 'Laplace', 'Jaccard');
252 0           my@metrics;
253 0           foreach my$metric(@headings){
254 0 0         push@metrics,$metric if defined $self->{metrics}{"min$metric"};
255             }
256 0           foreach my$metric(@metrics){
257 0           print FILE "\t$metric";
258             }
259 0           print FILE "\n";
260 0           foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
  0            
  0            
261 0           $self->{associationRules}{$rule}{support}=~s/\./,/;
262 0           $self->{associationRules}{$rule}{confidence}=~s/\./,/;
263 0           foreach my$metric(@metrics){
264 0           $self->{associationRules}{$rule}{lc$metric}=~s/\./,/;
265             }
266 0           print FILE "R$rule\t$self->{associationRules}{$rule}{support}\t$self->{associationRules}{$rule}{confidence}";
267 0           foreach my$metric(@metrics){
268 0           print FILE "\t$self->{associationRules}{$rule}{${\lc$metric}}";
  0            
269             }
270 0           print FILE "\n";
271             }
272 0           print FILE "\n";
273 0           foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
  0            
  0            
274 0           print FILE "Rule R$rule: $self->{associationRules}{$rule}{implication}\n";
275 0           print FILE "Support: $self->{associationRules}{$rule}{support}\n";
276 0           print FILE "Confidence: $self->{associationRules}{$rule}{confidence}\n";
277 0           foreach my$metric(@metrics){
278 0           print FILE "$metric: $self->{associationRules}{$rule}{${\lc$metric}}\n";
  0            
279             }
280 0           print FILE "Items:\n";
281 0           foreach my$item(@{$self->{associationRules}{$rule}{items}}){
  0            
282 0           print FILE "$item $self->{keyItemsDescription}{$item}\n";
283             }
284 0           print FILE "\n";
285             }
286 0           print FILE "Frequent itemset: { @{$self->{frequentItemset}} }\n";
  0            
287 0           print FILE "Items:\n";
288 0           foreach my$item(@{$self->{frequentItemset}}){
  0            
289 0           print FILE "$item $self->{keyItemsDescription}{$item}\n";
290             }
291 0           close(FILE);
292             }
293            
294             sub excel{
295 0     0 0   require Excel::Writer::XLSX;
296 0 0         if($self->{messages}){
297 0           print "\nExporting to excel $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx ...";
298             }
299 0 0         my $workbook = Excel::Writer::XLSX->new("$self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx")
300             or die("\nError: $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx $!");
301 0           my $worksheet = $workbook->add_worksheet();
302 0           my $bold = $workbook->add_format(bold => 1);
303 0           my $headings = ['Rules', 'Support', 'Confidence'];
304 0           my@metrics=('Lift', 'Leverage', 'Conviction', 'Coverage', 'Correlation', 'Cosine', 'Laplace', 'Jaccard');
305 0           foreach my$metric(@metrics){
306 0 0         push@{$headings},$metric if defined $self->{metrics}{"min$metric"};
  0            
307             }
308 0           @metrics=@{$headings}[3..$#{$headings}];
  0            
  0            
309 0           my(@rules,@support,@confidence,@lift,@leverage,@conviction,@coverage,@correlation,@cosine,@laplace,@jaccard);
310 0           foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
  0            
  0            
311 0           push @rules,"R$rule";
312 0           push @support,$self->{associationRules}{$rule}{support};
313 0           push @confidence,$self->{associationRules}{$rule}{confidence};
314 0 0         push @lift,$self->{associationRules}{$rule}{lift} if defined $self->{metrics}{minLift};
315 0 0         push @leverage,$self->{associationRules}{$rule}{leverage} if defined $self->{metrics}{minLeverage};
316 0 0         push @conviction,$self->{associationRules}{$rule}{conviction} if defined $self->{metrics}{minConviction};
317 0 0         push @coverage,$self->{associationRules}{$rule}{coverage} if defined $self->{metrics}{minCoverage};
318 0 0         push @correlation,$self->{associationRules}{$rule}{correlation} if defined $self->{metrics}{minCorrelation};
319 0 0         push @cosine,$self->{associationRules}{$rule}{cosine} if defined $self->{metrics}{minCosine};
320 0 0         push @laplace,$self->{associationRules}{$rule}{laplace} if defined $self->{metrics}{minLaplace};
321 0 0         push @jaccard,$self->{associationRules}{$rule}{jaccard} if defined $self->{metrics}{minJaccard};
322             }
323 0           my$line=(scalar(@rules)+1);
324 0           my@data=(\@rules,\@support,\@confidence);
325 0 0         push @data,\@lift if defined $self->{metrics}{minLift};
326 0 0         push @data,\@leverage if defined $self->{metrics}{minLeverage};
327 0 0         push @data,\@conviction if defined $self->{metrics}{minConviction};
328 0 0         push @data,\@coverage if defined $self->{metrics}{minCoverage};
329 0 0         push @data,\@correlation if defined $self->{metrics}{minCorrelation};
330 0 0         push @data,\@cosine if defined $self->{metrics}{minCosine};
331 0 0         push @data,\@laplace if defined $self->{metrics}{minLaplace};
332 0 0         push @data,\@jaccard if defined $self->{metrics}{minJaccard};
333 0           $worksheet->write('A1', $headings, $bold);
334 0           $worksheet->write('A2', \@data);
335 0           my$chart=$workbook->add_chart(type =>'column', embedded=>1);
336 0           my@columns=('B'..'M');
337 0           my$i=0;
338 0           $chart->add_series(
339             name => 'Support',
340             categories => '=Sheet1!$A$2:$A$'.$line,
341             values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
342             );
343 0           $i++;
344 0           $chart->add_series(
345             name => 'Confidence',
346             categories => '=Sheet1!$A$2:$A$'.$line,
347             values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
348             );
349 0           foreach my$metric(@metrics){
350 0           $i++;
351 0           $chart->add_series(
352             name => $metric,
353             categories => '=Sheet1!$A$2:$A$'.$line,
354             values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
355             );
356             }
357 0           $worksheet->insert_chart($columns[($i+2)].'2', $chart);
358 0           $line+=2;
359 0           my $urlFormat = $workbook->add_format(
360             color => 'blue',
361             );
362 0           my$url=2;
363 0           foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
  0            
  0            
364 0           $worksheet->write_url("A$line","internal:Sheet1!A$url",$urlFormat,"<");
365 0           $line++;
366 0           $worksheet->write_url("A$url","internal:Sheet1!A$line",$urlFormat,"R$rule");
367 0           $worksheet->write("A$line","Rule R$rule: $self->{associationRules}{$rule}{implication}");
368 0           $line++;
369 0           $worksheet->write("A$line","Support: $self->{associationRules}{$rule}{support}");
370 0           $line++;
371 0           $worksheet->write("A$line","Confidence: $self->{associationRules}{$rule}{confidence}");
372 0           $line++;
373 0           foreach my$metric(@metrics){
374 0           $worksheet->write("A$line","$metric: $self->{associationRules}{$rule}{${\lc$metric}}");
  0            
375 0           $line++;
376             }
377 0           $worksheet->write("A$line","Items:");
378 0           $line++;
379 0           foreach my$item(@{$self->{associationRules}{$rule}{items}}){
  0            
380 0           $worksheet->write("A$line","$item $self->{keyItemsDescription}{$item}");
381 0           $line++;
382             }
383 0           $line++;
384 0           $url++;
385             }
386 0           $worksheet->write("A$line","Frequent itemset: { @{$self->{frequentItemset}} }");
  0            
387 0           $line++;
388 0           $worksheet->write("A$line","Items:");
389 0           $line++;
390 0           foreach my$item(@{$self->{frequentItemset}}){
  0            
391 0           $worksheet->write("A$line","$item $self->{keyItemsDescription}{$item}");
392 0           $line++;
393             }
394 0           $workbook->close;
395             }
396            
397             return 1;
398             __END__