File Coverage

blib/lib/Data/Mining/Apriori.pm
Criterion Covered Total %
statement 19 284 6.6
branch 0 124 0.0
condition 1 45 2.2
subroutine 6 16 37.5
pod 7 11 63.6
total 33 480 6.8


line stmt bran cond sub pod time code
1             package Data::Mining::Apriori;
2            
3 1     1   13152 use 5.010001;
  1         3  
4 1     1   3 use strict;
  1         1  
  1         22  
5 1     1   2 use warnings;
  1         7  
  1         22  
6 1     1   441 use Algorithm::Combinatorics qw(subsets variations);
  1         2457  
  1         61  
7 1     1   480 no if $] >= 5.017011, warnings => 'experimental::smartmatch';
  1         7  
  1         4  
8            
9             our $VERSION = 0.16;
10            
11             my$self;
12            
13             $" = ', ';
14             $| = 1;
15             $SIG{'INT'} = \&stop;
16            
17             sub new{
18 1     1 1 2452 my $type = shift;
19 1   33     6 my $class = ref($type)||$type;
20 1         13 $self = {
21             totalTransactions => 0,
22             metrics => {
23             minSupport => 0.01,
24             minConfidence => 0.10,
25             minLift => undef,
26             minLeverage => undef,
27             minConviction => undef,
28             minCoverage => undef,
29             minCorrelation => undef,
30             minCosine => undef,
31             minLaplace => undef,
32             minJaccard => undef
33             },
34             output => undef,
35             pathOutputFiles => "",
36             messages => undef,
37             keyItemsDescription => undef,
38             keyItemsTransactions => undef,
39             limitRules => undef,
40             limitSubsets => undef,
41             numberSubsets => 0,
42             frequentItemset => [],
43             associationRules => undef,
44             implications => undef,
45             largeItemsetLength => 2,
46             rule => 0
47             };
48 1         2 bless($self,$class);
49 1         1 return $self;
50             }
51            
52             sub validate_data{
53             (defined $self->{keyItemsDescription})
54 0 0   0 0   or die('Error: $apriori->{keyItemsDescription} is not defined!');
55             (defined $self->{keyItemsTransactions})
56 0 0         or die('Error: $apriori->{keyItemsTransactions} is not defined!');
57             (defined $self->{metrics}{minSupport})
58 0 0         or die('Error: $apriori->{metrics}{minSupport} is not defined!');
59             (defined $self->{metrics}{minConfidence})
60 0 0         or die('Error: $apriori->{metrics}{minConfidence} is not defined!');
61             }
62            
63             sub insert_key_items_transaction{
64 0 0   0 1   (scalar(@_)==2)
65             or die('Error: $apriori->insert_key_items_transaction(\@items) missing parameter key items to array reference!');
66 0 0         (ref($_[1]) eq "ARRAY")
67             or die('Error: $apriori->insert_key_items_transaction(\@items) parameter key items is not an array reference!');
68 0           my@items=sort(@{$_[1]});
  0            
69 0           my @itemsets=subsets(\@items);
70 0           foreach my$itemset(@itemsets){
71 0           $self->{keyItemsTransactions}{"@{$itemset}"}++;
  0            
72             }
73 0           $self->{totalTransactions}++;
74             }
75            
76             sub input_data_file{
77 0 0   0 1   (scalar(@_)==3)
78             or die('Error: $apriori->input_data_file("datafile.txt",",") missing parameters path to data file and/or item separator!');
79 0           my$file=$_[1];
80 0           my$separator=$_[2];
81 0 0         open(FILE,"<$file")
82             or die("Error: $!");
83 0           while(my$line=){
84 0           $line=~s/\r\n|\n//;
85 0           my@items=split($separator,$line);
86 0 0         if(scalar(@items)>=2){
87 0           $self->insert_key_items_transaction(\@items);
88             }
89             }
90 0           close(FILE);
91             }
92            
93             sub quantity_possible_rules{
94 0     0 1   $self->validate_data;
95 0           return ((3**scalar(keys(%{$self->{keyItemsDescription}})))-(2**(scalar(keys(%{$self->{keyItemsDescription}}))+1))+1);
  0            
  0            
96             }
97            
98             sub generate_rules{
99 0     0 1   $self->validate_data;
100 0 0         if($self->{messages}){
101 0           print "\n${\scalar(keys(%{$self->{keyItemsDescription}}))} items, ${\$self->quantity_possible_rules} possible rules";
  0            
  0            
  0            
102             }
103 0           my @largeItemsetLengthOne = grep{(($self->{keyItemsTransactions}{$_}/$self->{totalTransactions})*100)>=$self->{metrics}{minSupport}}keys(%{$self->{keyItemsDescription}});
  0            
  0            
104 0           $self->association_rules(\@largeItemsetLengthOne);
105             }
106            
107             sub association_rules{
108 0     0 1   my @largeItemset = @{$_[1]};
  0            
109 0           my @variations = variations(\@largeItemset,$self->{largeItemsetLength});
110 0           my @frequentItemset;
111 0 0         if($self->{messages}){
112 0           print "\nLarge itemset of length $self->{largeItemsetLength}, ${\scalar(@largeItemset)} items ";
  0            
113 0           print "\nProcessing... ";
114             }
115             VARIATIONS:
116 0           foreach my$variation(@variations){
117 0           my@candidateItemset=@{$variation};
  0            
118 0           my@antecedent;
119             my@consequent;
120 0           for(my$antecedentLength=0;$antecedentLength<$#candidateItemset;$antecedentLength++){
121 0           push@antecedent,$candidateItemset[$antecedentLength];
122 0           @consequent=();
123 0           for(my$consequentLength=($antecedentLength+1);$consequentLength<=$#candidateItemset;$consequentLength++){
124 0           push@consequent,$candidateItemset[$consequentLength];
125             }
126 0           @antecedent=sort@antecedent;
127 0           @consequent=sort@consequent;
128 0 0         next if("@consequent"~~@{$self->{implications}{"@antecedent"}});
  0            
129 0 0 0       last VARIATIONS if(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets});
130 0           $self->{numberSubsets}++;
131 0           push @{$self->{implications}{"@antecedent"}},"@consequent";
  0            
132 0           my@implication;
133 0           push@implication,@antecedent,@consequent;
134 0           @implication=sort(@implication);
135 0 0         next unless $self->{keyItemsTransactions}{"@antecedent"};
136 0           my$supportAntecedent=($self->{keyItemsTransactions}{"@antecedent"}/$self->{totalTransactions});
137 0 0         next unless $self->{keyItemsTransactions}{"@implication"};
138 0           my$supportConsequent=($self->{keyItemsTransactions}{"@implication"}/$self->{totalTransactions});
139 0           my $support = $supportConsequent;
140 0 0         next if($support < $self->{metrics}{minSupport});
141 0           my $confidence = ($supportConsequent/$supportAntecedent);
142 0 0 0       next if(defined $self->{metrics}{minConfidence} && $confidence < $self->{metrics}{minConfidence});
143 0           my $lift = ($support/($supportAntecedent*$supportConsequent));
144 0 0 0       next if(defined $self->{metrics}{minLift} && $lift < $self->{metrics}{minLift});
145 0           my $leverage = ($support-($supportAntecedent*$supportConsequent));
146 0 0 0       next if(defined $self->{metrics}{minLeverage} && $leverage < $self->{metrics}{minLeverage});
147 0 0         my $conviction = ((1-$supportConsequent)==0)?"NaN":((1-$confidence)==0)?"NaN":((1-$supportConsequent)/(1-$confidence));
    0          
148 0 0 0       next if(defined $self->{metrics}{minConviction} && $conviction < $self->{metrics}{minConviction});
149 0           my $coverage = $supportAntecedent;
150 0 0 0       next if(defined $self->{metrics}{minCoverage} && $coverage < $self->{metrics}{minCoverage});
151 0           my $correlation = (($support-($supportAntecedent*$supportConsequent))/sqrt($supportAntecedent*(1-$supportAntecedent)*$supportConsequent*(1-$supportConsequent)));
152 0 0 0       next if(defined $self->{metrics}{minCorrelation} && $correlation < $self->{metrics}{minCorrelation});
153 0           my $cosine = ($support/sqrt($supportAntecedent*$supportConsequent));
154 0 0 0       next if(defined $self->{metrics}{minCosine} && $cosine < $self->{metrics}{minCosine});
155 0           my $laplace = (($support+1)/($supportAntecedent+2));
156 0 0 0       next if(defined $self->{metrics}{minLaplace} && $laplace < $self->{metrics}{minLaplace});
157 0           my $jaccard = ($support/($supportAntecedent+$supportConsequent-$support));
158 0 0 0       next if(defined $self->{metrics}{minJaccard} && $jaccard < $self->{metrics}{minJaccard});
159 0           $self->{rule}++;
160 0           $support = sprintf("%.4f", $support);
161 0           $confidence = sprintf("%.4f", $confidence);
162 0           $lift = sprintf("%.4f", $lift);
163 0           $leverage = sprintf("%.4f", $leverage);
164 0 0         $conviction = sprintf("%.4f", $conviction)if($conviction ne "NaN");
165 0           $coverage = sprintf("%.4f", $coverage);
166 0           $correlation = sprintf("%.4f", $correlation);
167 0           $cosine = sprintf("%.4f", $cosine);
168 0           $laplace = sprintf("%.4f", $laplace);
169 0           $jaccard = sprintf("%.4f", $jaccard);
170             $self->{associationRules}{$self->{rule}} = {
171 0           implication => "{ @antecedent } => { @consequent }",
172             support => $support,
173             confidence => $confidence,
174             lift => $lift,
175             leverage => $leverage,
176             conviction => $conviction,
177             coverage => $coverage,
178             correlation => $correlation,
179             cosine => $cosine,
180             laplace => $laplace,
181             jaccard => $jaccard,
182             items => [@antecedent, @consequent]
183             };
184 0           my@items=grep{!($_~~@frequentItemset)}@implication;
  0            
185 0           push@frequentItemset,@items;
186 0 0 0       last VARIATIONS if(defined $self->{limitRules} && $self->{rule} == $self->{limitRules});
187             }
188             }
189 0 0         if($self->{messages}){
190 0           print "\nFrequent itemset: { @frequentItemset }, ${\scalar(@frequentItemset)} items ";
  0            
191             }
192 0 0         if(defined $self->{associationRules}){
193 0           @{$self->{frequentItemset}}=@frequentItemset;
  0            
194 0           $self->output;
195             }
196             return if((defined $self->{limitRules} && $self->{rule} == $self->{limitRules})
197 0 0 0       ||(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets}));
      0        
      0        
198 0 0         if(scalar(@frequentItemset)>=($self->{largeItemsetLength}+1)){
199 0           $self->{largeItemsetLength}++;
200 0           $self->{associationRules} = undef;
201 0           $self->association_rules(\@frequentItemset);
202             }
203             }
204            
205             sub stop{
206 0 0   0 0   if($self->{messages}){
207 0           print "\nStopping... ";
208 0 0         $self->output if $self->{associationRules};
209 0           print "\nExit? (Y/N): ";
210 0           my $answer = ;
211 0           chomp($answer);
212 0 0         if($answer =~ /^y$/i){
213 0           exit;
214             }
215             else{
216 0           print "Processing... ";
217             }
218             }
219             else{
220 0 0         $self->output if $self->{associationRules};
221 0           exit;
222             }
223             }
224            
225             sub output{
226 0 0   0 1   if($self->{output}){
227 0 0         if($self->{output}==1){
    0          
228 0           $self->file;
229             }
230             elsif($self->{output}==2){
231 0           $self->excel;
232             }
233             }
234             }
235            
236             sub file{
237 0 0   0 0   if($self->{messages}){
238 0           print "\nExporting to file $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt... ";
239             }
240 0 0         open(FILE,">$self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt")
241             or die("\nError: $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt $!");
242 0           print FILE "Rules\tSupport\tConfidence";
243 0           my@headings=('Lift', 'Leverage', 'Conviction', 'Coverage', 'Correlation', 'Cosine', 'Laplace', 'Jaccard');
244 0           my@metrics;
245 0           foreach my$metric(@headings){
246 0 0         push@metrics,$metric if defined $self->{metrics}{"min$metric"};
247             }
248 0           foreach my$metric(@metrics){
249 0           print FILE "\t$metric";
250             }
251 0           print FILE "\n";
252 0           foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
  0            
  0            
253 0           $self->{associationRules}{$rule}{support}=~s/\./,/;
254 0           $self->{associationRules}{$rule}{confidence}=~s/\./,/;
255 0           foreach my$metric(@metrics){
256 0           $self->{associationRules}{$rule}{lc$metric}=~s/\./,/;
257             }
258 0           print FILE "R$rule\t$self->{associationRules}{$rule}{support}\t$self->{associationRules}{$rule}{confidence}";
259 0           foreach my$metric(@metrics){
260 0           print FILE "\t$self->{associationRules}{$rule}{${\lc$metric}}";
  0            
261             }
262 0           print FILE "\n";
263             }
264 0           print FILE "\n";
265 0           foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
  0            
  0            
266 0           print FILE "Rule R$rule: $self->{associationRules}{$rule}{implication}\n";
267 0           print FILE "Support: $self->{associationRules}{$rule}{support}\n";
268 0           print FILE "Confidence: $self->{associationRules}{$rule}{confidence}\n";
269 0           foreach my$metric(@metrics){
270 0           print FILE "$metric: $self->{associationRules}{$rule}{${\lc$metric}}\n";
  0            
271             }
272 0           print FILE "Items:\n";
273 0           foreach my$item(@{$self->{associationRules}{$rule}{items}}){
  0            
274 0           print FILE "$item $self->{keyItemsDescription}{$item}\n";
275             }
276 0           print FILE "\n";
277             }
278 0           print FILE "Frequent itemset: { @{$self->{frequentItemset}} }\n";
  0            
279 0           print FILE "Items:\n";
280 0           foreach my$item(@{$self->{frequentItemset}}){
  0            
281 0           print FILE "$item $self->{keyItemsDescription}{$item}\n";
282             }
283 0           close(FILE);
284             }
285            
286             sub excel{
287 0     0 0   require Excel::Writer::XLSX;
288 0 0         if($self->{messages}){
289 0           print "\nExporting to excel $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx... ";
290             }
291 0 0         my $workbook = Excel::Writer::XLSX->new("$self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx")
292             or die("\nError: $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx $!");
293 0           my $worksheet = $workbook->add_worksheet();
294 0           my $bold = $workbook->add_format(bold => 1);
295 0           my $headings = ['Rules', 'Support', 'Confidence'];
296 0           my@metrics=('Lift', 'Leverage', 'Conviction', 'Coverage', 'Correlation', 'Cosine', 'Laplace', 'Jaccard');
297 0           foreach my$metric(@metrics){
298 0 0         push@{$headings},$metric if defined $self->{metrics}{"min$metric"};
  0            
299             }
300 0           @metrics=@{$headings}[3..$#{$headings}];
  0            
  0            
301 0           my(@rules,@support,@confidence,@lift,@leverage,@conviction,@coverage,@correlation,@cosine,@laplace,@jaccard);
302 0           foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
  0            
  0            
303 0           push @rules,"R$rule";
304 0           push @support,$self->{associationRules}{$rule}{support};
305 0           push @confidence,$self->{associationRules}{$rule}{confidence};
306 0 0         push @lift,$self->{associationRules}{$rule}{lift} if defined $self->{metrics}{minLift};
307 0 0         push @leverage,$self->{associationRules}{$rule}{leverage} if defined $self->{metrics}{minLeverage};
308 0 0         push @conviction,$self->{associationRules}{$rule}{conviction} if defined $self->{metrics}{minConviction};
309 0 0         push @coverage,$self->{associationRules}{$rule}{coverage} if defined $self->{metrics}{minCoverage};
310 0 0         push @correlation,$self->{associationRules}{$rule}{correlation} if defined $self->{metrics}{minCorrelation};
311 0 0         push @cosine,$self->{associationRules}{$rule}{cosine} if defined $self->{metrics}{minCosine};
312 0 0         push @laplace,$self->{associationRules}{$rule}{laplace} if defined $self->{metrics}{minLaplace};
313 0 0         push @jaccard,$self->{associationRules}{$rule}{jaccard} if defined $self->{metrics}{minJaccard};
314             }
315 0           my$line=(scalar(@rules)+1);
316 0           my@data=(\@rules,\@support,\@confidence);
317 0 0         push @data,\@lift if defined $self->{metrics}{minLift};
318 0 0         push @data,\@leverage if defined $self->{metrics}{minLeverage};
319 0 0         push @data,\@conviction if defined $self->{metrics}{minConviction};
320 0 0         push @data,\@coverage if defined $self->{metrics}{minCoverage};
321 0 0         push @data,\@correlation if defined $self->{metrics}{minCorrelation};
322 0 0         push @data,\@cosine if defined $self->{metrics}{minCosine};
323 0 0         push @data,\@laplace if defined $self->{metrics}{minLaplace};
324 0 0         push @data,\@jaccard if defined $self->{metrics}{minJaccard};
325 0           $worksheet->write('A1', $headings, $bold);
326 0           $worksheet->write('A2', \@data);
327 0           my$chart=$workbook->add_chart(type =>'column', embedded=>1);
328 0           my@columns=('B'..'M');
329 0           my$i=0;
330 0           $chart->add_series(
331             name => 'Support',
332             categories => '=Sheet1!$A$2:$A$'.$line,
333             values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
334             );
335 0           $i++;
336 0           $chart->add_series(
337             name => 'Confidence',
338             categories => '=Sheet1!$A$2:$A$'.$line,
339             values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
340             );
341 0           foreach my$metric(@metrics){
342 0           $i++;
343 0           $chart->add_series(
344             name => $metric,
345             categories => '=Sheet1!$A$2:$A$'.$line,
346             values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
347             );
348             }
349 0           $worksheet->insert_chart($columns[($i+2)].'2', $chart);
350 0           $line+=2;
351 0           my $urlFormat = $workbook->add_format(
352             color => 'blue',
353             );
354 0           my$url=2;
355 0           foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
  0            
  0            
356 0           $worksheet->write_url("A$line","internal:Sheet1!A$url",$urlFormat,"<");
357 0           $line++;
358 0           $worksheet->write_url("A$url","internal:Sheet1!A$line",$urlFormat,"R$rule");
359 0           $worksheet->write("A$line","Rule R$rule: $self->{associationRules}{$rule}{implication}");
360 0           $line++;
361 0           $worksheet->write("A$line","Support: $self->{associationRules}{$rule}{support}");
362 0           $line++;
363 0           $worksheet->write("A$line","Confidence: $self->{associationRules}{$rule}{confidence}");
364 0           $line++;
365 0           foreach my$metric(@metrics){
366 0           $worksheet->write("A$line","$metric: $self->{associationRules}{$rule}{${\lc$metric}}");
  0            
367 0           $line++;
368             }
369 0           $worksheet->write("A$line","Items:");
370 0           $line++;
371 0           foreach my$item(@{$self->{associationRules}{$rule}{items}}){
  0            
372 0           $worksheet->write("A$line","$item $self->{keyItemsDescription}{$item}");
373 0           $line++;
374             }
375 0           $line++;
376 0           $url++;
377             }
378 0           $worksheet->write("A$line","Frequent itemset: { @{$self->{frequentItemset}} }");
  0            
379 0           $line++;
380 0           $worksheet->write("A$line","Items:");
381 0           $line++;
382 0           foreach my$item(@{$self->{frequentItemset}}){
  0            
383 0           $worksheet->write("A$line","$item $self->{keyItemsDescription}{$item}");
384 0           $line++;
385             }
386 0           $workbook->close;
387             }
388            
389             return 1;
390             __END__