File Coverage

blib/lib/NanoB2B/NER/Modelman.pm
Criterion Covered Total %
statement 12 63 19.0
branch 0 16 0.0
condition n/a
subroutine 4 7 57.1
pod 0 2 0.0
total 16 88 18.1


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             # NanoB2B-NER::NER::Modelman
3             #
4             # Turns the ARFF Train files into models and loads models with ARFF Test files
5             # Version 1.0
6             #
7             # Program by Milk
8              
9             package NanoB2B::NER::Modelman;
10              
11 1     1   7 use NanoB2B::UniversalRoutines;
  1         2  
  1         27  
12 1     1   5 use File::Path qw(make_path); #makes sub directories
  1         2  
  1         41  
13 1     1   13 use strict;
  1         7  
  1         21  
14 1     1   5 use warnings;
  1         3  
  1         642  
15              
16             #### GLOBAL VARIABLES ####
17              
18             #option variables
19             my $program_dir;
20             my $classifier = "weka.classifiers.bayes.NaiveBayes";
21             my $weka_size = "Xmx4G";
22             my @features;
23             my $buckets = 10;
24             my $debug = 0;
25              
26              
27             #hardcoded for now can be programmer later
28             my $C_val = 0.25;
29             my $M_val = 2;
30              
31              
32             #universal subroutines object
33             my %uniParams = ();
34             my $uniSub;
35              
36              
37             #### A CIVILLIAN IS SAVED ####
38              
39             # construction method to create a new Wekaman object
40             # input : $directory <-- the name of the directory for the files
41             # $features <-- the set of features to run on [e.g. omtpcs]
42             # \$type <-- the weka algorithm to run the set on [e.g. weka.classifiers.functions.SMO]
43             # \$weka_size <-- the size to for the memory allocation in the weka parameter [e.g. -Xmx6G]
44             # \$buckets <-- the number of buckets used for the k-fold cross validation
45             # \$debug <-- the set of features to run on [e.g. omtpcs]
46             # output : $self <-- an instance of the Wekaman object
47             sub new {
48             #grab class and parameters
49 0     0 0   my $self = {};
50 0           my $class = shift;
51 0 0         return undef if(ref $class);
52 0           my $params = shift;
53              
54             #bless this object
55 0           bless $self, $class;
56 0           $self->_init($params);
57              
58             #retrieve parameters for universal-routines
59 0           $uniParams{'debug'} = $debug;
60 0           $uniSub = NanoB2B::UniversalRoutines->new(\%uniParams);
61              
62             #return the object
63 0           return $self;
64             }
65             # method to initialize the NanoB2B::NER::Wekaman object.
66             # input : $parameters <- reference to a hash
67             # output:
68             sub _init {
69 0     0     my $self = shift;
70 0           my $params = shift;
71              
72 0 0         $params = {} if(!defined $params);
73              
74             # get some of the parameters
75 0           my $diroption = $params->{'directory'};
76 0           my $ftsoption = $params->{'features'};
77 0           my $bucketsNumoption = $params->{'buckets'};
78 0           my $typeoption = $params->{'type'};
79 0           my $sizeoption = $params->{'weka_size'};
80 0           my $debugoption = $params->{'debug'};
81              
82             #set the global variables
83 0 0         if(defined $debugoption){$debug = $debugoption;}
  0            
84 0 0         if(defined $diroption){$program_dir = $diroption;}
  0            
85 0 0         if(defined $bucketsNumoption){$buckets = $bucketsNumoption;}
  0            
86 0 0         if(defined $ftsoption){@features = split(' ', $ftsoption); }
  0            
87 0 0         if(defined $typeoption){$classifier = $typeoption};
  0            
88 0 0         if(defined $sizeoption){$weka_size = $sizeoption};
  0            
89             }
90              
91              
92             ############### I'M AN EVERYDAY AVERAGE MODELMAN ################
93              
94             # runs the arff files through weka to export models
95             # input : $name <-- the name of the file to run through weka - model maker
96             # output: (model files)
97             sub make_model_file{
98 0     0 0   my $self = shift;
99 0           my $name = shift;
100              
101 0           $name = lc($name);
102              
103             #split them up by sets
104 0           my @sets = ();
105 0           my $item = "_";
106 0           foreach my $fs (@features){
107 0           my $abbrev = substr($fs, 0, 1); #add to abbreviations for the name
108 0           $item .= $abbrev;
109 0           push(@sets, $item);
110             }
111              
112             #get the ending part of the classifier for the weka dir name
113 0           my @b = split(/\./, $classifier);
114 0           my $weka_dir = $b[$#b];
115              
116             #run each set through weka and save the accuracy file
117 0           foreach my $set(@sets){
118             #set up the new folder
119 0           my $direct = "$program_dir/_MODELS/$weka_dir/$name" . "_MODEL_DATA/$set";
120 0           make_path($direct);
121              
122             #prep the output accuracy file and the test and train files
123 0           for(my $a = 1; $a <= $buckets; $a++){
124 0           $| = 1;
125 0           $uniSub->printColorDebug("cyan", ("\r" . "$name - $set -- $a"));
126 0           my $TRAIN = "$program_dir/_ARFF/$name" . "_ARFF/$set/_train/$name" . "_train-$a.arff";
127 0           my $WEK = $direct . "/$name" . "_model_$a";
128              
129             #run weka-modelling and output
130 0           system "java $weka_size $classifier -C $C_val -t $TRAIN -d $direct";
131             }
132 0           $uniSub->printDebug("\n");
133             }
134             }
135              
136              
137              
138             1;