| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Algorithm::LibLinear::DataSet; |
|
2
|
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
16816
|
use 5.014; |
|
|
5
|
|
|
|
|
13
|
|
|
|
5
|
|
|
|
|
163
|
|
|
4
|
5
|
|
|
5
|
|
1748
|
use Algorithm::LibLinear::Types; |
|
|
5
|
|
|
|
|
11
|
|
|
|
5
|
|
|
|
|
166
|
|
|
5
|
5
|
|
|
5
|
|
23
|
use Carp qw//; |
|
|
5
|
|
|
|
|
7
|
|
|
|
5
|
|
|
|
|
83
|
|
|
6
|
5
|
|
|
5
|
|
2989
|
use List::MoreUtils qw/none/; |
|
|
5
|
|
|
|
|
4733
|
|
|
|
5
|
|
|
|
|
450
|
|
|
7
|
5
|
|
|
5
|
|
2257
|
use Smart::Args; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub new { |
|
10
|
|
|
|
|
|
|
args |
|
11
|
|
|
|
|
|
|
my $class => 'ClassName', |
|
12
|
|
|
|
|
|
|
my $data_set => 'ArrayRef[Algorithm::LibLinear::LabeledData]'; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
bless +{ data_set => $data_set } => $class; |
|
15
|
|
|
|
|
|
|
} |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub load { |
|
18
|
|
|
|
|
|
|
args |
|
19
|
|
|
|
|
|
|
my $class => 'ClassName', |
|
20
|
|
|
|
|
|
|
my $fh => +{ isa => 'FileHandle', optional => 1, }, |
|
21
|
|
|
|
|
|
|
my $filename => +{ isa => 'Str', optional => 1, }, |
|
22
|
|
|
|
|
|
|
my $string => +{ isa => 'Str', optional => 1, }; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
if (none { defined } ($fh, $filename, $string)) { |
|
25
|
|
|
|
|
|
|
Carp::croak('No source specified.'); |
|
26
|
|
|
|
|
|
|
} |
|
27
|
|
|
|
|
|
|
my $source = $fh; |
|
28
|
|
|
|
|
|
|
$source //= do { |
|
29
|
|
|
|
|
|
|
open my $fh, '<', +($filename // \$string) or Carp::croak($!); |
|
30
|
|
|
|
|
|
|
$fh; |
|
31
|
|
|
|
|
|
|
}; |
|
32
|
|
|
|
|
|
|
$class->new(data_set => $class->parse_input_file($source)); |
|
33
|
|
|
|
|
|
|
} |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub add_data { |
|
36
|
|
|
|
|
|
|
args |
|
37
|
|
|
|
|
|
|
my $self, |
|
38
|
|
|
|
|
|
|
my $data => 'Algorithm::LibLinear::LabeledData'; |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
push @{ $self->data_set }, $data; |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub as_arrayref { $_[0]->{data_set} } |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub as_problem { |
|
46
|
|
|
|
|
|
|
args |
|
47
|
|
|
|
|
|
|
my $self, |
|
48
|
|
|
|
|
|
|
my $bias => +{ isa => 'Num', default => -1.0, }; |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my (@features, @labels); |
|
51
|
|
|
|
|
|
|
for my $data (@{ $self->as_arrayref }) { |
|
52
|
|
|
|
|
|
|
push @features, $data->{feature}; |
|
53
|
|
|
|
|
|
|
push @labels, $data->{label}; |
|
54
|
|
|
|
|
|
|
} |
|
55
|
|
|
|
|
|
|
Algorithm::LibLinear::Problem->new(\@labels, \@features, $bias); |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub as_string { |
|
59
|
|
|
|
|
|
|
args |
|
60
|
|
|
|
|
|
|
my $self; |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
my $result = ''; |
|
63
|
|
|
|
|
|
|
for my $entry (@{ $self->as_arrayref }) { |
|
64
|
|
|
|
|
|
|
my $feature = $entry->{feature}; |
|
65
|
|
|
|
|
|
|
my @feature_dump = |
|
66
|
|
|
|
|
|
|
map { "$_:$feature->{$_}" } sort { $a <=> $b } keys %$feature; |
|
67
|
|
|
|
|
|
|
$result .= join(' ', $entry->{label}, @feature_dump) . "\n"; |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
return $result; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub parse_input_file { |
|
73
|
|
|
|
|
|
|
args_pos |
|
74
|
|
|
|
|
|
|
my $class => 'ClassName', |
|
75
|
|
|
|
|
|
|
my $source => 'FileHandle'; |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my @data_set; |
|
78
|
|
|
|
|
|
|
while (defined(my $line = <$source>)) { |
|
79
|
|
|
|
|
|
|
chomp $line; |
|
80
|
|
|
|
|
|
|
my ($label, @feature) = split /\s+/, $line; |
|
81
|
|
|
|
|
|
|
$label += 0; |
|
82
|
|
|
|
|
|
|
my %feature = map { |
|
83
|
|
|
|
|
|
|
my ($index, $value) = split /:/; |
|
84
|
|
|
|
|
|
|
$index += 0; |
|
85
|
|
|
|
|
|
|
$value += 0; |
|
86
|
|
|
|
|
|
|
($index => $value); |
|
87
|
|
|
|
|
|
|
} @feature; |
|
88
|
|
|
|
|
|
|
push @data_set, +{ feature => \%feature, label => $label, }; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
return \@data_set; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub size { 0 + @{ $_[0]->as_arrayref } } |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
1; |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
__DATA__ |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head1 NAME |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Algorithm::LibLinear::DataSet |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
use Algorithm::LibLinear::DataSet; |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
my $data_set = Algorithm::LibLinear::DataSet->new(data_set => [ |
|
108
|
|
|
|
|
|
|
+{ feature => +{ 1 => 0.708333, 2 => 1, 3 => 1, ... }, label => 1, }, |
|
109
|
|
|
|
|
|
|
+{ feature => +{ 1 => 0.583333, 2 => -1, 3 => 0.333333, ... }, label => -1, }, |
|
110
|
|
|
|
|
|
|
+{ feature => +{ 1 => 0.166667, 2 => 1, 3 => -0.333333, ... }, label => 1, }, |
|
111
|
|
|
|
|
|
|
... |
|
112
|
|
|
|
|
|
|
]); |
|
113
|
|
|
|
|
|
|
my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA); |
|
114
|
|
|
|
|
|
|
my $data_set = Algorithm::LibLinear::DataSet->load(filename => 'liblinear_file'); |
|
115
|
|
|
|
|
|
|
my $data_set = Algorithm::LibLinear::DataSet->load(string => "+1 1:0.70833 ..."); |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
say $data_set->size; |
|
118
|
|
|
|
|
|
|
say $data_set->as_string; # '+1 1:0.70833 2:1 3:1 ...' |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
__DATA__ |
|
121
|
|
|
|
|
|
|
+1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1 |
|
122
|
|
|
|
|
|
|
-1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1 |
|
123
|
|
|
|
|
|
|
+1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1 |
|
124
|
|
|
|
|
|
|
... |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This class represents set of feature vectors with gold answers. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 METHODS |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head2 new(data_set => \@data_set) |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Constructor. |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
C<data_set> is an ArrayRef of HashRef that has 2 keys: C<feature> and C<label>. |
|
137
|
|
|
|
|
|
|
The value of C<feature> is a HashRef which represents a (sparse) feature vector. Its key is an index and corresponding value is a real number. The indices must be >= 1. |
|
138
|
|
|
|
|
|
|
The value of C<label> is an integer that is class label the feature belonging. |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head2 load(fh => \*FH | filename => $path | string => $string) |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Class method. Loads data set from LIBSVM/LIBLINEAR format file. |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head2 as_string |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Dumps the data set as a LIBSVM/LIBLINEAR format data. |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head2 size |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
The number of data. |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |