line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
package Text::Document;
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
$Text::Document::VERSION = '1.05';
|
5
|
|
|
|
|
|
|
|
6
|
5
|
|
|
5
|
|
1642
|
use strict;
|
|
5
|
|
|
|
|
8
|
|
|
5
|
|
|
|
|
203
|
|
7
|
|
|
|
|
|
|
|
8
|
5
|
|
|
5
|
|
58
|
use v5.6.0;
|
|
5
|
|
|
|
|
15
|
|
|
5
|
|
|
|
|
632
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @FIELDS = qw( lowercase );
|
11
|
|
|
|
|
|
|
our $COMPRESS_AVAILABLE;
|
12
|
|
|
|
|
|
|
our @KEYS_FOR_NEW = qw( compress lowercase );
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
BEGIN {
|
15
|
5
|
|
|
5
|
|
355
|
eval "use Compress::Zlib;";
|
|
5
|
|
|
5
|
|
6855
|
|
|
5
|
|
|
|
|
653533
|
|
|
5
|
|
|
|
|
1727
|
|
16
|
5
|
50
|
|
|
|
33
|
if( $@ ){
|
17
|
0
|
|
|
|
|
0
|
$COMPRESS_AVAILABLE = undef;
|
18
|
|
|
|
|
|
|
} else {
|
19
|
5
|
|
|
|
|
12616
|
$COMPRESS_AVAILABLE = 1;
|
20
|
|
|
|
|
|
|
}
|
21
|
|
|
|
|
|
|
}
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub new
|
25
|
|
|
|
|
|
|
{
|
26
|
7
|
|
|
7
|
1
|
151
|
my $class = shift;
|
27
|
7
|
|
|
|
|
14
|
my %self = @_;
|
28
|
7
|
|
|
|
|
26
|
my $self = {
|
29
|
|
|
|
|
|
|
lowercase => 1,
|
30
|
|
|
|
|
|
|
compress => 1,
|
31
|
|
|
|
|
|
|
terms => {},
|
32
|
|
|
|
|
|
|
};
|
33
|
7
|
|
|
|
|
18
|
foreach my $k ( @KEYS_FOR_NEW ){
|
34
|
14
|
50
|
|
|
|
38
|
defined( $self{$k} )
|
35
|
|
|
|
|
|
|
and ($self->{$k} = $self{$k});
|
36
|
|
|
|
|
|
|
}
|
37
|
|
|
|
|
|
|
|
38
|
7
|
|
|
|
|
16
|
bless $self, $class;
|
39
|
7
|
|
|
|
|
22
|
return $self;
|
40
|
|
|
|
|
|
|
}
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub AddContent
|
43
|
|
|
|
|
|
|
{
|
44
|
7
|
|
|
7
|
1
|
36
|
my $self = shift;
|
45
|
7
|
|
|
|
|
15
|
my ($text) = @_;
|
46
|
|
|
|
|
|
|
# clear frequency cache
|
47
|
7
|
50
|
|
|
|
25
|
$self->{freqs} and delete $self->{freqs};
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# parse text fragment
|
50
|
7
|
|
|
|
|
24
|
my @terms = $self->ScanV( $text );
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# update word count
|
53
|
7
|
|
|
|
|
11
|
foreach my $w (@terms){
|
54
|
26
|
|
|
|
|
59
|
$self->{terms}->{$w} ++;
|
55
|
|
|
|
|
|
|
}
|
56
|
7
|
|
|
|
|
12
|
undef $self->{WeightedEuclideanNorm};
|
57
|
7
|
|
|
|
|
15
|
undef $self->{EuclideanNorm};
|
58
|
7
|
|
|
|
|
19
|
return scalar @terms;
|
59
|
|
|
|
|
|
|
}
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# number of occurrences of a given term
|
62
|
|
|
|
|
|
|
sub Occurrences
|
63
|
|
|
|
|
|
|
{
|
64
|
0
|
|
|
0
|
1
|
0
|
my $self = shift;
|
65
|
0
|
|
|
|
|
0
|
my ($term) = @_;
|
66
|
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
0
|
return $self->{terms}->{$term};
|
68
|
|
|
|
|
|
|
}
|
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub ScanV
|
71
|
|
|
|
|
|
|
{
|
72
|
7
|
|
|
7
|
1
|
10
|
my $self = shift;
|
73
|
7
|
|
|
|
|
9
|
my ($text) = @_;
|
74
|
7
|
|
|
|
|
49
|
my @words = split( /[^a-zA-Z0-9]+/, $text );
|
75
|
7
|
|
|
|
|
66
|
@words = grep( /.+/, @words );
|
76
|
7
|
50
|
|
|
|
21
|
if( $self->{lowercase} ){
|
77
|
7
|
|
|
|
|
44
|
return map( lc($_), @words );
|
78
|
|
|
|
|
|
|
} else {
|
79
|
0
|
|
|
|
|
0
|
return @words;
|
80
|
|
|
|
|
|
|
}
|
81
|
|
|
|
|
|
|
}
|
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub KeywordFrequency
|
84
|
|
|
|
|
|
|
{
|
85
|
0
|
|
|
0
|
1
|
0
|
my $self = shift;
|
86
|
|
|
|
|
|
|
|
87
|
0
|
0
|
|
|
|
0
|
return $self->{freqs} if $self->{freqs};
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# all the distinct terms in the doc
|
90
|
0
|
|
|
|
|
0
|
my @terms = $self->Terms();
|
91
|
|
|
|
|
|
|
# total number of terms
|
92
|
0
|
|
|
|
|
0
|
my $sum = 0;
|
93
|
0
|
|
|
|
|
0
|
foreach my $t (@terms) { $sum += $self->{terms}->{$t}; }
|
|
0
|
|
|
|
|
0
|
|
94
|
|
|
|
|
|
|
# if zero, frequency is not defined
|
95
|
0
|
0
|
|
|
|
0
|
($sum > 0) or return undef;
|
96
|
|
|
|
|
|
|
# list of [term,frequency] pairs
|
97
|
0
|
|
|
|
|
0
|
my @freqs = map( [$_, $self->{terms}->{$_}/$sum ] , @terms );
|
98
|
|
|
|
|
|
|
# sort by ascending frequency
|
99
|
0
|
|
|
|
|
0
|
@freqs = sort { $a->[1] <=> $b->[1] } @freqs;
|
|
0
|
|
|
|
|
0
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# return reference to result
|
102
|
0
|
|
|
|
|
0
|
return $self->{freqs} = \@freqs;
|
103
|
|
|
|
|
|
|
}
|
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# all distinct term names
|
106
|
|
|
|
|
|
|
sub Terms
|
107
|
|
|
|
|
|
|
{
|
108
|
0
|
|
|
0
|
1
|
0
|
my $self = shift;
|
109
|
0
|
|
|
|
|
0
|
return keys %{$self->{terms}};
|
|
0
|
|
|
|
|
0
|
|
110
|
|
|
|
|
|
|
}
|
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# number of common terms divided by total number of terms
|
113
|
|
|
|
|
|
|
sub CommonTermsRatio
|
114
|
|
|
|
|
|
|
{
|
115
|
0
|
|
|
0
|
0
|
0
|
my $self = shift;
|
116
|
0
|
|
|
|
|
0
|
my ($other) = @_;
|
117
|
0
|
|
|
|
|
0
|
my @terms = $self->Terms();
|
118
|
0
|
|
|
|
|
0
|
my %terms;
|
119
|
0
|
|
|
|
|
0
|
@terms{@terms} = 1 .. @terms;
|
120
|
0
|
|
|
|
|
0
|
my @oTerms = $other->Terms();
|
121
|
0
|
|
|
|
|
0
|
my (%union);
|
122
|
0
|
|
|
|
|
0
|
@union{@terms} = 1 .. @terms;
|
123
|
0
|
|
|
|
|
0
|
@union{@oTerms} = 1 .. @oTerms;
|
124
|
0
|
0
|
|
|
|
0
|
my @intersection = map( ( $terms{$_} ? 1 : () ), @oTerms );
|
125
|
0
|
|
|
|
|
0
|
my $unionCardinality = scalar( keys %union );
|
126
|
0
|
0
|
|
|
|
0
|
($unionCardinality > 0) or return undef;
|
127
|
0
|
|
|
|
|
0
|
return scalar(@intersection) / $unionCardinality;
|
128
|
|
|
|
|
|
|
}
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub PureASCII
|
131
|
|
|
|
|
|
|
{
|
132
|
0
|
|
|
0
|
0
|
0
|
my $self = shift;
|
133
|
0
|
|
|
|
|
0
|
$self->{compress} = 1;
|
134
|
|
|
|
|
|
|
}
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub WriteToString
|
137
|
|
|
|
|
|
|
{
|
138
|
1
|
|
|
1
|
1
|
9
|
my $self = shift;
|
139
|
|
|
|
|
|
|
|
140
|
1
|
|
|
|
|
2
|
my $block = join( ',', %{$self->{terms}} );
|
|
1
|
|
|
|
|
8
|
|
141
|
1
|
|
|
|
|
3
|
my $compressed = undef;
|
142
|
1
|
50
|
33
|
|
|
11
|
if( $COMPRESS_AVAILABLE && $self->{compress} ){
|
143
|
1
|
|
|
|
|
5
|
$block = Compress::Zlib::compress( $block );
|
144
|
|
|
|
|
|
|
# $block = compress( $block );
|
145
|
1
|
|
|
|
|
496
|
$compressed = 1;
|
146
|
|
|
|
|
|
|
}
|
147
|
1
|
50
|
|
|
|
14
|
my $header =
|
148
|
|
|
|
|
|
|
'p='
|
149
|
|
|
|
|
|
|
. __PACKAGE__
|
150
|
|
|
|
|
|
|
. ' v='
|
151
|
|
|
|
|
|
|
. $Text::Document::VERSION
|
152
|
|
|
|
|
|
|
. ' l='
|
153
|
|
|
|
|
|
|
. length( $block )
|
154
|
|
|
|
|
|
|
. ' compress='
|
155
|
|
|
|
|
|
|
. ($compressed?'1':'0')
|
156
|
|
|
|
|
|
|
. ' '
|
157
|
|
|
|
|
|
|
. join( ' ', map( "$_=$self->{$_}", @FIELDS))
|
158
|
|
|
|
|
|
|
. "\n";
|
159
|
|
|
|
|
|
|
|
160
|
1
|
|
|
|
|
20
|
my $str = $header . $block;
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# add 8-char hex-encoded 4-byte checksum at the end of data
|
163
|
1
|
|
|
|
|
15
|
return $str . sprintf( '%08x', unpack( '%32C*', $str ) );
|
164
|
|
|
|
|
|
|
}
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub NewFromString
|
167
|
|
|
|
|
|
|
{
|
168
|
1
|
|
|
1
|
1
|
5
|
my ($str) = @_;
|
169
|
|
|
|
|
|
|
|
170
|
1
|
|
|
|
|
2
|
my $self = {};
|
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# verify checksum
|
173
|
|
|
|
|
|
|
# try to be compatible with version 1.03
|
174
|
1
|
|
|
|
|
3
|
my $stored_checksum = unpack( 'N', substr( $str, -4 ));
|
175
|
1
|
|
|
|
|
2
|
my $data_payload = substr( $str, 0, -4 );
|
176
|
1
|
|
|
|
|
3
|
my $computed_checksum = unpack( '%32C*', $data_payload );
|
177
|
|
|
|
|
|
|
|
178
|
1
|
50
|
|
|
|
5
|
if( $stored_checksum != $computed_checksum ){
|
179
|
1
|
|
|
|
|
4
|
$stored_checksum = hex( substr( $str, -8 ));
|
180
|
1
|
|
|
|
|
2
|
$data_payload = substr( $str, 0, -8 );
|
181
|
1
|
|
|
|
|
2
|
$computed_checksum = unpack( '%32C*', $data_payload );
|
182
|
|
|
|
|
|
|
}
|
183
|
|
|
|
|
|
|
|
184
|
1
|
50
|
|
|
|
4
|
if( $stored_checksum != $computed_checksum ){
|
185
|
0
|
|
|
|
|
0
|
die( __PACKAGE__ . '::NewFromString : '
|
186
|
|
|
|
|
|
|
. 'checksum test failed '
|
187
|
|
|
|
|
|
|
. $stored_checksum
|
188
|
|
|
|
|
|
|
. ' != '
|
189
|
|
|
|
|
|
|
. $computed_checksum
|
190
|
|
|
|
|
|
|
);
|
191
|
|
|
|
|
|
|
}
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# split data in header and block
|
194
|
1
|
|
|
|
|
4
|
my ($header,$block) = split( /\n/, $data_payload, 2 );
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# parse header line
|
197
|
1
|
|
|
|
|
11
|
my %header = split( /[ =]+/, $header );
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# check that the reading package is the same as the one that wrote
|
200
|
1
|
50
|
|
|
|
5
|
if( $header{p} ne __PACKAGE__ ){
|
201
|
0
|
|
|
|
|
0
|
die( __PACKAGE__ . '::NewFromString : '
|
202
|
|
|
|
|
|
|
. "file was not written by "
|
203
|
|
|
|
|
|
|
. __PACKAGE__
|
204
|
|
|
|
|
|
|
);
|
205
|
|
|
|
|
|
|
}
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# version must be identical
|
208
|
1
|
50
|
|
|
|
7
|
if( $header{v} > $Text::Document::VERSION ){
|
209
|
0
|
|
|
|
|
0
|
die( __PACKAGE__ . '::NewFromString : '
|
210
|
|
|
|
|
|
|
. "Current version is $Text::Document::VERSION"
|
211
|
|
|
|
|
|
|
. " and the file version is $header{v}"
|
212
|
|
|
|
|
|
|
);
|
213
|
|
|
|
|
|
|
}
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# size of block must match
|
216
|
1
|
50
|
|
|
|
4
|
if( $header{l} != length( $block ) ){
|
217
|
0
|
|
|
|
|
0
|
die( __PACKAGE__ . '::NewFromString : '
|
218
|
|
|
|
|
|
|
. "data size is "
|
219
|
|
|
|
|
|
|
. length( $block )
|
220
|
|
|
|
|
|
|
. "instead of $header{l} "
|
221
|
|
|
|
|
|
|
);
|
222
|
|
|
|
|
|
|
}
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# compressed?
|
225
|
1
|
50
|
33
|
|
|
6
|
if( $header{compress} and not($COMPRESS_AVAILABLE) ){
|
226
|
0
|
|
|
|
|
0
|
die( __PACKAGE__ . '::NewFromString : '
|
227
|
|
|
|
|
|
|
. 'header indicates that data is compressed, '
|
228
|
|
|
|
|
|
|
. 'but Compress::Zlib is not available'
|
229
|
|
|
|
|
|
|
);
|
230
|
|
|
|
|
|
|
}
|
231
|
|
|
|
|
|
|
|
232
|
1
|
50
|
|
|
|
3
|
if( $header{compress} ){
|
233
|
1
|
|
|
|
|
4
|
$block = Compress::Zlib::uncompress( $block );
|
234
|
|
|
|
|
|
|
# $block = uncompress( $block );
|
235
|
|
|
|
|
|
|
}
|
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
|
238
|
1
|
|
|
|
|
91
|
@{$self}{@FIELDS} = @header{ @FIELDS };
|
|
1
|
|
|
|
|
3
|
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# retrieve terms and recurrence count
|
241
|
1
|
|
|
|
|
3
|
%{$self->{terms}} = split( /,/, $block );
|
|
1
|
|
|
|
|
6
|
|
242
|
|
|
|
|
|
|
|
243
|
1
|
|
|
|
|
3
|
bless $self, $header{p};
|
244
|
|
|
|
|
|
|
|
245
|
1
|
|
|
|
|
4
|
return $self;
|
246
|
|
|
|
|
|
|
}
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub JaccardSimilarity
|
249
|
|
|
|
|
|
|
{
|
250
|
8
|
|
|
8
|
1
|
41
|
my $self = shift;
|
251
|
8
|
|
|
|
|
13
|
my ($e) = @_;
|
252
|
|
|
|
|
|
|
|
253
|
8
|
|
|
|
|
56
|
my @inter = map(
|
254
|
|
|
|
|
|
|
( $self->{terms}->{$_} ? $_ : () ),
|
255
|
8
|
100
|
|
|
|
10
|
keys %{$e->{terms}}
|
256
|
|
|
|
|
|
|
);
|
257
|
8
|
|
|
|
|
12
|
my %union = %{$self->{terms}};
|
|
8
|
|
|
|
|
34
|
|
258
|
8
|
|
|
|
|
13
|
my @keyse = keys %{$e->{terms}};
|
|
8
|
|
|
|
|
26
|
|
259
|
8
|
|
|
|
|
24
|
@union{@keyse} = @keyse;
|
260
|
8
|
50
|
|
|
|
25
|
if( (my $unionSize = scalar keys %union) > 0 ){
|
261
|
8
|
|
|
|
|
47
|
return scalar(@inter) / $unionSize;
|
262
|
|
|
|
|
|
|
} else {
|
263
|
0
|
|
|
|
|
0
|
return undef;
|
264
|
|
|
|
|
|
|
}
|
265
|
|
|
|
|
|
|
}
|
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub CosineSimilarity
|
268
|
|
|
|
|
|
|
{
|
269
|
3
|
|
|
3
|
1
|
20
|
my $self = shift;
|
270
|
3
|
|
|
|
|
5
|
my ($e) = @_;
|
271
|
|
|
|
|
|
|
|
272
|
3
|
|
|
|
|
7
|
my ($Dv,$Ev) = ($self->{terms}, $e->{terms});
|
273
|
3
|
|
|
|
|
5
|
my %union = %{$self->{terms}};
|
|
3
|
|
|
|
|
14
|
|
274
|
3
|
|
|
|
|
5
|
my @keyse = keys %{$e->{terms}};
|
|
3
|
|
|
|
|
11
|
|
275
|
3
|
|
|
|
|
9
|
@union{@keyse} = @keyse;
|
276
|
3
|
|
|
|
|
4
|
my $dotProduct = 0.0;
|
277
|
3
|
100
|
|
|
|
45
|
map( $dotProduct +=
|
|
|
100
|
|
|
|
|
|
278
|
|
|
|
|
|
|
(defined($Dv->{$_}) ? $Dv->{$_} : 0.0)
|
279
|
|
|
|
|
|
|
* (defined($Ev->{$_}) ? $Ev->{$_} : 0.0 ),
|
280
|
|
|
|
|
|
|
keys %union
|
281
|
|
|
|
|
|
|
);
|
282
|
|
|
|
|
|
|
|
283
|
3
|
|
|
|
|
9
|
my $nD = $self->EuclideanNorm();
|
284
|
3
|
|
|
|
|
7
|
my $nE = $e->EuclideanNorm();
|
285
|
|
|
|
|
|
|
|
286
|
3
|
50
|
33
|
|
|
18
|
if( ($nD==0) || ($nE==0) ){
|
287
|
0
|
|
|
|
|
0
|
return undef;
|
288
|
|
|
|
|
|
|
} else {
|
289
|
3
|
|
|
|
|
33
|
return $dotProduct / $nD / $nE;
|
290
|
|
|
|
|
|
|
}
|
291
|
|
|
|
|
|
|
}
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub EuclideanNorm
|
294
|
|
|
|
|
|
|
{
|
295
|
6
|
|
|
6
|
0
|
8
|
my $self = shift;
|
296
|
6
|
100
|
|
|
|
22
|
defined( $self->{EuclideanNorm} ) and return $self->{EuclideanNorm};
|
297
|
3
|
|
|
|
|
4
|
my $sum = 0.0;
|
298
|
3
|
|
|
|
|
4
|
map( $sum += $_*$_, values %{$self->{terms}} );
|
|
3
|
|
|
|
|
11
|
|
299
|
3
|
|
|
|
|
16
|
return ($self->{EuclideanNorm} = sqrt( $sum ));
|
300
|
|
|
|
|
|
|
}
|
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
# this is rather rough
|
303
|
|
|
|
|
|
|
sub WeightedCosineSimilarity
|
304
|
|
|
|
|
|
|
{
|
305
|
0
|
|
|
0
|
1
|
|
my $self = shift;
|
306
|
0
|
|
|
|
|
|
my ($e,$weightFunction,$rock) = @_;
|
307
|
|
|
|
|
|
|
|
308
|
0
|
|
|
|
|
|
my ($Dv,$Ev) = ($self->{terms}, $e->{terms});
|
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# compute union
|
311
|
0
|
|
|
|
|
|
my %union = %{$self->{terms}};
|
|
0
|
|
|
|
|
|
|
312
|
0
|
|
|
|
|
|
my @keyse = keys %{$e->{terms}};
|
|
0
|
|
|
|
|
|
|
313
|
0
|
|
|
|
|
|
@union{@keyse} = @keyse;
|
314
|
0
|
|
|
|
|
|
my @allkeys = keys %union;
|
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# weighted D
|
317
|
0
|
|
|
|
|
|
my @Dw = map(( defined( $Dv->{$_} )?
|
318
|
0
|
0
|
|
|
|
|
&{$weightFunction}( $rock, $_ )*$Dv->{$_} : 0.0 ),
|
319
|
|
|
|
|
|
|
@allkeys
|
320
|
|
|
|
|
|
|
);
|
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# weighted E
|
323
|
0
|
|
|
|
|
|
my @Ew = map(( defined( $Ev->{$_} )?
|
324
|
0
|
0
|
|
|
|
|
&{$weightFunction}( $rock, $_ )*$Ev->{$_} : 0.0 ),
|
325
|
|
|
|
|
|
|
@allkeys
|
326
|
|
|
|
|
|
|
);
|
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
# dot product of D and E
|
329
|
0
|
|
|
|
|
|
my $dotProduct = 0.0;
|
330
|
0
|
|
|
|
|
|
map( $dotProduct += $Dw[$_] * $Ew[$_] , 0..$#Dw );
|
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# norm of D
|
333
|
0
|
|
|
|
|
|
my $nD = 0.0;
|
334
|
0
|
|
|
|
|
|
map( $nD += $Dw[$_] * $Dw[$_] , 0..$#Dw );
|
335
|
0
|
|
|
|
|
|
$nD = sqrt( $nD );
|
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# norm of E
|
338
|
0
|
|
|
|
|
|
my $nE = 0.0;
|
339
|
0
|
|
|
|
|
|
map( $nE += $Ew[$_] * $Ew[$_] , 0..$#Ew );
|
340
|
0
|
|
|
|
|
|
$nE = sqrt( $nE );
|
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# dot product scaled by norm
|
343
|
0
|
0
|
0
|
|
|
|
if( ($nD==0) || ($nE==0) ){
|
344
|
0
|
|
|
|
|
|
return undef;
|
345
|
|
|
|
|
|
|
} else {
|
346
|
0
|
|
|
|
|
|
return $dotProduct / $nD / $nE;
|
347
|
|
|
|
|
|
|
}
|
348
|
|
|
|
|
|
|
}
|
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
1;
|
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
__END__
|