File Coverage

blib/lib/Bio/Gonzales/Util/Math/kNN/via/Distances.pm

Criterion	Covered	Total	%
statement	51	52	98.0
branch	4	4	100.0
condition			n/a
subroutine	11	11	100.0
pod	0	1	0.0
total	66	68	97.0

line	stmt	bran	sub	pod	time	code
1						package Bio::Gonzales::Util::Math::kNN::via::Distances;
2
3	1		1		112375	use Mouse;
	1				28775
	1				6
4
5	1		1		406	use warnings;
	1				2
	1				41
6	1		1		7	use strict;
	1				2
	1				25
7	1		1		5	use List::Util qw/max min/;
	1				2
	1				101
8	1		1		631	use List::MoreUtils qw/indexes/;
	1				14311
	1				7
9	1		1		1134	use Data::Dumper;
	1				3
	1				59
10
11	1		1		19	use 5.010;
	1				4
12						our $VERSION = '0.083'; # VERSION
13
14						=head1 NAME
15
16						Bio::Gonzales::Util::Math::kNN::via::Distances - Calculate kNN clusterings from already calculated distances
17
18						=head1 SYNOPSIS
19
20						use Bio::Gonzales::Util::Math::kNN::via::Distances;
21
22						my $k = Bio::Gonzales::Util::Math::kNN::via::Distances->new(
23						distances => [ [1], [ 2, 3 ], ... ],
24						groups => [ 'group1 row1', 'group row2', undef, 'group row4' ]
25						);
26						my $result = $k->calc(1);
27
28						=head1 DESCRIPTION
29
30						=head1 METHODS
31
32						=head2 Bio::Gonzales::Util::Math::kNN::via::Distances->new(...)
33
34						=over 4
35
36						=item distances
37
38						Distances in lower triangular form (array of arrays).
39
40						=item groups
41
42						Groups in array, undef for "test" rows.
43
44						=back
45
46						=cut
47
48						#array of arrays (distances in lower triangular matrix form)
49						has distances => ( is => 'rw', required => 1 );
50						#array of groupnames with undef for test
51						has groups => ( is => 'rw', required => 1 );
52
53						sub calc {
54	1		1	0	266	my ( $self, $k ) = @_;
55
56	1		4		7	my @training_idx = indexes {$_} @{ $self->groups };
	4				8
	1				10
57	1				10	say STDERR "Training idx: " . Dumper( \@training_idx );
58
59	1				292	my @lies_in_group;
60	1				4	for ( my $i = 0; $i < @{ $self->distances }; $i++ ) {
	5				20
61	4				11	my $d = $self->distances->[$i];
62
63						# here we have a training 'row', so skip it
64	4	100			11	if ( $self->groups->[$i] ) {
65	2				4	$lies_in_group[$i] = undef;
66	2				5	next;
67						}
68
69						# get index of $k distances from training set
70						my @k_nearest
71	2				24	= ( sort { $self->_distance_between( $a, $i ) <=> $self->_distance_between( $b, $i ) } @training_idx )
	2				8
72						[ 0 .. ( $k - 1 ) ];
73	2				13	say STDERR "$i -> " . Dumper( \@k_nearest );
74
75	2				429	$lies_in_group[$i] = $self->_vote( \@k_nearest );
76						}
77	1				9	return \@lies_in_group;
78						}
79
80						sub _distance_between {
81	4		4		28	my ( $self, $i, $a ) = @_;
82
83	4				8	my $dist;
84	4	100			19	if ( $a > $i ) {
85	1				3	$dist = $self->distances->[$a][$i];
86						} else {
87	3				11	$dist = $self->distances->[$i][$a];
88						}
89
90	4				838	say STDERR "($i, $a) => $dist";
91	4				34	return $dist;
92						}
93
94						sub _vote {
95	2		2		6	my ( $self, $k_nearest ) = @_;
96
97	2				4	my %votes;
98	2				5	for my $idx ( @{$k_nearest} ) {
	2				5
99	2				10	$votes{ $self->groups->[$idx] }++;
100						}
101
102	2				10	my $group = ( sort { $votes{$b} <=> $votes{$a} } keys %votes )[0];
	0				0
103
104	2				10	return $group;
105						}
106
107						1;
108						__END__
109
110						=head1 SEE ALSO
111
112						=head1 AUTHOR
113
114						jw bargsten, C<< <joachim.bargsten at wur.nl> >>
115
116						=cut