File Coverage

blib/lib/Set/Similarity.pm

Criterion	Covered	Total	%
statement	50	50	100.0
branch	20	20	100.0
condition	11	12	100.0
subroutine	13	13	100.0
pod	9	9	100.0
total	103	104	100.0

line	stmt	bran	cond	sub	pod	time	code
1							package Set::Similarity;
2
3	5			5		2598	use strict;
	5					9
	5					162
4	5			5		22	use warnings;
	5					7
	5					238
5
6							our $VERSION = '0.026';
7
8	5			5		35	use Carp 'croak';
	5					15
	5					3356
9
10							sub new {
11	7			7	1	1381	my $class = shift;
12
13							# uncoverable condition false
14	7	100	66			50	bless @_ ? @_ > 1 ? {@_} : {%{$_[0]}} : {}, ref $class \|\| $class;
	2	100				13
15							}
16
17							sub similarity {
18	104			104	1	3314	my ($self, $any1, $any2, $width) = @_;
19
20	104					212	return $self->from_tokens(
21							$self->_any($any1,$width),
22							$self->_any($any2,$width)
23							);
24							}
25
26							sub _any {
27	222			222		266	my ($self, $any, $width) = @_;
28
29	222	100				581	if (ref($any) eq 'ARRAY') {
		100
		100
30	58					166	return $any;
31							}
32							elsif (ref($any) eq 'HASH') {
33	13					32	return [grep { $any->{$_} } keys %$any];
	15					43
34							}
35							elsif (ref($any)) {
36	1					14	return [];
37							}
38							else {
39	150					248	return [$self->ngrams($any,$width)];
40							}
41							}
42
43							sub ngrams {
44	161			161	1	161	my ($self, $word, $width) = @_;
45
46	161	100	100			757	$width = 1 unless ($width && $width =~ m/^[1-9][0-9]*$/x);
47	161		100			299	$word \|\|= '';
48
49	161	100				331	return ($word) unless ($width <= length($word));
50
51	142					256	return map {substr $word,$_,$width;} (0..length($word)-$width);
	546					1158
52							}
53
54							sub from_tokens {
55	108			108	1	742	my ($self, $tokens1, $tokens2) = @_;
56
57	108	100	100			306	return 1 if (!(scalar @$tokens1 \|\| scalar @$tokens2));
58	102	100	100			397	return 0 unless (scalar @$tokens1 && scalar @$tokens2 );
59
60	90					170	return $self->from_sets(
61							[$self->uniq($tokens1)],
62							[$self->uniq($tokens2)],
63							);
64							}
65
66	1			1	1	330	sub from_sets { croak 'Method "from_sets" not implemented in subclass' }
67
68							sub intersection {
69	90			90	1	84	my %uniq;
70	90					75	@uniq{@{$_[1]}} = ();
	90					177
71	90					83	scalar grep { exists $uniq{$_} } @{$_[2]};
	255					628
	90					145
72							}
73
74							sub uniq {
75	180			180	1	151	my %uniq;
76	180					154	@uniq{@{$_[1]}} = ();
	180					478
77	180					846	return keys %uniq;
78							}
79
80							sub combined_length {
81	49			49	1	45	scalar(@{$_[1]}) + scalar(@{$_[2]});
	49					57
	49					193
82							}
83
84							sub min {
85	20					20	(scalar(@{$_[1]}) < scalar(@{$_[2]}))
	20					33
86	20	100		20	1	18	? scalar(@{$_[1]}) : scalar(@{$_[2]});
	2					10
	18					95
87							}
88
89							1;
90
91							__END__