File Coverage

blib/lib/Set/Similarity.pm

Criterion	Covered	Total	%
statement	50	50	100.0
branch	20	20	100.0
condition	11	12	100.0
subroutine	13	13	100.0
pod	9	9	100.0
total	103	104	100.0

line	stmt	bran	cond	sub	pod	time	code
1							package Set::Similarity;
2
3	5			5		2466	use strict;
	5					8
	5					141
4	5			5		19	use warnings;
	5					7
	5					227
5
6							our $VERSION = '0.025';
7
8	5			5		31	use Carp 'croak';
	5					6
	5					3363
9
10							sub new {
11	7			7	1	1703	my $class = shift;
12
13							# uncoverable condition false
14	7	100	66			56	bless @_ ? @_ > 1 ? {@_} : {%{$_[0]}} : {}, ref $class \|\| $class;
	2	100				11
15							}
16
17							sub similarity {
18	104			104	1	4386	my ($self, $any1, $any2, $width) = @_;
19
20	104					211	return $self->from_tokens(
21							$self->_any($any1,$width),
22							$self->_any($any2,$width)
23							);
24							}
25
26							sub _any {
27	222			222		245	my ($self, $any, $width) = @_;
28
29	222	100				562	if (ref($any) eq 'ARRAY') {
		100
		100
30	58					114	return $any;
31							}
32							elsif (ref($any) eq 'HASH') {
33	13					32	return [grep { $any->{$_} } keys %$any];
	15					44
34							}
35							elsif (ref($any)) {
36	1					4	return [];
37							}
38							else {
39	150					218	return [$self->ngrams($any,$width)];
40							}
41							}
42
43							sub ngrams {
44	161			161	1	158	my ($self, $word, $width) = @_;
45
46	161	100	100			732	$width = 1 unless ($width && $width =~ m/^[1-9][0-9]*$/x);
47	161		100			302	$word \|\|= '';
48
49	161	100				335	return ($word) unless ($width <= length($word));
50
51	142					253	return map {substr $word,$_,$width;} (0..length($word)-$width);
	546					1070
52							}
53
54							sub from_tokens {
55	108			108	1	988	my ($self, $tokens1, $tokens2) = @_;
56
57	108	100	100			259	return 1 if (!(scalar @$tokens1 \|\| scalar @$tokens2));
58	102	100	100			352	return 0 unless (scalar @$tokens1 && scalar @$tokens2 );
59
60	90					150	return $self->from_sets(
61							[$self->uniq($tokens1)],
62							[$self->uniq($tokens2)],
63							);
64							}
65
66	1			1	1	437	sub from_sets { croak 'Method "from_sets" not implemented in subclass' }
67
68							sub intersection {
69	90			90	1	63	my %uniq;
70	90					76	@uniq{@{$_[1]}} = ();
	90					148
71	90					103	scalar grep { exists $uniq{$_} } @{$_[2]};
	255					577
	90					151
72							}
73
74							sub uniq {
75	180			180	1	145	my %uniq;
76	180					150	@uniq{@{$_[1]}} = ();
	180					461
77	180					695	return keys %uniq;
78							}
79
80							sub combined_length {
81	49			49	1	41	scalar(@{$_[1]}) + scalar(@{$_[2]});
	49					69
	49					188
82							}
83
84							sub min {
85	20					18	(scalar(@{$_[1]}) < scalar(@{$_[2]}))
	20					26
86	20	100		20	1	18	? scalar(@{$_[1]}) : scalar(@{$_[2]});
	2					10
	18					88
87							}
88
89							1;
90
91							__END__