File Coverage

blib/lib/KinoSearch1/Search/Similarity.pm
Criterion Covered Total %
statement 45 45 100.0
branch 7 10 70.0
condition n/a
subroutine 14 14 100.0
pod 0 3 0.0
total 66 72 91.6


line stmt bran cond sub pod time code
1             package KinoSearch1::Search::Similarity;
2 34     34   37782 use strict;
  34         72  
  34         1235  
3 34     34   181 use warnings;
  34         66  
  34         886  
4 34     34   952 use KinoSearch1::Util::ToolSet;
  34         85  
  34         4753  
5 34     34   209 use base qw( KinoSearch1::Util::CClass );
  34         106  
  34         6354  
6              
7 34     34   551 BEGIN { __PACKAGE__->init_instance_vars(); }
8              
9             # See _float_to_byte.
10             *encode_norm = *_float_to_byte;
11             *decode_norm = *_byte_to_float;
12              
13             # Calculate the Inverse Document Frequecy for one or more Term in a given
14             # collection (the Searcher represents the collection).
15             #
16             # If multiple Terms are supplied, their idfs are summed.
17             sub idf {
18 559     559 0 1022 my ( $self, $term_or_terms, $searcher ) = @_;
19 559         1841 my $max_doc = $searcher->max_doc;
20 559 100       2134 my $terms
21             = ref $term_or_terms eq 'ARRAY' ? $term_or_terms : [$term_or_terms];
22              
23 559 50       1561 return 1 unless $max_doc; # guard against log of zero error
24              
25             # accumulate IDF
26 559         922 my $idf = 0;
27 559         1253 for my $term (@$terms) {
28 639         2240 my $doc_freq = $searcher->doc_freq($term);
29 639         2060 $idf += 1 + log( $max_doc / ( 1 + $searcher->doc_freq($term) ) );
30             }
31 559         2650 return $idf;
32             }
33              
34             # Normalize a Query's weight so that it is comparable to other Queries.
35             sub query_norm {
36 283     283 0 533 my ( $self, $sum_of_squared_weights ) = @_;
37 283 100       970 return 0 if ( $sum_of_squared_weights == 0 ); # guard against div by zero
38 281         1100 return ( 1 / sqrt($sum_of_squared_weights) );
39             }
40              
41             # KLUDGE -- see comment at STORABLE_thaw.
42             sub STORABLE_freeze {
43 3     3 0 226 my ( $self, $cloning ) = @_;
44 3 50       12 return if $cloning;
45 3         177 return "1";
46             }
47              
48             package KinoSearch1::Search::TitleSimilarity;
49 34     34   238 use strict;
  34         75  
  34         1035  
50 34     34   223 use warnings;
  34         73  
  34         1035  
51 34     34   202 use KinoSearch1::Util::ToolSet;
  34         74  
  34         5064  
52 34     34   194 use base qw( KinoSearch1::Search::Similarity );
  34         2762  
  34         5437  
53              
54             sub new {
55 1     1   24 my $self = shift->SUPER::new(@_);
56 1         18 $self->_use_title_tf;
57 1         4 return $self;
58             }
59              
60             sub lengthnorm {
61 2 50   2   6 return 0 unless $_[1];
62 2         15 return 1 / sqrt( $_[1] );
63             }
64              
65             1;
66              
67             __END__