| line |
true |
false |
branch |
|
40
|
0 |
2 |
if _check_for_illegal_params(@params) == 0 |
|
62
|
0 |
2 |
exists $args{'break_camelcased_and_underscored'} ? : |
|
|
0 |
2 |
exists $args{'use_idf_filter'} ? : |
|
|
0 |
2 |
exists $args{'relevancy_threshold'} ? : |
|
95
|
0 |
2 |
unless $self->{'_corpus_directory'} |
|
97
|
0 |
2 |
if $self->{'_debug'} |
|
99
|
0 |
2 |
if $self->{'_stop_words_file'} |
|
100
|
0 |
2 |
if ($self->{'_debug'}) |
|
105
|
0 |
2 |
if ($self->{'_save_model_on_disk'}) |
|
110
|
0 |
0 |
unless tie %{$$self{'_vocab_hist_on_disk'};}, 'SDBM_File', $self->{'_corpus_vocab_db'}, 66, 416 |
|
120
|
0 |
2 |
if $self->{'_debug'} |
|
133
|
0 |
0 |
unless keys %{$$self{'_vocab_hist'};} |
|
144
|
0 |
0 |
unless keys %{$$self{'_vocab_hist'};} |
|
152
|
0 |
0 |
unless keys %{$$self{'_vocab_hist'};} |
|
153
|
0 |
0 |
unless open OUT, "> $file" |
|
165
|
0 |
0 |
unless keys %{$$self{'_vocab_idf_hist'};} |
|
167
|
0 |
0 |
unless $self->{'_idf_filter_option'} |
|
197
|
0 |
2 |
if ($self->{'_save_model_on_disk'}) |
|
201
|
0 |
0 |
unless $self->{'_doc_vectors_db'} and $self->{'_normalized_doc_vecs_db'} |
|
205
|
0 |
0 |
if ($@) |
|
211
|
0 |
0 |
if ($@) |
|
220
|
0 |
0 |
unless keys %{$$self{'_corpus_doc_vectors'};} |
|
234
|
0 |
0 |
unless keys %{$$self{'_normalized_doc_vecs'};} |
|
235
|
0 |
0 |
unless ($self->{'_idf_filter_option'}) |
|
258
|
0 |
0 |
unless contained_in($doc1, @all_files) |
|
259
|
0 |
0 |
unless contained_in($doc2, @all_files) |
|
280
|
0 |
0 |
unless contained_in($doc1, @all_files) |
|
281
|
0 |
0 |
unless contained_in($doc2, @all_files) |
|
305
|
1 |
0 |
if ($self->{'_break_camelcased_and_underscored'}) { } |
|
309
|
0 |
0 |
$1 ? : |
|
310
|
7 |
1 |
$1 ? : |
|
|
0 |
1 |
$self->{'_case_sensitive'} ? : |
|
316
|
0 |
1 |
if $self->{'_debug'} |
|
317
|
1 |
0 |
if ($self->{'_idf_filter_option'}) { } |
|
320
|
0 |
1 |
unless scalar keys %{$$self{'_vocab_hist'};} and scalar keys %{$$self{'_normalized_doc_vecs'};} |
|
324
|
0 |
0 |
unless scalar keys %{$$self{'_vocab_hist'};} and scalar keys %{$$self{'_corpus_doc_vectors'};} |
|
330
|
0 |
7 |
if ($self->{'_case_sensitive'}) { } |
|
331
|
0 |
0 |
if exists $self->{'_vocab_hist'}{$_} |
|
333
|
2 |
5 |
if exists $self->{'_vocab_hist'}{lc $_} |
|
338
|
0 |
1 |
unless $query_word_count_total |
|
341
|
1 |
0 |
if ($self->{'_idf_filter_option'}) { } |
|
343
|
0 |
1 |
if $self->{'_debug'} |
|
346
|
8 |
2 |
if $self->_similarity_to_query($_) > 0 |
|
350
|
0 |
0 |
if $self->{'_debug'} |
|
353
|
0 |
0 |
if $self->_similarity_to_query($_) > 0 |
|
356
|
0 |
1 |
if ($self->{'_debug'}) |
|
371
|
0 |
0 |
unless -s "$self->{'_corpus_vocab_db'}.pag" and -s $self->{'_doc_vectors_db'} |
|
374
|
0 |
0 |
unless tie %{$$self{'_vocab_hist_on_disk'};}, 'SDBM_File', $self->{'_corpus_vocab_db'}, 0, 416 |
|
376
|
0 |
0 |
if ($self->{'_debug'}) |
|
387
|
0 |
0 |
if $self->{'_debug'} |
|
396
|
0 |
0 |
unless -s "$self->{'_corpus_vocab_db'}.pag" and -s $self->{'_normalized_doc_vecs_db'} |
|
399
|
0 |
0 |
unless tie %{$$self{'_vocab_hist_on_disk'};}, 'SDBM_File', $self->{'_corpus_vocab_db'}, 0, 416 |
|
401
|
0 |
0 |
if ($self->{'_debug'}) |
|
412
|
0 |
0 |
if $self->{'_debug'} |
|
426
|
0 |
0 |
if $iter > $self->{'_max_number_retrievals'} |
|
437
|
0 |
4 |
unless chdir $dir |
|
439
|
0 |
40 |
if (-d $_ and not -l $_) { } |
|
|
40 |
0 |
elsif (-r _ and -T _ and -M _ > 1e-05 and not -l $_ and $self->ok_to_filetype($_)) { } |
|
441
|
0 |
0 |
unless chdir $dir |
|
448
|
0 |
40 |
if $self->{'_scan_dir_for_rels'} |
|
449
|
20 |
20 |
unless $self->{'_corpus_vocab_done'} |
|
450
|
20 |
20 |
if $self->{'_corpus_vocab_done'} |
|
463
|
126 |
730 |
if /^[ ]*\r?\n?$/ |
|
466
|
730 |
0 |
if ($self->{'_break_camelcased_and_underscored'}) { } |
|
470
|
0 |
0 |
$1 ? : |
|
471
|
1736 |
996 |
$1 ? : |
|
|
0 |
730 |
$self->{'_case_sensitive'} ? : |
|
476
|
286 |
444 |
unless @clean_words |
|
478
|
444 |
0 |
if $self->{'_want_stemming'} |
|
481
|
0 |
444 |
$self->{'_case_sensitive'} ? : |
|
482
|
0 |
444 |
if ($self->{'_case_sensitive'}) { } |
|
498
|
0 |
40 |
unless @{$self->{'_file_types'};} > 0 |
|
499
|
40 |
0 |
if contained_in($suffix, @{$$self{'_file_types'};}) |
|
507
|
1 |
0 |
if ($self->{'_idf_filter_option'}) { } |
|
508
|
0 |
1 |
if (not $self->{'_normalized_doc_vecs'} and -s $self->{'_normalized_doc_vecs_db'}) |
|
523
|
0 |
0 |
if (not $self->{'_corpus_doc_vectors'} and -s $self->{'_doc_vectors_db'}) |
|
537
|
0 |
1 |
if $self->{'_debug'} |
|
538
|
0 |
1 |
if $self->{'_debug'} |
|
542
|
0 |
1 |
if $self->{'_debug'} |
|
556
|
0 |
1 |
if $self->{'_debug'} |
|
559
|
1 |
0 |
$self->{'_idf_filter_option'} ? : |
|
566
|
0 |
10 |
if $self->{'_debug'} |
|
578
|
1 |
0 |
if ($self->{'_break_camelcased_and_underscored'}) { } |
|
581
|
7 |
1 |
$1 ? : |
|
587
|
0 |
1 |
if $self->{'_debug'} |
|
589
|
0 |
1 |
unless scalar keys %{$$self{'_vocab_hist'};} |
|
591
|
0 |
1 |
unless scalar keys %{$$self{'_doc_vecs_trunc_lsa'};} |
|
597
|
2 |
5 |
if exists $self->{'_vocab_hist'}{lc $_} |
|
601
|
0 |
1 |
unless $query_word_count_total |
|
607
|
0 |
1 |
if $self->{'_debug'} |
|
611
|
0 |
1 |
if $self->{'_debug'} |
|
618
|
0 |
10 |
if $self->{'_debug'} |
|
619
|
5 |
5 |
if $dot_product->sclr > 0 |
|
621
|
0 |
1 |
if ($self->{'_debug'}) |
|
640
|
0 |
20 |
unless (open IN, $file) |
|
642
|
0 |
0 |
if $self->{'_debug'} |
|
646
|
126 |
730 |
if /^[ ]*\r?\n?$/ |
|
650
|
286 |
444 |
unless @clean_words |
|
653
|
444 |
0 |
if $self->{'_want_stemming'} |
|
657
|
0 |
444 |
$self->{'_case_sensitive'} ? : |
|
661
|
0 |
20 |
unless $self->{'_vocab_size'} == scalar keys %document_vector |
|
666
|
20 |
0 |
if ($self->{'_idf_filter_option'}) |
|
678
|
0 |
20 |
if (not $1 eq '') { } |
|
691
|
0 |
0 |
unless open IN, "$self->{'_working_directory'}/$self->{'_stop_words_file'}" |
|
694
|
0 |
0 |
if /^#/ |
|
695
|
0 |
0 |
if /^[ ]*\r?\n?$/ |
|
697
|
0 |
0 |
if exists $self->{'_vocab_hist'}{$_} |
|
709
|
24 |
0 |
$self->{'_idf_filter_option'} ? : |
|
712
|
24 |
0 |
$self->{'_idf_filter_option'} ? : |
|
728
|
10 |
14 |
if $product1 < $product2 |
|
729
|
1 |
13 |
if $product1 == $product2 |
|
730
|
13 |
0 |
if $product1 > $product2 |
|
738
|
18 |
0 |
$self->{'_idf_filter_option'} ? : |
|
771
|
0 |
0 |
unless $self->{'_query_file'} |
|
773
|
0 |
0 |
unless open IN, $self->{'_query_file'} |
|
776
|
0 |
0 |
unless $self->{'_relevancy_file'} |
|
778
|
0 |
0 |
if /^#/ |
|
779
|
0 |
0 |
if /^[ ]*\r?\n?$/ |
|
781
|
0 |
0 |
unless /^[ ]*q[0-9]+:/ |
|
785
|
0 |
0 |
unless $query |
|
788
|
0 |
0 |
if ($self->{'_debug'}) |
|
797
|
0 |
0 |
unless open OUT, ">$self->{'_relevancy_file'}" |
|
821
|
0 |
0 |
unless open IN, $self->{'_relevancy_file'} |
|
823
|
0 |
0 |
if /^#/ |
|
824
|
0 |
0 |
if /^[ ]*\r?\n?$/ |
|
826
|
0 |
0 |
unless /^[ ]*q[0-9]+[ ]*=>/ |
|
830
|
0 |
0 |
unless $relevancy_docs_string |
|
835
|
0 |
0 |
if ($self->{'_debug'}) |
|
846
|
0 |
0 |
unless scalar keys %{$$self{'_relevancy_estimates'};} |
|
872
|
0 |
0 |
if $self->{'_debug'} |
|
876
|
0 |
0 |
if $self->{'_debug'} |
|
877
|
0 |
0 |
if @matches |
|
880
|
0 |
0 |
if $self->{'_debug'} |
|
882
|
0 |
0 |
if $count >= $self->{'_relevancy_threshold'} |
|
891
|
0 |
0 |
unless $retrieval_type |
|
895
|
0 |
0 |
unless scalar keys %{$$self{'_relevancy_estimates'};} |
|
896
|
0 |
0 |
unless (scalar keys %{$$self{'_queries_for_relevancy'};}) |
|
898
|
0 |
0 |
unless open IN, $self->{'_query_file'} |
|
900
|
0 |
0 |
if /^#/ |
|
901
|
0 |
0 |
if /^[ ]*\r?\n?$/ |
|
903
|
0 |
0 |
unless /^[ ]*q[0-9]+:/ |
|
907
|
0 |
0 |
unless $query |
|
910
|
0 |
0 |
if ($self->{'_debug'}) |
|
919
|
0 |
0 |
if $self->{'_debug'} |
|
920
|
0 |
0 |
if $self->{'_debug'} |
|
923
|
0 |
0 |
unless defined $retrieval_type |
|
928
|
0 |
0 |
if ($retrieval_type eq 'vsm') { } |
|
|
0 |
0 |
elsif ($retrieval_type eq 'lsa') { } |
|
934
|
0 |
0 |
if ($@) |
|
946
|
0 |
0 |
if ($self->{'_debug'}) |
|
972
|
0 |
0 |
if ($self->{'_debug'}) |
|
979
|
0 |
0 |
unless @relevant_set |
|
982
|
0 |
0 |
unless @relevant_set |
|
984
|
0 |
0 |
if $self->{'_debug'} |
|
991
|
0 |
0 |
if $self->{'_debug'} |
|
1004
|
0 |
0 |
@retrieved_at_rank ? : |
|
1012
|
0 |
0 |
if $self->{'_debug'} |
|
1014
|
0 |
0 |
if $self->{'_debug'} |
|
1022
|
0 |
0 |
if $self->{'_debug'} |
|
1034
|
0 |
0 |
unless scalar keys %{$$self{'_avg_precision_for_queries'};} |
|
1049
|
0 |
0 |
unless scalar keys %{$$self{'_precision_for_queries'};} |
|
1069
|
0 |
0 |
unless scalar keys %{$$self{'_avg_precision_for_queries'};} |
|
1107
|
17 |
85 |
if ($param eq $legal) |
|
1112
|
0 |
17 |
if $found_match_flag == 0 |
|
1122
|
40 |
40 |
if $ele eq $_ |
|
1139
|
0 |
66 |
unless @$vec1 == @$vec2 |
|
1169
|
0 |
3346 |
if $debug |
|
1182
|
0 |
3346 |
if $debug |
|
1203
|
0 |
0 |
@common_elements ? : |