Branch Coverage

blib/lib/CrawlerCommons/RobotRulesParser.pm
Criterion Covered Total %
branch 77 82 93.9


line true false branch
108 0 2 unless $Log::Log4perl::Logger::INITIALIZED
195 2 71 if ($content // '') eq ''
203 1 70 if ($content_len >= 3 and substr($content, 0, 1) eq "\357" and substr($content, 1, 1) eq "\273" and substr($content, 2, 1) eq "\277") { }
1 69 elsif ($content_len >= 2 and substr($content, 0, 1) eq "\377" and substr($content, 1, 1) eq "\376") { }
1 136 elsif ($content_len >= 2 and substr($content, 0, 1) eq "\376" and substr($content, 1, 1) eq "\377") { }
227 0 71 ($content_type // '') ne '' && lc($content_type // '') =~ m[^text/html] ? :
231 3 68 if ($is_html_type or ($content // '') =~ /$SIMPLE_HTML_PATTERN/)
232 1 2 if (not +($content // '') =~ /$USER_AGENT_PATTERN/) { }
240 0 2 if ($is_html_type) { }
263 30 1805 if $has_html
266 1799 36 if (my $hash_idx = index($line, '#'))
267 13 1786 if $hash_idx >= 0
272 274 1561 if length $line == 0
276 923 638 if ($robot_token->directive->is_user_agent)
281 494 144 if ($robot_token->directive->is_disallow)
286 57 87 if ($robot_token->directive->is_allow)
291 17 70 if ($robot_token->directive->is_crawl_delay)
296 19 51 if ($robot_token->directive->is_sitemap)
301 1 50 if ($robot_token->directive->is_http)
306 13 37 if ($robot_token->directive->is_missing)
319 26 11 if ($robot_token->directive->is_unknown)
335 1 69 if ($robot_rules->crawl_delay > $MAX_CRAWL_DELAY) { }
354 138 413 if $state->is_skip_agents
358 148 265 unless $state->is_adding_rules
364 11 254 if (length $path == 0) { }
386 5 12 if $state->is_skip_agents
390 3 9 unless $state->is_adding_rules
410 1 0 if (index $url_fragment, 'sitemap') { }
441 20 0 if $host ne ''
451 121 802 if ($state->is_matched_real_name)
452 41 80 if $state->is_finished_agent_fields
456 62 740 if ($state->is_finished_agent_fields)
465 39 1323 if ($agent_name eq '*' and not $state->is_matched_wildcard) { }
1315 8 elsif ($agent_name ne '') { }
471 33 1307 if (index($target_name_split, $agent_name) == 0)
488 21 18 if $warning_count == 1
491 34 5 if $warning_count < $MAX_WARNINGS
503 1522 39 if ($directive =~ /^acap\-/ or 'CrawlerCommons::RobotDirective'->directive_exists($directive)) { }
508 10 1512 unless defined $data
522 2 1520 $directive =~ /^acap-/i ? :
531 26 13 $lower_line =~ /[ \t]*:[ \t]*(.*)/ ? :