File Coverage

blib/lib/SignalWire/Agents/Skills/Builtin/Spider.pm
Criterion Covered Total %
statement 23 29 79.3
branch n/a
condition n/a
subroutine 9 11 81.8
pod 0 4 0.0
total 32 44 72.7


line stmt bran cond sub pod time code
1             package SignalWire::Agents::Skills::Builtin::Spider;
2 3     3   20 use strict;
  3         6  
  3         154  
3 3     3   18 use warnings;
  3         6  
  3         170  
4 3     3   17 use Moo;
  3         6  
  3         21  
5             extends 'SignalWire::Agents::Skills::SkillBase';
6              
7 3     3   1144 use SignalWire::Agents::Skills::SkillRegistry;
  3         8  
  3         1883  
8             SignalWire::Agents::Skills::SkillRegistry->register_skill('spider', __PACKAGE__);
9              
10             has '+skill_name' => (default => sub { 'spider' });
11             has '+skill_description' => (default => sub { 'Fast web scraping and crawling capabilities' });
12             has '+supports_multiple_instances' => (default => sub { 1 });
13              
14 4     4 0 2270 sub setup { 1 }
15              
16             sub register_tools {
17 3     3 0 603 my ($self) = @_;
18              
19             $self->define_tool(
20             name => 'scrape_url',
21             description => 'Scrape content from a URL',
22             parameters => {
23             type => 'object',
24             properties => {
25             url => { type => 'string', description => 'The URL to scrape' },
26             },
27             required => ['url'],
28             },
29             handler => sub {
30 1     1   2 my ($args, $raw) = @_;
31 1         712 require SignalWire::Agents::SWAIG::FunctionResult;
32 1         8 return SignalWire::Agents::SWAIG::FunctionResult->new(
33             response => "Scraped content from: $args->{url}"
34             );
35             },
36 3         49 );
37              
38             $self->define_tool(
39             name => 'crawl_site',
40             description => 'Crawl a website starting from a URL',
41             parameters => {
42             type => 'object',
43             properties => {
44             start_url => { type => 'string', description => 'Starting URL for crawl' },
45             },
46             required => ['start_url'],
47             },
48             handler => sub {
49 0     0   0 my ($args, $raw) = @_;
50 0         0 require SignalWire::Agents::SWAIG::FunctionResult;
51 0         0 return SignalWire::Agents::SWAIG::FunctionResult->new(
52             response => "Crawling site from: $args->{start_url}"
53             );
54             },
55 3         36 );
56              
57             $self->define_tool(
58             name => 'extract_structured_data',
59             description => 'Extract structured data from a URL',
60             parameters => {
61             type => 'object',
62             properties => {
63             url => { type => 'string', description => 'URL to extract data from' },
64             },
65             required => ['url'],
66             },
67             handler => sub {
68 0     0   0 my ($args, $raw) = @_;
69 0         0 require SignalWire::Agents::SWAIG::FunctionResult;
70 0         0 return SignalWire::Agents::SWAIG::FunctionResult->new(
71             response => "Extracted structured data from: $args->{url}"
72             );
73             },
74 3         30 );
75             }
76              
77             sub get_hints {
78 1     1 0 7 return ['scrape', 'crawl', 'extract', 'web page', 'website', 'spider'];
79             }
80              
81             sub get_parameter_schema {
82             return {
83 1     1 0 3960 %{ SignalWire::Agents::Skills::SkillBase->get_parameter_schema },
  1         6  
84             delay => { type => 'number' },
85             concurrent_requests => { type => 'integer' },
86             timeout => { type => 'integer' },
87             max_pages => { type => 'integer' },
88             max_depth => { type => 'integer' },
89             };
90             }
91              
92             1;