Class: SignalWire::Skills::Builtin::SpiderSkill

Inherits:
SkillBase
  • Object
show all
Defined in:
lib/signalwire/skills/builtin/spider.rb

Instance Attribute Summary

Attributes inherited from SkillBase

#agent, #logger, #params, #swaig_fields

Instance Method Summary collapse

Methods inherited from SkillBase

#cleanup, #get_global_data, #get_param, #get_prompt_sections, #initialize, #required_env_vars, #version

Constructor Details

This class inherits a constructor from SignalWire::Skills::SkillBase

Instance Method Details

#descriptionObject



14
# File 'lib/signalwire/skills/builtin/spider.rb', line 14

def description; 'Fast web scraping and crawling capabilities'; end

#get_hintsObject



53
54
55
# File 'lib/signalwire/skills/builtin/spider.rb', line 53

def get_hints
  %w[scrape crawl extract web\ page website spider]
end

#get_parameter_schemaObject



57
58
59
60
61
62
63
# File 'lib/signalwire/skills/builtin/spider.rb', line 57

def get_parameter_schema
  {
    'timeout'         => { 'type' => 'integer', 'default' => 5 },
    'max_text_length' => { 'type' => 'integer', 'default' => 10_000 },
    'user_agent'      => { 'type' => 'string' }
  }
end

#instance_keyObject



26
27
28
# File 'lib/signalwire/skills/builtin/spider.rb', line 26

def instance_key
  "spider_#{get_param('tool_name', default: 'spider')}"
end

#nameObject



13
# File 'lib/signalwire/skills/builtin/spider.rb', line 13

def name;        'spider'; end

#register_toolsObject



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/signalwire/skills/builtin/spider.rb', line 30

def register_tools
  [
    {
      name: "#{@tool_prefix}scrape_url",
      description: 'Extract text content from a single web page',
      parameters: { 'url' => { 'type' => 'string', 'description' => 'The URL to scrape' } },
      handler: method(:handle_scrape)
    },
    {
      name: "#{@tool_prefix}crawl_site",
      description: 'Crawl multiple pages starting from a URL',
      parameters: { 'start_url' => { 'type' => 'string', 'description' => 'Starting URL for the crawl' } },
      handler: method(:handle_crawl)
    },
    {
      name: "#{@tool_prefix}extract_structured_data",
      description: 'Extract specific data from a web page using selectors',
      parameters: { 'url' => { 'type' => 'string', 'description' => 'The URL to scrape' } },
      handler: method(:handle_extract)
    }
  ]
end

#setupObject



17
18
19
20
21
22
23
24
# File 'lib/signalwire/skills/builtin/spider.rb', line 17

def setup
  @max_text_length = (get_param('max_text_length', default: 10_000)).to_i
  @timeout         = (get_param('timeout', default: 5)).to_i
  @user_agent      = get_param('user_agent', default: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
  @tool_prefix     = get_param('tool_name', default: '')
  @tool_prefix     = "#{@tool_prefix}_" unless @tool_prefix.empty?
  true
end

#supports_multiple_instances?Boolean

Returns:

  • (Boolean)


15
# File 'lib/signalwire/skills/builtin/spider.rb', line 15

def supports_multiple_instances?; true; end