Kward

Class: Kward::WebFetch

Inherits:
Object
  • Object
show all
Defined in:
lib/kward/tools/search/web_fetch.rb

Overview

Fetches specific web resources for agent research workflows.

Constant Summary collapse

DEFAULT_MAX_BYTES =
16 * 1024
MAX_MAX_BYTES =
128 * 1024
MAX_REDIRECTS =
5
HTTP_TIMEOUT_SECONDS =
10

Instance Method Summary collapse

Constructor Details

#initialize(http_client: WebSearch::NetHttpClient.new) ⇒ WebFetch

Creates a fetcher for web content and raw resources.



15
16
17
# File 'lib/kward/tools/search/web_fetch.rb', line 15

def initialize(http_client: WebSearch::NetHttpClient.new)
  @http_client = http_client
end

Instance Method Details

#fetch_content(args) ⇒ Object

Fetches a URL and extracts readable text for human-facing pages.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/kward/tools/search/web_fetch.rb', line 20

def fetch_content(args)
  url = args_value(args, "url").to_s.strip
  return "Error: url is required" if url.empty?

  max_bytes = bounded_max_bytes(args_value(args, "max_bytes") || args_value(args, "maxBytes"))
  extract = normalize_extract(args_value(args, "extract") || "auto")
  return "Error: extract must be one of: auto, text, markdown" unless extract

  response = fetch_url(url, max_bytes: max_bytes)
  return response if response.is_a?(String)

  body = response[:body].to_s
  content_type = header_value(response[:headers], "content-type")
  text = extract_readable_text(body, content_type: content_type, mode: extract)
  text = truncate_bytes(text, max_bytes)

  [
    "# Fetched content",
    "- URL: #{response[:url]}",
    "- Content type: #{content_type.empty? ? "unknown" : content_type}",
    "- Bytes returned: #{text.bytesize}",
    "",
    text.empty? ? "(No readable text extracted.)" : text
  ].join("\n")
rescue StandardError => e
  "Error: fetch_content failed: #{e.message}"
end

#fetch_raw(args) ⇒ Object

Fetches a URL and returns bounded raw response content.



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/kward/tools/search/web_fetch.rb', line 49

def fetch_raw(args)
  url = args_value(args, "url").to_s.strip
  return "Error: url is required" if url.empty?

  max_bytes = bounded_max_bytes(args_value(args, "max_bytes") || args_value(args, "maxBytes"))
  accept = args_value(args, "accept").to_s.strip
  response = fetch_url(url, max_bytes: max_bytes, accept: accept.empty? ? "*/*" : accept)
  return response if response.is_a?(String)

  body = truncate_bytes(response[:body].to_s, max_bytes)
  content_type = header_value(response[:headers], "content-type")
  [
    "# Fetched raw content",
    "- URL: #{response[:url]}",
    "- Content type: #{content_type.empty? ? "unknown" : content_type}",
    "- Bytes returned: #{body.bytesize}",
    "",
    body
  ].join("\n")
rescue StandardError => e
  "Error: fetch_raw failed: #{e.message}"
end