Class: Kward::WebFetch
- Inherits:
-
Object
- Object
- Kward::WebFetch
- Defined in:
- lib/kward/tools/search/web_fetch.rb
Overview
Fetches specific web resources for agent research workflows.
Constant Summary collapse
- DEFAULT_MAX_BYTES =
16 * 1024
- MAX_MAX_BYTES =
128 * 1024
- MAX_REDIRECTS =
5- HTTP_TIMEOUT_SECONDS =
10
Instance Method Summary collapse
-
#fetch_content(args) ⇒ Object
Fetches a URL and extracts readable text for human-facing pages.
-
#fetch_raw(args) ⇒ Object
Fetches a URL and returns bounded raw response content.
-
#initialize(http_client: WebSearch::NetHttpClient.new) ⇒ WebFetch
constructor
Creates a fetcher for web content and raw resources.
Constructor Details
#initialize(http_client: WebSearch::NetHttpClient.new) ⇒ WebFetch
Creates a fetcher for web content and raw resources.
15 16 17 |
# File 'lib/kward/tools/search/web_fetch.rb', line 15 def initialize(http_client: WebSearch::NetHttpClient.new) @http_client = http_client end |
Instance Method Details
#fetch_content(args) ⇒ Object
Fetches a URL and extracts readable text for human-facing pages.
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/kward/tools/search/web_fetch.rb', line 20 def fetch_content(args) url = args_value(args, "url").to_s.strip return "Error: url is required" if url.empty? max_bytes = bounded_max_bytes(args_value(args, "max_bytes") || args_value(args, "maxBytes")) extract = normalize_extract(args_value(args, "extract") || "auto") return "Error: extract must be one of: auto, text, markdown" unless extract response = fetch_url(url, max_bytes: max_bytes) return response if response.is_a?(String) body = response[:body].to_s content_type = header_value(response[:headers], "content-type") text = extract_readable_text(body, content_type: content_type, mode: extract) text = truncate_bytes(text, max_bytes) [ "# Fetched content", "- URL: #{response[:url]}", "- Content type: #{content_type.empty? ? "unknown" : content_type}", "- Bytes returned: #{text.bytesize}", "", text.empty? ? "(No readable text extracted.)" : text ].join("\n") rescue StandardError => e "Error: fetch_content failed: #{e.}" end |
#fetch_raw(args) ⇒ Object
Fetches a URL and returns bounded raw response content.
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/kward/tools/search/web_fetch.rb', line 49 def fetch_raw(args) url = args_value(args, "url").to_s.strip return "Error: url is required" if url.empty? max_bytes = bounded_max_bytes(args_value(args, "max_bytes") || args_value(args, "maxBytes")) accept = args_value(args, "accept").to_s.strip response = fetch_url(url, max_bytes: max_bytes, accept: accept.empty? ? "*/*" : accept) return response if response.is_a?(String) body = truncate_bytes(response[:body].to_s, max_bytes) content_type = header_value(response[:headers], "content-type") [ "# Fetched raw content", "- URL: #{response[:url]}", "- Content type: #{content_type.empty? ? "unknown" : content_type}", "- Bytes returned: #{body.bytesize}", "", body ].join("\n") rescue StandardError => e "Error: fetch_raw failed: #{e.}" end |