Module: Legion::CLI::Chat::WebSearch
- Defined in:
- lib/legion/cli/chat/web_search.rb
Defined Under Namespace
Classes: SearchError
Constant Summary collapse
- MAX_RESULTS =
5- TIMEOUT =
10- AUTO_FETCH =
true
Class Method Summary collapse
- .duckduckgo_html(query, max_results) ⇒ Object
- .extract_real_url(ddg_url) ⇒ Object
- .fetch_top_result(url) ⇒ Object
- .parse_duckduckgo_results(html, max_results) ⇒ Object
- .search(query, max_results: MAX_RESULTS, auto_fetch: AUTO_FETCH) ⇒ Object
- .strip_tags(html) ⇒ Object
Class Method Details
.duckduckgo_html(query, max_results) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/legion/cli/chat/web_search.rb', line 29 def duckduckgo_html(query, max_results) uri = URI('https://html.duckduckgo.com/html/') uri.query = URI.encode_www_form(q: query) http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = true http.open_timeout = TIMEOUT http.read_timeout = TIMEOUT request = Net::HTTP::Get.new(uri) request['User-Agent'] = 'LegionIO/1.0 (CLI web search)' request['Accept'] = 'text/html' response = http.request(request) raise SearchError, "Search failed: HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess) body = response.body&.dup&.force_encoding('UTF-8') || '' parse_duckduckgo_results(body, max_results) rescue SocketError => e raise SearchError, "Connection failed: #{e.}" rescue Net::OpenTimeout, Net::ReadTimeout raise SearchError, "Search timed out (#{TIMEOUT}s)" end |
.extract_real_url(ddg_url) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/legion/cli/chat/web_search.rb', line 80 def extract_real_url(ddg_url) return ddg_url unless ddg_url.include?('duckduckgo.com') match = ddg_url.match(/uddg=([^&]+)/) return nil unless match URI.decode_www_form_component(match[1]) rescue StandardError => e Legion::Logging.debug("WebSearch#extract_real_url failed: #{e.}") if defined?(Legion::Logging) nil end |
.fetch_top_result(url) ⇒ Object
97 98 99 100 101 102 103 |
# File 'lib/legion/cli/chat/web_search.rb', line 97 def fetch_top_result(url) require 'legion/cli/chat/web_fetch' WebFetch.fetch(url) rescue StandardError => e Legion::Logging.debug("WebSearch#fetch_top_result failed for #{url}: #{e.}") if defined?(Legion::Logging) nil end |
.parse_duckduckgo_results(html, max_results) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/legion/cli/chat/web_search.rb', line 53 def parse_duckduckgo_results(html, max_results) results = [] html.scan(%r{<a[^>]+class="result__a"[^>]*href="([^"]*)"[^>]*>(.*?)</a>}mi) do |url, title| clean_title = (title).strip next if clean_title.empty? real_url = extract_real_url(url) next unless real_url results << { title: clean_title, url: real_url } break if results.length >= max_results end # Extract snippets snippets = [] html.scan(%r{<a[^>]+class="result__snippet"[^>]*>(.*?)</a>}mi) do |snippet| snippets << (snippet.first).strip end results.each_with_index do |r, i| r[:snippet] = snippets[i] || '' end results end |
.search(query, max_results: MAX_RESULTS, auto_fetch: AUTO_FETCH) ⇒ Object
19 20 21 22 23 24 25 26 27 |
# File 'lib/legion/cli/chat/web_search.rb', line 19 def search(query, max_results: MAX_RESULTS, auto_fetch: AUTO_FETCH) results = duckduckgo_html(query, max_results) raise SearchError, 'No results found.' if results.empty? fetched_content = nil fetched_content = fetch_top_result(results.first[:url]) if auto_fetch && !results.empty? { query: query, results: results, fetched_content: fetched_content } end |
.strip_tags(html) ⇒ Object
92 93 94 95 |
# File 'lib/legion/cli/chat/web_search.rb', line 92 def (html) html.gsub(/<[^>]+>/, '').gsub('&', '&').gsub('<', '<').gsub('>', '>') .gsub('"', '"').gsub(''', "'").gsub(' ', ' ') end |