Module: Legion::CLI::Chat::WebSearch

Defined in:
lib/legion/cli/chat/web_search.rb

Defined Under Namespace

Classes: SearchError

Constant Summary collapse

MAX_RESULTS =
5
TIMEOUT =
10
AUTO_FETCH =
true

Class Method Summary collapse

Class Method Details

.duckduckgo_html(query, max_results) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/legion/cli/chat/web_search.rb', line 29

def duckduckgo_html(query, max_results)
  uri = URI('https://html.duckduckgo.com/html/')
  uri.query = URI.encode_www_form(q: query)

  http = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = true
  http.open_timeout = TIMEOUT
  http.read_timeout = TIMEOUT

  request = Net::HTTP::Get.new(uri)
  request['User-Agent'] = 'LegionIO/1.0 (CLI web search)'
  request['Accept'] = 'text/html'

  response = http.request(request)
  raise SearchError, "Search failed: HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)

  body = response.body&.dup&.force_encoding('UTF-8') || ''
  parse_duckduckgo_results(body, max_results)
rescue SocketError => e
  raise SearchError, "Connection failed: #{e.message}"
rescue Net::OpenTimeout, Net::ReadTimeout
  raise SearchError, "Search timed out (#{TIMEOUT}s)"
end

.extract_real_url(ddg_url) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
# File 'lib/legion/cli/chat/web_search.rb', line 80

def extract_real_url(ddg_url)
  return ddg_url unless ddg_url.include?('duckduckgo.com')

  match = ddg_url.match(/uddg=([^&]+)/)
  return nil unless match

  URI.decode_www_form_component(match[1])
rescue StandardError => e
  Legion::Logging.debug("WebSearch#extract_real_url failed: #{e.message}") if defined?(Legion::Logging)
  nil
end

.fetch_top_result(url) ⇒ Object



97
98
99
100
101
102
103
# File 'lib/legion/cli/chat/web_search.rb', line 97

def fetch_top_result(url)
  require 'legion/cli/chat/web_fetch'
  WebFetch.fetch(url)
rescue StandardError => e
  Legion::Logging.debug("WebSearch#fetch_top_result failed for #{url}: #{e.message}") if defined?(Legion::Logging)
  nil
end

.parse_duckduckgo_results(html, max_results) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/legion/cli/chat/web_search.rb', line 53

def parse_duckduckgo_results(html, max_results)
  results = []

  html.scan(%r{<a[^>]+class="result__a"[^>]*href="([^"]*)"[^>]*>(.*?)</a>}mi) do |url, title|
    clean_title = strip_tags(title).strip
    next if clean_title.empty?

    real_url = extract_real_url(url)
    next unless real_url

    results << { title: clean_title, url: real_url }
    break if results.length >= max_results
  end

  # Extract snippets
  snippets = []
  html.scan(%r{<a[^>]+class="result__snippet"[^>]*>(.*?)</a>}mi) do |snippet|
    snippets << strip_tags(snippet.first).strip
  end

  results.each_with_index do |r, i|
    r[:snippet] = snippets[i] || ''
  end

  results
end

.search(query, max_results: MAX_RESULTS, auto_fetch: AUTO_FETCH) ⇒ Object

Raises:



19
20
21
22
23
24
25
26
27
# File 'lib/legion/cli/chat/web_search.rb', line 19

def search(query, max_results: MAX_RESULTS, auto_fetch: AUTO_FETCH)
  results = duckduckgo_html(query, max_results)
  raise SearchError, 'No results found.' if results.empty?

  fetched_content = nil
  fetched_content = fetch_top_result(results.first[:url]) if auto_fetch && !results.empty?

  { query: query, results: results, fetched_content: fetched_content }
end

.strip_tags(html) ⇒ Object



92
93
94
95
# File 'lib/legion/cli/chat/web_search.rb', line 92

def strip_tags(html)
  html.gsub(/<[^>]+>/, '').gsub('&amp;', '&').gsub('&lt;', '<').gsub('&gt;', '>')
      .gsub('&quot;', '"').gsub('&#39;', "'").gsub('&nbsp;', ' ')
end