Class: Relaton::Cie::BrowserAgent

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton/cie/data_fetcher.rb

Overview

Thin Ferrum-backed HTTP agent that mimics the Mechanize#get interface used elsewhere in DataFetcher. Drives headless Chrome with stealth tweaks so the CIE catalogue host (Cloudflare-protected accuristech) serves real HTML instead of a “Just a moment…” challenge.

Constant Summary collapse

UA =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " \
"(KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
CHALLENGE_MARKERS =
["Just a moment", "challenge-platform"].freeze
MAX_CHALLENGE_WAIT =
30

Instance Method Summary collapse

Constructor Details

#initializeBrowserAgent

Returns a new instance of BrowserAgent.



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/relaton/cie/data_fetcher.rb', line 24

def initialize
  @browser = Ferrum::Browser.new(
    headless: true,
    timeout: 90,
    process_timeout: 90,
    window_size: [1366, 768],
    browser_options: {
      "disable-blink-features" => "AutomationControlled",
      "disable-quic" => nil,
      "no-sandbox" => nil
    }
  )
  @browser.headers.set(
    "Accept-Language" => "en-US,en;q=0.9",
    "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9," \
                "image/webp,*/*;q=0.8",
    "User-Agent" => UA
  )
  # Pre-mask the most common headless-Chrome tells before any page JS runs.
  @browser.evaluate_on_new_document(<<~JS)
    Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
    Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
    Object.defineProperty(navigator, 'plugins', { get: () => [1,2,3,4,5] });
    window.chrome = { runtime: {} };
  JS
end

Instance Method Details

#get(url) ⇒ Object



51
52
53
54
55
# File 'lib/relaton/cie/data_fetcher.rb', line 51

def get(url)
  @browser.go_to(url)
  wait_for_challenge
  Nokogiri::HTML(@browser.body)
end

#quitObject



57
58
59
60
61
# File 'lib/relaton/cie/data_fetcher.rb', line 57

def quit
  @browser&.quit
ensure
  @browser = nil
end