Class: Archaeo::HttpClient

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/http_client.rb

Overview

HTTP client with retry logic, gzip decompression, rotating realistic User-Agent profiles, and connection pooling.

Injected via constructor for testability. Connections are reused across requests to the same host for improved performance.

Defined Under Namespace

Classes: Response, RetriableStatusError

Constant Summary collapse

DEFAULT_TIMEOUT =
30
DEFAULT_MAX_RETRIES =
3
DEFAULT_RETRY_DELAY =
2
MAX_POOL_SIZE =
8
MAX_IDLE_TIME =
60
RETRIABLE_STATUSES =
[429, 502, 503, 504].freeze
TRANSIENT_ERRORS =
[
  Net::ReadTimeout,
  Net::OpenTimeout,
  IOError,
  Errno::ECONNRESET,
  Errno::ECONNREFUSED,
  EOFError,
  Errno::EPIPE,
].freeze
USER_AGENT_PROFILES =
[
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
  "AppleWebKit/537.36 (KHTML, like Gecko) " \
  "Chrome/131.0.0.0 Safari/537.36",
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
  "AppleWebKit/537.36 (KHTML, like Gecko) " \
  "Chrome/130.0.0.0 Safari/537.36",
  "Mozilla/5.0 (X11; Linux x86_64) " \
  "AppleWebKit/537.36 (KHTML, like Gecko) " \
  "Chrome/131.0.0.0 Safari/537.36",
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
  "AppleWebKit/537.36 (KHTML, like Gecko) " \
  "Chrome/129.0.0.0 Safari/537.36",
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
  "AppleWebKit/537.36 (KHTML, like Gecko) " \
  "Chrome/131.0.0.0 Safari/537.36",
].freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, retry_delay: DEFAULT_RETRY_DELAY, user_agent: nil, on_request: nil) ⇒ HttpClient

Returns a new instance of HttpClient.



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/archaeo/http_client.rb', line 62

def initialize(timeout: DEFAULT_TIMEOUT,
               max_retries: DEFAULT_MAX_RETRIES,
               retry_delay: DEFAULT_RETRY_DELAY,
               user_agent: nil,
               on_request: nil)
  @timeout = timeout
  @max_retries = max_retries
  @retry_delay = retry_delay
  @user_agent = user_agent
  @on_request = on_request
  @connections = {}
  @last_used = {}
  @mutex = Mutex.new
  @shutdown = false
end

Class Method Details

.open(**opts) ⇒ Object



78
79
80
81
82
83
# File 'lib/archaeo/http_client.rb', line 78

def self.open(**opts)
  client = new(**opts)
  yield client
ensure
  client&.shutdown
end

Instance Method Details

#get(url, headers: {}) ⇒ Object



85
86
87
88
89
# File 'lib/archaeo/http_client.rb', line 85

def get(url, headers: {})
  merged = default_headers.merge(headers)
  uri = URI(url)
  attempt_with_retries(uri, merged, Net::HTTP::Get)
end

#head(url, headers: {}) ⇒ Object



91
92
93
94
95
# File 'lib/archaeo/http_client.rb', line 91

def head(url, headers: {})
  merged = default_headers.merge(headers)
  uri = URI(url)
  attempt_with_retries(uri, merged, Net::HTTP::Head)
end

#pool_statsObject



111
112
113
114
115
116
117
118
119
120
121
# File 'lib/archaeo/http_client.rb', line 111

def pool_stats
  now = Time.now
  @mutex.synchronize do
    {
      active_connections: @connections.size,
      max_pool_size: MAX_POOL_SIZE,
      hosts: @connections.keys,
      idle_times: @last_used.transform_values { |t| (now - t).round },
    }.freeze
  end
end

#shutdownObject



97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/archaeo/http_client.rb', line 97

def shutdown
  @mutex.synchronize do
    return if @shutdown

    @shutdown = true
    @connections.each_value do |http|
      http.finish
    rescue StandardError
      nil
    end
    @connections.clear
  end
end