Module: Coelacanth::HTTP

Defined in:
lib/coelacanth/http.rb

Defined Under Namespace

Classes: ErrorResponse

Constant Summary collapse

DEFAULT_OPEN_TIMEOUT =
5
DEFAULT_READ_TIMEOUT =
10
MAX_RETRIES =
2
Response =
Class.new(SimpleDelegator) do
  attr_reader :status_code, :headers, :final_uri

  def initialize(response, final_uri: nil)
    super(response)
    @status_code = response.respond_to?(:code) ? response.code.to_i : nil
    @headers = response.respond_to?(:each_header) ? response.each_header.to_h : {}
    @final_uri = (response.respond_to?(:uri) ? response.uri : nil) || final_uri
  end

  def final_url
    final_uri&.to_s
  end

  def is_a?(klass)
    super || __getobj__.is_a?(klass)
  end

  def kind_of?(klass)
    is_a?(klass)
  end
end

Class Method Summary collapse

Class Method Details

.ensure_allowed!(uri) ⇒ Object



78
79
80
81
82
83
# File 'lib/coelacanth/http.rb', line 78

def ensure_allowed!(uri)
  return if Coelacanth::Robots.allowed?(uri)

  raise Coelacanth::RobotsDisallowedError,
        "Access to #{uri} is disallowed by robots.txt for user-agent '#{Coelacanth::Robots.user_agent}'"
end

.get_response(uri, open_timeout: DEFAULT_OPEN_TIMEOUT, read_timeout: DEFAULT_READ_TIMEOUT, retries: MAX_RETRIES) ⇒ Object



51
52
53
54
55
# File 'lib/coelacanth/http.rb', line 51

def get_response(uri, open_timeout: DEFAULT_OPEN_TIMEOUT, read_timeout: DEFAULT_READ_TIMEOUT, retries: MAX_RETRIES)
  ensure_allowed!(uri)
  response = raw_get_response(uri, open_timeout: open_timeout, read_timeout: read_timeout, retries: retries)
  Response.new(response, final_uri: uri)
end

.raise_http_error(uri, response) ⇒ Object

Raises:

  • (OpenURI::HTTPError)


85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/coelacanth/http.rb', line 85

def raise_http_error(uri, response)
  message = format("%s %s for GET %s", response.code, response.message, uri)
  io = ErrorResponse.new(
    status: [response.code, response.message],
    meta: response.each_header.to_h,
    base_uri: uri,
    final_uri: response.respond_to?(:uri) ? response.uri : uri,
    body: response.body
  )

  raise OpenURI::HTTPError.new(message, io)
end

.raw_get_response(uri, open_timeout: DEFAULT_OPEN_TIMEOUT, read_timeout: DEFAULT_READ_TIMEOUT, retries: MAX_RETRIES) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/coelacanth/http.rb', line 57

def raw_get_response(uri, open_timeout: DEFAULT_OPEN_TIMEOUT, read_timeout: DEFAULT_READ_TIMEOUT, retries: MAX_RETRIES)
  attempts = 0
  begin
    attempts += 1
    request = Net::HTTP::Get.new(uri)
    Net::HTTP.start(
      uri.host,
      uri.port,
      use_ssl: uri.scheme == "https",
      open_timeout: open_timeout,
      read_timeout: read_timeout
    ) do |http|
      return http.request(request)
    end
  rescue Net::OpenTimeout, Net::ReadTimeout, Timeout::Error => e
    retry if attempts <= retries

    raise Coelacanth::TimeoutError, "GET #{uri} timed out after #{attempts} attempts: #{e.message}"
  end
end