Class: Crawlora::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlora/client.rb

Overview

Synchronous client for the Crawlora API.

Call operations via grouped helpers (+client.bing.search(q: “…”)+) or dynamically (+client.request(“bing-search”, q: “…”)+). Supports configurable retries, an on_retry hook, opt-in request_id and idempotency_keys, before_request/after_response middleware, client-side rate_limit/max_concurrency, pagination (paginate/paginate_items), and response_type: “stream”. Uses a keep-alive connection pool by default; call close (or use the block form of Crawlora.client) to release connections.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key: nil, jwt_token: nil, base_url: nil, timeout: 30, retries: 0, retry_delay: 0.25, max_retry_delay: DEFAULT_MAX_RETRY_DELAY, retry_statuses: nil, retry_predicate: nil, on_retry: nil, request_id: false, idempotency_keys: false, rate_limit: nil, max_concurrency: nil, logger: nil, before_request: nil, after_response: nil, headers: nil, user_agent: nil, transport: nil) ⇒ Client

Returns a new instance of Client.



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/crawlora/client.rb', line 148

def initialize(
  api_key: nil, jwt_token: nil, base_url: nil, timeout: 30,
  retries: 0, retry_delay: 0.25, max_retry_delay: DEFAULT_MAX_RETRY_DELAY,
  retry_statuses: nil, retry_predicate: nil, on_retry: nil,
  request_id: false, idempotency_keys: false,
  rate_limit: nil, max_concurrency: nil, logger: nil,
  before_request: nil, after_response: nil,
  headers: nil, user_agent: nil, transport: nil
)
  # Precedence: explicit argument > environment variable > default.
  @api_key = api_key || ENV.fetch("CRAWLORA_API_KEY", "")
  @jwt_token = jwt_token || ""
  @base_url = (base_url || ENV["CRAWLORA_BASE_URL"] || DEFAULT_BASE_URL).chomp("/")
  @timeout = timeout
  @retries = [0, retries.to_i].max
  @retry_delay = [0.0, retry_delay.to_f].max
  @max_retry_delay = [0.0, max_retry_delay.to_f].max
  @retry_statuses = retry_statuses&.to_a&.to_set
  @retry_predicate = retry_predicate
  @on_retry = on_retry
  @request_id = request_id
  @idempotency_keys = idempotency_keys
  @rate_limiter = (rate_limit || max_concurrency) ? RateLimiter.new(rate_limit, max_concurrency) : nil
  @logger = logger
  @before_request = as_hook_list(before_request)
  @after_response = as_hook_list(after_response)
  @headers = headers ? headers.dup : {}
  @user_agent = user_agent || "crawlora-ruby-sdk/#{VERSION}"
  @transport = transport || DefaultTransport.new

  @groups = {}
  GROUPS.each do |group_name, operations|
    @groups[group_name] = OperationGroup.new(self, operations)
    define_singleton_method(group_name) { @groups[group_name] }
  end
end

Instance Attribute Details

#api_keyObject (readonly)

Returns the value of attribute api_key.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def api_key
  @api_key
end

#base_urlObject (readonly)

Returns the value of attribute base_url.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def base_url
  @base_url
end

#headersObject (readonly)

Returns the value of attribute headers.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def headers
  @headers
end

#jwt_tokenObject (readonly)

Returns the value of attribute jwt_token.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def jwt_token
  @jwt_token
end

#max_retry_delayObject (readonly)

Returns the value of attribute max_retry_delay.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def max_retry_delay
  @max_retry_delay
end

#retriesObject (readonly)

Returns the value of attribute retries.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def retries
  @retries
end

#retry_delayObject (readonly)

Returns the value of attribute retry_delay.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def retry_delay
  @retry_delay
end

#retry_statusesObject (readonly)

Returns the value of attribute retry_statuses.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def retry_statuses
  @retry_statuses
end

#timeoutObject (readonly)

Returns the value of attribute timeout.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def timeout
  @timeout
end

#user_agentObject (readonly)

Returns the value of attribute user_agent.



145
146
147
# File 'lib/crawlora/client.rb', line 145

def user_agent
  @user_agent
end

Instance Method Details

#closeObject

Release pooled keep-alive connections, if the transport supports it.



186
187
188
# File 'lib/crawlora/client.rb', line 186

def close
  @transport.close if @transport.respond_to?(:close)
end

#operation(operation_id, params = {}, **options) ⇒ Object



190
191
192
# File 'lib/crawlora/client.rb', line 190

def operation(operation_id, params = {}, **options)
  request(operation_id, params, **options)
end

#paginate(operation_id, params = {}, page_param: nil, cursor_param: nil, next_cursor: nil, start: nil, step: 1, max_pages: nil, response_type: "auto", timeout: nil, headers: nil, &block) ⇒ Object

Yield successive pages of a paginated operation.

Numeric mode (default) advances the page/offset query parameter and stops on an empty page. Cursor mode (pass both cursor_param and a next_cursor extractor) sends the cursor parameter and stops when next_cursor returns a falsy value.

Raises:

  • (ArgumentError)


227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/crawlora/client.rb', line 227

def paginate(operation_id, params = {}, page_param: nil, cursor_param: nil, next_cursor: nil,
             start: nil, step: 1, max_pages: nil, response_type: "auto", timeout: nil, headers: nil, &block)
  unless block
    return enum_for(:paginate, operation_id, params, page_param: page_param, cursor_param: cursor_param,
                                                     next_cursor: next_cursor, start: start, step: step, max_pages: max_pages,
                                                     response_type: response_type, timeout: timeout, headers: headers)
  end

  operation = OPERATIONS[operation_id]
  raise ArgumentError, "unknown Crawlora operation: #{operation_id}" if operation.nil?

  base_params = stringify_keys(params)
  opts = { response_type: response_type, timeout: timeout, headers: headers }

  if cursor_param || next_cursor
    paginate_cursor(operation_id, operation, base_params, cursor_param: cursor_param, next_cursor: next_cursor,
                                                          start: start, max_pages: max_pages, opts: opts, &block)
  else
    paginate_numeric(operation_id, operation, base_params, page_param: page_param, start: start, step: step,
                                                           max_pages: max_pages, opts: opts, &block)
  end
end

#paginate_items(operation_id, params = {}, items: nil, **options, &block) ⇒ Object

Yield individual items across pages. items extracts the list from a page (default: the Crawlora data array).



252
253
254
255
256
257
258
259
# File 'lib/crawlora/client.rb', line 252

def paginate_items(operation_id, params = {}, items: nil, **options, &block)
  return enum_for(:paginate_items, operation_id, params, items: items, **options) unless block_given?

  extract = items || Pagination.method(:default_items)
  paginate(operation_id, params, **options) do |page|
    extract.call(page).each(&block)
  end
end

#request(operation_id, params = {}, response_type: "auto", timeout: nil, headers: nil, retries: nil, retry_predicate: nil) ⇒ Object

Raises:

  • (ArgumentError)


194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/crawlora/client.rb', line 194

def request(operation_id, params = {}, response_type: "auto", timeout: nil, headers: nil,
            retries: nil, retry_predicate: nil)
  operation = OPERATIONS[operation_id]
  raise ArgumentError, "unknown Crawlora operation: #{operation_id}" if operation.nil?

  response_type = validate_response_type(response_type)
  log(event: "request", operation: operation_id)
  max_retries = retries.nil? ? @retries : [0, retries.to_i].max
  idempotency_key =
    (@idempotency_keys && %w[POST PATCH].include?(operation["method"])) ? SecureRandom.hex(16) : nil

  attempt = 0
  loop do
    return send_request(operation, stringify_keys(params), response_type: response_type,
                                                           timeout: timeout, headers: headers, idempotency_key: idempotency_key)
  rescue Error => e
    retryable = retry_predicate ? retry_predicate.call(e.status, e) : retryable?(e.status, e)
    raise if attempt >= max_retries || !retryable

    attempt += 1
    delay = compute_retry_delay(attempt, e.headers)
    log(event: "retry", operation: operation_id, attempt: attempt, status: e.status, delay: delay)
    @on_retry&.call(attempt, e, delay)
    sleep(delay) if delay.positive?
  end
end