Class: Crawlora::Client
- Inherits:
-
Object
- Object
- Crawlora::Client
- Defined in:
- lib/crawlora/client.rb
Overview
Synchronous client for the Crawlora API.
Call operations via grouped helpers (+client.bing.search(q: “…”)+) or dynamically (+client.request(“bing-search”, q: “…”)+). Supports configurable retries, an on_retry hook, opt-in request_id and idempotency_keys, before_request/after_response middleware, client-side rate_limit/max_concurrency, pagination (paginate/paginate_items), and response_type: “stream”. Uses a keep-alive connection pool by default; call close (or use the block form of Crawlora.client) to release connections.
Instance Attribute Summary collapse
-
#api_key ⇒ Object
readonly
Returns the value of attribute api_key.
-
#base_url ⇒ Object
readonly
Returns the value of attribute base_url.
-
#headers ⇒ Object
readonly
Returns the value of attribute headers.
-
#jwt_token ⇒ Object
readonly
Returns the value of attribute jwt_token.
-
#max_retry_delay ⇒ Object
readonly
Returns the value of attribute max_retry_delay.
-
#retries ⇒ Object
readonly
Returns the value of attribute retries.
-
#retry_delay ⇒ Object
readonly
Returns the value of attribute retry_delay.
-
#retry_statuses ⇒ Object
readonly
Returns the value of attribute retry_statuses.
-
#timeout ⇒ Object
readonly
Returns the value of attribute timeout.
-
#user_agent ⇒ Object
readonly
Returns the value of attribute user_agent.
Instance Method Summary collapse
-
#close ⇒ Object
Release pooled keep-alive connections, if the transport supports it.
-
#initialize(api_key: nil, jwt_token: nil, base_url: nil, timeout: 30, retries: 0, retry_delay: 0.25, max_retry_delay: DEFAULT_MAX_RETRY_DELAY, retry_statuses: nil, retry_predicate: nil, on_retry: nil, request_id: false, idempotency_keys: false, rate_limit: nil, max_concurrency: nil, logger: nil, before_request: nil, after_response: nil, headers: nil, user_agent: nil, transport: nil) ⇒ Client
constructor
A new instance of Client.
- #operation(operation_id, params = {}, **options) ⇒ Object
-
#paginate(operation_id, params = {}, page_param: nil, cursor_param: nil, next_cursor: nil, start: nil, step: 1, max_pages: nil, response_type: "auto", timeout: nil, headers: nil, &block) ⇒ Object
Yield successive pages of a paginated operation.
-
#paginate_items(operation_id, params = {}, items: nil, **options, &block) ⇒ Object
Yield individual items across pages.
- #request(operation_id, params = {}, response_type: "auto", timeout: nil, headers: nil, retries: nil, retry_predicate: nil) ⇒ Object
Constructor Details
#initialize(api_key: nil, jwt_token: nil, base_url: nil, timeout: 30, retries: 0, retry_delay: 0.25, max_retry_delay: DEFAULT_MAX_RETRY_DELAY, retry_statuses: nil, retry_predicate: nil, on_retry: nil, request_id: false, idempotency_keys: false, rate_limit: nil, max_concurrency: nil, logger: nil, before_request: nil, after_response: nil, headers: nil, user_agent: nil, transport: nil) ⇒ Client
Returns a new instance of Client.
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/crawlora/client.rb', line 148 def initialize( api_key: nil, jwt_token: nil, base_url: nil, timeout: 30, retries: 0, retry_delay: 0.25, max_retry_delay: DEFAULT_MAX_RETRY_DELAY, retry_statuses: nil, retry_predicate: nil, on_retry: nil, request_id: false, idempotency_keys: false, rate_limit: nil, max_concurrency: nil, logger: nil, before_request: nil, after_response: nil, headers: nil, user_agent: nil, transport: nil ) # Precedence: explicit argument > environment variable > default. @api_key = api_key || ENV.fetch("CRAWLORA_API_KEY", "") @jwt_token = jwt_token || "" @base_url = (base_url || ENV["CRAWLORA_BASE_URL"] || DEFAULT_BASE_URL).chomp("/") @timeout = timeout @retries = [0, retries.to_i].max @retry_delay = [0.0, retry_delay.to_f].max @max_retry_delay = [0.0, max_retry_delay.to_f].max @retry_statuses = retry_statuses&.to_a&.to_set @retry_predicate = retry_predicate @on_retry = on_retry @request_id = request_id @idempotency_keys = idempotency_keys @rate_limiter = (rate_limit || max_concurrency) ? RateLimiter.new(rate_limit, max_concurrency) : nil @logger = logger @before_request = as_hook_list(before_request) @after_response = as_hook_list(after_response) @headers = headers ? headers.dup : {} @user_agent = user_agent || "crawlora-ruby-sdk/#{VERSION}" @transport = transport || DefaultTransport.new @groups = {} GROUPS.each do |group_name, operations| @groups[group_name] = OperationGroup.new(self, operations) define_singleton_method(group_name) { @groups[group_name] } end end |
Instance Attribute Details
#api_key ⇒ Object (readonly)
Returns the value of attribute api_key.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def api_key @api_key end |
#base_url ⇒ Object (readonly)
Returns the value of attribute base_url.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def base_url @base_url end |
#headers ⇒ Object (readonly)
Returns the value of attribute headers.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def headers @headers end |
#jwt_token ⇒ Object (readonly)
Returns the value of attribute jwt_token.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def jwt_token @jwt_token end |
#max_retry_delay ⇒ Object (readonly)
Returns the value of attribute max_retry_delay.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def max_retry_delay @max_retry_delay end |
#retries ⇒ Object (readonly)
Returns the value of attribute retries.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def retries @retries end |
#retry_delay ⇒ Object (readonly)
Returns the value of attribute retry_delay.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def retry_delay @retry_delay end |
#retry_statuses ⇒ Object (readonly)
Returns the value of attribute retry_statuses.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def retry_statuses @retry_statuses end |
#timeout ⇒ Object (readonly)
Returns the value of attribute timeout.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def timeout @timeout end |
#user_agent ⇒ Object (readonly)
Returns the value of attribute user_agent.
145 146 147 |
# File 'lib/crawlora/client.rb', line 145 def user_agent @user_agent end |
Instance Method Details
#close ⇒ Object
Release pooled keep-alive connections, if the transport supports it.
186 187 188 |
# File 'lib/crawlora/client.rb', line 186 def close @transport.close if @transport.respond_to?(:close) end |
#operation(operation_id, params = {}, **options) ⇒ Object
190 191 192 |
# File 'lib/crawlora/client.rb', line 190 def operation(operation_id, params = {}, **) request(operation_id, params, **) end |
#paginate(operation_id, params = {}, page_param: nil, cursor_param: nil, next_cursor: nil, start: nil, step: 1, max_pages: nil, response_type: "auto", timeout: nil, headers: nil, &block) ⇒ Object
Yield successive pages of a paginated operation.
Numeric mode (default) advances the page/offset query parameter and stops on an empty page. Cursor mode (pass both cursor_param and a next_cursor extractor) sends the cursor parameter and stops when next_cursor returns a falsy value.
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
# File 'lib/crawlora/client.rb', line 227 def paginate(operation_id, params = {}, page_param: nil, cursor_param: nil, next_cursor: nil, start: nil, step: 1, max_pages: nil, response_type: "auto", timeout: nil, headers: nil, &block) unless block return enum_for(:paginate, operation_id, params, page_param: page_param, cursor_param: cursor_param, next_cursor: next_cursor, start: start, step: step, max_pages: max_pages, response_type: response_type, timeout: timeout, headers: headers) end operation = OPERATIONS[operation_id] raise ArgumentError, "unknown Crawlora operation: #{operation_id}" if operation.nil? base_params = stringify_keys(params) opts = { response_type: response_type, timeout: timeout, headers: headers } if cursor_param || next_cursor paginate_cursor(operation_id, operation, base_params, cursor_param: cursor_param, next_cursor: next_cursor, start: start, max_pages: max_pages, opts: opts, &block) else paginate_numeric(operation_id, operation, base_params, page_param: page_param, start: start, step: step, max_pages: max_pages, opts: opts, &block) end end |
#paginate_items(operation_id, params = {}, items: nil, **options, &block) ⇒ Object
Yield individual items across pages. items extracts the list from a page (default: the Crawlora data array).
252 253 254 255 256 257 258 259 |
# File 'lib/crawlora/client.rb', line 252 def paginate_items(operation_id, params = {}, items: nil, **, &block) return enum_for(:paginate_items, operation_id, params, items: items, **) unless block_given? extract = items || Pagination.method(:default_items) paginate(operation_id, params, **) do |page| extract.call(page).each(&block) end end |
#request(operation_id, params = {}, response_type: "auto", timeout: nil, headers: nil, retries: nil, retry_predicate: nil) ⇒ Object
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
# File 'lib/crawlora/client.rb', line 194 def request(operation_id, params = {}, response_type: "auto", timeout: nil, headers: nil, retries: nil, retry_predicate: nil) operation = OPERATIONS[operation_id] raise ArgumentError, "unknown Crawlora operation: #{operation_id}" if operation.nil? response_type = validate_response_type(response_type) log(event: "request", operation: operation_id) max_retries = retries.nil? ? @retries : [0, retries.to_i].max idempotency_key = (@idempotency_keys && %w[POST PATCH].include?(operation["method"])) ? SecureRandom.hex(16) : nil attempt = 0 loop do return send_request(operation, stringify_keys(params), response_type: response_type, timeout: timeout, headers: headers, idempotency_key: idempotency_key) rescue Error => e retryable = retry_predicate ? retry_predicate.call(e.status, e) : retryable?(e.status, e) raise if attempt >= max_retries || !retryable attempt += 1 delay = compute_retry_delay(attempt, e.headers) log(event: "retry", operation: operation_id, attempt: attempt, status: e.status, delay: delay) @on_retry&.call(attempt, e, delay) sleep(delay) if delay.positive? end end |