Class: Firecrawl::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/firecrawl/client.rb

Overview

Client for the Firecrawl v2 API.

Examples:

Quick start

client = Firecrawl::Client.new(api_key: "fc-your-api-key")

# Scrape a single page
doc = client.scrape("https://example.com",
  Firecrawl::Models::ScrapeOptions.new(formats: ["markdown"]))

# Crawl a website
job = client.crawl("https://example.com",
  Firecrawl::Models::CrawlOptions.new(limit: 50))

Constant Summary collapse

DEFAULT_API_URL =
"https://api.firecrawl.dev"
DEFAULT_TIMEOUT =

seconds

300
DEFAULT_MAX_RETRIES =
3
DEFAULT_BACKOFF_FACTOR =
0.5
DEFAULT_POLL_INTERVAL =

seconds

2
DEFAULT_JOB_TIMEOUT =

seconds

300

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ Client

Creates a new Firecrawl client.

Parameters:

  • api_key (String, nil) (defaults to: nil)

    API key (falls back to FIRECRAWL_API_KEY env var)

  • api_url (String) (defaults to: nil)

    API base URL

  • timeout (Integer) (defaults to: DEFAULT_TIMEOUT)

    HTTP request timeout in seconds

  • max_retries (Integer) (defaults to: DEFAULT_MAX_RETRIES)

    maximum automatic retries for transient failures

  • backoff_factor (Float) (defaults to: DEFAULT_BACKOFF_FACTOR)

    exponential backoff factor in seconds



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/firecrawl/client.rb', line 34

def initialize(
  api_key: nil,
  api_url: nil,
  timeout: DEFAULT_TIMEOUT,
  max_retries: DEFAULT_MAX_RETRIES,
  backoff_factor: DEFAULT_BACKOFF_FACTOR
)
  resolved_key = api_key || ENV["FIRECRAWL_API_KEY"]
  # A nil/empty key is allowed: scrape, search, and interact fall back to the
  # keyless free tier (rate-limited per IP). Other methods return 401 from the
  # API until a key is provided.
  resolved_key = nil if resolved_key.nil? || resolved_key.strip.empty?

  resolved_url = api_url || ENV["FIRECRAWL_API_URL"] || DEFAULT_API_URL
  unless resolved_url.match?(%r{\Ahttps?://}i)
    raise FirecrawlError, "API URL must be a fully qualified HTTP or HTTPS URL (got: #{resolved_url})."
  end

  @http = HttpClient.new(
    api_key: resolved_key,
    base_url: resolved_url,
    timeout: timeout,
    max_retries: max_retries,
    backoff_factor: backoff_factor
  )
end

Class Method Details

.from_envClient

Creates a client from the FIRECRAWL_API_KEY environment variable.

Returns:



64
65
66
# File 'lib/firecrawl/client.rb', line 64

def self.from_env
  new
end

Instance Method Details

#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::AgentStatusResponse

Runs an agent task and waits for completion (auto-polling).

Parameters:

  • options (Models::AgentOptions)

    agent configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:

Raises:



472
473
474
475
476
477
478
479
480
481
482
483
484
# File 'lib/firecrawl/client.rb', line 472

def agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_agent(options)
  raise FirecrawlError, "Agent start did not return a job ID" if start.id.nil?

  deadline = Time.now + timeout
  while Time.now < deadline
    status = get_agent_status(start.id)
    return status if status.done?

    sleep(poll_interval)
  end
  raise JobTimeoutError.new(start.id, timeout, "Agent")
end

#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::BatchScrapeJob

Batch-scrapes URLs and waits for completion (auto-polling).

Parameters:

  • urls (Array<String>)

    the URLs to scrape

  • options (Models::BatchScrapeOptions, nil) (defaults to: nil)

    batch scrape configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:



304
305
306
307
# File 'lib/firecrawl/client.rb', line 304

def batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_batch_scrape(urls, options)
  poll_batch_scrape(start.id, poll_interval, timeout)
end

#cancel_agent(job_id) ⇒ Hash

Cancels a running agent task.

Parameters:

  • job_id (String)

    the agent job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


490
491
492
493
494
# File 'lib/firecrawl/client.rb', line 490

def cancel_agent(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/agent/#{job_id}")
end

#cancel_batch_scrape(job_id) ⇒ Hash

Cancels a running batch scrape job.

Parameters:

  • job_id (String)

    the batch scrape job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


313
314
315
316
317
# File 'lib/firecrawl/client.rb', line 313

def cancel_batch_scrape(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/batch/scrape/#{job_id}")
end

#cancel_crawl(job_id) ⇒ Hash

Cancels a running crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


239
240
241
242
243
# File 'lib/firecrawl/client.rb', line 239

def cancel_crawl(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/crawl/#{job_id}")
end

#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::CrawlJob

Crawls a website and waits for completion (auto-polling).

Parameters:

  • url (String)

    the URL to crawl

  • options (Models::CrawlOptions, nil) (defaults to: nil)

    crawl configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:



230
231
232
233
# File 'lib/firecrawl/client.rb', line 230

def crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_crawl(url, options)
  poll_crawl(start.id, poll_interval, timeout)
end

#create_monitor(name:, schedule:, targets:, webhook: nil, notification: nil, retention_days: nil, goal: nil, judge_enabled: nil) ⇒ Object

MONITOR



342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
# File 'lib/firecrawl/client.rb', line 342

def create_monitor(name:, schedule:, targets:, webhook: nil, notification: nil,
                   retention_days: nil, goal: nil, judge_enabled: nil)
  body = {
    "name" => name,
    "schedule" => schedule,
    "targets" => targets,
    "webhook" => webhook,
    "notification" => notification,
    "retentionDays" => retention_days,
    "goal" => goal,
    "judgeEnabled" => judge_enabled,
  }.compact
  raw = @http.post("/v2/monitor", body)
  Models::Monitor.new(raw["data"] || raw)
end

#delete_monitor(monitor_id) ⇒ Object

Raises:

  • (ArgumentError)


388
389
390
391
392
# File 'lib/firecrawl/client.rb', line 388

def delete_monitor(monitor_id)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  @http.delete("/v2/monitor/#{monitor_id}")["success"] == true
end

#get_agent_status(job_id) ⇒ Models::AgentStatusResponse

Gets the status of an agent task.

Parameters:

  • job_id (String)

    the agent job ID

Returns:

Raises:

  • (ArgumentError)


459
460
461
462
463
464
# File 'lib/firecrawl/client.rb', line 459

def get_agent_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/agent/#{job_id}")
  Models::AgentStatusResponse.new(raw)
end

#get_batch_scrape_status(job_id) ⇒ Models::BatchScrapeJob

Gets the status and results of a batch scrape job.

Parameters:

  • job_id (String)

    the batch scrape job ID

Returns:

Raises:

  • (ArgumentError)


290
291
292
293
294
295
# File 'lib/firecrawl/client.rb', line 290

def get_batch_scrape_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/batch/scrape/#{job_id}")
  Models::BatchScrapeJob.new(raw)
end

#get_concurrencyModels::ConcurrencyCheck

Gets current concurrency usage.



503
504
505
506
# File 'lib/firecrawl/client.rb', line 503

def get_concurrency
  raw = @http.get("/v2/concurrency-check")
  Models::ConcurrencyCheck.new(raw)
end

#get_crawl_errors(job_id) ⇒ Hash

Gets errors from a crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


249
250
251
252
253
# File 'lib/firecrawl/client.rb', line 249

def get_crawl_errors(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.get("/v2/crawl/#{job_id}/errors")
end

#get_crawl_status(job_id) ⇒ Models::CrawlJob

Gets the status and results of a crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

Raises:

  • (ArgumentError)


216
217
218
219
220
221
# File 'lib/firecrawl/client.rb', line 216

def get_crawl_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/crawl/#{job_id}")
  Models::CrawlJob.new(raw)
end

#get_credit_usageModels::CreditUsage

Gets current credit usage.

Returns:



511
512
513
514
515
# File 'lib/firecrawl/client.rb', line 511

def get_credit_usage
  raw = @http.get("/v2/team/credit-usage")
  data = raw["data"] || raw
  Models::CreditUsage.new(data)
end

#get_monitor(monitor_id) ⇒ Object

Raises:

  • (ArgumentError)


363
364
365
366
367
368
# File 'lib/firecrawl/client.rb', line 363

def get_monitor(monitor_id)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  raw = @http.get("/v2/monitor/#{monitor_id}")
  Models::Monitor.new(raw["data"] || raw)
end

#get_monitor_check(monitor_id, check_id, limit: nil, skip: nil, status: nil, auto_paginate: true) ⇒ Object

Raises:

  • (ArgumentError)


408
409
410
411
412
413
414
415
416
417
418
# File 'lib/firecrawl/client.rb', line 408

def get_monitor_check(monitor_id, check_id, limit: nil, skip: nil, status: nil, auto_paginate: true)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?
  raise ArgumentError, "Check ID is required" if check_id.nil?

  params = query(limit: limit, skip: skip, status: status)
  raw = @http.get("/v2/monitor/#{monitor_id}/checks/#{check_id}#{params}")
  data = raw["data"] || raw
  data["next"] = raw["next"] if raw["next"]
  check = Models::MonitorCheckDetail.new(data)
  auto_paginate ? paginate_monitor_check(check) : check
end

#inspect_paper(paper_id) ⇒ Hash

Inspect paper metadata.

Parameters:

  • paper_id (String)

    paper identifier

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


101
102
103
104
# File 'lib/firecrawl/client.rb', line 101

def inspect_paper(paper_id)
  raise ArgumentError, "Paper ID is required" if paper_id.nil?
  @http.get("/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}")
end

#interact(job_id, code, language: "node", timeout: nil) ⇒ Hash

Interacts with the scrape-bound browser session for a scrape job.

Parameters:

  • job_id (String)

    the scrape job ID

  • code (String)

    the code to execute

  • language (String) (defaults to: "node")

    “python”, “node”, or “bash” (default: “node”)

  • timeout (Integer, nil) (defaults to: nil)

    execution timeout in seconds (1-300)

Returns:

  • (Hash)

    execution result with stdout, stderr, exit_code

Raises:

  • (ArgumentError)


146
147
148
149
150
151
152
153
154
# File 'lib/firecrawl/client.rb', line 146

def interact(job_id, code, language: "node", timeout: nil)
  raise ArgumentError, "Job ID is required" if job_id.nil?
  raise ArgumentError, "Code is required" if code.nil?

  body = { "code" => code, "language" => language }
  body["timeout"] = timeout if timeout
  body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
  @http.post("/v2/scrape/#{job_id}/interact", body)
end

#list_monitor_checks(monitor_id, limit: nil, offset: nil) ⇒ Object

Raises:

  • (ArgumentError)


401
402
403
404
405
406
# File 'lib/firecrawl/client.rb', line 401

def list_monitor_checks(monitor_id, limit: nil, offset: nil)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  raw = @http.get("/v2/monitor/#{monitor_id}/checks#{query(limit: limit, offset: offset)}")
  (raw["data"] || []).map { |item| Models::MonitorCheck.new(item) }
end

#list_monitors(limit: nil, offset: nil) ⇒ Object



358
359
360
361
# File 'lib/firecrawl/client.rb', line 358

def list_monitors(limit: nil, offset: nil)
  raw = @http.get("/v2/monitor#{query(limit: limit, offset: offset)}")
  (raw["data"] || []).map { |item| Models::Monitor.new(item) }
end

#map(url, options = nil) ⇒ Models::MapData

Discovers URLs on a website.

Parameters:

  • url (String)

    the URL to map

  • options (Models::MapOptions, nil) (defaults to: nil)

    map configuration

Returns:

Raises:

  • (ArgumentError)


328
329
330
331
332
333
334
335
336
# File 'lib/firecrawl/client.rb', line 328

def map(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/map", body)
  data = raw["data"] || raw
  Models::MapData.new(data)
end

#parse(file, options = nil) ⇒ Models::Document

Parses an uploaded file and returns the extracted document.

Parameters:

Returns:

Raises:

  • (ArgumentError)


175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/firecrawl/client.rb', line 175

def parse(file, options = nil)
  raise ArgumentError, "File is required" if file.nil?
  unless file.is_a?(Models::ParseFile)
    raise ArgumentError, "File must be a Firecrawl::Models::ParseFile"
  end

  options_hash = options.nil? ? {} : options.to_h
  raw = @http.post_multipart(
    "/v2/parse",
    fields: { "options" => JSON.generate(options_hash) },
    file_field: "file",
    filename: file.filename,
    content: file.content,
    content_type: file.content_type,
  )
  data = raw["data"] || raw
  Models::Document.new(data)
end

#read_paper(paper_id, query_text, options = {}) ⇒ Hash

Read a paper with query-guided passages.

Parameters:

  • paper_id (String)

    paper identifier

  • query_text (String)

    passage query

  • options (Hash) (defaults to: {})

    optional query parameters

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


112
113
114
115
116
# File 'lib/firecrawl/client.rb', line 112

def read_paper(paper_id, query_text, options = {})
  raise ArgumentError, "Paper ID is required" if paper_id.nil?
  path = "/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}"
  @http.get("#{path}#{query(options.merge("query" => query_text, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
end

Find papers related to a paper.

Parameters:

  • paper_id (String)

    paper identifier

  • intent (String)

    relatedness intent

  • options (Hash) (defaults to: {})

    optional query parameters

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


124
125
126
127
128
# File 'lib/firecrawl/client.rb', line 124

def related_papers(paper_id, intent, options = {})
  raise ArgumentError, "Paper ID is required" if paper_id.nil?
  path = "/v2/search/research/papers/#{URI.encode_www_form_component(paper_id)}/similar"
  @http.get("#{path}#{query(options.merge("intent" => intent, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
end

#run_monitor(monitor_id) ⇒ Object

Raises:

  • (ArgumentError)


394
395
396
397
398
399
# File 'lib/firecrawl/client.rb', line 394

def run_monitor(monitor_id)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  raw = @http.post("/v2/monitor/#{monitor_id}/run", {})
  Models::MonitorCheck.new(raw["data"] || raw)
end

#scrape(url, options = nil) ⇒ Models::Document

Scrapes a single URL and returns the document.

Parameters:

  • url (String)

    the URL to scrape

  • options (Models::ScrapeOptions, nil) (defaults to: nil)

    scrape configuration

Returns:

Raises:

  • (ArgumentError)


77
78
79
80
81
82
83
84
85
86
# File 'lib/firecrawl/client.rb', line 77

def scrape(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
  raw = @http.post("/v2/scrape", body)
  data = raw["data"] || raw
  Models::Document.new(data)
end

#search(query, options = nil) ⇒ Models::SearchData

Performs a web search.

Parameters:

  • query (String)

    the search query

  • options (Models::SearchOptions, nil) (defaults to: nil)

    search configuration

Returns:

Raises:

  • (ArgumentError)


429
430
431
432
433
434
435
436
437
438
# File 'lib/firecrawl/client.rb', line 429

def search(query, options = nil)
  raise ArgumentError, "Query is required" if query.nil?

  body = { "query" => query }
  body.merge!(options.to_h) if options
  body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
  raw = @http.post("/v2/search", body)
  data = raw["data"] || raw
  Models::SearchData.new(data)
end

#search_github(query_text, options = {}) ⇒ Hash

Search GitHub research content.

Parameters:

  • query_text (String)

    GitHub query

  • options (Hash) (defaults to: {})

    optional query parameters

Returns:

  • (Hash)


135
136
137
# File 'lib/firecrawl/client.rb', line 135

def search_github(query_text, options = {})
  @http.get("/v2/search/research/github#{query(options.merge("query" => query_text, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
end

#search_papers(query, options = {}) ⇒ Hash

Search research papers.

Parameters:

  • query (String)

    research query

  • options (Hash) (defaults to: {})

    optional query parameters

Returns:

  • (Hash)


93
94
95
# File 'lib/firecrawl/client.rb', line 93

def search_papers(query, options = {})
  @http.get("/v2/search/research/papers#{query(options.merge("query" => query, "origin" => "ruby-sdk@#{Firecrawl::VERSION}"))}")
end

#start_agent(options) ⇒ Models::AgentResponse

Starts an async agent task.

Parameters:

Returns:

Raises:

  • (ArgumentError)


448
449
450
451
452
453
# File 'lib/firecrawl/client.rb', line 448

def start_agent(options)
  raise ArgumentError, "Agent options are required" if options.nil?

  raw = @http.post("/v2/agent", options.to_h)
  Models::AgentResponse.new(raw)
end

#start_batch_scrape(urls, options = nil) ⇒ Models::BatchScrapeResponse

Starts an async batch scrape job.

Parameters:

  • urls (Array<String>)

    the URLs to scrape

  • options (Models::BatchScrapeOptions, nil) (defaults to: nil)

    batch scrape configuration

Returns:

Raises:

  • (ArgumentError)


264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# File 'lib/firecrawl/client.rb', line 264

def start_batch_scrape(urls, options = nil)
  raise ArgumentError, "URLs list is required" if urls.nil?

  body = { "urls" => urls }
  extra_headers = {}
  if options
    opts_hash = options.to_h

    # idempotencyKey goes as a header, not in body
    if options.idempotency_key && !options.idempotency_key.empty?
      extra_headers["x-idempotency-key"] = options.idempotency_key
    end

    # Flatten nested scrape options to top level (API expects this)
    nested = opts_hash.delete("options")
    body.merge!(opts_hash)
    body.merge!(nested) if nested
  end
  raw = @http.post("/v2/batch/scrape", body, extra_headers: extra_headers)
  Models::BatchScrapeResponse.new(raw)
end

#start_crawl(url, options = nil) ⇒ Models::CrawlResponse

Starts an async crawl job and returns immediately.

Parameters:

  • url (String)

    the URL to start crawling from

  • options (Models::CrawlOptions, nil) (defaults to: nil)

    crawl configuration

Returns:

Raises:

  • (ArgumentError)


203
204
205
206
207
208
209
210
# File 'lib/firecrawl/client.rb', line 203

def start_crawl(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/crawl", body)
  Models::CrawlResponse.new(raw)
end

#stop_interactive_browser(job_id) ⇒ Hash

Stops the interactive browser session for a scrape job.

Parameters:

  • job_id (String)

    the scrape job ID

Returns:

  • (Hash)

    stop response

Raises:

  • (ArgumentError)


160
161
162
163
164
# File 'lib/firecrawl/client.rb', line 160

def stop_interactive_browser(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/scrape/#{job_id}/interact")
end

#update_monitor(monitor_id, **attrs) ⇒ Object

Raises:

  • (ArgumentError)


370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
# File 'lib/firecrawl/client.rb', line 370

def update_monitor(monitor_id, **attrs)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  body = {
    "name" => attrs[:name],
    "status" => attrs[:status],
    "schedule" => attrs[:schedule],
    "webhook" => attrs[:webhook],
    "notification" => attrs[:notification],
    "targets" => attrs[:targets],
    "retentionDays" => attrs[:retention_days],
    "goal" => attrs[:goal],
    "judgeEnabled" => attrs[:judge_enabled],
  }.compact
  raw = @http.patch("/v2/monitor/#{monitor_id}", body)
  Models::Monitor.new(raw["data"] || raw)
end