Class: Firecrawl::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/firecrawl/client.rb

Overview

Client for the Firecrawl v2 API.

Examples:

Quick start

client = Firecrawl::Client.new(api_key: "fc-your-api-key")

# Scrape a single page
doc = client.scrape("https://example.com",
  Firecrawl::Models::ScrapeOptions.new(formats: ["markdown"]))

# Crawl a website
job = client.crawl("https://example.com",
  Firecrawl::Models::CrawlOptions.new(limit: 50))

Constant Summary collapse

DEFAULT_API_URL =
"https://api.firecrawl.dev"
DEFAULT_TIMEOUT =

seconds

300
DEFAULT_MAX_RETRIES =
3
DEFAULT_BACKOFF_FACTOR =
0.5
DEFAULT_POLL_INTERVAL =

seconds

2
DEFAULT_JOB_TIMEOUT =

seconds

300

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ Client

Creates a new Firecrawl client.

Parameters:

  • api_key (String, nil) (defaults to: nil)

    API key (falls back to FIRECRAWL_API_KEY env var)

  • api_url (String) (defaults to: nil)

    API base URL

  • timeout (Integer) (defaults to: DEFAULT_TIMEOUT)

    HTTP request timeout in seconds

  • max_retries (Integer) (defaults to: DEFAULT_MAX_RETRIES)

    maximum automatic retries for transient failures

  • backoff_factor (Float) (defaults to: DEFAULT_BACKOFF_FACTOR)

    exponential backoff factor in seconds



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/firecrawl/client.rb', line 34

def initialize(
  api_key: nil,
  api_url: nil,
  timeout: DEFAULT_TIMEOUT,
  max_retries: DEFAULT_MAX_RETRIES,
  backoff_factor: DEFAULT_BACKOFF_FACTOR
)
  resolved_key = api_key || ENV["FIRECRAWL_API_KEY"]
  # A nil/empty key is allowed: scrape, search, and interact fall back to the
  # keyless free tier (rate-limited per IP). Other methods return 401 from the
  # API until a key is provided.
  resolved_key = nil if resolved_key.nil? || resolved_key.strip.empty?

  resolved_url = api_url || ENV["FIRECRAWL_API_URL"] || DEFAULT_API_URL
  unless resolved_url.match?(%r{\Ahttps?://}i)
    raise FirecrawlError, "API URL must be a fully qualified HTTP or HTTPS URL (got: #{resolved_url})."
  end

  @http = HttpClient.new(
    api_key: resolved_key,
    base_url: resolved_url,
    timeout: timeout,
    max_retries: max_retries,
    backoff_factor: backoff_factor
  )
end

Class Method Details

.from_envClient

Creates a client from the FIRECRAWL_API_KEY environment variable.

Returns:



64
65
66
# File 'lib/firecrawl/client.rb', line 64

def self.from_env
  new
end

Instance Method Details

#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::AgentStatusResponse

Runs an agent task and waits for completion (auto-polling).

Parameters:

  • options (Models::AgentOptions)

    agent configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:

Raises:



421
422
423
424
425
426
427
428
429
430
431
432
433
# File 'lib/firecrawl/client.rb', line 421

def agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_agent(options)
  raise FirecrawlError, "Agent start did not return a job ID" if start.id.nil?

  deadline = Time.now + timeout
  while Time.now < deadline
    status = get_agent_status(start.id)
    return status if status.done?

    sleep(poll_interval)
  end
  raise JobTimeoutError.new(start.id, timeout, "Agent")
end

#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::BatchScrapeJob

Batch-scrapes URLs and waits for completion (auto-polling).

Parameters:

  • urls (Array<String>)

    the URLs to scrape

  • options (Models::BatchScrapeOptions, nil) (defaults to: nil)

    batch scrape configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:



253
254
255
256
# File 'lib/firecrawl/client.rb', line 253

def batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_batch_scrape(urls, options)
  poll_batch_scrape(start.id, poll_interval, timeout)
end

#cancel_agent(job_id) ⇒ Hash

Cancels a running agent task.

Parameters:

  • job_id (String)

    the agent job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


439
440
441
442
443
# File 'lib/firecrawl/client.rb', line 439

def cancel_agent(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/agent/#{job_id}")
end

#cancel_batch_scrape(job_id) ⇒ Hash

Cancels a running batch scrape job.

Parameters:

  • job_id (String)

    the batch scrape job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


262
263
264
265
266
# File 'lib/firecrawl/client.rb', line 262

def cancel_batch_scrape(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/batch/scrape/#{job_id}")
end

#cancel_crawl(job_id) ⇒ Hash

Cancels a running crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


188
189
190
191
192
# File 'lib/firecrawl/client.rb', line 188

def cancel_crawl(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/crawl/#{job_id}")
end

#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::CrawlJob

Crawls a website and waits for completion (auto-polling).

Parameters:

  • url (String)

    the URL to crawl

  • options (Models::CrawlOptions, nil) (defaults to: nil)

    crawl configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:



179
180
181
182
# File 'lib/firecrawl/client.rb', line 179

def crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_crawl(url, options)
  poll_crawl(start.id, poll_interval, timeout)
end

#create_monitor(name:, schedule:, targets:, webhook: nil, notification: nil, retention_days: nil, goal: nil, judge_enabled: nil) ⇒ Object

MONITOR



291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/firecrawl/client.rb', line 291

def create_monitor(name:, schedule:, targets:, webhook: nil, notification: nil,
                   retention_days: nil, goal: nil, judge_enabled: nil)
  body = {
    "name" => name,
    "schedule" => schedule,
    "targets" => targets,
    "webhook" => webhook,
    "notification" => notification,
    "retentionDays" => retention_days,
    "goal" => goal,
    "judgeEnabled" => judge_enabled,
  }.compact
  raw = @http.post("/v2/monitor", body)
  Models::Monitor.new(raw["data"] || raw)
end

#delete_monitor(monitor_id) ⇒ Object

Raises:

  • (ArgumentError)


337
338
339
340
341
# File 'lib/firecrawl/client.rb', line 337

def delete_monitor(monitor_id)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  @http.delete("/v2/monitor/#{monitor_id}")["success"] == true
end

#get_agent_status(job_id) ⇒ Models::AgentStatusResponse

Gets the status of an agent task.

Parameters:

  • job_id (String)

    the agent job ID

Returns:

Raises:

  • (ArgumentError)


408
409
410
411
412
413
# File 'lib/firecrawl/client.rb', line 408

def get_agent_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/agent/#{job_id}")
  Models::AgentStatusResponse.new(raw)
end

#get_batch_scrape_status(job_id) ⇒ Models::BatchScrapeJob

Gets the status and results of a batch scrape job.

Parameters:

  • job_id (String)

    the batch scrape job ID

Returns:

Raises:

  • (ArgumentError)


239
240
241
242
243
244
# File 'lib/firecrawl/client.rb', line 239

def get_batch_scrape_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/batch/scrape/#{job_id}")
  Models::BatchScrapeJob.new(raw)
end

#get_concurrencyModels::ConcurrencyCheck

Gets current concurrency usage.



452
453
454
455
# File 'lib/firecrawl/client.rb', line 452

def get_concurrency
  raw = @http.get("/v2/concurrency-check")
  Models::ConcurrencyCheck.new(raw)
end

#get_crawl_errors(job_id) ⇒ Hash

Gets errors from a crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


198
199
200
201
202
# File 'lib/firecrawl/client.rb', line 198

def get_crawl_errors(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.get("/v2/crawl/#{job_id}/errors")
end

#get_crawl_status(job_id) ⇒ Models::CrawlJob

Gets the status and results of a crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

Raises:

  • (ArgumentError)


165
166
167
168
169
170
# File 'lib/firecrawl/client.rb', line 165

def get_crawl_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/crawl/#{job_id}")
  Models::CrawlJob.new(raw)
end

#get_credit_usageModels::CreditUsage

Gets current credit usage.

Returns:



460
461
462
463
464
# File 'lib/firecrawl/client.rb', line 460

def get_credit_usage
  raw = @http.get("/v2/team/credit-usage")
  data = raw["data"] || raw
  Models::CreditUsage.new(data)
end

#get_monitor(monitor_id) ⇒ Object

Raises:

  • (ArgumentError)


312
313
314
315
316
317
# File 'lib/firecrawl/client.rb', line 312

def get_monitor(monitor_id)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  raw = @http.get("/v2/monitor/#{monitor_id}")
  Models::Monitor.new(raw["data"] || raw)
end

#get_monitor_check(monitor_id, check_id, limit: nil, skip: nil, status: nil, auto_paginate: true) ⇒ Object

Raises:

  • (ArgumentError)


357
358
359
360
361
362
363
364
365
366
367
# File 'lib/firecrawl/client.rb', line 357

def get_monitor_check(monitor_id, check_id, limit: nil, skip: nil, status: nil, auto_paginate: true)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?
  raise ArgumentError, "Check ID is required" if check_id.nil?

  params = query(limit: limit, skip: skip, status: status)
  raw = @http.get("/v2/monitor/#{monitor_id}/checks/#{check_id}#{params}")
  data = raw["data"] || raw
  data["next"] = raw["next"] if raw["next"]
  check = Models::MonitorCheckDetail.new(data)
  auto_paginate ? paginate_monitor_check(check) : check
end

#interact(job_id, code, language: "node", timeout: nil) ⇒ Hash

Interacts with the scrape-bound browser session for a scrape job.

Parameters:

  • job_id (String)

    the scrape job ID

  • code (String)

    the code to execute

  • language (String) (defaults to: "node")

    “python”, “node”, or “bash” (default: “node”)

  • timeout (Integer, nil) (defaults to: nil)

    execution timeout in seconds (1-300)

Returns:

  • (Hash)

    execution result with stdout, stderr, exit_code

Raises:

  • (ArgumentError)


95
96
97
98
99
100
101
102
103
# File 'lib/firecrawl/client.rb', line 95

def interact(job_id, code, language: "node", timeout: nil)
  raise ArgumentError, "Job ID is required" if job_id.nil?
  raise ArgumentError, "Code is required" if code.nil?

  body = { "code" => code, "language" => language }
  body["timeout"] = timeout if timeout
  body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
  @http.post("/v2/scrape/#{job_id}/interact", body)
end

#list_monitor_checks(monitor_id, limit: nil, offset: nil) ⇒ Object

Raises:

  • (ArgumentError)


350
351
352
353
354
355
# File 'lib/firecrawl/client.rb', line 350

def list_monitor_checks(monitor_id, limit: nil, offset: nil)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  raw = @http.get("/v2/monitor/#{monitor_id}/checks#{query(limit: limit, offset: offset)}")
  (raw["data"] || []).map { |item| Models::MonitorCheck.new(item) }
end

#list_monitors(limit: nil, offset: nil) ⇒ Object



307
308
309
310
# File 'lib/firecrawl/client.rb', line 307

def list_monitors(limit: nil, offset: nil)
  raw = @http.get("/v2/monitor#{query(limit: limit, offset: offset)}")
  (raw["data"] || []).map { |item| Models::Monitor.new(item) }
end

#map(url, options = nil) ⇒ Models::MapData

Discovers URLs on a website.

Parameters:

  • url (String)

    the URL to map

  • options (Models::MapOptions, nil) (defaults to: nil)

    map configuration

Returns:

Raises:

  • (ArgumentError)


277
278
279
280
281
282
283
284
285
# File 'lib/firecrawl/client.rb', line 277

def map(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/map", body)
  data = raw["data"] || raw
  Models::MapData.new(data)
end

#parse(file, options = nil) ⇒ Models::Document

Parses an uploaded file and returns the extracted document.

Parameters:

Returns:

Raises:

  • (ArgumentError)


124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/firecrawl/client.rb', line 124

def parse(file, options = nil)
  raise ArgumentError, "File is required" if file.nil?
  unless file.is_a?(Models::ParseFile)
    raise ArgumentError, "File must be a Firecrawl::Models::ParseFile"
  end

  options_hash = options.nil? ? {} : options.to_h
  raw = @http.post_multipart(
    "/v2/parse",
    fields: { "options" => JSON.generate(options_hash) },
    file_field: "file",
    filename: file.filename,
    content: file.content,
    content_type: file.content_type,
  )
  data = raw["data"] || raw
  Models::Document.new(data)
end

#run_monitor(monitor_id) ⇒ Object

Raises:

  • (ArgumentError)


343
344
345
346
347
348
# File 'lib/firecrawl/client.rb', line 343

def run_monitor(monitor_id)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  raw = @http.post("/v2/monitor/#{monitor_id}/run", {})
  Models::MonitorCheck.new(raw["data"] || raw)
end

#scrape(url, options = nil) ⇒ Models::Document

Scrapes a single URL and returns the document.

Parameters:

  • url (String)

    the URL to scrape

  • options (Models::ScrapeOptions, nil) (defaults to: nil)

    scrape configuration

Returns:

Raises:

  • (ArgumentError)


77
78
79
80
81
82
83
84
85
86
# File 'lib/firecrawl/client.rb', line 77

def scrape(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
  raw = @http.post("/v2/scrape", body)
  data = raw["data"] || raw
  Models::Document.new(data)
end

#search(query, options = nil) ⇒ Models::SearchData

Performs a web search.

Parameters:

  • query (String)

    the search query

  • options (Models::SearchOptions, nil) (defaults to: nil)

    search configuration

Returns:

Raises:

  • (ArgumentError)


378
379
380
381
382
383
384
385
386
387
# File 'lib/firecrawl/client.rb', line 378

def search(query, options = nil)
  raise ArgumentError, "Query is required" if query.nil?

  body = { "query" => query }
  body.merge!(options.to_h) if options
  body["origin"] ||= "ruby-sdk@#{Firecrawl::VERSION}"
  raw = @http.post("/v2/search", body)
  data = raw["data"] || raw
  Models::SearchData.new(data)
end

#start_agent(options) ⇒ Models::AgentResponse

Starts an async agent task.

Parameters:

Returns:

Raises:

  • (ArgumentError)


397
398
399
400
401
402
# File 'lib/firecrawl/client.rb', line 397

def start_agent(options)
  raise ArgumentError, "Agent options are required" if options.nil?

  raw = @http.post("/v2/agent", options.to_h)
  Models::AgentResponse.new(raw)
end

#start_batch_scrape(urls, options = nil) ⇒ Models::BatchScrapeResponse

Starts an async batch scrape job.

Parameters:

  • urls (Array<String>)

    the URLs to scrape

  • options (Models::BatchScrapeOptions, nil) (defaults to: nil)

    batch scrape configuration

Returns:

Raises:

  • (ArgumentError)


213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/firecrawl/client.rb', line 213

def start_batch_scrape(urls, options = nil)
  raise ArgumentError, "URLs list is required" if urls.nil?

  body = { "urls" => urls }
  extra_headers = {}
  if options
    opts_hash = options.to_h

    # idempotencyKey goes as a header, not in body
    if options.idempotency_key && !options.idempotency_key.empty?
      extra_headers["x-idempotency-key"] = options.idempotency_key
    end

    # Flatten nested scrape options to top level (API expects this)
    nested = opts_hash.delete("options")
    body.merge!(opts_hash)
    body.merge!(nested) if nested
  end
  raw = @http.post("/v2/batch/scrape", body, extra_headers: extra_headers)
  Models::BatchScrapeResponse.new(raw)
end

#start_crawl(url, options = nil) ⇒ Models::CrawlResponse

Starts an async crawl job and returns immediately.

Parameters:

  • url (String)

    the URL to start crawling from

  • options (Models::CrawlOptions, nil) (defaults to: nil)

    crawl configuration

Returns:

Raises:

  • (ArgumentError)


152
153
154
155
156
157
158
159
# File 'lib/firecrawl/client.rb', line 152

def start_crawl(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/crawl", body)
  Models::CrawlResponse.new(raw)
end

#stop_interactive_browser(job_id) ⇒ Hash

Stops the interactive browser session for a scrape job.

Parameters:

  • job_id (String)

    the scrape job ID

Returns:

  • (Hash)

    stop response

Raises:

  • (ArgumentError)


109
110
111
112
113
# File 'lib/firecrawl/client.rb', line 109

def stop_interactive_browser(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/scrape/#{job_id}/interact")
end

#update_monitor(monitor_id, **attrs) ⇒ Object

Raises:

  • (ArgumentError)


319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# File 'lib/firecrawl/client.rb', line 319

def update_monitor(monitor_id, **attrs)
  raise ArgumentError, "Monitor ID is required" if monitor_id.nil?

  body = {
    "name" => attrs[:name],
    "status" => attrs[:status],
    "schedule" => attrs[:schedule],
    "webhook" => attrs[:webhook],
    "notification" => attrs[:notification],
    "targets" => attrs[:targets],
    "retentionDays" => attrs[:retention_days],
    "goal" => attrs[:goal],
    "judgeEnabled" => attrs[:judge_enabled],
  }.compact
  raw = @http.patch("/v2/monitor/#{monitor_id}", body)
  Models::Monitor.new(raw["data"] || raw)
end