Class: Firecrawl::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/firecrawl/client.rb

Overview

Client for the Firecrawl v2 API.

Examples:

Quick start

client = Firecrawl::Client.new(api_key: "fc-your-api-key")

# Scrape a single page
doc = client.scrape("https://example.com",
  Firecrawl::Models::ScrapeOptions.new(formats: ["markdown"]))

# Crawl a website
job = client.crawl("https://example.com",
  Firecrawl::Models::CrawlOptions.new(limit: 50))

Constant Summary collapse

DEFAULT_API_URL =
"https://api.firecrawl.dev"
DEFAULT_TIMEOUT =

seconds

300
DEFAULT_MAX_RETRIES =
3
DEFAULT_BACKOFF_FACTOR =
0.5
DEFAULT_POLL_INTERVAL =

seconds

2
DEFAULT_JOB_TIMEOUT =

seconds

300

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ Client

Creates a new Firecrawl client.

Parameters:

  • api_key (String, nil) (defaults to: nil)

    API key (falls back to FIRECRAWL_API_KEY env var)

  • api_url (String) (defaults to: nil)

    API base URL

  • timeout (Integer) (defaults to: DEFAULT_TIMEOUT)

    HTTP request timeout in seconds

  • max_retries (Integer) (defaults to: DEFAULT_MAX_RETRIES)

    maximum automatic retries for transient failures

  • backoff_factor (Float) (defaults to: DEFAULT_BACKOFF_FACTOR)

    exponential backoff factor in seconds



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/firecrawl/client.rb', line 31

def initialize(
  api_key: nil,
  api_url: nil,
  timeout: DEFAULT_TIMEOUT,
  max_retries: DEFAULT_MAX_RETRIES,
  backoff_factor: DEFAULT_BACKOFF_FACTOR
)
  resolved_key = api_key || ENV["FIRECRAWL_API_KEY"]
  if resolved_key.nil? || resolved_key.strip.empty?
    raise FirecrawlError, "API key is required. Provide api_key: or set FIRECRAWL_API_KEY environment variable."
  end

  resolved_url = api_url || ENV["FIRECRAWL_API_URL"] || DEFAULT_API_URL
  unless resolved_url.match?(%r{\Ahttps?://}i)
    raise FirecrawlError, "API URL must be a fully qualified HTTP or HTTPS URL (got: #{resolved_url})."
  end

  @http = HttpClient.new(
    api_key: resolved_key,
    base_url: resolved_url,
    timeout: timeout,
    max_retries: max_retries,
    backoff_factor: backoff_factor
  )
end

Class Method Details

.from_envClient

Creates a client from the FIRECRAWL_API_KEY environment variable.

Returns:



60
61
62
# File 'lib/firecrawl/client.rb', line 60

def self.from_env
  new
end

Instance Method Details

#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::AgentStatusResponse

Runs an agent task and waits for completion (auto-polling).

Parameters:

  • options (Models::AgentOptions)

    agent configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:

Raises:



304
305
306
307
308
309
310
311
312
313
314
315
316
# File 'lib/firecrawl/client.rb', line 304

def agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_agent(options)
  raise FirecrawlError, "Agent start did not return a job ID" if start.id.nil?

  deadline = Time.now + timeout
  while Time.now < deadline
    status = get_agent_status(start.id)
    return status if status.done?

    sleep(poll_interval)
  end
  raise JobTimeoutError.new(start.id, timeout, "Agent")
end

#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::BatchScrapeJob

Batch-scrapes URLs and waits for completion (auto-polling).

Parameters:

  • urls (Array<String>)

    the URLs to scrape

  • options (Models::BatchScrapeOptions, nil) (defaults to: nil)

    batch scrape configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:



219
220
221
222
# File 'lib/firecrawl/client.rb', line 219

def batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_batch_scrape(urls, options)
  poll_batch_scrape(start.id, poll_interval, timeout)
end

#cancel_agent(job_id) ⇒ Hash

Cancels a running agent task.

Parameters:

  • job_id (String)

    the agent job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


322
323
324
325
326
# File 'lib/firecrawl/client.rb', line 322

def cancel_agent(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/agent/#{job_id}")
end

#cancel_batch_scrape(job_id) ⇒ Hash

Cancels a running batch scrape job.

Parameters:

  • job_id (String)

    the batch scrape job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


228
229
230
231
232
# File 'lib/firecrawl/client.rb', line 228

def cancel_batch_scrape(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/batch/scrape/#{job_id}")
end

#cancel_crawl(job_id) ⇒ Hash

Cancels a running crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


154
155
156
157
158
# File 'lib/firecrawl/client.rb', line 154

def cancel_crawl(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/crawl/#{job_id}")
end

#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::CrawlJob

Crawls a website and waits for completion (auto-polling).

Parameters:

  • url (String)

    the URL to crawl

  • options (Models::CrawlOptions, nil) (defaults to: nil)

    crawl configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:



145
146
147
148
# File 'lib/firecrawl/client.rb', line 145

def crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_crawl(url, options)
  poll_crawl(start.id, poll_interval, timeout)
end

#get_agent_status(job_id) ⇒ Models::AgentStatusResponse

Gets the status of an agent task.

Parameters:

  • job_id (String)

    the agent job ID

Returns:

Raises:

  • (ArgumentError)


291
292
293
294
295
296
# File 'lib/firecrawl/client.rb', line 291

def get_agent_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/agent/#{job_id}")
  Models::AgentStatusResponse.new(raw)
end

#get_batch_scrape_status(job_id) ⇒ Models::BatchScrapeJob

Gets the status and results of a batch scrape job.

Parameters:

  • job_id (String)

    the batch scrape job ID

Returns:

Raises:

  • (ArgumentError)


205
206
207
208
209
210
# File 'lib/firecrawl/client.rb', line 205

def get_batch_scrape_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/batch/scrape/#{job_id}")
  Models::BatchScrapeJob.new(raw)
end

#get_concurrencyModels::ConcurrencyCheck

Gets current concurrency usage.



335
336
337
338
# File 'lib/firecrawl/client.rb', line 335

def get_concurrency
  raw = @http.get("/v2/concurrency-check")
  Models::ConcurrencyCheck.new(raw)
end

#get_crawl_errors(job_id) ⇒ Hash

Gets errors from a crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


164
165
166
167
168
# File 'lib/firecrawl/client.rb', line 164

def get_crawl_errors(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.get("/v2/crawl/#{job_id}/errors")
end

#get_crawl_status(job_id) ⇒ Models::CrawlJob

Gets the status and results of a crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

Raises:

  • (ArgumentError)


131
132
133
134
135
136
# File 'lib/firecrawl/client.rb', line 131

def get_crawl_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/crawl/#{job_id}")
  Models::CrawlJob.new(raw)
end

#get_credit_usageModels::CreditUsage

Gets current credit usage.

Returns:



343
344
345
346
# File 'lib/firecrawl/client.rb', line 343

def get_credit_usage
  raw = @http.get("/v2/team/credit-usage")
  Models::CreditUsage.new(raw)
end

#interact(job_id, code, language: "node", timeout: nil) ⇒ Hash

Interacts with the scrape-bound browser session for a scrape job.

Parameters:

  • job_id (String)

    the scrape job ID

  • code (String)

    the code to execute

  • language (String) (defaults to: "node")

    “python”, “node”, or “bash” (default: “node”)

  • timeout (Integer, nil) (defaults to: nil)

    execution timeout in seconds (1-300)

Returns:

  • (Hash)

    execution result with stdout, stderr, exit_code

Raises:

  • (ArgumentError)


90
91
92
93
94
95
96
97
# File 'lib/firecrawl/client.rb', line 90

def interact(job_id, code, language: "node", timeout: nil)
  raise ArgumentError, "Job ID is required" if job_id.nil?
  raise ArgumentError, "Code is required" if code.nil?

  body = { "code" => code, "language" => language }
  body["timeout"] = timeout if timeout
  @http.post("/v2/scrape/#{job_id}/interact", body)
end

#map(url, options = nil) ⇒ Models::MapData

Discovers URLs on a website.

Parameters:

  • url (String)

    the URL to map

  • options (Models::MapOptions, nil) (defaults to: nil)

    map configuration

Returns:

Raises:

  • (ArgumentError)


243
244
245
246
247
248
249
250
251
# File 'lib/firecrawl/client.rb', line 243

def map(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/map", body)
  data = raw["data"] || raw
  Models::MapData.new(data)
end

#scrape(url, options = nil) ⇒ Models::Document

Scrapes a single URL and returns the document.

Parameters:

  • url (String)

    the URL to scrape

  • options (Models::ScrapeOptions, nil) (defaults to: nil)

    scrape configuration

Returns:

Raises:

  • (ArgumentError)


73
74
75
76
77
78
79
80
81
# File 'lib/firecrawl/client.rb', line 73

def scrape(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/scrape", body)
  data = raw["data"] || raw
  Models::Document.new(data)
end

#search(query, options = nil) ⇒ Models::SearchData

Performs a web search.

Parameters:

  • query (String)

    the search query

  • options (Models::SearchOptions, nil) (defaults to: nil)

    search configuration

Returns:

Raises:

  • (ArgumentError)


262
263
264
265
266
267
268
269
270
# File 'lib/firecrawl/client.rb', line 262

def search(query, options = nil)
  raise ArgumentError, "Query is required" if query.nil?

  body = { "query" => query }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/search", body)
  data = raw["data"] || raw
  Models::SearchData.new(data)
end

#start_agent(options) ⇒ Models::AgentResponse

Starts an async agent task.

Parameters:

Returns:

Raises:

  • (ArgumentError)


280
281
282
283
284
285
# File 'lib/firecrawl/client.rb', line 280

def start_agent(options)
  raise ArgumentError, "Agent options are required" if options.nil?

  raw = @http.post("/v2/agent", options.to_h)
  Models::AgentResponse.new(raw)
end

#start_batch_scrape(urls, options = nil) ⇒ Models::BatchScrapeResponse

Starts an async batch scrape job.

Parameters:

  • urls (Array<String>)

    the URLs to scrape

  • options (Models::BatchScrapeOptions, nil) (defaults to: nil)

    batch scrape configuration

Returns:

Raises:

  • (ArgumentError)


179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/firecrawl/client.rb', line 179

def start_batch_scrape(urls, options = nil)
  raise ArgumentError, "URLs list is required" if urls.nil?

  body = { "urls" => urls }
  extra_headers = {}
  if options
    opts_hash = options.to_h

    # idempotencyKey goes as a header, not in body
    if options.idempotency_key && !options.idempotency_key.empty?
      extra_headers["x-idempotency-key"] = options.idempotency_key
    end

    # Flatten nested scrape options to top level (API expects this)
    nested = opts_hash.delete("options")
    body.merge!(opts_hash)
    body.merge!(nested) if nested
  end
  raw = @http.post("/v2/batch/scrape", body, extra_headers: extra_headers)
  Models::BatchScrapeResponse.new(raw)
end

#start_crawl(url, options = nil) ⇒ Models::CrawlResponse

Starts an async crawl job and returns immediately.

Parameters:

  • url (String)

    the URL to start crawling from

  • options (Models::CrawlOptions, nil) (defaults to: nil)

    crawl configuration

Returns:

Raises:

  • (ArgumentError)


118
119
120
121
122
123
124
125
# File 'lib/firecrawl/client.rb', line 118

def start_crawl(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/crawl", body)
  Models::CrawlResponse.new(raw)
end

#stop_interactive_browser(job_id) ⇒ Hash

Stops the interactive browser session for a scrape job.

Parameters:

  • job_id (String)

    the scrape job ID

Returns:

  • (Hash)

    stop response

Raises:

  • (ArgumentError)


103
104
105
106
107
# File 'lib/firecrawl/client.rb', line 103

def stop_interactive_browser(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/scrape/#{job_id}/interact")
end