Class: Firecrawl::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/firecrawl/client.rb

Overview

Client for the Firecrawl v2 API.

Examples:

Quick start

client = Firecrawl::Client.new(api_key: "fc-your-api-key")

# Scrape a single page
doc = client.scrape("https://example.com",
  Firecrawl::Models::ScrapeOptions.new(formats: ["markdown"]))

# Crawl a website
job = client.crawl("https://example.com",
  Firecrawl::Models::CrawlOptions.new(limit: 50))

Constant Summary collapse

DEFAULT_API_URL =
"https://api.firecrawl.dev"
DEFAULT_TIMEOUT =

seconds

300
DEFAULT_MAX_RETRIES =
3
DEFAULT_BACKOFF_FACTOR =
0.5
DEFAULT_POLL_INTERVAL =

seconds

2
DEFAULT_JOB_TIMEOUT =

seconds

300

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ Client

Creates a new Firecrawl client.

Parameters:

  • api_key (String, nil) (defaults to: nil)

    API key (falls back to FIRECRAWL_API_KEY env var)

  • api_url (String) (defaults to: nil)

    API base URL

  • timeout (Integer) (defaults to: DEFAULT_TIMEOUT)

    HTTP request timeout in seconds

  • max_retries (Integer) (defaults to: DEFAULT_MAX_RETRIES)

    maximum automatic retries for transient failures

  • backoff_factor (Float) (defaults to: DEFAULT_BACKOFF_FACTOR)

    exponential backoff factor in seconds



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/firecrawl/client.rb', line 33

def initialize(
  api_key: nil,
  api_url: nil,
  timeout: DEFAULT_TIMEOUT,
  max_retries: DEFAULT_MAX_RETRIES,
  backoff_factor: DEFAULT_BACKOFF_FACTOR
)
  resolved_key = api_key || ENV["FIRECRAWL_API_KEY"]
  if resolved_key.nil? || resolved_key.strip.empty?
    raise FirecrawlError, "API key is required. Provide api_key: or set FIRECRAWL_API_KEY environment variable."
  end

  resolved_url = api_url || ENV["FIRECRAWL_API_URL"] || DEFAULT_API_URL
  unless resolved_url.match?(%r{\Ahttps?://}i)
    raise FirecrawlError, "API URL must be a fully qualified HTTP or HTTPS URL (got: #{resolved_url})."
  end

  @http = HttpClient.new(
    api_key: resolved_key,
    base_url: resolved_url,
    timeout: timeout,
    max_retries: max_retries,
    backoff_factor: backoff_factor
  )
end

Class Method Details

.from_envClient

Creates a client from the FIRECRAWL_API_KEY environment variable.

Returns:



62
63
64
# File 'lib/firecrawl/client.rb', line 62

def self.from_env
  new
end

Instance Method Details

#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::AgentStatusResponse

Runs an agent task and waits for completion (auto-polling).

Parameters:

  • options (Models::AgentOptions)

    agent configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:

Raises:



334
335
336
337
338
339
340
341
342
343
344
345
346
# File 'lib/firecrawl/client.rb', line 334

def agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_agent(options)
  raise FirecrawlError, "Agent start did not return a job ID" if start.id.nil?

  deadline = Time.now + timeout
  while Time.now < deadline
    status = get_agent_status(start.id)
    return status if status.done?

    sleep(poll_interval)
  end
  raise JobTimeoutError.new(start.id, timeout, "Agent")
end

#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::BatchScrapeJob

Batch-scrapes URLs and waits for completion (auto-polling).

Parameters:

  • urls (Array<String>)

    the URLs to scrape

  • options (Models::BatchScrapeOptions, nil) (defaults to: nil)

    batch scrape configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:



249
250
251
252
# File 'lib/firecrawl/client.rb', line 249

def batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_batch_scrape(urls, options)
  poll_batch_scrape(start.id, poll_interval, timeout)
end

#cancel_agent(job_id) ⇒ Hash

Cancels a running agent task.

Parameters:

  • job_id (String)

    the agent job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


352
353
354
355
356
# File 'lib/firecrawl/client.rb', line 352

def cancel_agent(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/agent/#{job_id}")
end

#cancel_batch_scrape(job_id) ⇒ Hash

Cancels a running batch scrape job.

Parameters:

  • job_id (String)

    the batch scrape job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


258
259
260
261
262
# File 'lib/firecrawl/client.rb', line 258

def cancel_batch_scrape(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/batch/scrape/#{job_id}")
end

#cancel_crawl(job_id) ⇒ Hash

Cancels a running crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


184
185
186
187
188
# File 'lib/firecrawl/client.rb', line 184

def cancel_crawl(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/crawl/#{job_id}")
end

#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::CrawlJob

Crawls a website and waits for completion (auto-polling).

Parameters:

  • url (String)

    the URL to crawl

  • options (Models::CrawlOptions, nil) (defaults to: nil)

    crawl configuration

  • poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL)

    seconds between status checks

  • timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT)

    maximum seconds to wait

Returns:



175
176
177
178
# File 'lib/firecrawl/client.rb', line 175

def crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_crawl(url, options)
  poll_crawl(start.id, poll_interval, timeout)
end

#get_agent_status(job_id) ⇒ Models::AgentStatusResponse

Gets the status of an agent task.

Parameters:

  • job_id (String)

    the agent job ID

Returns:

Raises:

  • (ArgumentError)


321
322
323
324
325
326
# File 'lib/firecrawl/client.rb', line 321

def get_agent_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/agent/#{job_id}")
  Models::AgentStatusResponse.new(raw)
end

#get_batch_scrape_status(job_id) ⇒ Models::BatchScrapeJob

Gets the status and results of a batch scrape job.

Parameters:

  • job_id (String)

    the batch scrape job ID

Returns:

Raises:

  • (ArgumentError)


235
236
237
238
239
240
# File 'lib/firecrawl/client.rb', line 235

def get_batch_scrape_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/batch/scrape/#{job_id}")
  Models::BatchScrapeJob.new(raw)
end

#get_concurrencyModels::ConcurrencyCheck

Gets current concurrency usage.



365
366
367
368
# File 'lib/firecrawl/client.rb', line 365

def get_concurrency
  raw = @http.get("/v2/concurrency-check")
  Models::ConcurrencyCheck.new(raw)
end

#get_crawl_errors(job_id) ⇒ Hash

Gets errors from a crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

  • (Hash)

Raises:

  • (ArgumentError)


194
195
196
197
198
# File 'lib/firecrawl/client.rb', line 194

def get_crawl_errors(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.get("/v2/crawl/#{job_id}/errors")
end

#get_crawl_status(job_id) ⇒ Models::CrawlJob

Gets the status and results of a crawl job.

Parameters:

  • job_id (String)

    the crawl job ID

Returns:

Raises:

  • (ArgumentError)


161
162
163
164
165
166
# File 'lib/firecrawl/client.rb', line 161

def get_crawl_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/crawl/#{job_id}")
  Models::CrawlJob.new(raw)
end

#get_credit_usageModels::CreditUsage

Gets current credit usage.

Returns:



373
374
375
376
# File 'lib/firecrawl/client.rb', line 373

def get_credit_usage
  raw = @http.get("/v2/team/credit-usage")
  Models::CreditUsage.new(raw)
end

#interact(job_id, code, language: "node", timeout: nil) ⇒ Hash

Interacts with the scrape-bound browser session for a scrape job.

Parameters:

  • job_id (String)

    the scrape job ID

  • code (String)

    the code to execute

  • language (String) (defaults to: "node")

    “python”, “node”, or “bash” (default: “node”)

  • timeout (Integer, nil) (defaults to: nil)

    execution timeout in seconds (1-300)

Returns:

  • (Hash)

    execution result with stdout, stderr, exit_code

Raises:

  • (ArgumentError)


92
93
94
95
96
97
98
99
# File 'lib/firecrawl/client.rb', line 92

def interact(job_id, code, language: "node", timeout: nil)
  raise ArgumentError, "Job ID is required" if job_id.nil?
  raise ArgumentError, "Code is required" if code.nil?

  body = { "code" => code, "language" => language }
  body["timeout"] = timeout if timeout
  @http.post("/v2/scrape/#{job_id}/interact", body)
end

#map(url, options = nil) ⇒ Models::MapData

Discovers URLs on a website.

Parameters:

  • url (String)

    the URL to map

  • options (Models::MapOptions, nil) (defaults to: nil)

    map configuration

Returns:

Raises:

  • (ArgumentError)


273
274
275
276
277
278
279
280
281
# File 'lib/firecrawl/client.rb', line 273

def map(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/map", body)
  data = raw["data"] || raw
  Models::MapData.new(data)
end

#parse(file, options = nil) ⇒ Models::Document

Parses an uploaded file and returns the extracted document.

Parameters:

Returns:

Raises:

  • (ArgumentError)


120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/firecrawl/client.rb', line 120

def parse(file, options = nil)
  raise ArgumentError, "File is required" if file.nil?
  unless file.is_a?(Models::ParseFile)
    raise ArgumentError, "File must be a Firecrawl::Models::ParseFile"
  end

  options_hash = options.nil? ? {} : options.to_h
  raw = @http.post_multipart(
    "/v2/parse",
    fields: { "options" => JSON.generate(options_hash) },
    file_field: "file",
    filename: file.filename,
    content: file.content,
    content_type: file.content_type,
  )
  data = raw["data"] || raw
  Models::Document.new(data)
end

#scrape(url, options = nil) ⇒ Models::Document

Scrapes a single URL and returns the document.

Parameters:

  • url (String)

    the URL to scrape

  • options (Models::ScrapeOptions, nil) (defaults to: nil)

    scrape configuration

Returns:

Raises:

  • (ArgumentError)


75
76
77
78
79
80
81
82
83
# File 'lib/firecrawl/client.rb', line 75

def scrape(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/scrape", body)
  data = raw["data"] || raw
  Models::Document.new(data)
end

#search(query, options = nil) ⇒ Models::SearchData

Performs a web search.

Parameters:

  • query (String)

    the search query

  • options (Models::SearchOptions, nil) (defaults to: nil)

    search configuration

Returns:

Raises:

  • (ArgumentError)


292
293
294
295
296
297
298
299
300
# File 'lib/firecrawl/client.rb', line 292

def search(query, options = nil)
  raise ArgumentError, "Query is required" if query.nil?

  body = { "query" => query }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/search", body)
  data = raw["data"] || raw
  Models::SearchData.new(data)
end

#start_agent(options) ⇒ Models::AgentResponse

Starts an async agent task.

Parameters:

Returns:

Raises:

  • (ArgumentError)


310
311
312
313
314
315
# File 'lib/firecrawl/client.rb', line 310

def start_agent(options)
  raise ArgumentError, "Agent options are required" if options.nil?

  raw = @http.post("/v2/agent", options.to_h)
  Models::AgentResponse.new(raw)
end

#start_batch_scrape(urls, options = nil) ⇒ Models::BatchScrapeResponse

Starts an async batch scrape job.

Parameters:

  • urls (Array<String>)

    the URLs to scrape

  • options (Models::BatchScrapeOptions, nil) (defaults to: nil)

    batch scrape configuration

Returns:

Raises:

  • (ArgumentError)


209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'lib/firecrawl/client.rb', line 209

def start_batch_scrape(urls, options = nil)
  raise ArgumentError, "URLs list is required" if urls.nil?

  body = { "urls" => urls }
  extra_headers = {}
  if options
    opts_hash = options.to_h

    # idempotencyKey goes as a header, not in body
    if options.idempotency_key && !options.idempotency_key.empty?
      extra_headers["x-idempotency-key"] = options.idempotency_key
    end

    # Flatten nested scrape options to top level (API expects this)
    nested = opts_hash.delete("options")
    body.merge!(opts_hash)
    body.merge!(nested) if nested
  end
  raw = @http.post("/v2/batch/scrape", body, extra_headers: extra_headers)
  Models::BatchScrapeResponse.new(raw)
end

#start_crawl(url, options = nil) ⇒ Models::CrawlResponse

Starts an async crawl job and returns immediately.

Parameters:

  • url (String)

    the URL to start crawling from

  • options (Models::CrawlOptions, nil) (defaults to: nil)

    crawl configuration

Returns:

Raises:

  • (ArgumentError)


148
149
150
151
152
153
154
155
# File 'lib/firecrawl/client.rb', line 148

def start_crawl(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/crawl", body)
  Models::CrawlResponse.new(raw)
end

#stop_interactive_browser(job_id) ⇒ Hash

Stops the interactive browser session for a scrape job.

Parameters:

  • job_id (String)

    the scrape job ID

Returns:

  • (Hash)

    stop response

Raises:

  • (ArgumentError)


105
106
107
108
109
# File 'lib/firecrawl/client.rb', line 105

def stop_interactive_browser(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/scrape/#{job_id}/interact")
end