Class: Firecrawl::Client

Inherits:

Object

Object
Firecrawl::Client

show all

Defined in:: lib/firecrawl/client.rb

Overview

Client for the Firecrawl v2 API.

Examples:

Quick start

client = Firecrawl::Client.new(api_key: "fc-your-api-key")

# Scrape a single page
doc = client.scrape("https://example.com",
  Firecrawl::Models::ScrapeOptions.new(formats: ["markdown"]))

# Crawl a website
job = client.crawl("https://example.com",
  Firecrawl::Models::CrawlOptions.new(limit: 50))

Constant Summary collapse

DEFAULT_API_URL =

"https://api.firecrawl.dev"

DEFAULT_TIMEOUT = seconds

DEFAULT_MAX_RETRIES =

DEFAULT_BACKOFF_FACTOR =

0.5

DEFAULT_POLL_INTERVAL = seconds

DEFAULT_JOB_TIMEOUT = seconds

Class Method Summary collapse

.from_env ⇒ Client

Creates a client from the FIRECRAWL_API_KEY environment variable.

Instance Method Summary collapse

#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::AgentStatusResponse

Runs an agent task and waits for completion (auto-polling).
#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::BatchScrapeJob

Batch-scrapes URLs and waits for completion (auto-polling).
#cancel_agent(job_id) ⇒ Hash

Cancels a running agent task.
#cancel_batch_scrape(job_id) ⇒ Hash

Cancels a running batch scrape job.
#cancel_crawl(job_id) ⇒ Hash

Cancels a running crawl job.
#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::CrawlJob

Crawls a website and waits for completion (auto-polling).
#get_agent_status(job_id) ⇒ Models::AgentStatusResponse

Gets the status of an agent task.
#get_batch_scrape_status(job_id) ⇒ Models::BatchScrapeJob

Gets the status and results of a batch scrape job.
#get_concurrency ⇒ Models::ConcurrencyCheck

Gets current concurrency usage.
#get_crawl_errors(job_id) ⇒ Hash

Gets errors from a crawl job.
#get_crawl_status(job_id) ⇒ Models::CrawlJob

Gets the status and results of a crawl job.
#get_credit_usage ⇒ Models::CreditUsage

Gets current credit usage.
#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ Client constructor

Creates a new Firecrawl client.
#interact(job_id, code, language: "node", timeout: nil) ⇒ Hash

Interacts with the scrape-bound browser session for a scrape job.
#map(url, options = nil) ⇒ Models::MapData

Discovers URLs on a website.
#scrape(url, options = nil) ⇒ Models::Document

Scrapes a single URL and returns the document.
#search(query, options = nil) ⇒ Models::SearchData

Performs a web search.
#start_agent(options) ⇒ Models::AgentResponse

Starts an async agent task.
#start_batch_scrape(urls, options = nil) ⇒ Models::BatchScrapeResponse

Starts an async batch scrape job.
#start_crawl(url, options = nil) ⇒ Models::CrawlResponse

Starts an async crawl job and returns immediately.
#stop_interactive_browser(job_id) ⇒ Hash

Stops the interactive browser session for a scrape job.

Constructor Details

#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ `Client`

Creates a new Firecrawl client.

Parameters:

api_key (String, nil) (defaults to: nil) —

API key (falls back to FIRECRAWL_API_KEY env var)
api_url (String) (defaults to: nil) —

API base URL
timeout (Integer) (defaults to: DEFAULT_TIMEOUT) —

HTTP request timeout in seconds
max_retries (Integer) (defaults to: DEFAULT_MAX_RETRIES) —

maximum automatic retries for transient failures
backoff_factor (Float) (defaults to: DEFAULT_BACKOFF_FACTOR) —

exponential backoff factor in seconds

# File 'lib/firecrawl/client.rb', line 31

def initialize(
  api_key: nil,
  api_url: nil,
  timeout: DEFAULT_TIMEOUT,
  max_retries: DEFAULT_MAX_RETRIES,
  backoff_factor: DEFAULT_BACKOFF_FACTOR
)
  resolved_key = api_key || ENV["FIRECRAWL_API_KEY"]
  if resolved_key.nil? || resolved_key.strip.empty?
    raise FirecrawlError, "API key is required. Provide api_key: or set FIRECRAWL_API_KEY environment variable."
  end

  resolved_url = api_url || ENV["FIRECRAWL_API_URL"] || DEFAULT_API_URL
  unless resolved_url.match?(%r{\Ahttps?://}i)
    raise FirecrawlError, "API URL must be a fully qualified HTTP or HTTPS URL (got: #{resolved_url})."
  end

  @http = HttpClient.new(
    api_key: resolved_key,
    base_url: resolved_url,
    timeout: timeout,
    max_retries: max_retries,
    backoff_factor: backoff_factor
  )
end

Class Method Details

.from_env ⇒ `Client`

Creates a client from the FIRECRAWL_API_KEY environment variable.

Returns:

(Client)



60
61
62

# File 'lib/firecrawl/client.rb', line 60

def self.from_env
  new
end

Instance Method Details

#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ `Models::AgentStatusResponse`

Runs an agent task and waits for completion (auto-polling).

Parameters:

options (Models::AgentOptions) —

agent configuration
poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL) —

seconds between status checks
timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT) —

maximum seconds to wait

Returns:

(Models::AgentStatusResponse)

Raises:

(FirecrawlError)

# File 'lib/firecrawl/client.rb', line 304

def agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_agent(options)
  raise FirecrawlError, "Agent start did not return a job ID" if start.id.nil?

  deadline = Time.now + timeout
  while Time.now < deadline
    status = get_agent_status(start.id)
    return status if status.done?

    sleep(poll_interval)
  end
  raise JobTimeoutError.new(start.id, timeout, "Agent")
end

#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ `Models::BatchScrapeJob`

Batch-scrapes URLs and waits for completion (auto-polling).

Parameters:

urls (Array<String>) —

the URLs to scrape
options (Models::BatchScrapeOptions, nil) (defaults to: nil) —

batch scrape configuration
poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL) —

seconds between status checks
timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT) —

maximum seconds to wait

Returns:

(Models::BatchScrapeJob)

# File 'lib/firecrawl/client.rb', line 219

def batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_batch_scrape(urls, options)
  poll_batch_scrape(start.id, poll_interval, timeout)
end

#cancel_agent(job_id) ⇒ `Hash`

Cancels a running agent task.

Parameters:

job_id (String) —

the agent job ID

Returns:

(Hash)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 322

def cancel_agent(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/agent/#{job_id}")
end

#cancel_batch_scrape(job_id) ⇒ `Hash`

Cancels a running batch scrape job.

Parameters:

job_id (String) —

the batch scrape job ID

Returns:

(Hash)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 228

def cancel_batch_scrape(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/batch/scrape/#{job_id}")
end

#cancel_crawl(job_id) ⇒ `Hash`

Cancels a running crawl job.

Parameters:

job_id (String) —

the crawl job ID

Returns:

(Hash)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 154

def cancel_crawl(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/crawl/#{job_id}")
end

#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ `Models::CrawlJob`

Crawls a website and waits for completion (auto-polling).

Parameters:

url (String) —

the URL to crawl
options (Models::CrawlOptions, nil) (defaults to: nil) —

crawl configuration
poll_interval (Integer) (defaults to: DEFAULT_POLL_INTERVAL) —

seconds between status checks
timeout (Integer) (defaults to: DEFAULT_JOB_TIMEOUT) —

maximum seconds to wait

Returns:

(Models::CrawlJob)

# File 'lib/firecrawl/client.rb', line 145

def crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT)
  start = start_crawl(url, options)
  poll_crawl(start.id, poll_interval, timeout)
end

#get_agent_status(job_id) ⇒ `Models::AgentStatusResponse`

Gets the status of an agent task.

Parameters:

job_id (String) —

the agent job ID

Returns:

(Models::AgentStatusResponse)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 291

def get_agent_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/agent/#{job_id}")
  Models::AgentStatusResponse.new(raw)
end

#get_batch_scrape_status(job_id) ⇒ `Models::BatchScrapeJob`

Gets the status and results of a batch scrape job.

Parameters:

job_id (String) —

the batch scrape job ID

Returns:

(Models::BatchScrapeJob)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 205

def get_batch_scrape_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/batch/scrape/#{job_id}")
  Models::BatchScrapeJob.new(raw)
end

#get_concurrency ⇒ `Models::ConcurrencyCheck`

Gets current concurrency usage.

Returns:

(Models::ConcurrencyCheck)

# File 'lib/firecrawl/client.rb', line 335

def get_concurrency
  raw = @http.get("/v2/concurrency-check")
  Models::ConcurrencyCheck.new(raw)
end

#get_crawl_errors(job_id) ⇒ `Hash`

Gets errors from a crawl job.

Parameters:

job_id (String) —

the crawl job ID

Returns:

(Hash)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 164

def get_crawl_errors(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.get("/v2/crawl/#{job_id}/errors")
end

#get_crawl_status(job_id) ⇒ `Models::CrawlJob`

Gets the status and results of a crawl job.

Parameters:

job_id (String) —

the crawl job ID

Returns:

(Models::CrawlJob)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 131

def get_crawl_status(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  raw = @http.get("/v2/crawl/#{job_id}")
  Models::CrawlJob.new(raw)
end

#get_credit_usage ⇒ `Models::CreditUsage`

Gets current credit usage.

Returns:

(Models::CreditUsage)

# File 'lib/firecrawl/client.rb', line 343

def get_credit_usage
  raw = @http.get("/v2/team/credit-usage")
  Models::CreditUsage.new(raw)
end

#interact(job_id, code, language: "node", timeout: nil) ⇒ `Hash`

Interacts with the scrape-bound browser session for a scrape job.

Parameters:

job_id (String) —

the scrape job ID
code (String) —

the code to execute
language (String) (defaults to: "node") —

“python”, “node”, or “bash” (default: “node”)
timeout (Integer, nil) (defaults to: nil) —

execution timeout in seconds (1-300)

Returns:

(Hash) —

execution result with stdout, stderr, exit_code

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 90

def interact(job_id, code, language: "node", timeout: nil)
  raise ArgumentError, "Job ID is required" if job_id.nil?
  raise ArgumentError, "Code is required" if code.nil?

  body = { "code" => code, "language" => language }
  body["timeout"] = timeout if timeout
  @http.post("/v2/scrape/#{job_id}/interact", body)
end

#map(url, options = nil) ⇒ `Models::MapData`

Discovers URLs on a website.

Parameters:

url (String) —

the URL to map
options (Models::MapOptions, nil) (defaults to: nil) —

map configuration

Returns:

(Models::MapData)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 243

def map(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/map", body)
  data = raw["data"] || raw
  Models::MapData.new(data)
end

#scrape(url, options = nil) ⇒ `Models::Document`

Scrapes a single URL and returns the document.

Parameters:

url (String) —

the URL to scrape
options (Models::ScrapeOptions, nil) (defaults to: nil) —

scrape configuration

Returns:

(Models::Document)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 73

def scrape(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/scrape", body)
  data = raw["data"] || raw
  Models::Document.new(data)
end

#search(query, options = nil) ⇒ `Models::SearchData`

Performs a web search.

Parameters:

query (String) —

the search query
options (Models::SearchOptions, nil) (defaults to: nil) —

search configuration

Returns:

(Models::SearchData)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 262

def search(query, options = nil)
  raise ArgumentError, "Query is required" if query.nil?

  body = { "query" => query }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/search", body)
  data = raw["data"] || raw
  Models::SearchData.new(data)
end

#start_agent(options) ⇒ `Models::AgentResponse`

Starts an async agent task.

Parameters:

options (Models::AgentOptions) —

agent configuration

Returns:

(Models::AgentResponse)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 280

def start_agent(options)
  raise ArgumentError, "Agent options are required" if options.nil?

  raw = @http.post("/v2/agent", options.to_h)
  Models::AgentResponse.new(raw)
end

#start_batch_scrape(urls, options = nil) ⇒ `Models::BatchScrapeResponse`

Starts an async batch scrape job.

Parameters:

urls (Array<String>) —

the URLs to scrape
options (Models::BatchScrapeOptions, nil) (defaults to: nil) —

batch scrape configuration

Returns:

(Models::BatchScrapeResponse)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 179

def start_batch_scrape(urls, options = nil)
  raise ArgumentError, "URLs list is required" if urls.nil?

  body = { "urls" => urls }
  extra_headers = {}
  if options
    opts_hash = options.to_h

    # idempotencyKey goes as a header, not in body
    if options.idempotency_key && !options.idempotency_key.empty?
      extra_headers["x-idempotency-key"] = options.idempotency_key
    end

    # Flatten nested scrape options to top level (API expects this)
    nested = opts_hash.delete("options")
    body.merge!(opts_hash)
    body.merge!(nested) if nested
  end
  raw = @http.post("/v2/batch/scrape", body, extra_headers: extra_headers)
  Models::BatchScrapeResponse.new(raw)
end

#start_crawl(url, options = nil) ⇒ `Models::CrawlResponse`

Starts an async crawl job and returns immediately.

Parameters:

url (String) —

the URL to start crawling from
options (Models::CrawlOptions, nil) (defaults to: nil) —

crawl configuration

Returns:

(Models::CrawlResponse)

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 118

def start_crawl(url, options = nil)
  raise ArgumentError, "URL is required" if url.nil?

  body = { "url" => url }
  body.merge!(options.to_h) if options
  raw = @http.post("/v2/crawl", body)
  Models::CrawlResponse.new(raw)
end

#stop_interactive_browser(job_id) ⇒ `Hash`

Stops the interactive browser session for a scrape job.

Parameters:

job_id (String) —

the scrape job ID

Returns:

(Hash) —

stop response

Raises:

(ArgumentError)

# File 'lib/firecrawl/client.rb', line 103

def stop_interactive_browser(job_id)
  raise ArgumentError, "Job ID is required" if job_id.nil?

  @http.delete("/v2/scrape/#{job_id}/interact")
end

Class: Firecrawl::Client

Overview

Examples:

Quick start

Constant Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ Client

Class Method Details

.from_env ⇒ Client

Instance Method Details

#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::AgentStatusResponse

#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::BatchScrapeJob

#cancel_agent(job_id) ⇒ Hash

#cancel_batch_scrape(job_id) ⇒ Hash

#cancel_crawl(job_id) ⇒ Hash

#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::CrawlJob

#get_agent_status(job_id) ⇒ Models::AgentStatusResponse

#get_batch_scrape_status(job_id) ⇒ Models::BatchScrapeJob

#get_concurrency ⇒ Models::ConcurrencyCheck

#get_crawl_errors(job_id) ⇒ Hash

#get_crawl_status(job_id) ⇒ Models::CrawlJob

#get_credit_usage ⇒ Models::CreditUsage

#interact(job_id, code, language: "node", timeout: nil) ⇒ Hash

#map(url, options = nil) ⇒ Models::MapData

#scrape(url, options = nil) ⇒ Models::Document

#search(query, options = nil) ⇒ Models::SearchData

#start_agent(options) ⇒ Models::AgentResponse

#start_batch_scrape(urls, options = nil) ⇒ Models::BatchScrapeResponse

#start_crawl(url, options = nil) ⇒ Models::CrawlResponse

#stop_interactive_browser(job_id) ⇒ Hash

#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ `Client`

.from_env ⇒ `Client`

#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ `Models::AgentStatusResponse`

#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ `Models::BatchScrapeJob`

#cancel_agent(job_id) ⇒ `Hash`

#cancel_batch_scrape(job_id) ⇒ `Hash`

#cancel_crawl(job_id) ⇒ `Hash`

#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ `Models::CrawlJob`

#get_agent_status(job_id) ⇒ `Models::AgentStatusResponse`

#get_batch_scrape_status(job_id) ⇒ `Models::BatchScrapeJob`

#get_concurrency ⇒ `Models::ConcurrencyCheck`

#get_crawl_errors(job_id) ⇒ `Hash`

#get_crawl_status(job_id) ⇒ `Models::CrawlJob`

#get_credit_usage ⇒ `Models::CreditUsage`

#interact(job_id, code, language: "node", timeout: nil) ⇒ `Hash`

#map(url, options = nil) ⇒ `Models::MapData`

#scrape(url, options = nil) ⇒ `Models::Document`

#search(query, options = nil) ⇒ `Models::SearchData`

#start_agent(options) ⇒ `Models::AgentResponse`

#start_batch_scrape(urls, options = nil) ⇒ `Models::BatchScrapeResponse`

#start_crawl(url, options = nil) ⇒ `Models::CrawlResponse`

#stop_interactive_browser(job_id) ⇒ `Hash`