Class: Firecrawl::Client
- Inherits:
-
Object
- Object
- Firecrawl::Client
- Defined in:
- lib/firecrawl/client.rb
Overview
Client for the Firecrawl v2 API.
Constant Summary collapse
- DEFAULT_API_URL =
"https://api.firecrawl.dev"- DEFAULT_TIMEOUT =
seconds
300- DEFAULT_MAX_RETRIES =
3- DEFAULT_BACKOFF_FACTOR =
0.5- DEFAULT_POLL_INTERVAL =
seconds
2- DEFAULT_JOB_TIMEOUT =
seconds
300
Class Method Summary collapse
-
.from_env ⇒ Client
Creates a client from the FIRECRAWL_API_KEY environment variable.
Instance Method Summary collapse
-
#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::AgentStatusResponse
Runs an agent task and waits for completion (auto-polling).
-
#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::BatchScrapeJob
Batch-scrapes URLs and waits for completion (auto-polling).
-
#cancel_agent(job_id) ⇒ Hash
Cancels a running agent task.
-
#cancel_batch_scrape(job_id) ⇒ Hash
Cancels a running batch scrape job.
-
#cancel_crawl(job_id) ⇒ Hash
Cancels a running crawl job.
-
#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::CrawlJob
Crawls a website and waits for completion (auto-polling).
-
#create_monitor(name:, schedule:, targets:, webhook: nil, notification: nil, retention_days: nil) ⇒ Object
MONITOR ================================================================.
- #delete_monitor(monitor_id) ⇒ Object
-
#get_agent_status(job_id) ⇒ Models::AgentStatusResponse
Gets the status of an agent task.
-
#get_batch_scrape_status(job_id) ⇒ Models::BatchScrapeJob
Gets the status and results of a batch scrape job.
-
#get_concurrency ⇒ Models::ConcurrencyCheck
Gets current concurrency usage.
-
#get_crawl_errors(job_id) ⇒ Hash
Gets errors from a crawl job.
-
#get_crawl_status(job_id) ⇒ Models::CrawlJob
Gets the status and results of a crawl job.
-
#get_credit_usage ⇒ Models::CreditUsage
Gets current credit usage.
- #get_monitor(monitor_id) ⇒ Object
- #get_monitor_check(monitor_id, check_id, limit: nil, skip: nil, status: nil, auto_paginate: true) ⇒ Object
-
#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ Client
constructor
Creates a new Firecrawl client.
-
#interact(job_id, code, language: "node", timeout: nil) ⇒ Hash
Interacts with the scrape-bound browser session for a scrape job.
- #list_monitor_checks(monitor_id, limit: nil, offset: nil) ⇒ Object
- #list_monitors(limit: nil, offset: nil) ⇒ Object
-
#map(url, options = nil) ⇒ Models::MapData
Discovers URLs on a website.
-
#parse(file, options = nil) ⇒ Models::Document
Parses an uploaded file and returns the extracted document.
- #run_monitor(monitor_id) ⇒ Object
-
#scrape(url, options = nil) ⇒ Models::Document
Scrapes a single URL and returns the document.
-
#search(query, options = nil) ⇒ Models::SearchData
Performs a web search.
-
#start_agent(options) ⇒ Models::AgentResponse
Starts an async agent task.
-
#start_batch_scrape(urls, options = nil) ⇒ Models::BatchScrapeResponse
Starts an async batch scrape job.
-
#start_crawl(url, options = nil) ⇒ Models::CrawlResponse
Starts an async crawl job and returns immediately.
-
#stop_interactive_browser(job_id) ⇒ Hash
Stops the interactive browser session for a scrape job.
- #update_monitor(monitor_id, **attrs) ⇒ Object
Constructor Details
#initialize(api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR) ⇒ Client
Creates a new Firecrawl client.
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/firecrawl/client.rb', line 34 def initialize( api_key: nil, api_url: nil, timeout: DEFAULT_TIMEOUT, max_retries: DEFAULT_MAX_RETRIES, backoff_factor: DEFAULT_BACKOFF_FACTOR ) resolved_key = api_key || ENV["FIRECRAWL_API_KEY"] if resolved_key.nil? || resolved_key.strip.empty? raise FirecrawlError, "API key is required. Provide api_key: or set FIRECRAWL_API_KEY environment variable." end resolved_url = api_url || ENV["FIRECRAWL_API_URL"] || DEFAULT_API_URL unless resolved_url.match?(%r{\Ahttps?://}i) raise FirecrawlError, "API URL must be a fully qualified HTTP or HTTPS URL (got: #{resolved_url})." end @http = HttpClient.new( api_key: resolved_key, base_url: resolved_url, timeout: timeout, max_retries: max_retries, backoff_factor: backoff_factor ) end |
Class Method Details
.from_env ⇒ Client
Creates a client from the FIRECRAWL_API_KEY environment variable.
63 64 65 |
# File 'lib/firecrawl/client.rb', line 63 def self.from_env new end |
Instance Method Details
#agent(options, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::AgentStatusResponse
Runs an agent task and waits for completion (auto-polling).
412 413 414 415 416 417 418 419 420 421 422 423 424 |
# File 'lib/firecrawl/client.rb', line 412 def agent(, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) start = start_agent() raise FirecrawlError, "Agent start did not return a job ID" if start.id.nil? deadline = Time.now + timeout while Time.now < deadline status = get_agent_status(start.id) return status if status.done? sleep(poll_interval) end raise JobTimeoutError.new(start.id, timeout, "Agent") end |
#batch_scrape(urls, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::BatchScrapeJob
Batch-scrapes URLs and waits for completion (auto-polling).
250 251 252 253 |
# File 'lib/firecrawl/client.rb', line 250 def batch_scrape(urls, = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) start = start_batch_scrape(urls, ) poll_batch_scrape(start.id, poll_interval, timeout) end |
#cancel_agent(job_id) ⇒ Hash
Cancels a running agent task.
430 431 432 433 434 |
# File 'lib/firecrawl/client.rb', line 430 def cancel_agent(job_id) raise ArgumentError, "Job ID is required" if job_id.nil? @http.delete("/v2/agent/#{job_id}") end |
#cancel_batch_scrape(job_id) ⇒ Hash
Cancels a running batch scrape job.
259 260 261 262 263 |
# File 'lib/firecrawl/client.rb', line 259 def cancel_batch_scrape(job_id) raise ArgumentError, "Job ID is required" if job_id.nil? @http.delete("/v2/batch/scrape/#{job_id}") end |
#cancel_crawl(job_id) ⇒ Hash
Cancels a running crawl job.
185 186 187 188 189 |
# File 'lib/firecrawl/client.rb', line 185 def cancel_crawl(job_id) raise ArgumentError, "Job ID is required" if job_id.nil? @http.delete("/v2/crawl/#{job_id}") end |
#crawl(url, options = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) ⇒ Models::CrawlJob
Crawls a website and waits for completion (auto-polling).
176 177 178 179 |
# File 'lib/firecrawl/client.rb', line 176 def crawl(url, = nil, poll_interval: DEFAULT_POLL_INTERVAL, timeout: DEFAULT_JOB_TIMEOUT) start = start_crawl(url, ) poll_crawl(start.id, poll_interval, timeout) end |
#create_monitor(name:, schedule:, targets:, webhook: nil, notification: nil, retention_days: nil) ⇒ Object
MONITOR
288 289 290 291 292 293 294 295 296 297 298 299 |
# File 'lib/firecrawl/client.rb', line 288 def create_monitor(name:, schedule:, targets:, webhook: nil, notification: nil, retention_days: nil) body = { "name" => name, "schedule" => schedule, "targets" => targets, "webhook" => webhook, "notification" => notification, "retentionDays" => retention_days, }.compact raw = @http.post("/v2/monitor", body) Models::Monitor.new(raw["data"] || raw) end |
#delete_monitor(monitor_id) ⇒ Object
329 330 331 332 333 |
# File 'lib/firecrawl/client.rb', line 329 def delete_monitor(monitor_id) raise ArgumentError, "Monitor ID is required" if monitor_id.nil? @http.delete("/v2/monitor/#{monitor_id}")["success"] == true end |
#get_agent_status(job_id) ⇒ Models::AgentStatusResponse
Gets the status of an agent task.
399 400 401 402 403 404 |
# File 'lib/firecrawl/client.rb', line 399 def get_agent_status(job_id) raise ArgumentError, "Job ID is required" if job_id.nil? raw = @http.get("/v2/agent/#{job_id}") Models::AgentStatusResponse.new(raw) end |
#get_batch_scrape_status(job_id) ⇒ Models::BatchScrapeJob
Gets the status and results of a batch scrape job.
236 237 238 239 240 241 |
# File 'lib/firecrawl/client.rb', line 236 def get_batch_scrape_status(job_id) raise ArgumentError, "Job ID is required" if job_id.nil? raw = @http.get("/v2/batch/scrape/#{job_id}") Models::BatchScrapeJob.new(raw) end |
#get_concurrency ⇒ Models::ConcurrencyCheck
Gets current concurrency usage.
443 444 445 446 |
# File 'lib/firecrawl/client.rb', line 443 def get_concurrency raw = @http.get("/v2/concurrency-check") Models::ConcurrencyCheck.new(raw) end |
#get_crawl_errors(job_id) ⇒ Hash
Gets errors from a crawl job.
195 196 197 198 199 |
# File 'lib/firecrawl/client.rb', line 195 def get_crawl_errors(job_id) raise ArgumentError, "Job ID is required" if job_id.nil? @http.get("/v2/crawl/#{job_id}/errors") end |
#get_crawl_status(job_id) ⇒ Models::CrawlJob
Gets the status and results of a crawl job.
162 163 164 165 166 167 |
# File 'lib/firecrawl/client.rb', line 162 def get_crawl_status(job_id) raise ArgumentError, "Job ID is required" if job_id.nil? raw = @http.get("/v2/crawl/#{job_id}") Models::CrawlJob.new(raw) end |
#get_credit_usage ⇒ Models::CreditUsage
Gets current credit usage.
451 452 453 454 455 |
# File 'lib/firecrawl/client.rb', line 451 def get_credit_usage raw = @http.get("/v2/team/credit-usage") data = raw["data"] || raw Models::CreditUsage.new(data) end |
#get_monitor(monitor_id) ⇒ Object
306 307 308 309 310 311 |
# File 'lib/firecrawl/client.rb', line 306 def get_monitor(monitor_id) raise ArgumentError, "Monitor ID is required" if monitor_id.nil? raw = @http.get("/v2/monitor/#{monitor_id}") Models::Monitor.new(raw["data"] || raw) end |
#get_monitor_check(monitor_id, check_id, limit: nil, skip: nil, status: nil, auto_paginate: true) ⇒ Object
349 350 351 352 353 354 355 356 357 358 359 |
# File 'lib/firecrawl/client.rb', line 349 def get_monitor_check(monitor_id, check_id, limit: nil, skip: nil, status: nil, auto_paginate: true) raise ArgumentError, "Monitor ID is required" if monitor_id.nil? raise ArgumentError, "Check ID is required" if check_id.nil? params = query(limit: limit, skip: skip, status: status) raw = @http.get("/v2/monitor/#{monitor_id}/checks/#{check_id}#{params}") data = raw["data"] || raw data["next"] = raw["next"] if raw["next"] check = Models::MonitorCheckDetail.new(data) auto_paginate ? paginate_monitor_check(check) : check end |
#interact(job_id, code, language: "node", timeout: nil) ⇒ Hash
Interacts with the scrape-bound browser session for a scrape job.
93 94 95 96 97 98 99 100 |
# File 'lib/firecrawl/client.rb', line 93 def interact(job_id, code, language: "node", timeout: nil) raise ArgumentError, "Job ID is required" if job_id.nil? raise ArgumentError, "Code is required" if code.nil? body = { "code" => code, "language" => language } body["timeout"] = timeout if timeout @http.post("/v2/scrape/#{job_id}/interact", body) end |
#list_monitor_checks(monitor_id, limit: nil, offset: nil) ⇒ Object
342 343 344 345 346 347 |
# File 'lib/firecrawl/client.rb', line 342 def list_monitor_checks(monitor_id, limit: nil, offset: nil) raise ArgumentError, "Monitor ID is required" if monitor_id.nil? raw = @http.get("/v2/monitor/#{monitor_id}/checks#{query(limit: limit, offset: offset)}") (raw["data"] || []).map { |item| Models::MonitorCheck.new(item) } end |
#list_monitors(limit: nil, offset: nil) ⇒ Object
301 302 303 304 |
# File 'lib/firecrawl/client.rb', line 301 def list_monitors(limit: nil, offset: nil) raw = @http.get("/v2/monitor#{query(limit: limit, offset: offset)}") (raw["data"] || []).map { |item| Models::Monitor.new(item) } end |
#map(url, options = nil) ⇒ Models::MapData
Discovers URLs on a website.
274 275 276 277 278 279 280 281 282 |
# File 'lib/firecrawl/client.rb', line 274 def map(url, = nil) raise ArgumentError, "URL is required" if url.nil? body = { "url" => url } body.merge!(.to_h) if raw = @http.post("/v2/map", body) data = raw["data"] || raw Models::MapData.new(data) end |
#parse(file, options = nil) ⇒ Models::Document
Parses an uploaded file and returns the extracted document.
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/firecrawl/client.rb', line 121 def parse(file, = nil) raise ArgumentError, "File is required" if file.nil? unless file.is_a?(Models::ParseFile) raise ArgumentError, "File must be a Firecrawl::Models::ParseFile" end = .nil? ? {} : .to_h raw = @http.post_multipart( "/v2/parse", fields: { "options" => JSON.generate() }, file_field: "file", filename: file.filename, content: file.content, content_type: file.content_type, ) data = raw["data"] || raw Models::Document.new(data) end |
#run_monitor(monitor_id) ⇒ Object
335 336 337 338 339 340 |
# File 'lib/firecrawl/client.rb', line 335 def run_monitor(monitor_id) raise ArgumentError, "Monitor ID is required" if monitor_id.nil? raw = @http.post("/v2/monitor/#{monitor_id}/run", {}) Models::MonitorCheck.new(raw["data"] || raw) end |
#scrape(url, options = nil) ⇒ Models::Document
Scrapes a single URL and returns the document.
76 77 78 79 80 81 82 83 84 |
# File 'lib/firecrawl/client.rb', line 76 def scrape(url, = nil) raise ArgumentError, "URL is required" if url.nil? body = { "url" => url } body.merge!(.to_h) if raw = @http.post("/v2/scrape", body) data = raw["data"] || raw Models::Document.new(data) end |
#search(query, options = nil) ⇒ Models::SearchData
Performs a web search.
370 371 372 373 374 375 376 377 378 |
# File 'lib/firecrawl/client.rb', line 370 def search(query, = nil) raise ArgumentError, "Query is required" if query.nil? body = { "query" => query } body.merge!(.to_h) if raw = @http.post("/v2/search", body) data = raw["data"] || raw Models::SearchData.new(data) end |
#start_agent(options) ⇒ Models::AgentResponse
Starts an async agent task.
388 389 390 391 392 393 |
# File 'lib/firecrawl/client.rb', line 388 def start_agent() raise ArgumentError, "Agent options are required" if .nil? raw = @http.post("/v2/agent", .to_h) Models::AgentResponse.new(raw) end |
#start_batch_scrape(urls, options = nil) ⇒ Models::BatchScrapeResponse
Starts an async batch scrape job.
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
# File 'lib/firecrawl/client.rb', line 210 def start_batch_scrape(urls, = nil) raise ArgumentError, "URLs list is required" if urls.nil? body = { "urls" => urls } extra_headers = {} if opts_hash = .to_h # idempotencyKey goes as a header, not in body if .idempotency_key && !.idempotency_key.empty? extra_headers["x-idempotency-key"] = .idempotency_key end # Flatten nested scrape options to top level (API expects this) nested = opts_hash.delete("options") body.merge!(opts_hash) body.merge!(nested) if nested end raw = @http.post("/v2/batch/scrape", body, extra_headers: extra_headers) Models::BatchScrapeResponse.new(raw) end |
#start_crawl(url, options = nil) ⇒ Models::CrawlResponse
Starts an async crawl job and returns immediately.
149 150 151 152 153 154 155 156 |
# File 'lib/firecrawl/client.rb', line 149 def start_crawl(url, = nil) raise ArgumentError, "URL is required" if url.nil? body = { "url" => url } body.merge!(.to_h) if raw = @http.post("/v2/crawl", body) Models::CrawlResponse.new(raw) end |
#stop_interactive_browser(job_id) ⇒ Hash
Stops the interactive browser session for a scrape job.
106 107 108 109 110 |
# File 'lib/firecrawl/client.rb', line 106 def stop_interactive_browser(job_id) raise ArgumentError, "Job ID is required" if job_id.nil? @http.delete("/v2/scrape/#{job_id}/interact") end |
#update_monitor(monitor_id, **attrs) ⇒ Object
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
# File 'lib/firecrawl/client.rb', line 313 def update_monitor(monitor_id, **attrs) raise ArgumentError, "Monitor ID is required" if monitor_id.nil? body = { "name" => attrs[:name], "status" => attrs[:status], "schedule" => attrs[:schedule], "webhook" => attrs[:webhook], "notification" => attrs[:notification], "targets" => attrs[:targets], "retentionDays" => attrs[:retention_days], }.compact raw = @http.patch("/v2/monitor/#{monitor_id}", body) Models::Monitor.new(raw["data"] || raw) end |