Class: LlmScraper::ContentFetchers::Firecrawl

Inherits:
Base
  • Object
show all
Defined in:
lib/llm_scraper/content_fetchers/firecrawl.rb

Constant Summary collapse

BASE_URL =
"https://api.firecrawl.dev"

Instance Method Summary collapse

Constructor Details

#initialize(config = LlmScraper.configuration) ⇒ Firecrawl

Returns a new instance of Firecrawl.



8
9
10
11
# File 'lib/llm_scraper/content_fetchers/firecrawl.rb', line 8

def initialize(config = LlmScraper.configuration)
  @config = config
  @conn   = build_connection(base_url: BASE_URL, timeout: 60)
end

Instance Method Details

#fetch(url) ⇒ String

Returns markdown content.

Parameters:

  • url (String)

Returns:

  • (String)

    markdown content



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/llm_scraper/content_fetchers/firecrawl.rb', line 15

def fetch(url)
  response = @conn.post("/v1/scrape") do |req|
    req.headers["Authorization"] = "Bearer #{@config.firecrawl_api_key}"
    req.headers["Content-Type"]  = "application/json"
    req.body = JSON.generate(url: url, formats: ["markdown"])
  end

  raise LlmScraper::FetchError, "Firecrawl error (#{response.status}): #{response.body}" unless response.success?

  body = JSON.parse(response.body)
  body.dig("data", "markdown") ||
    raise(LlmScraper::FetchError, "Firecrawl returned no markdown for #{url}")
rescue Faraday::Error => e
  raise LlmScraper::FetchError, "Firecrawl fetch error: #{e.message}"
rescue JSON::ParserError => e
  raise LlmScraper::FetchError, "Firecrawl response parse error: #{e.message}"
end