Class: LlmScraper::ContentFetchers::Markdownify

Inherits:
Base
  • Object
show all
Defined in:
lib/llm_scraper/content_fetchers/markdownify.rb

Constant Summary collapse

BASE_URL =
"https://api.scrapegraphai.com"

Instance Method Summary collapse

Constructor Details

#initialize(config = LlmScraper.configuration) ⇒ Markdownify

Returns a new instance of Markdownify.



8
9
10
11
# File 'lib/llm_scraper/content_fetchers/markdownify.rb', line 8

def initialize(config = LlmScraper.configuration)
  @config = config
  @conn   = build_connection(base_url: BASE_URL, timeout: 60)
end

Instance Method Details

#fetch(url) ⇒ String

Returns markdown content.

Parameters:

  • url (String)

Returns:

  • (String)

    markdown content



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/llm_scraper/content_fetchers/markdownify.rb', line 15

def fetch(url)
  response = @conn.post("/v1/markdownify") do |req|
    req.headers["SGAI-APIKEY"]  = @config.markdownify_api_key
    req.headers["Content-Type"] = "application/json"
    req.body = JSON.generate(website_url: url)
  end

  raise LlmScraper::FetchError, "Markdownify error (#{response.status}): #{response.body}" unless response.success?

  body = JSON.parse(response.body)
  body["result"] ||
    raise(LlmScraper::FetchError, "Markdownify returned no content for #{url}")
rescue Faraday::Error => e
  raise LlmScraper::FetchError, "Markdownify fetch error: #{e.message}"
rescue JSON::ParserError => e
  raise LlmScraper::FetchError, "Markdownify response parse error: #{e.message}"
end