Class: LlmScraper::ContentFetchers::Jina

Inherits:
Base
  • Object
show all
Defined in:
lib/llm_scraper/content_fetchers/jina.rb

Constant Summary collapse

BASE_URL =
"https://r.jina.ai"

Instance Method Summary collapse

Constructor Details

#initialize(config = LlmScraper.configuration) ⇒ Jina

Returns a new instance of Jina.



8
9
10
11
# File 'lib/llm_scraper/content_fetchers/jina.rb', line 8

def initialize(config = LlmScraper.configuration)
  @config = config
  @conn   = build_connection(base_url: BASE_URL, timeout: 30)
end

Instance Method Details

#fetch(url) ⇒ String

Returns markdown content.

Parameters:

  • url (String)

Returns:

  • (String)

    markdown content



15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/llm_scraper/content_fetchers/jina.rb', line 15

def fetch(url)
  warn "[LlmScraper] No jina_api_key set — unauthenticated (~200 req/day limit)" if @config.jina_api_key.nil?

  response = @conn.get("/#{url}") do |req|
    req.headers["Accept"]        = "text/markdown"
    req.headers["Authorization"] = "Bearer #{@config.jina_api_key}" if @config.jina_api_key
  end

  raise LlmScraper::FetchError, "Jina error (#{response.status}): #{response.body}" unless response.success?

  response.body
rescue Faraday::Error => e
  raise LlmScraper::FetchError, "Jina fetch error: #{e.message}"
end