Class: LlmScraper::ContentFetchers::Jina
- Defined in:
- lib/llm_scraper/content_fetchers/jina.rb
Constant Summary collapse
- BASE_URL =
"https://r.jina.ai"
Instance Method Summary collapse
-
#fetch(url) ⇒ String
Markdown content.
-
#initialize(config = LlmScraper.configuration) ⇒ Jina
constructor
A new instance of Jina.
Constructor Details
#initialize(config = LlmScraper.configuration) ⇒ Jina
Returns a new instance of Jina.
8 9 10 11 |
# File 'lib/llm_scraper/content_fetchers/jina.rb', line 8 def initialize(config = LlmScraper.configuration) @config = config @conn = build_connection(base_url: BASE_URL, timeout: 30) end |
Instance Method Details
#fetch(url) ⇒ String
Returns markdown content.
15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/llm_scraper/content_fetchers/jina.rb', line 15 def fetch(url) warn "[LlmScraper] No jina_api_key set — unauthenticated (~200 req/day limit)" if @config.jina_api_key.nil? response = @conn.get("/#{url}") do |req| req.headers["Accept"] = "text/markdown" req.headers["Authorization"] = "Bearer #{@config.jina_api_key}" if @config.jina_api_key end raise LlmScraper::FetchError, "Jina error (#{response.status}): #{response.body}" unless response.success? response.body rescue Faraday::Error => e raise LlmScraper::FetchError, "Jina fetch error: #{e.}" end |