Module: Pdf2MarkdownOCR::LlmApi

Defined in:: lib/pdf2markdownOCR/llm_api.rb

Class Method Summary collapse

.ocr_images(images) ⇒ Object
.payload(image_path) ⇒ Object

Class Method Details

.ocr_images(images) ⇒ `Object`

# File 'lib/pdf2markdownOCR/llm_api.rb', line 32

def self.ocr_images(images)
  markdown_pages = []
  
  Pdf2MarkdownOCR.configuration.logger.info "OCR #{images.size} images"
  t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  hydra = Typhoeus::Hydra.new
  images.each_with_index do |image_path, index|

    payload = Pdf2MarkdownOCR::LlmApi.payload(image_path)
    request = Typhoeus::Request.new(
      "#{Pdf2MarkdownOCR.configuration.llm_api_url}/v1/chat/completions",
      method: :post,
      body: payload.to_json,
      headers: { "Content-Type" => "application/json" },
      timeout: 600 # Default OpenAI timeout is 600 seconds
    )

    request.on_complete do |response|
      if response.success?
        parsed_response = JSON.parse(response.body)
        markdown_page = parsed_response.dig("choices", 0, "message", "content") || ""

        if markdown_page && !markdown_page.empty?
          markdown_pages << { index: index, content: markdown_page }
        else
          Pdf2MarkdownOCR.configuration.logger.warn "Warning: No Markdown content generated for #{image_path}"
        end
      else
        Pdf2MarkdownOCR.configuration.logger.error "Error processing #{image_path}: #{response.return_message} (#{response.code})"
      end
    end



    hydra.queue(request)
  end
  hydra.run

  t2 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  Pdf2MarkdownOCR.configuration.logger.info "Total Image processing time: #{(t2 - t1).round(2)} seconds"

  markdown_content = markdown_pages.sort_by { |page| page[:index] }.map { |page| page[:content] }.join("\n\n---\n\n")
  markdown_content
end

.payload(image_path) ⇒ `Object`

# File 'lib/pdf2markdownOCR/llm_api.rb', line 4

def self.payload(image_path)
        
  image_url = Base64.strict_encode64(File.binread(image_path))

  payload = {
    model: Pdf2MarkdownOCR.configuration.llm_model,
    messages: [
      {
        role: "user",
        content: [
          {
            type: "text",
            text: "<image>\nFree OCR."
          },
          {
            type: "image_url",
            image_url: {
              url: "data:image/png;base64,#{image_url}"
            }
          },
        ]
      }
    ],
  }

  payload
end

Module: Pdf2MarkdownOCR::LlmApi

Class Method Summary collapse

Class Method Details

.ocr_images(images) ⇒ Object

.payload(image_path) ⇒ Object

.ocr_images(images) ⇒ `Object`

.payload(image_path) ⇒ `Object`