Module: Pdf2MarkdownOCR
- Defined in:
- lib/pdf2markdownOCR.rb,
lib/pdf2markdownOCR/cli.rb,
lib/pdf2markdownOCR/llm_api.rb,
lib/pdf2markdownOCR/version.rb,
lib/pdf2markdownOCR/pdf2image.rb,
lib/pdf2markdownOCR/configuration.rb
Defined Under Namespace
Modules: LlmApi, Pdf2Image Classes: CLI, Configuration, FileNotFoundError
Constant Summary collapse
- VERSION =
"0.0.3"
Class Attribute Summary collapse
Class Method Summary collapse
- .configure {|configuration| ... } ⇒ Object
- .convert_pdf(pdf_path:, output_file: nil, pages: nil) ⇒ Object
- .gem_version ⇒ Object
Class Attribute Details
.configuration ⇒ Object
20 21 22 |
# File 'lib/pdf2markdownOCR.rb', line 20 def configuration @configuration ||= Configuration.new end |
Class Method Details
.configure {|configuration| ... } ⇒ Object
24 25 26 |
# File 'lib/pdf2markdownOCR.rb', line 24 def configure yield(configuration) end |
.convert_pdf(pdf_path:, output_file: nil, pages: nil) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/pdf2markdownOCR.rb', line 30 def self.convert_pdf(pdf_path:, output_file: nil, pages: nil) Pdf2MarkdownOCR.configuration.logger.info "Parsing PDF file: #{pdf_path}" unless File.exist?(pdf_path) Pdf2MarkdownOCR.configuration.logger.error "File not found: #{pdf_path}" raise FileNotFoundError, "File not found: #{pdf_path}" end markdown_content = "" begin tempdir = Dir.mktmpdir("pdf2markdownocr") images = [] if Pdf2MarkdownOCR.configuration.mode == :multi_thread images = Pdf2MarkdownOCR::Pdf2Image.multi_thread_conversion(pdf_path: pdf_path, pages: pages, output_prefix: tempdir) else images = Pdf2MarkdownOCR::Pdf2Image.single_thread_conversion(pdf_path: pdf_path, pages: pages, output_prefix: tempdir) end markdown_content = Pdf2MarkdownOCR::LlmApi.ocr_images(images) ensure # Clean up temporary directory after processing FileUtils.remove_entry(tempdir) if tempdir && Dir.exist?(tempdir) end # If output file is configured, write the markdown content to the file, otherwise return it as a string if output_file && !output_file.empty? File.write(output_file, markdown_content) return nil end markdown_content end |
.gem_version ⇒ Object
6 7 8 |
# File 'lib/pdf2markdownOCR/version.rb', line 6 def self.gem_version Gem::Version.new(VERSION) end |