Class: Legion::Data::Extract::Handlers::Pdf

Inherits:
Base
  • Object
show all
Defined in:
lib/legion/data/extract/handlers/pdf.rb

Class Method Summary collapse

Methods inherited from Base

available?, for_type, inherited, register, supported_types

Methods included from Logging::Helper

#handle_exception

Class Method Details

.extensionsObject



9
# File 'lib/legion/data/extract/handlers/pdf.rb', line 9

def self.extensions = %w[.pdf]

.extract(source) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/legion/data/extract/handlers/pdf.rb', line 12

def self.extract(source)
  require 'pdf-reader'

  reader = ::PDF::Reader.new(source)
  text = reader.pages.map(&:text).join("\n\n")
  { text: text, metadata: { pages: reader.page_count, title: reader.info[:Title] } }
rescue LoadError => e
  handle_exception(e, level: :warn, handled: true, operation: :extract_pdf, gem: gem_name)
  { text: nil, error: :gem_not_installed, gem: gem_name }
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: :extract_pdf)
  { text: nil, error: e.message }
end

.gem_nameObject



10
# File 'lib/legion/data/extract/handlers/pdf.rb', line 10

def self.gem_name = 'pdf-reader'

.typeObject



8
# File 'lib/legion/data/extract/handlers/pdf.rb', line 8

def self.type = :pdf