Class: Legion::Data::Extract::Handlers::Docx

Inherits:
Base
  • Object
show all
Defined in:
lib/legion/data/extract/handlers/docx.rb

Class Method Summary collapse

Methods inherited from Base

available?, for_type, inherited, register, supported_types

Methods included from Logging::Helper

#handle_exception

Class Method Details

.extensionsObject



9
# File 'lib/legion/data/extract/handlers/docx.rb', line 9

def self.extensions = %w[.docx]

.extract(source) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/legion/data/extract/handlers/docx.rb', line 12

def self.extract(source)
  require 'docx'

  doc = ::Docx::Document.open(source)
  paragraphs = doc.paragraphs.map(&:text).reject(&:empty?)
  text = paragraphs.join("\n\n")
  { text: text, metadata: { paragraphs: paragraphs.size } }
rescue LoadError => e
  handle_exception(e, level: :warn, handled: true, operation: :extract_docx, gem: gem_name)
  { text: nil, error: :gem_not_installed, gem: gem_name }
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: :extract_docx)
  { text: nil, error: e.message }
end

.gem_nameObject



10
# File 'lib/legion/data/extract/handlers/docx.rb', line 10

def self.gem_name = 'docx'

.typeObject



8
# File 'lib/legion/data/extract/handlers/docx.rb', line 8

def self.type = :docx