Class: Legion::Data::Extract::Handlers::Pptx

Inherits:
Base
  • Object
show all
Defined in:
lib/legion/data/extract/handlers/pptx.rb

Class Method Summary collapse

Methods inherited from Base

available?, for_type, inherited, register, supported_types

Methods included from Logging::Helper

#handle_exception

Class Method Details

.extensionsObject



9
# File 'lib/legion/data/extract/handlers/pptx.rb', line 9

def self.extensions = %w[.pptx]

.extract(source) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/legion/data/extract/handlers/pptx.rb', line 12

def self.extract(source)
  require 'zip'
  require 'rexml/document'

  slides = []
  ::Zip::File.open(source) do |zip|
    zip.glob('ppt/slides/slide*.xml').sort_by(&:name).each do |entry|
      doc = REXML::Document.new(entry.get_input_stream.read)
      texts = []
      doc.each_element('//a:t') { |e| texts << e.text }
      slides << texts.join(' ') unless texts.empty?
    end
  end
  text = slides.each_with_index.map { |s, i| "Slide #{i + 1}: #{s}" }.join("\n\n")
  { text: text, metadata: { slides: slides.size } }
rescue LoadError => e
  handle_exception(e, level: :warn, handled: true, operation: :extract_pptx, gem: gem_name)
  { text: nil, error: :gem_not_installed, gem: 'rubyzip' }
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: :extract_pptx)
  { text: nil, error: e.message }
end

.gem_nameObject



10
# File 'lib/legion/data/extract/handlers/pptx.rb', line 10

def self.gem_name = 'rubyzip'

.typeObject



8
# File 'lib/legion/data/extract/handlers/pptx.rb', line 8

def self.type = :pptx