Class: Legion::Data::Extract::Handlers::Pptx
- Inherits:
-
Base
- Object
- Base
- Legion::Data::Extract::Handlers::Pptx
show all
- Defined in:
- lib/legion/data/extract/handlers/pptx.rb
Class Method Summary
collapse
Methods inherited from Base
available?, for_type, inherited, register, supported_types
#handle_exception
Class Method Details
.extensions ⇒ Object
9
|
# File 'lib/legion/data/extract/handlers/pptx.rb', line 9
def self.extensions = %w[.pptx]
|
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
# File 'lib/legion/data/extract/handlers/pptx.rb', line 12
def self.(source)
require 'zip'
require 'rexml/document'
slides = []
::Zip::File.open(source) do |zip|
zip.glob('ppt/slides/slide*.xml').sort_by(&:name).each do |entry|
doc = REXML::Document.new(entry.get_input_stream.read)
texts = []
doc.each_element('//a:t') { |e| texts << e.text }
slides << texts.join(' ') unless texts.empty?
end
end
text = slides.each_with_index.map { |s, i| "Slide #{i + 1}: #{s}" }.join("\n\n")
{ text: text, metadata: { slides: slides.size } }
rescue LoadError => e
handle_exception(e, level: :warn, handled: true, operation: :extract_pptx, gem: gem_name)
{ text: nil, error: :gem_not_installed, gem: 'rubyzip' }
rescue StandardError => e
handle_exception(e, level: :warn, handled: true, operation: :extract_pptx)
{ text: nil, error: e.message }
end
|
.gem_name ⇒ Object
10
|
# File 'lib/legion/data/extract/handlers/pptx.rb', line 10
def self.gem_name = 'rubyzip'
|
.type ⇒ Object
8
|
# File 'lib/legion/data/extract/handlers/pptx.rb', line 8
def self.type = :pptx
|