Class: AcroForge::AllTextProcessor

Inherits:
HexaPDF::Content::Processor
  • Object
show all
Defined in:
lib/acroforge/all_text_processor.rb

Instance Method Summary collapse

Constructor Details

#initializeAllTextProcessor

Returns a new instance of AllTextProcessor.



7
8
9
10
# File 'lib/acroforge/all_text_processor.rb', line 7

def initialize
  super
  @raw_chunks = []
end

Instance Method Details

#show_text(str) ⇒ Object



12
13
14
# File 'lib/acroforge/all_text_processor.rb', line 12

def show_text(str)
  process_text(str)
end

#show_text_with_positioning(arr) ⇒ Object



16
17
18
# File 'lib/acroforge/all_text_processor.rb', line 16

def show_text_with_positioning(arr)
  process_text(arr)
end

#text_chunksObject



20
21
22
23
24
25
26
27
28
# File 'lib/acroforge/all_text_processor.rb', line 20

def text_chunks
  merged = merge_fragments(@raw_chunks)
  # merge_fragments joins adjacent chunks with a literal " ", which can
  # produce strings like "M o d e O f R e p a y m e n t" when the PDF
  # rendered each glyph as a separate text object. Re-run normalization
  # on the merged result so the spaced-letter collapse and other fragment
  # fixes get a second chance to fire on the joined text.
  merged.map { |c| c.merge(text: normalize_extracted_text(c[:text])) }
end