Class: MsgExtractor::Rtf::Decapsulator
- Inherits:
-
Object
- Object
- MsgExtractor::Rtf::Decapsulator
- Defined in:
- lib/msg_extractor/rtf/decapsulator.rb
Overview
Extracts HTML encapsulated in RTF ([MS-OXRTFEX]). Targets well-formed Outlook-generated RTF; returns nil on anything it cannot handle.
Constant Summary collapse
- SKIP_DESTINATIONS =
%w[ fonttbl colortbl stylesheet info generator pntext listtable listoverridetable themedata colorschememapping datastore latentstyles xmlnstbl rsidtbl pgptbl background pict object header footer footnote ].freeze
Class Method Summary collapse
Instance Method Summary collapse
-
#initialize(rtf) ⇒ Decapsulator
constructor
A new instance of Decapsulator.
- #run ⇒ Object
Constructor Details
#initialize(rtf) ⇒ Decapsulator
Returns a new instance of Decapsulator.
20 21 22 23 24 25 26 27 28 29 |
# File 'lib/msg_extractor/rtf/decapsulator.rb', line 20 def initialize(rtf) @rtf = rtf.b @pos = 0 @out = +"".encode(Encoding::UTF_8) # accumulates decoded UTF-8 text @pending = +"".b # raw codepage bytes not yet decoded @codepage = 1252 # Group-scoped state, saved on "{" and restored on "}". @state = { suppress: false, destination: :normal, uc: 1 } @stack = [] end |
Class Method Details
.html_from(rtf) ⇒ Object
12 13 14 15 16 17 18 |
# File 'lib/msg_extractor/rtf/decapsulator.rb', line 12 def self.html_from(rtf) return nil unless rtf return nil unless rtf.byteslice(0, 512).to_s.include?("\\fromhtml1") new(rtf).run rescue StandardError nil # malformed RTF: html_body falls back to nil rather than raising end |
Instance Method Details
#run ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/msg_extractor/rtf/decapsulator.rb', line 31 def run while @pos < @rtf.bytesize byte = @rtf.getbyte(@pos) case byte when 0x7B # { @pos += 1 @stack.push(@state.dup) handle_group_start when 0x7D # } @pos += 1 @state = @stack.pop || @state when 0x5C # backslash handle_control when 0x0D, 0x0A # bare CR/LF are not document text in RTF @pos += 1 else @pos += 1 emit(byte.chr) end end finalize end |