Module: MsgExtractor::Rtf::CompressedRtf

Defined in:
lib/msg_extractor/rtf/compressed_rtf.rb

Overview

LZFu decompression for PR_RTF_COMPRESSED per [MS-OXRTFCP].

Constant Summary collapse

MAGIC_COMPRESSED =

“LZFu”

0x75465A4C
MAGIC_UNCOMPRESSED =

“MELA”

0x414C454D
INITIAL_DICTIONARY =

The fixed 207-byte initial dictionary defined by the spec.

("{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
"{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
"\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
"{\\colortbl\\red0\\green0\\blue0\r\n\\par " \
"\\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx").b.freeze
CRC_TABLE =

CRC32 table (polynomial 0xEDB88320), init 0, no final XOR — per spec.

(0...256).map { |i|
  crc = i
  8.times { crc = crc.odd? ? (0xEDB88320 ^ (crc >> 1)) : (crc >> 1) }
  crc
}.freeze

Class Method Summary collapse

Class Method Details

.crc32(bytes) ⇒ Object



25
26
27
# File 'lib/msg_extractor/rtf/compressed_rtf.rb', line 25

def crc32(bytes)
  bytes.each_byte.reduce(0) { |crc, b| CRC_TABLE[(crc ^ b) & 0xFF] ^ (crc >> 8) }
end

.decompress(data) ⇒ Object

Raises:



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/msg_extractor/rtf/compressed_rtf.rb', line 29

def decompress(data)
  raise CorruptFileError, "compressed RTF too short" if data.nil? || data.bytesize < 16
  comp_size, raw_size, magic, crc = data.unpack("V4")
  case magic
  when MAGIC_UNCOMPRESSED
    raise CorruptFileError, "MELA RTF truncated" if data.bytesize - 16 < raw_size
    data.byteslice(16, raw_size)
  when MAGIC_COMPRESSED
    payload = data.byteslice(16, comp_size - 12) if comp_size >= 12
    raise CorruptFileError, "compressed RTF truncated header" if payload.nil?
    unless crc32(payload) == crc
      raise CorruptFileError, "compressed RTF CRC mismatch"
    end
    lzfu(payload)
  else
    raise CorruptFileError, format("bad compressed RTF magic 0x%08x", magic)
  end
end

.lzfu(payload) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/msg_extractor/rtf/compressed_rtf.rb', line 48

def lzfu(payload)
  dictionary = INITIAL_DICTIONARY.dup
  dictionary << ("\0".b * (4096 - dictionary.bytesize))
  write_pos = INITIAL_DICTIONARY.bytesize # 207
  out = +"".b
  pos = 0
  while pos < payload.bytesize
    control = payload.getbyte(pos)
    pos += 1
    8.times do |bit|
      if ((control >> bit) & 1) == 1
        high = payload.getbyte(pos)
        low = payload.getbyte(pos + 1)
        return out if high.nil? || low.nil?
        pos += 2
        reference = (high << 8) | low
        offset = reference >> 4
        length = (reference & 0x0F) + 2
        return out if offset == write_pos # end-of-stream marker
        length.times do
          byte = dictionary.getbyte(offset)
          offset = (offset + 1) % 4096
          out << byte
          dictionary.setbyte(write_pos, byte)
          write_pos = (write_pos + 1) % 4096
        end
      else
        byte = payload.getbyte(pos)
        return out if byte.nil?
        pos += 1
        out << byte
        dictionary.setbyte(write_pos, byte)
        write_pos = (write_pos + 1) % 4096
      end
    end
  end
  out
end