Module: Optimize::Codec

Defined in:
lib/optimize/codec.rb,
lib/optimize/codec/header.rb,
lib/optimize/codec/iseq_list.rb,
lib/optimize/codec/line_info.rb,
lib/optimize/codec/stack_max.rb,
lib/optimize/codec/ci_entries.rb,
lib/optimize/codec/catch_table.rb,
lib/optimize/codec/local_table.rb,
lib/optimize/codec/object_table.rb,
lib/optimize/codec/arg_positions.rb,
lib/optimize/codec/binary_reader.rb,
lib/optimize/codec/binary_writer.rb,
lib/optimize/codec/iseq_envelope.rb,
lib/optimize/codec/instruction_stream.rb

Defined Under Namespace

Modules: ArgPositions, CatchTable, CiEntries, InstructionStream, IseqEnvelope, LineInfo, LocalTable, StackMax Classes: BinaryReader, BinaryWriter, EncoderSizeChange, Header, IseqList, MalformedBinary, ObjectTable, UnmatchedIBFVersion, UnsupportedObjectKind, UnsupportedOpcode

Class Method Summary collapse

Class Method Details

.decode(binary) ⇒ IR::Function

Decodes a YARB binary blob (from RubyVM::InstructionSequence#to_binary) into an IR::Function tree.

The returned IR::Function is a synthetic “root container” whose children are the iseq-list functions. For binaries with a single top-level iseq, root.children is that iseq; its children are nested iseqs.

Parameters:

  • binary (String)

    raw YARB binary (ASCII-8BIT or BINARY encoding)

Returns:



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/optimize/codec.rb', line 87

def self.decode(binary)
  binary = binary.b  # force ASCII-8BIT

  reader = BinaryReader.new(binary)
  header = Header.decode(reader)

  # Decode the object table (uses full binary for random-access seeks).
  object_table = ObjectTable.decode(binary, header)

  # Decode the iseq list (also uses full binary).
  iseq_list = IseqList.decode(binary, header, object_table)

  # Build a synthetic root IR::Function that carries the full decode state.
  # The actual top-level iseq is iseq_list.root; its children are nested iseqs.
  IR::Function.new(
    name:          "<root>",
    path:          "",
    absolute_path: nil,
    first_lineno:  0,
    type:          :root,
    arg_spec:      {},
    local_table:   nil,
    catch_table:   nil,
    line_info:     nil,
    instructions:  nil,
    children:      iseq_list.functions,
    misc: {
      header:       header,
      object_table: object_table,
      iseq_list:    iseq_list,
      raw_binary:   binary,
    }
  )
end

.encode(ir) ⇒ String

Encodes an IR::Function (as returned by decode) back into YARB binary form.

Uses byte-identical (identity) encoding for unmodified IR. When IR has been mutated in ways that change bytecode size, the iseq data region grows or shrinks; in that case the header fields iseq_list_offset, global_object_list_offset, and size are patched in the output buffer to reflect the fresh layout. All other header fields are unchanged.

Parameters:

  • ir (IR::Function)

    root container as returned by Codec.decode

Returns:

  • (String)

    YARB binary (ASCII-8BIT)



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/optimize/codec.rb', line 132

def self.encode(ir)
  header       = ir.misc[:header]
  object_table = ir.misc[:object_table]
  iseq_list    = ir.misc[:iseq_list]

  writer = BinaryWriter.new
  # Write header first (its offset fields may be stale if iseq region changed size).
  header.encode(writer)

  # Encode the iseq list; it returns the fresh absolute offset of the iseq offset
  # array (the value that belongs in header.iseq_list_offset).
  fresh_iseq_list_offset = iseq_list.encode(writer)

  # Compute how much the iseq region grew or shrank. The object offset array stores
  # absolute positions that all shift by the same delta.
  iseq_list_delta = fresh_iseq_list_offset - header.iseq_list_offset

  fresh_object_list_offset_from_encode =
    object_table.encode(writer, iseq_list_delta: iseq_list_delta)

  fresh_total_size = writer.pos
  fresh_object_list_offset =
    fresh_object_list_offset_from_encode ||
      (header.global_object_list_offset + iseq_list_delta)

  # Patch the three header fields that depend on layout.
  # Header layout: size@12(4 bytes), iseq_list_offset@28(4 bytes),
  # global_object_list_offset@32(4 bytes).
  buf = writer.buffer
  buf[12, 4] = [fresh_total_size].pack("V")
  buf[28, 4] = [fresh_iseq_list_offset].pack("V")
  buf[32, 4] = [fresh_object_list_offset].pack("V")

  appended = object_table.appended_count
  if appended.positive?
    fresh_object_list_size = header.global_object_list_size + appended
    buf[24, 4] = [fresh_object_list_size].pack("V")
  end

  buf
end