Module: Optimize::Demo::Claude::Serializer

Defined in:
lib/optimize/demo/claude/serializer.rb

Overview

Serialize an IR::Function’s instruction stream into a JSON-ready Array of [opcode_string, *operands] tuples. VALUE/ID operands are resolved through the object table so the output is self-describing (no bare table indices). CALLDATA is expanded to a Hash.

Intended for round-tripping through an LLM (“gag pass”); the inverse is Serializer.deserialize (Task 3).

Defined Under Namespace

Classes: DeserializeError

Constant Summary collapse

OPCODE_OPERAND_TYPES =

opcode_name (Symbol) => Array<operand_type (Symbol)>

Codec::InstructionStream::OPCODE_TO_INFO
.each_with_object({}) { |(_num, (name, ops)), h| h[name] = ops }
.freeze

Class Method Summary collapse

Class Method Details

.deserialize(json, template:, object_table:, strict: false) ⇒ IR::Function

Inverse of serialize. Reconstructs an IR::Function whose instructions mirror json (an Array of [opcode_string, *operands] tuples). All other Function fields are shallow-copied from template — this is the “gag pass” boundary: Claude only gets to rewrite the instruction stream, never the iseq envelope.

Parameters:

  • json (Array<Array>)
  • template (IR::Function)
  • object_table (Codec::ObjectTable)
  • strict (Boolean) (defaults to: false)

    when true, raise on unknown opcodes

Returns:



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/optimize/demo/claude/serializer.rb', line 73

def deserialize(json, template:, object_table:, strict: false)
  unless json.is_a?(Array)
    raise DeserializeError, "expected a top-level JSON array, got #{json.class}"
  end

  rebuilt = json.each_with_index.map do |tuple, idx|
    unless tuple.is_a?(Array) && !tuple.empty?
      raise DeserializeError, "instruction #{idx}: expected [opcode_string, ...operands], got #{tuple.inspect}"
    end
    opcode_str, *operands = tuple
    unless opcode_str.is_a?(String)
      raise DeserializeError, "instruction #{idx}: opcode must be a String, got #{opcode_str.inspect}"
    end
    opcode = opcode_str.to_sym
    op_types = OPCODE_OPERAND_TYPES[opcode]
    if op_types.nil?
      if strict
        raise DeserializeError, "unknown opcode #{opcode_str.inspect}"
      end
      # Lax: keep operands as-is without type-directed rebuild.
      IR::Instruction.new(opcode: opcode, operands: operands, line: nil)
    else
      rebuilt_operands = operands.each_with_index.map do |operand, i|
        deserialize_operand(op_types[i], operand, object_table)
      end
      IR::Instruction.new(opcode: opcode, operands: rebuilt_operands, line: nil)
    end
  end

  # This is always a full-body replacement. We can't use
  # splice_instructions! here — its branch-target sanity check
  # assumes replacement offsets refer to the PRE-splice array,
  # but Claude's rewrites carry targets in the NEW array's
  # coordinates. We do the line_entries repointing (the only
  # useful side-effect of splice in this context) by hand: every
  # `inst` ref in the template points at an old instruction that
  # no longer exists, so we anchor them all at the first new
  # instruction. The codec needs at least one valid `inst` ref
  # per line entry during encode.
  new_fn = template.dup
  new_fn.instructions = rebuilt
  if template.line_entries
    anchor = rebuilt.first
    new_fn.line_entries =
      if anchor.nil?
        []
      else
        template.line_entries.map do |le|
          le_dup = le.dup
          le_dup.inst = anchor
          le_dup
        end
      end
  end
  new_fn.invalidate_cfg if new_fn.respond_to?(:invalidate_cfg)
  new_fn
end

.deserialize_operand(op_type, operand, object_table) ⇒ Object



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/optimize/demo/claude/serializer.rb', line 131

def deserialize_operand(op_type, operand, object_table)
  case op_type
  when :VALUE, :ID
    intern_value(operand, object_table)
  when :CALLDATA
    unless operand.is_a?(Hash)
      raise DeserializeError, "expected Hash for CALLDATA, got #{operand.inspect}"
    end
    mid_str = operand["mid"]
    unless mid_str.is_a?(String)
      raise DeserializeError, "CALLDATA 'mid' must be a String, got #{mid_str.inspect}"
    end
    mid_idx =
      begin
        object_table.intern(mid_str.to_sym)
      rescue ArgumentError => e
        raise DeserializeError, "cannot intern calldata mid #{mid_str.inspect}: #{e.message}"
      end
    IR::CallData.new(
      mid_idx: mid_idx,
      flag: operand["flag"],
      argc: operand["argc"],
      kwlen: operand["kwlen"] || 0,
      kw_indices: [],
    )
  when :OFFSET, :LINDEX, :NUM, :ISE, :IVC, :ICVARC, :IC, :CDHASH, :ISEQ
    operand
  when :BUILTIN
    unless operand.is_a?(Array) && operand[0] == "__builtin__"
      raise DeserializeError, "expected [\"__builtin__\", idx, name_bytes] for BUILTIN, got #{operand.inspect}"
    end
    _tag, idx, name_bytes = operand
    [idx, name_bytes.bytesize, name_bytes]
  else
    raise DeserializeError, "unknown operand type #{op_type.inspect}"
  end
end

.intern_value(value, object_table) ⇒ Object



169
170
171
172
173
174
175
176
177
178
179
# File 'lib/optimize/demo/claude/serializer.rb', line 169

def intern_value(value, object_table)
  case value
  when Integer, TrueClass, FalseClass, NilClass, String
    object_table.intern(value)
  else
    raise DeserializeError,
      "unsupported VALUE/ID operand #{value.inspect}; only Integer/true/false/nil/String are internable in v1"
  end
rescue ArgumentError => e
  raise DeserializeError, "cannot intern value #{value.inspect}: #{e.message}"
end

.serialize(function, object_table:) ⇒ Array<Array>

Returns each entry is [opcode_string, *operands].

Parameters:

Returns:

  • (Array<Array>)

    each entry is [opcode_string, *operands]



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/optimize/demo/claude/serializer.rb', line 29

def serialize(function, object_table:)
  function.instructions.map do |insn|
    op_types = OPCODE_OPERAND_TYPES.fetch(insn.opcode) do
      raise ArgumentError, "unknown opcode #{insn.opcode.inspect}"
    end
    operands = insn.operands.each_with_index.map do |operand, i|
      serialize_operand(op_types[i], operand, object_table)
    end
    [insn.opcode.to_s, *operands]
  end
end

.serialize_operand(op_type, operand, object_table) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/optimize/demo/claude/serializer.rb', line 41

def serialize_operand(op_type, operand, object_table)
  case op_type
  when :VALUE, :ID
    serialize_value(object_table.resolve(operand))
  when :CALLDATA
    {
      "mid"   => operand.mid_symbol(object_table).to_s,
      "argc"  => operand.argc,
      "flag"  => operand.flag,
      "kwlen" => operand.kwlen,
    }
  when :OFFSET, :LINDEX, :NUM, :ISE, :IVC, :ICVARC, :IC, :CDHASH, :ISEQ
    operand
  when :BUILTIN
    idx, _name_len, name_bytes = operand
    ["__builtin__", idx, name_bytes]
  else
    raise ArgumentError, "unknown operand type #{op_type.inspect}"
  end
end

.serialize_value(value) ⇒ Object



181
182
183
184
185
186
187
188
189
190
# File 'lib/optimize/demo/claude/serializer.rb', line 181

def serialize_value(value)
  case value
  when Integer, TrueClass, FalseClass, NilClass, String
    value
  when Symbol
    value.to_s
  else
    value.inspect
  end
end