Module: GPT2FFIMetal

Defined in:
lib/toy/llm/engine/gpt2_fwd_engine_metal.rb

Class Method Summary collapse

Class Method Details

.forward(fwd_cache, token_ids) ⇒ Object

Run forward. token_ids is a length-t_seq padded Array<Int>. Returns the (t_seq, vocab) logits Mat. ggml’s mul_mat result has ne=[vocab, t_seq] which, interpreted row-major with rows=t_seq / cols=vocab, is the layout Mat#flat[t*vocab + v] expects.



353
354
355
356
357
358
359
360
# File 'lib/toy/llm/engine/gpt2_fwd_engine_metal.rb', line 353

def self.forward(fwd_cache, token_ids)
  TinyNNMetal.upload_int_array(fwd_cache.sess, fwd_cache.t_token_ids, token_ids)
  rc = TinyNNMetal.tnn_compute(fwd_cache.sess)
  if rc != 0
    puts "tnn_compute failed: rc=" + rc.to_s
  end
  TinyNNMetal.download_row_major(fwd_cache.sess, fwd_cache.t_logits, fwd_cache.t_seq, fwd_cache.vocab_size)
end

.make_pos_slice(model, t_seq) ⇒ Object

Build the (t_seq, d_model) pos_slice that pairs with token_ids padded to t_seq. Slice rows 0..t_seq-1 of model.pos_embed.



322
323
324
325
326
327
328
329
330
331
# File 'lib/toy/llm/engine/gpt2_fwd_engine_metal.rb', line 322

def self.make_pos_slice(model, t_seq)
  out = Mat.new(t_seq, model.d_model)
  n = t_seq * model.d_model
  i = 0
  while i < n
    out.flat[i] = model.pos_embed.flat[i]
    i = i + 1
  end
  out
end

.pad_ids(ids, t_seq) ⇒ Object

Pad an Array<Int> of token IDs to length t_seq with zeros (the “<unk>” / EOS-style fallback). Returns a new Array.



335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/toy/llm/engine/gpt2_fwd_engine_metal.rb', line 335

def self.pad_ids(ids, t_seq)
  out = Array.new(t_seq, 0)
  n   = ids.length
  if n > t_seq
    n = t_seq
  end
  i = 0
  while i < n
    out[i] = ids[i]
    i = i + 1
  end
  out
end

.upload_from(fwd_cache, model, pos_slice_mat) ⇒ Object

Upload all weights from a populated GPT2LM into a freshly-realized GPT2FullForwardFFICacheMetal. Transposed-upload for the per-head Q/K/V and for w_o/w_ff1/w_ff2; row-major bulk for token_embed/pos_slice; direct 1-D upload for biases and LayerNorm params.



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# File 'lib/toy/llm/engine/gpt2_fwd_engine_metal.rb', line 276

def self.upload_from(fwd_cache, model, pos_slice_mat)
  sess = fwd_cache.sess
  n    = fwd_cache.n_layers
  n_heads = fwd_cache.n_heads
  d_model = fwd_cache.d_model

  TinyNNMetal.upload_row_major(sess, fwd_cache.t_token_embed, model.token_embed)
  TinyNNMetal.upload_row_major(sess, fwd_cache.t_pos_slice,   pos_slice_mat)
  TinyNNMetal.tnn_upload_from_float_array(sess, fwd_cache.t_ln_f_gamma, model.ln_f_gamma, d_model)
  TinyNNMetal.tnn_upload_from_float_array(sess, fwd_cache.t_ln_f_beta,  model.ln_f_beta,  d_model)

  li = 0
  while li < n
    blk_n = model.gpt2_blocks[li]
    blk_f = fwd_cache.gpt2_blocks_ffi[li]

    TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_ln1_gamma, blk_n.ln1_gamma, d_model)
    TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_ln1_beta,  blk_n.ln1_beta,  d_model)
    TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_ln2_gamma, blk_n.ln2_gamma, d_model)
    TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_ln2_beta,  blk_n.ln2_beta,  d_model)

    d_head = fwd_cache.d_head
    h = 0
    while h < n_heads
      TinyNNMetal.stage_transposed_and_upload(sess, blk_f.t_w_q[h], blk_n.w_q[h])
      TinyNNMetal.stage_transposed_and_upload(sess, blk_f.t_w_k[h], blk_n.w_k[h])
      TinyNNMetal.stage_transposed_and_upload(sess, blk_f.t_w_v[h], blk_n.w_v[h])
      TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_b_q[h], blk_n.b_q[h], d_head)
      TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_b_k[h], blk_n.b_k[h], d_head)
      TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_b_v[h], blk_n.b_v[h], d_head)
      h = h + 1
    end

    TinyNNMetal.stage_transposed_and_upload(sess, blk_f.t_w_o,   blk_n.w_o)
    TinyNNMetal.stage_transposed_and_upload(sess, blk_f.t_w_ff1, blk_n.w_ff1)
    TinyNNMetal.stage_transposed_and_upload(sess, blk_f.t_w_ff2, blk_n.w_ff2)
    TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_b_o,   blk_n.b_o,   d_model)
    TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_b_ff1, blk_n.b_ff1, fwd_cache.d_ff)
    TinyNNMetal.tnn_upload_from_float_array(sess, blk_f.t_b_ff2, blk_n.b_ff2, d_model)

    li = li + 1
  end
end