Module: GPT2FFI

Defined in:
lib/toy/llm/engine/gpt2_fwd_engine.rb

Class Method Summary collapse

Class Method Details

.forward(fwd_cache, token_ids) ⇒ Object

Run forward. token_ids is a length-t_seq padded Array<Int>. Returns the (t_seq, vocab) logits Mat. ggml’s mul_mat result has ne=[vocab, t_seq] which, interpreted row-major with rows=t_seq / cols=vocab, is the layout Mat#flat[t*vocab + v] expects.



351
352
353
354
355
356
357
358
# File 'lib/toy/llm/engine/gpt2_fwd_engine.rb', line 351

def self.forward(fwd_cache, token_ids)
  TinyNN.upload_int_array(fwd_cache.sess, fwd_cache.t_token_ids, token_ids)
  rc = TinyNN.tnn_compute(fwd_cache.sess)
  if rc != 0
    puts "tnn_compute failed: rc=" + rc.to_s
  end
  TinyNN.download_row_major(fwd_cache.sess, fwd_cache.t_logits, fwd_cache.t_seq, fwd_cache.vocab_size)
end

.make_pos_slice(model, t_seq) ⇒ Object

Build the (t_seq, d_model) pos_slice that pairs with token_ids padded to t_seq. Slice rows 0..t_seq-1 of model.pos_embed.



320
321
322
323
324
325
326
327
328
329
# File 'lib/toy/llm/engine/gpt2_fwd_engine.rb', line 320

def self.make_pos_slice(model, t_seq)
  out = Mat.new(t_seq, model.d_model)
  n = t_seq * model.d_model
  i = 0
  while i < n
    out.flat[i] = model.pos_embed.flat[i]
    i = i + 1
  end
  out
end

.pad_ids(ids, t_seq) ⇒ Object

Pad an Array<Int> of token IDs to length t_seq with zeros (the “<unk>” / EOS-style fallback). Returns a new Array.



333
334
335
336
337
338
339
340
341
342
343
344
345
# File 'lib/toy/llm/engine/gpt2_fwd_engine.rb', line 333

def self.pad_ids(ids, t_seq)
  out = Array.new(t_seq, 0)
  n   = ids.length
  if n > t_seq
    n = t_seq
  end
  i = 0
  while i < n
    out[i] = ids[i]
    i = i + 1
  end
  out
end

.upload_from(fwd_cache, model, pos_slice_mat) ⇒ Object

Upload all weights from a populated GPT2LM into a freshly-realized GPT2FullForwardFFICache. Transposed-upload for the per-head Q/K/V and for w_o/w_ff1/w_ff2; row-major bulk for token_embed/pos_slice; direct 1-D upload for biases and LayerNorm params.



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
# File 'lib/toy/llm/engine/gpt2_fwd_engine.rb', line 274

def self.upload_from(fwd_cache, model, pos_slice_mat)
  sess = fwd_cache.sess
  n    = fwd_cache.n_layers
  n_heads = fwd_cache.n_heads
  d_model = fwd_cache.d_model

  TinyNN.upload_row_major(sess, fwd_cache.t_token_embed, model.token_embed)
  TinyNN.upload_row_major(sess, fwd_cache.t_pos_slice,   pos_slice_mat)
  TinyNN.tnn_upload_from_float_array(sess, fwd_cache.t_ln_f_gamma, model.ln_f_gamma, d_model)
  TinyNN.tnn_upload_from_float_array(sess, fwd_cache.t_ln_f_beta,  model.ln_f_beta,  d_model)

  li = 0
  while li < n
    blk_n = model.gpt2_blocks[li]
    blk_f = fwd_cache.gpt2_blocks_ffi[li]

    TinyNN.tnn_upload_from_float_array(sess, blk_f.t_ln1_gamma, blk_n.ln1_gamma, d_model)
    TinyNN.tnn_upload_from_float_array(sess, blk_f.t_ln1_beta,  blk_n.ln1_beta,  d_model)
    TinyNN.tnn_upload_from_float_array(sess, blk_f.t_ln2_gamma, blk_n.ln2_gamma, d_model)
    TinyNN.tnn_upload_from_float_array(sess, blk_f.t_ln2_beta,  blk_n.ln2_beta,  d_model)

    d_head = fwd_cache.d_head
    h = 0
    while h < n_heads
      TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_q[h], blk_n.w_q[h])
      TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_k[h], blk_n.w_k[h])
      TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_v[h], blk_n.w_v[h])
      TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_q[h], blk_n.b_q[h], d_head)
      TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_k[h], blk_n.b_k[h], d_head)
      TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_v[h], blk_n.b_v[h], d_head)
      h = h + 1
    end

    TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_o,   blk_n.w_o)
    TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_ff1, blk_n.w_ff1)
    TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_ff2, blk_n.w_ff2)
    TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_o,   blk_n.b_o,   d_model)
    TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_ff1, blk_n.b_ff1, fwd_cache.d_ff)
    TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_ff2, blk_n.b_ff2, d_model)

    li = li + 1
  end
end