Class: Toy::LLM::Engine::GPT2SeqEngineMetal
- Inherits:
-
Object
- Object
- Toy::LLM::Engine::GPT2SeqEngineMetal
- Defined in:
- lib/toy/llm/engine/gpt2_seq_engine_metal.rb
Constant Summary collapse
- LN_EPS =
1.0e-5
Instance Attribute Summary collapse
-
#g_b_k ⇒ Object
Returns the value of attribute g_b_k.
-
#g_b_o ⇒ Object
Returns the value of attribute g_b_o.
-
#g_b_q ⇒ Object
Returns the value of attribute g_b_q.
-
#g_b_v ⇒ Object
Returns the value of attribute g_b_v.
-
#g_cb_rc ⇒ Object
Returns the value of attribute g_cb_rc.
-
#g_context ⇒ Object
Returns the value of attribute g_context.
-
#g_d_ff ⇒ Object
Returns the value of attribute g_d_ff.
-
#g_d_head ⇒ Object
Returns the value of attribute g_d_head.
-
#g_d_model ⇒ Object
Returns the value of attribute g_d_model.
-
#g_fc_b ⇒ Object
Returns the value of attribute g_fc_b.
-
#g_fc_w ⇒ Object
Returns the value of attribute g_fc_w.
-
#g_ln1_b ⇒ Object
Returns the value of attribute g_ln1_b.
-
#g_ln1_g ⇒ Object
Returns the value of attribute g_ln1_g.
-
#g_ln2_b ⇒ Object
Returns the value of attribute g_ln2_b.
-
#g_ln2_g ⇒ Object
Returns the value of attribute g_ln2_g.
-
#g_lnf_b ⇒ Object
Returns the value of attribute g_lnf_b.
-
#g_lnf_g ⇒ Object
Returns the value of attribute g_lnf_g.
-
#g_n_heads ⇒ Object
Returns the value of attribute g_n_heads.
-
#g_n_layers ⇒ Object
Returns the value of attribute g_n_layers.
-
#g_opt_m ⇒ Object
Returns the value of attribute g_opt_m.
-
#g_opt_v ⇒ Object
Returns the value of attribute g_opt_v.
-
#g_pr_b ⇒ Object
Returns the value of attribute g_pr_b.
-
#g_pr_w ⇒ Object
Returns the value of attribute g_pr_w.
-
#g_rb_rc ⇒ Object
Returns the value of attribute g_rb_rc.
-
#g_rng ⇒ Object
Returns the value of attribute g_rng.
-
#g_t_hp ⇒ Object
Returns the value of attribute g_t_hp.
-
#g_t_labels ⇒ Object
Returns the value of attribute g_t_labels.
-
#g_t_logits ⇒ Object
Returns the value of attribute g_t_logits.
-
#g_t_loss ⇒ Object
Returns the value of attribute g_t_loss.
-
#g_t_pos ⇒ Object
Returns the value of attribute g_t_pos.
-
#g_t_tok ⇒ Object
Returns the value of attribute g_t_tok.
-
#g_vocab ⇒ Object
Returns the value of attribute g_vocab.
-
#g_w_k ⇒ Object
Returns the value of attribute g_w_k.
-
#g_w_o ⇒ Object
Returns the value of attribute g_w_o.
-
#g_w_q ⇒ Object
Returns the value of attribute g_w_q.
-
#g_w_v ⇒ Object
Returns the value of attribute g_w_v.
-
#g_weights ⇒ Object
Returns the value of attribute g_weights.
-
#g_wpe ⇒ Object
Returns the value of attribute g_wpe.
-
#g_wte ⇒ Object
Returns the value of attribute g_wte.
-
#sess ⇒ Object
Returns the value of attribute sess.
Instance Method Summary collapse
- #alloc_w1(inits, n, init_mat) ⇒ Object
-
#alloc_w2(inits, rows, cols, init_mat) ⇒ Object
alloc-only (buffers don’t exist until tnn_finalize_weights); records (weight, m, v) into the optimizer arrays and the init Mat into ‘inits`.
-
#build_forward! ⇒ Object
GPT-2 forward → @g_t_logits (tied unembed).
-
#build_train_step! ⇒ Object
CE loss + backward + opt_step_adamw per weight.
- #const_mat(rows, cols, value) ⇒ Object
-
#initialize ⇒ GPT2SeqEngineMetal
constructor
A new instance of GPT2SeqEngineMetal.
-
#rand_unit ⇒ Object
seeded LCG → ~[-1,1).
- #random_mat(rows, cols, scale) ⇒ Object
-
#realize!(vocab, d_model, n_heads, d_ff, n_layers, context, seed) ⇒ Object
Build the full random-init training graph.
-
#step!(seq_ids, positions, m_labels, m_hp, is_first) ⇒ Object
One training step.
Constructor Details
#initialize ⇒ GPT2SeqEngineMetal
Returns a new instance of GPT2SeqEngineMetal.
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 52 def initialize @sess = TinyNNMetal.tnn_null_ptr @g_vocab = 0; @g_d_model = 0; @g_n_heads = 0; @g_d_head = 0 @g_d_ff = 0; @g_n_layers = 0; @g_context = 0 @g_t_tok = TinyNNMetal.tnn_null_ptr; @g_t_pos = TinyNNMetal.tnn_null_ptr @g_t_labels = TinyNNMetal.tnn_null_ptr; @g_t_hp = TinyNNMetal.tnn_null_ptr @g_t_loss = TinyNNMetal.tnn_null_ptr; @g_t_logits = TinyNNMetal.tnn_null_ptr @g_wte = TinyNNMetal.tnn_null_ptr; @g_wpe = TinyNNMetal.tnn_null_ptr @g_lnf_g = TinyNNMetal.tnn_null_ptr; @g_lnf_b = TinyNNMetal.tnn_null_ptr @g_ln1_g = [TinyNNMetal.tnn_null_ptr]; @g_ln1_g.pop @g_ln1_b = [TinyNNMetal.tnn_null_ptr]; @g_ln1_b.pop @g_ln2_g = [TinyNNMetal.tnn_null_ptr]; @g_ln2_g.pop @g_ln2_b = [TinyNNMetal.tnn_null_ptr]; @g_ln2_b.pop @g_w_q = [TinyNNMetal.tnn_null_ptr]; @g_w_q.pop @g_b_q = [TinyNNMetal.tnn_null_ptr]; @g_b_q.pop @g_w_k = [TinyNNMetal.tnn_null_ptr]; @g_w_k.pop @g_b_k = [TinyNNMetal.tnn_null_ptr]; @g_b_k.pop @g_w_v = [TinyNNMetal.tnn_null_ptr]; @g_w_v.pop @g_b_v = [TinyNNMetal.tnn_null_ptr]; @g_b_v.pop @g_w_o = [TinyNNMetal.tnn_null_ptr]; @g_w_o.pop @g_b_o = [TinyNNMetal.tnn_null_ptr]; @g_b_o.pop @g_fc_w = [TinyNNMetal.tnn_null_ptr]; @g_fc_w.pop @g_fc_b = [TinyNNMetal.tnn_null_ptr]; @g_fc_b.pop @g_pr_w = [TinyNNMetal.tnn_null_ptr]; @g_pr_w.pop @g_pr_b = [TinyNNMetal.tnn_null_ptr]; @g_pr_b.pop @g_weights = [TinyNNMetal.tnn_null_ptr]; @g_weights.pop @g_opt_m = [TinyNNMetal.tnn_null_ptr]; @g_opt_m.pop @g_opt_v = [TinyNNMetal.tnn_null_ptr]; @g_opt_v.pop @g_rng = 0 end |
Instance Attribute Details
#g_b_k ⇒ Object
Returns the value of attribute g_b_k.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_b_k @g_b_k end |
#g_b_o ⇒ Object
Returns the value of attribute g_b_o.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_b_o @g_b_o end |
#g_b_q ⇒ Object
Returns the value of attribute g_b_q.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_b_q @g_b_q end |
#g_b_v ⇒ Object
Returns the value of attribute g_b_v.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_b_v @g_b_v end |
#g_cb_rc ⇒ Object
Returns the value of attribute g_cb_rc.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_cb_rc @g_cb_rc end |
#g_context ⇒ Object
Returns the value of attribute g_context.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_context @g_context end |
#g_d_ff ⇒ Object
Returns the value of attribute g_d_ff.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_d_ff @g_d_ff end |
#g_d_head ⇒ Object
Returns the value of attribute g_d_head.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_d_head @g_d_head end |
#g_d_model ⇒ Object
Returns the value of attribute g_d_model.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_d_model @g_d_model end |
#g_fc_b ⇒ Object
Returns the value of attribute g_fc_b.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_fc_b @g_fc_b end |
#g_fc_w ⇒ Object
Returns the value of attribute g_fc_w.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_fc_w @g_fc_w end |
#g_ln1_b ⇒ Object
Returns the value of attribute g_ln1_b.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_ln1_b @g_ln1_b end |
#g_ln1_g ⇒ Object
Returns the value of attribute g_ln1_g.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_ln1_g @g_ln1_g end |
#g_ln2_b ⇒ Object
Returns the value of attribute g_ln2_b.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_ln2_b @g_ln2_b end |
#g_ln2_g ⇒ Object
Returns the value of attribute g_ln2_g.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_ln2_g @g_ln2_g end |
#g_lnf_b ⇒ Object
Returns the value of attribute g_lnf_b.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_lnf_b @g_lnf_b end |
#g_lnf_g ⇒ Object
Returns the value of attribute g_lnf_g.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_lnf_g @g_lnf_g end |
#g_n_heads ⇒ Object
Returns the value of attribute g_n_heads.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_n_heads @g_n_heads end |
#g_n_layers ⇒ Object
Returns the value of attribute g_n_layers.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_n_layers @g_n_layers end |
#g_opt_m ⇒ Object
Returns the value of attribute g_opt_m.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_opt_m @g_opt_m end |
#g_opt_v ⇒ Object
Returns the value of attribute g_opt_v.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_opt_v @g_opt_v end |
#g_pr_b ⇒ Object
Returns the value of attribute g_pr_b.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_pr_b @g_pr_b end |
#g_pr_w ⇒ Object
Returns the value of attribute g_pr_w.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_pr_w @g_pr_w end |
#g_rb_rc ⇒ Object
Returns the value of attribute g_rb_rc.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_rb_rc @g_rb_rc end |
#g_rng ⇒ Object
Returns the value of attribute g_rng.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_rng @g_rng end |
#g_t_hp ⇒ Object
Returns the value of attribute g_t_hp.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_t_hp @g_t_hp end |
#g_t_labels ⇒ Object
Returns the value of attribute g_t_labels.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_t_labels @g_t_labels end |
#g_t_logits ⇒ Object
Returns the value of attribute g_t_logits.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_t_logits @g_t_logits end |
#g_t_loss ⇒ Object
Returns the value of attribute g_t_loss.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_t_loss @g_t_loss end |
#g_t_pos ⇒ Object
Returns the value of attribute g_t_pos.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_t_pos @g_t_pos end |
#g_t_tok ⇒ Object
Returns the value of attribute g_t_tok.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_t_tok @g_t_tok end |
#g_vocab ⇒ Object
Returns the value of attribute g_vocab.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_vocab @g_vocab end |
#g_w_k ⇒ Object
Returns the value of attribute g_w_k.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_w_k @g_w_k end |
#g_w_o ⇒ Object
Returns the value of attribute g_w_o.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_w_o @g_w_o end |
#g_w_q ⇒ Object
Returns the value of attribute g_w_q.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_w_q @g_w_q end |
#g_w_v ⇒ Object
Returns the value of attribute g_w_v.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_w_v @g_w_v end |
#g_weights ⇒ Object
Returns the value of attribute g_weights.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_weights @g_weights end |
#g_wpe ⇒ Object
Returns the value of attribute g_wpe.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_wpe @g_wpe end |
#g_wte ⇒ Object
Returns the value of attribute g_wte.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def g_wte @g_wte end |
#sess ⇒ Object
Returns the value of attribute sess.
37 38 39 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 37 def sess @sess end |
Instance Method Details
#alloc_w1(inits, n, init_mat) ⇒ Object
122 123 124 125 126 127 128 129 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 122 def alloc_w1(inits, n, init_mat) w = TinyNNMetal.tnn_input_1d_f32_persistent(@sess, n) @g_weights.push(w) @g_opt_m.push(TinyNNMetal.tnn_input_1d_f32_persistent(@sess, n)) @g_opt_v.push(TinyNNMetal.tnn_input_1d_f32_persistent(@sess, n)) inits.push(init_mat) w end |
#alloc_w2(inits, rows, cols, init_mat) ⇒ Object
alloc-only (buffers don’t exist until tnn_finalize_weights); records (weight, m, v) into the optimizer arrays and the init Mat into ‘inits`.
113 114 115 116 117 118 119 120 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 113 def alloc_w2(inits, rows, cols, init_mat) w = TinyNNMetal.tnn_input_2d_f32_persistent(@sess, rows, cols) @g_weights.push(w) @g_opt_m.push(TinyNNMetal.tnn_input_2d_f32_persistent(@sess, rows, cols)) @g_opt_v.push(TinyNNMetal.tnn_input_2d_f32_persistent(@sess, rows, cols)) inits.push(init_mat) w end |
#build_forward! ⇒ Object
GPT-2 forward → @g_t_logits (tied unembed).
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 199 def build_forward! @g_t_tok = TinyNNMetal.tnn_input_1d_i32(@sess, @g_context) @g_t_pos = TinyNNMetal.tnn_input_1d_i32(@sess, @g_context) x = TinyNNMetal.tnn_add(@sess, TinyNNMetal.tnn_get_rows(@sess, @g_wte, @g_t_tok), TinyNNMetal.tnn_get_rows(@sess, @g_wpe, @g_t_pos)) TinyNNMetal.tnn_set_output(x) att_scale = 1.0 / Math.sqrt(@g_d_head.to_f) li = 0 while li < @g_n_layers # attention sub-block (per-head loop + concat) h1 = TinyNNMetal.tnn_layer_norm(@sess, x, @g_ln1_g[li], @g_ln1_b[li], LN_EPS) head_out = TinyNNMetal.tnn_null_ptr hh = 0 while hh < @g_n_heads hi = li * @g_n_heads + hh q = TinyNNMetal.tnn_add(@sess, TinyNNMetal.tnn_matmul(@sess, @g_w_q[hi], h1), @g_b_q[hi]) k = TinyNNMetal.tnn_add(@sess, TinyNNMetal.tnn_matmul(@sess, @g_w_k[hi], h1), @g_b_k[hi]) v = TinyNNMetal.tnn_matmul(@sess, @g_w_v[hi], h1) # bias added to output scores = TinyNNMetal.tnn_scale(@sess, TinyNNMetal.tnn_matmul(@sess, k, q), att_scale) scores = TinyNNMetal.tnn_diag_mask_inf(@sess, scores, 0) probs = TinyNNMetal.tnn_softmax(@sess, scores) v_t = TinyNNMetal.tnn_cont_2d(@sess, TinyNNMetal.tnn_transpose(@sess, v), @g_context, @g_d_head) head = TinyNNMetal.tnn_add(@sess, TinyNNMetal.tnn_matmul(@sess, v_t, probs), @g_b_v[hi]) if hh == 0 head_out = head else head_out = TinyNNMetal.tnn_concat(@sess, head_out, head, 0) end hh = hh + 1 end ao = TinyNNMetal.tnn_add(@sess, TinyNNMetal.tnn_matmul(@sess, @g_w_o[li], head_out), @g_b_o[li]) x = TinyNNMetal.tnn_add(@sess, x, ao) TinyNNMetal.tnn_set_output(x) # FFN sub-block h2 = TinyNNMetal.tnn_layer_norm(@sess, x, @g_ln2_g[li], @g_ln2_b[li], LN_EPS) pre = TinyNNMetal.tnn_add(@sess, TinyNNMetal.tnn_matmul(@sess, @g_fc_w[li], h2), @g_fc_b[li]) act = TinyNNMetal.tnn_gelu(@sess, pre) mlp = TinyNNMetal.tnn_add(@sess, TinyNNMetal.tnn_matmul(@sess, @g_pr_w[li], act), @g_pr_b[li]) x = TinyNNMetal.tnn_add(@sess, x, mlp) TinyNNMetal.tnn_set_output(x) li = li + 1 end x_final = TinyNNMetal.tnn_layer_norm(@sess, x, @g_lnf_g, @g_lnf_b, LN_EPS) TinyNNMetal.tnn_set_output(x_final) @g_t_logits = TinyNNMetal.tnn_matmul(@sess, @g_wte, x_final) # tied TinyNNMetal.tnn_set_output(@g_t_logits) nil end |
#build_train_step! ⇒ Object
CE loss + backward + opt_step_adamw per weight.
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 254 def build_train_step! @g_t_labels = TinyNNMetal.tnn_input_2d_f32(@sess, @g_context, @g_vocab) @g_t_hp = TinyNNMetal.tnn_input_1d_f32(@sess, 7) @g_t_loss = TinyNNMetal.tnn_cross_entropy_loss(@sess, @g_t_logits, @g_t_labels) TinyNNMetal.tnn_set_output(@g_t_loss) TinyNNMetal.tnn_set_loss(@g_t_loss) TinyNNMetal.tnn_build_forward_only(@sess, @g_t_loss) TinyNNMetal.tnn_build_backward(@sess) gj = 0 while gj < @g_weights.length tw = @g_weights[gj] tg = TinyNNMetal.tnn_tensor_grad(@sess, tw) to = TinyNNMetal.tnn_opt_step_adamw(@sess, tw, tg, @g_opt_m[gj], @g_opt_v[gj], @g_t_hp) TinyNNMetal.tnn_extend_backward_graph(@sess, to) gj = gj + 1 end @g_rb_rc = TinyNNMetal.tnn_realize_backward(@sess) nil end |
#const_mat(rows, cols, value) ⇒ Object
100 101 102 103 104 105 106 107 108 109 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 100 def const_mat(rows, cols, value) m = Mat.new(rows, cols) n = rows * cols i = 0 while i < n m.flat[i] = value i = i + 1 end m end |
#rand_unit ⇒ Object
seeded LCG → ~[-1,1)
84 85 86 87 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 84 def rand_unit @g_rng = ((@g_rng * 1103515245) + 12345) & 0x7fffffff ((@g_rng >> 8).to_f / 8388608.0) - 1.0 end |
#random_mat(rows, cols, scale) ⇒ Object
89 90 91 92 93 94 95 96 97 98 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 89 def random_mat(rows, cols, scale) m = Mat.new(rows, cols) n = rows * cols i = 0 while i < n m.flat[i] = rand_unit * scale i = i + 1 end m end |
#realize!(vocab, d_model, n_heads, d_ff, n_layers, context, seed) ⇒ Object
Build the full random-init training graph. Realize ordering is load-bearing (alloc → set_param → finalize_weights → upload → backward →realize_backward); uploading a persistent weight before finalize aborts (“tensor buffer not set”).
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 135 def realize!(vocab, d_model, n_heads, d_ff, n_layers, context, seed) @g_vocab = vocab; @g_d_model = d_model; @g_n_heads = n_heads @g_d_head = d_model / n_heads; @g_d_ff = d_ff @g_n_layers = n_layers; @g_context = context @g_rng = seed @sess = TinyNNMetal.tnn_session_new(2) # Per-head decomposition makes node count scale O(n_layers × n_heads); # budget like LlamaSeqEngine (the default cap overflows at backward-expand # on bigger shapes). Must precede realize (no compute tensors stored yet). TinyNNMetal.tnn_session_set_graph_capacity(@sess, n_layers * n_heads * 1000 + 65536) inits = [Mat.new(1, 1)]; inits.pop @g_wte = alloc_w2(inits, vocab, d_model, random_mat(vocab, d_model, 0.02)) @g_wpe = alloc_w2(inits, context, d_model, random_mat(context, d_model, 0.02)) li = 0 while li < n_layers @g_ln1_g.push(alloc_w1(inits, d_model, const_mat(1, d_model, 1.0))) @g_ln1_b.push(alloc_w1(inits, d_model, const_mat(1, d_model, 0.0))) hh = 0 while hh < n_heads @g_w_q.push(alloc_w2(inits, @g_d_head, d_model, random_mat(@g_d_head, d_model, 0.02))) @g_b_q.push(alloc_w1(inits, @g_d_head, const_mat(1, @g_d_head, 0.0))) @g_w_k.push(alloc_w2(inits, @g_d_head, d_model, random_mat(@g_d_head, d_model, 0.02))) @g_b_k.push(alloc_w1(inits, @g_d_head, const_mat(1, @g_d_head, 0.0))) @g_w_v.push(alloc_w2(inits, @g_d_head, d_model, random_mat(@g_d_head, d_model, 0.02))) @g_b_v.push(alloc_w1(inits, @g_d_head, const_mat(1, @g_d_head, 0.0))) hh = hh + 1 end @g_w_o.push(alloc_w2(inits, d_model, d_model, random_mat(d_model, d_model, 0.02))) @g_b_o.push(alloc_w1(inits, d_model, const_mat(1, d_model, 0.0))) @g_ln2_g.push(alloc_w1(inits, d_model, const_mat(1, d_model, 1.0))) @g_ln2_b.push(alloc_w1(inits, d_model, const_mat(1, d_model, 0.0))) @g_fc_w.push(alloc_w2(inits, d_ff, d_model, random_mat(d_ff, d_model, 0.02))) @g_fc_b.push(alloc_w1(inits, d_ff, const_mat(1, d_ff, 0.0))) @g_pr_w.push(alloc_w2(inits, d_model, d_ff, random_mat(d_model, d_ff, 0.02))) @g_pr_b.push(alloc_w1(inits, d_model, const_mat(1, d_model, 0.0))) li = li + 1 end @g_lnf_g = alloc_w1(inits, d_model, const_mat(1, d_model, 1.0)) @g_lnf_b = alloc_w1(inits, d_model, const_mat(1, d_model, 0.0)) gi = 0 while gi < @g_weights.length TinyNNMetal.tnn_set_param(@g_weights[gi]) gi = gi + 1 end TinyNNMetal.tnn_finalize_weights(@sess) gk = 0 while gk < @g_weights.length TinyNNMetal.upload_row_major(@sess, @g_weights[gk], inits[gk]) TinyNNMetal.tnn_zero_tensor(@sess, @g_opt_m[gk]) TinyNNMetal.tnn_zero_tensor(@sess, @g_opt_v[gk]) gk = gk + 1 end build_forward! build_train_step! nil end |
#step!(seq_ids, positions, m_labels, m_hp, is_first) ⇒ Object
One training step. is_first selects full reset vs grads-only (momenta persist). Returns the loss Float.
278 279 280 281 282 283 284 285 286 287 288 289 290 291 |
# File 'lib/toy/llm/engine/gpt2_seq_engine_metal.rb', line 278 def step!(seq_ids, positions, m_labels, m_hp, is_first) if is_first TinyNNMetal.tnn_graph_reset(@sess) else TinyNNMetal.tnn_graph_reset_grads_only(@sess) end TinyNNMetal.upload_int_array(@sess, @g_t_tok, seq_ids) TinyNNMetal.upload_int_array(@sess, @g_t_pos, positions) TinyNNMetal.upload_row_major(@sess, @g_t_labels, m_labels) TinyNNMetal.upload_row_major(@sess, @g_t_hp, m_hp) @g_cb_rc = TinyNNMetal.tnn_compute_backward(@sess) TinyNNMetal.tnn_download(@sess, @g_t_loss) TinyNNMetal.tnn_scratch_get(@sess, 0) end |