Class: Toy::FFN

Inherits:
Object
  • Object
show all
Defined in:
lib/toy.rb

Overview

Toy::FFN

GPT-2 MLP: y = up · W2 + b2 where up = gelu(x · W1 + b1).
`act` selects the activation: :gelu_new (tanh approx, HF default)
or :gelu_exact (erf-based).

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(d_model, d_ff, act_sym) ⇒ FFN

Returns a new instance of FFN.



302
303
304
305
306
307
308
309
310
# File 'lib/toy.rb', line 302

def initialize(d_model, d_ff, act_sym)
  @d_model = d_model
  @d_ff    = d_ff
  @act     = act_sym                  # :gelu_new
  @w1 = Mat.new(d_model, d_ff)
  @w2 = Mat.new(d_ff,    d_model)
  @b1 = Array.new(d_ff,   0.0)
  @b2 = Array.new(d_model, 0.0)
end

Instance Attribute Details

#actObject

Returns the value of attribute act.



300
301
302
# File 'lib/toy.rb', line 300

def act
  @act
end

#b1Object

Returns the value of attribute b1.



300
301
302
# File 'lib/toy.rb', line 300

def b1
  @b1
end

#b2Object

Returns the value of attribute b2.



300
301
302
# File 'lib/toy.rb', line 300

def b2
  @b2
end

#d_ffObject

Returns the value of attribute d_ff.



300
301
302
# File 'lib/toy.rb', line 300

def d_ff
  @d_ff
end

#d_modelObject

Returns the value of attribute d_model.



300
301
302
# File 'lib/toy.rb', line 300

def d_model
  @d_model
end

#w1Object

Returns the value of attribute w1.



300
301
302
# File 'lib/toy.rb', line 300

def w1
  @w1
end

#w2Object

Returns the value of attribute w2.



300
301
302
# File 'lib/toy.rb', line 300

def w2
  @w2
end

Instance Method Details

#algorithmObject



330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# File 'lib/toy.rb', line 330

def algorithm
  c = Toy::Card.new("FFN.forward(x)", "GPT-2-style MLP")
  c.add_input("x",  "R^{T×D}", "")
  c.add_output("y", "R^{T×D}", "")
  c.add_hyper("D",          @d_model.to_s)
  c.add_hyper("D_f",        @d_ff.to_s)
  c.add_hyper("activation", @act.to_s)
  c.add_param("W_1", "R^{D×D_f}", "")
  c.add_param("b_1", "R^{D_f}",   "")
  c.add_param("W_2", "R^{D_f×D}", "")
  c.add_param("b_2", "R^{D}",     "")
  c.step_bind("h", "gelu(x · W_1 + b_1)", "h ∈ R^{T×D_f}")
  c.step_bind("y", "h · W_2 + b_2",       "y ∈ R^{T×D}")
  c.step_return("y")
  c
end

#algorithm_cardObject



347
# File 'lib/toy.rb', line 347

def algorithm_card; algorithm.render_pseudocode; end

#forward(x) ⇒ Object

x: [T, D] → [T, D]



313
314
315
316
317
318
319
320
# File 'lib/toy.rb', line 313

def forward(x)
  pre = x.matmul(@w1)                 # [T, Df]
  Toy.add_bias!(pre, @b1)
  hidden = Toy.gelu_new(pre)          # [T, Df]
  out = hidden.matmul(@w2)            # [T, D]
  Toy.add_bias!(out, @b2)
  out
end

#param_countObject



325
326
327
328
# File 'lib/toy.rb', line 325

def param_count
  @d_model * @d_ff + @d_ff +     # W1 + b1
    @d_ff * @d_model + @d_model  # W2 + b2
end

#summaryObject



322
323
324
# File 'lib/toy.rb', line 322

def summary
  "FFN(d=" + @d_model.to_s + ", hidden=" + @d_ff.to_s + ", act=" + @act.to_s + ")"
end