Class: Toy::FFN
- Inherits:
-
Object
- Object
- Toy::FFN
- Defined in:
- lib/toy.rb
Overview
Toy::FFN
GPT-2 MLP: y = up · W2 + b2 where up = gelu(x · W1 + b1).
`act` selects the activation: :gelu_new (tanh approx, HF default)
or :gelu_exact (erf-based).
Instance Attribute Summary collapse
-
#act ⇒ Object
Returns the value of attribute act.
-
#b1 ⇒ Object
Returns the value of attribute b1.
-
#b2 ⇒ Object
Returns the value of attribute b2.
-
#d_ff ⇒ Object
Returns the value of attribute d_ff.
-
#d_model ⇒ Object
Returns the value of attribute d_model.
-
#w1 ⇒ Object
Returns the value of attribute w1.
-
#w2 ⇒ Object
Returns the value of attribute w2.
Instance Method Summary collapse
- #algorithm ⇒ Object
- #algorithm_card ⇒ Object
-
#forward(x) ⇒ Object
x: [T, D] → [T, D].
-
#initialize(d_model, d_ff, act_sym) ⇒ FFN
constructor
A new instance of FFN.
- #param_count ⇒ Object
- #summary ⇒ Object
Constructor Details
#initialize(d_model, d_ff, act_sym) ⇒ FFN
Returns a new instance of FFN.
302 303 304 305 306 307 308 309 310 |
# File 'lib/toy.rb', line 302 def initialize(d_model, d_ff, act_sym) @d_model = d_model @d_ff = d_ff @act = act_sym # :gelu_new @w1 = Mat.new(d_model, d_ff) @w2 = Mat.new(d_ff, d_model) @b1 = Array.new(d_ff, 0.0) @b2 = Array.new(d_model, 0.0) end |
Instance Attribute Details
#act ⇒ Object
Returns the value of attribute act.
300 301 302 |
# File 'lib/toy.rb', line 300 def act @act end |
#b1 ⇒ Object
Returns the value of attribute b1.
300 301 302 |
# File 'lib/toy.rb', line 300 def b1 @b1 end |
#b2 ⇒ Object
Returns the value of attribute b2.
300 301 302 |
# File 'lib/toy.rb', line 300 def b2 @b2 end |
#d_ff ⇒ Object
Returns the value of attribute d_ff.
300 301 302 |
# File 'lib/toy.rb', line 300 def d_ff @d_ff end |
#d_model ⇒ Object
Returns the value of attribute d_model.
300 301 302 |
# File 'lib/toy.rb', line 300 def d_model @d_model end |
#w1 ⇒ Object
Returns the value of attribute w1.
300 301 302 |
# File 'lib/toy.rb', line 300 def w1 @w1 end |
#w2 ⇒ Object
Returns the value of attribute w2.
300 301 302 |
# File 'lib/toy.rb', line 300 def w2 @w2 end |
Instance Method Details
#algorithm ⇒ Object
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 |
# File 'lib/toy.rb', line 330 def algorithm c = Toy::Card.new("FFN.forward(x)", "GPT-2-style MLP") c.add_input("x", "R^{T×D}", "") c.add_output("y", "R^{T×D}", "") c.add_hyper("D", @d_model.to_s) c.add_hyper("D_f", @d_ff.to_s) c.add_hyper("activation", @act.to_s) c.add_param("W_1", "R^{D×D_f}", "") c.add_param("b_1", "R^{D_f}", "") c.add_param("W_2", "R^{D_f×D}", "") c.add_param("b_2", "R^{D}", "") c.step_bind("h", "gelu(x · W_1 + b_1)", "h ∈ R^{T×D_f}") c.step_bind("y", "h · W_2 + b_2", "y ∈ R^{T×D}") c.step_return("y") c end |
#algorithm_card ⇒ Object
347 |
# File 'lib/toy.rb', line 347 def algorithm_card; algorithm.render_pseudocode; end |
#forward(x) ⇒ Object
x: [T, D] → [T, D]
313 314 315 316 317 318 319 320 |
# File 'lib/toy.rb', line 313 def forward(x) pre = x.matmul(@w1) # [T, Df] Toy.add_bias!(pre, @b1) hidden = Toy.gelu_new(pre) # [T, Df] out = hidden.matmul(@w2) # [T, D] Toy.add_bias!(out, @b2) out end |
#param_count ⇒ Object
325 326 327 328 |
# File 'lib/toy.rb', line 325 def param_count @d_model * @d_ff + @d_ff + # W1 + b1 @d_ff * @d_model + @d_model # W2 + b2 end |
#summary ⇒ Object
322 323 324 |
# File 'lib/toy.rb', line 322 def summary "FFN(d=" + @d_model.to_s + ", hidden=" + @d_ff.to_s + ", act=" + @act.to_s + ")" end |