Class: Toy::FFN

Inherits:

Object

Object
Toy::FFN

show all

Defined in:: lib/toy.rb

Overview

Toy::FFN

GPT-2 MLP: y = up · W2 + b2 where up = gelu(x · W1 + b1).
`act` selects the activation: :gelu_new (tanh approx, HF default)
or :gelu_exact (erf-based).

Instance Attribute Summary collapse

#act ⇒ Object

Returns the value of attribute act.
#b1 ⇒ Object

Returns the value of attribute b1.
#b2 ⇒ Object

Returns the value of attribute b2.
#d_ff ⇒ Object

Returns the value of attribute d_ff.
#d_model ⇒ Object

Returns the value of attribute d_model.
#w1 ⇒ Object

Returns the value of attribute w1.
#w2 ⇒ Object

Returns the value of attribute w2.

Instance Method Summary collapse

#algorithm ⇒ Object
#algorithm_card ⇒ Object
#forward(x) ⇒ Object

x: [T, D] → [T, D].
#initialize(d_model, d_ff, act_sym) ⇒ FFN constructor

A new instance of FFN.
#param_count ⇒ Object
#summary ⇒ Object

Constructor Details

#initialize(d_model, d_ff, act_sym) ⇒ `FFN`

Returns a new instance of FFN.

# File 'lib/toy.rb', line 302

def initialize(d_model, d_ff, act_sym)
  @d_model = d_model
  @d_ff    = d_ff
  @act     = act_sym                  # :gelu_new
  @w1 = Mat.new(d_model, d_ff)
  @w2 = Mat.new(d_ff,    d_model)
  @b1 = Array.new(d_ff,   0.0)
  @b2 = Array.new(d_model, 0.0)
end

Instance Attribute Details

#act ⇒ `Object`

Returns the value of attribute act.



300
301
302

# File 'lib/toy.rb', line 300

def act
  @act
end

#b1 ⇒ `Object`

Returns the value of attribute b1.



300
301
302

# File 'lib/toy.rb', line 300

def b1
  @b1
end

#b2 ⇒ `Object`

Returns the value of attribute b2.



300
301
302

# File 'lib/toy.rb', line 300

def b2
  @b2
end

#d_ff ⇒ `Object`

Returns the value of attribute d_ff.



300
301
302

# File 'lib/toy.rb', line 300

def d_ff
  @d_ff
end

#d_model ⇒ `Object`

Returns the value of attribute d_model.



300
301
302

# File 'lib/toy.rb', line 300

def d_model
  @d_model
end

#w1 ⇒ `Object`

Returns the value of attribute w1.



300
301
302

# File 'lib/toy.rb', line 300

def w1
  @w1
end

#w2 ⇒ `Object`

Returns the value of attribute w2.



300
301
302

# File 'lib/toy.rb', line 300

def w2
  @w2
end

Instance Method Details

#algorithm ⇒ `Object`

# File 'lib/toy.rb', line 330

def algorithm
  c = Toy::Card.new("FFN.forward(x)", "GPT-2-style MLP")
  c.add_input("x",  "R^{T×D}", "")
  c.add_output("y", "R^{T×D}", "")
  c.add_hyper("D",          @d_model.to_s)
  c.add_hyper("D_f",        @d_ff.to_s)
  c.add_hyper("activation", @act.to_s)
  c.add_param("W_1", "R^{D×D_f}", "")
  c.add_param("b_1", "R^{D_f}",   "")
  c.add_param("W_2", "R^{D_f×D}", "")
  c.add_param("b_2", "R^{D}",     "")
  c.step_bind("h", "gelu(x · W_1 + b_1)", "h ∈ R^{T×D_f}")
  c.step_bind("y", "h · W_2 + b_2",       "y ∈ R^{T×D}")
  c.step_return("y")
  c
end

#algorithm_card ⇒ `Object`

347	# File 'lib/toy.rb', line 347 def algorithm_card; algorithm.render_pseudocode; end

#forward(x) ⇒ `Object`

x: [T, D] → [T, D]

# File 'lib/toy.rb', line 313

def forward(x)
  pre = x.matmul(@w1)                 # [T, Df]
  Toy.add_bias!(pre, @b1)
  hidden = Toy.gelu_new(pre)          # [T, Df]
  out = hidden.matmul(@w2)            # [T, D]
  Toy.add_bias!(out, @b2)
  out
end

#param_count ⇒ `Object`

# File 'lib/toy.rb', line 325

def param_count
  @d_model * @d_ff + @d_ff +     # W1 + b1
    @d_ff * @d_model + @d_model  # W2 + b2
end

#summary ⇒ `Object`



322
323
324

# File 'lib/toy.rb', line 322

def summary
  "FFN(d=" + @d_model.to_s + ", hidden=" + @d_ff.to_s + ", act=" + @act.to_s + ")"
end

Class: Toy::FFN

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(d_model, d_ff, act_sym) ⇒ FFN

Instance Attribute Details

#act ⇒ Object

#b1 ⇒ Object

#b2 ⇒ Object

#d_ff ⇒ Object

#d_model ⇒ Object

#w1 ⇒ Object

#w2 ⇒ Object