Class: Toy::LayerNorm

Inherits:

Object

Object
Toy::LayerNorm

show all

Defined in:: lib/toy.rb

Overview

Toy::LayerNorm

y = (x - mean) / sqrt(var + eps) * gamma + beta,  row-wise.
gamma, beta: length D. eps default 1e-5 (HF GPT-2).

Instance Attribute Summary collapse

#beta ⇒ Object

Returns the value of attribute beta.
#d ⇒ Object

Returns the value of attribute d.
#eps ⇒ Object

Returns the value of attribute eps.
#gamma ⇒ Object

Returns the value of attribute gamma.

Instance Method Summary collapse

#algorithm_card ⇒ Object

One-line algorithm-card body (Phuong–Hutter style).
#forward(x) ⇒ Object

x: [T, D] → [T, D].
#initialize(d) ⇒ LayerNorm constructor

A new instance of LayerNorm.
#param_count ⇒ Object

gamma + beta.
#summary ⇒ Object

Constructor Details

#initialize(d) ⇒ `LayerNorm`

Returns a new instance of LayerNorm.

# File 'lib/toy.rb', line 36

def initialize(d)
  @d     = d
  @eps   = RMS_EPS_DEFAULT
  @gamma = Array.new(d, 1.0)
  @beta  = Array.new(d, 0.0)
end

Instance Attribute Details

#beta ⇒ `Object`

Returns the value of attribute beta.



34
35
36

# File 'lib/toy.rb', line 34

def beta
  @beta
end

#d ⇒ `Object`

Returns the value of attribute d.



34
35
36

# File 'lib/toy.rb', line 34

def d
  @d
end

#eps ⇒ `Object`

Returns the value of attribute eps.



34
35
36

# File 'lib/toy.rb', line 34

def eps
  @eps
end

#gamma ⇒ `Object`

Returns the value of attribute gamma.



34
35
36

# File 'lib/toy.rb', line 34

def gamma
  @gamma
end

Instance Method Details

#algorithm_card ⇒ `Object`

One-line algorithm-card body (Phuong–Hutter style).

LN(x; γ, β) = (x - mean(x)) / sqrt(var(x) + ε) ⊙ γ + β



84
85
86

# File 'lib/toy.rb', line 84

def algorithm_card
  "LN(x; γ, β, ε) := (x − mean(x)) / √(var(x) + ε) ⊙ γ + β"
end

#forward(x) ⇒ `Object`

x: [T, D] → [T, D]

# File 'lib/toy.rb', line 44

def forward(x)
  t   = x.nrows
  d   = @d
  out = Mat.new(t, d)
  i = 0
  while i < t
    # mean over row
    sum = 0.0
    j = 0
    while j < d
      sum = sum + x.flat[i * d + j]
      j += 1
    end
    mean = sum / d
    # variance over row
    sumsq = 0.0
    j = 0
    while j < d
      v = x.flat[i * d + j] - mean
      sumsq = sumsq + v * v
      j += 1
    end
    inv = 1.0 / Math.sqrt(sumsq / d + @eps)
    # normalize, scale, shift
    j = 0
    while j < d
      n = (x.flat[i * d + j] - mean) * inv
      out.flat[i * d + j] = n * @gamma[j] + @beta[j]
      j += 1
    end
    i += 1
  end
  out
end

#param_count ⇒ `Object`

gamma + beta

80	# File 'lib/toy.rb', line 80 def param_count; 2 * @d; end

#summary ⇒ `Object`

79	# File 'lib/toy.rb', line 79 def summary; "LayerNorm(d=" + @d.to_s + ", eps=" + @eps.to_s + ")"; end

Class: Toy::LayerNorm

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(d) ⇒ LayerNorm

Instance Attribute Details

#beta ⇒ Object

#d ⇒ Object

#eps ⇒ Object

#gamma ⇒ Object