Class: GPT2Block

Inherits:
Object
  • Object
show all
Defined in:
lib/toy/models/gpt2.rb

Overview

One transformer block’s parameters. Per-head Q/K/V/biases match the project’s existing Block layout so a future FFI cache can mirror what FullForwardFFICache already does.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(d_model, d_head, d_ff, n_heads) ⇒ GPT2Block

Returns a new instance of GPT2Block.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/toy/models/gpt2.rb', line 30

def initialize(d_model, d_head, d_ff, n_heads)
  @ln1_gamma = Array.new(d_model, 1.0)
  @ln1_beta  = Array.new(d_model, 0.0)
  @ln2_gamma = Array.new(d_model, 1.0)
  @ln2_beta  = Array.new(d_model, 0.0)

  # Per-head literal-seed pattern (Array<Mat>, Array<Array<Float>>).
  @w_q = [Mat.new(d_model, d_head)]
  @w_k = [Mat.new(d_model, d_head)]
  @w_v = [Mat.new(d_model, d_head)]
  @b_q = [Array.new(d_head, 0.0)]
  @b_k = [Array.new(d_head, 0.0)]
  @b_v = [Array.new(d_head, 0.0)]
  h = 1
  while h < n_heads
    @w_q.push(Mat.new(d_model, d_head))
    @w_k.push(Mat.new(d_model, d_head))
    @w_v.push(Mat.new(d_model, d_head))
    @b_q.push(Array.new(d_head, 0.0))
    @b_k.push(Array.new(d_head, 0.0))
    @b_v.push(Array.new(d_head, 0.0))
    h += 1
  end

  @w_o   = Mat.new(d_model, d_model)
  @w_ff1 = Mat.new(d_model, d_ff)
  @w_ff2 = Mat.new(d_ff,    d_model)
  @b_o   = Array.new(d_model, 0.0)
  @b_ff1 = Array.new(d_ff,   0.0)
  @b_ff2 = Array.new(d_model, 0.0)
end

Instance Attribute Details

#b_ff1Object

Returns the value of attribute b_ff1.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def b_ff1
  @b_ff1
end

#b_ff2Object

Returns the value of attribute b_ff2.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def b_ff2
  @b_ff2
end

#b_kObject

Returns the value of attribute b_k.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def b_k
  @b_k
end

#b_oObject

Returns the value of attribute b_o.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def b_o
  @b_o
end

#b_qObject

Returns the value of attribute b_q.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def b_q
  @b_q
end

#b_vObject

Returns the value of attribute b_v.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def b_v
  @b_v
end

#ln1_betaObject

Returns the value of attribute ln1_beta.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def ln1_beta
  @ln1_beta
end

#ln1_gammaObject

Returns the value of attribute ln1_gamma.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def ln1_gamma
  @ln1_gamma
end

#ln2_betaObject

Returns the value of attribute ln2_beta.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def ln2_beta
  @ln2_beta
end

#ln2_gammaObject

Returns the value of attribute ln2_gamma.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def ln2_gamma
  @ln2_gamma
end

#w_ff1Object

Returns the value of attribute w_ff1.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def w_ff1
  @w_ff1
end

#w_ff2Object

Returns the value of attribute w_ff2.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def w_ff2
  @w_ff2
end

#w_kObject

Returns the value of attribute w_k.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def w_k
  @w_k
end

#w_oObject

Returns the value of attribute w_o.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def w_o
  @w_o
end

#w_qObject

Returns the value of attribute w_q.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def w_q
  @w_q
end

#w_vObject

Returns the value of attribute w_v.



26
27
28
# File 'lib/toy/models/gpt2.rb', line 26

def w_v
  @w_v
end