Class: Ignis::AI::Transformer::ModernModel

Inherits:
NN::Module
  • Object
show all
Defined in:
lib/nnw/ai/transformer/modern.rb

Overview

Llama/Qwen/SmolLM-style decoder LM: token embedding → N ModernBlocks →final RMSNorm → LM head. No learned position embedding — RoPE supplies it.

Instance Attribute Summary collapse

Attributes inherited from NN::Module

#training

Instance Method Summary collapse

Methods inherited from NN::Module

#call, #eval!, #load_state_dict, #named_parameters, #num_parameters, #parameters, #state_dict, #to, #train!, #zero_grad!

Constructor Details

#initialize(vocab_size:, embed_dim:, num_heads:, num_kv_heads:, num_layers:, ff_dim:, max_seq_len:, rope_base: 10000.0, rope_scaling: nil, head_dim: nil, eps: 1e-6, device_id: 0) ⇒ ModernModel

Returns a new instance of ModernModel.

Parameters:

  • vocab_size (Integer)
  • embed_dim (Integer)
  • num_heads (Integer)
  • num_kv_heads (Integer)
  • num_layers (Integer)
  • ff_dim (Integer)
  • max_seq_len (Integer)
  • rope_base (Float) (defaults to: 10000.0)
  • eps (Float) (defaults to: 1e-6)
  • device_id (Integer) (defaults to: 0)


147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/nnw/ai/transformer/modern.rb', line 147

def initialize(vocab_size:, embed_dim:, num_heads:, num_kv_heads:, num_layers:,
               ff_dim:, max_seq_len:, rope_base: 10000.0, rope_scaling: nil,
               head_dim: nil, eps: 1e-6, device_id: 0)
  super()
  @vocab_size = vocab_size
  @embed_dim = embed_dim
  @num_heads = num_heads
  @num_kv_heads = num_kv_heads
  @num_layers = num_layers
  @max_seq_len = max_seq_len
  @device_id = device_id
  @head_dim = head_dim || (embed_dim / num_heads)

  @token_embedding = register_module("token_embedding",
                      NN::Embedding.new(vocab_size, embed_dim, device_id: device_id))
  @blocks = []
  num_layers.times do |i|
    blk = ModernBlock.new(embed_dim, num_heads, num_kv_heads: num_kv_heads,
                          ff_dim: ff_dim, rope_base: rope_base, rope_scaling: rope_scaling,
                          head_dim: head_dim, eps: eps, device_id: device_id)
    @blocks << register_module("blocks.#{i}", blk)
  end
  @norm = register_module("norm", NN::RMSNorm.new(embed_dim, eps: eps, device_id: device_id))
  @head = register_module("head", NN::Linear.new(embed_dim, vocab_size, bias: false, device_id: device_id))
end

Instance Attribute Details

#embed_dimInteger (readonly)

Returns:

  • (Integer)


135
136
137
# File 'lib/nnw/ai/transformer/modern.rb', line 135

def embed_dim
  @embed_dim
end

#max_seq_lenInteger (readonly)

Returns:

  • (Integer)


135
136
137
# File 'lib/nnw/ai/transformer/modern.rb', line 135

def max_seq_len
  @max_seq_len
end

#num_headsInteger (readonly)

Returns:

  • (Integer)


135
136
137
# File 'lib/nnw/ai/transformer/modern.rb', line 135

def num_heads
  @num_heads
end

#num_kv_headsInteger (readonly)

Returns:

  • (Integer)


135
136
137
# File 'lib/nnw/ai/transformer/modern.rb', line 135

def num_kv_heads
  @num_kv_heads
end

#num_layersInteger (readonly)

Returns:

  • (Integer)


135
136
137
# File 'lib/nnw/ai/transformer/modern.rb', line 135

def num_layers
  @num_layers
end

#vocab_sizeInteger (readonly)

Returns:

  • (Integer)


135
136
137
# File 'lib/nnw/ai/transformer/modern.rb', line 135

def vocab_size
  @vocab_size
end

Instance Method Details

#forward(input_ids, mask: nil) ⇒ Tensor

Returns logits [seq, vocab].

Parameters:

  • input_ids (Tensor)

    token indices [seq] (int32)

  • mask (Tensor, nil) (defaults to: nil)

    unused (attention is causal)

Returns:

  • (Tensor)

    logits [seq, vocab]



176
177
178
179
180
181
# File 'lib/nnw/ai/transformer/modern.rb', line 176

def forward(input_ids, mask: nil)
  x = @token_embedding.call(input_ids) # [seq, embed]; RoPE (in attn) supplies position
  @blocks.each { |block| x = block.call(x) }
  x = @norm.call(x)
  @head.call(x)
end

#to_sString

Returns:

  • (String)


184
185
186
187
# File 'lib/nnw/ai/transformer/modern.rb', line 184

def to_s
  "ModernModel(vocab=#{@vocab_size}, embed=#{@embed_dim}, q_heads=#{@num_heads}, " \
  "kv_heads=#{@num_kv_heads}, layers=#{@num_layers})"
end