Class: Arch

Inherits:
Object
  • Object
show all
Defined in:
lib/toy/models/arch.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(family, name, vocab_size, d_model, n_layers, n_heads_q, n_heads_kv, d_head, d_ff, max_position, untied_lm_head, qkv_bias, qk_norm, swa_window, rope_freq_base, rope_freq_scale, rope_partial_factor, norm_kind, norm_eps, ffn_kind, ffn_bias, moe, n_experts, n_experts_used, n_shared_experts, expert_gating, tokenizer_kind, bos_id, eos_id, pad_id, unk_id, add_bos_by_default, embed_scale) ⇒ Arch

Returns a new instance of Arch.



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/toy/models/arch.rb', line 69

def initialize(family, name,
               vocab_size, d_model, n_layers, n_heads_q, n_heads_kv, d_head, d_ff,
               max_position, untied_lm_head,
               qkv_bias, qk_norm, swa_window,
               rope_freq_base, rope_freq_scale, rope_partial_factor,
               norm_kind, norm_eps,
               ffn_kind, ffn_bias,
               moe, n_experts, n_experts_used, n_shared_experts, expert_gating,
               tokenizer_kind, bos_id, eos_id, pad_id, unk_id, add_bos_by_default,
               embed_scale)
  @family               = family
  @name                 = name
  @vocab_size           = vocab_size
  @d_model              = d_model
  @n_layers             = n_layers
  @n_heads_q            = n_heads_q
  @n_heads_kv           = n_heads_kv
  @d_head               = d_head
  @d_ff                 = d_ff
  @max_position         = max_position
  @untied_lm_head       = untied_lm_head
  @qkv_bias             = qkv_bias
  @qk_norm              = qk_norm
  @swa_window           = swa_window
  @rope_freq_base       = rope_freq_base
  @rope_freq_scale      = rope_freq_scale
  @rope_partial_factor  = rope_partial_factor
  @norm_kind            = norm_kind
  @norm_eps             = norm_eps
  @ffn_kind             = ffn_kind
  @ffn_bias             = ffn_bias
  @moe                  = moe
  @n_experts            = n_experts
  @n_experts_used       = n_experts_used
  @n_shared_experts     = n_shared_experts
  @expert_gating        = expert_gating
  @tokenizer_kind       = tokenizer_kind
  @bos_id               = bos_id
  @eos_id               = eos_id
  @pad_id               = pad_id
  @unk_id               = unk_id
  @add_bos_by_default   = add_bos_by_default
  @embed_scale          = embed_scale
end

Instance Attribute Details

#add_bos_by_defaultObject (readonly)

Returns the value of attribute add_bos_by_default.



63
64
65
# File 'lib/toy/models/arch.rb', line 63

def add_bos_by_default
  @add_bos_by_default
end

#bos_idObject (readonly)

Returns the value of attribute bos_id.



59
60
61
# File 'lib/toy/models/arch.rb', line 59

def bos_id
  @bos_id
end

#d_ffObject (readonly)

Returns the value of attribute d_ff.



28
29
30
# File 'lib/toy/models/arch.rb', line 28

def d_ff
  @d_ff
end

#d_headObject (readonly)

Returns the value of attribute d_head.



27
28
29
# File 'lib/toy/models/arch.rb', line 27

def d_head
  @d_head
end

#d_modelObject (readonly)

Returns the value of attribute d_model.



23
24
25
# File 'lib/toy/models/arch.rb', line 23

def d_model
  @d_model
end

#embed_scaleObject (readonly)

Embed scale (some models multiply token_embd by sqrt(d_model); Llama-family does not).



67
68
69
# File 'lib/toy/models/arch.rb', line 67

def embed_scale
  @embed_scale
end

#eos_idObject (readonly)

Returns the value of attribute eos_id.



60
61
62
# File 'lib/toy/models/arch.rb', line 60

def eos_id
  @eos_id
end

#expert_gatingObject (readonly)

:softmax | :sigmoid



55
56
57
# File 'lib/toy/models/arch.rb', line 55

def expert_gating
  @expert_gating
end

#familyObject (readonly)

Identity — :qwen2, :llama, :smollm. The label comes from tensor- presence detection (NOT general.architecture: our converter writes “llama” for every model, so it’s unreliable).



18
19
20
# File 'lib/toy/models/arch.rb', line 18

def family
  @family
end

#ffn_biasObject (readonly)

Returns the value of attribute ffn_bias.



48
49
50
# File 'lib/toy/models/arch.rb', line 48

def ffn_bias
  @ffn_bias
end

#ffn_kindObject (readonly)

FFN



47
48
49
# File 'lib/toy/models/arch.rb', line 47

def ffn_kind
  @ffn_kind
end

#max_positionObject (readonly)

Returns the value of attribute max_position.



29
30
31
# File 'lib/toy/models/arch.rb', line 29

def max_position
  @max_position
end

#moeObject (readonly)

MoE (zeros / false when not MoE)



51
52
53
# File 'lib/toy/models/arch.rb', line 51

def moe
  @moe
end

#n_expertsObject (readonly)

Returns the value of attribute n_experts.



52
53
54
# File 'lib/toy/models/arch.rb', line 52

def n_experts
  @n_experts
end

#n_experts_usedObject (readonly)

Returns the value of attribute n_experts_used.



53
54
55
# File 'lib/toy/models/arch.rb', line 53

def n_experts_used
  @n_experts_used
end

#n_heads_kvObject (readonly)

Returns the value of attribute n_heads_kv.



26
27
28
# File 'lib/toy/models/arch.rb', line 26

def n_heads_kv
  @n_heads_kv
end

#n_heads_qObject (readonly)

Returns the value of attribute n_heads_q.



25
26
27
# File 'lib/toy/models/arch.rb', line 25

def n_heads_q
  @n_heads_q
end

#n_layersObject (readonly)

Returns the value of attribute n_layers.



24
25
26
# File 'lib/toy/models/arch.rb', line 24

def n_layers
  @n_layers
end

#n_shared_expertsObject (readonly)

Returns the value of attribute n_shared_experts.



54
55
56
# File 'lib/toy/models/arch.rb', line 54

def n_shared_experts
  @n_shared_experts
end

#nameObject (readonly)

Returns the value of attribute name.



19
20
21
# File 'lib/toy/models/arch.rb', line 19

def name
  @name
end

#norm_epsObject (readonly)

Returns the value of attribute norm_eps.



44
45
46
# File 'lib/toy/models/arch.rb', line 44

def norm_eps
  @norm_eps
end

#norm_kindObject (readonly)

Norm



43
44
45
# File 'lib/toy/models/arch.rb', line 43

def norm_kind
  @norm_kind
end

#pad_idObject (readonly)

Returns the value of attribute pad_id.



61
62
63
# File 'lib/toy/models/arch.rb', line 61

def pad_id
  @pad_id
end

#qk_normObject (readonly)

Returns the value of attribute qk_norm.



34
35
36
# File 'lib/toy/models/arch.rb', line 34

def qk_norm
  @qk_norm
end

#qkv_biasObject (readonly)

Attention



33
34
35
# File 'lib/toy/models/arch.rb', line 33

def qkv_bias
  @qkv_bias
end

#rope_freq_baseObject (readonly)

RoPE



38
39
40
# File 'lib/toy/models/arch.rb', line 38

def rope_freq_base
  @rope_freq_base
end

#rope_freq_scaleObject (readonly)

Returns the value of attribute rope_freq_scale.



39
40
41
# File 'lib/toy/models/arch.rb', line 39

def rope_freq_scale
  @rope_freq_scale
end

#rope_partial_factorObject (readonly)

1.0 default; 0.5 for GLM/Phi



40
41
42
# File 'lib/toy/models/arch.rb', line 40

def rope_partial_factor
  @rope_partial_factor
end

#swa_windowObject (readonly)

nil when no sliding-window



35
36
37
# File 'lib/toy/models/arch.rb', line 35

def swa_window
  @swa_window
end

#tokenizer_kindObject (readonly)

Tokenizer (Phase 0: GGUF metadata or nil)



58
59
60
# File 'lib/toy/models/arch.rb', line 58

def tokenizer_kind
  @tokenizer_kind
end

#unk_idObject (readonly)

Returns the value of attribute unk_id.



62
63
64
# File 'lib/toy/models/arch.rb', line 62

def unk_id
  @unk_id
end

#untied_lm_headObject (readonly)

Returns the value of attribute untied_lm_head.



30
31
32
# File 'lib/toy/models/arch.rb', line 30

def untied_lm_head
  @untied_lm_head
end

#vocab_sizeObject (readonly)

Dimensions



22
23
24
# File 'lib/toy/models/arch.rb', line 22

def vocab_size
  @vocab_size
end

Class Method Details

.from_gguf(path) ⇒ Object

Detect the architecture family by reading the GGUF and inspecting what’s there. The general.architecture key is unreliable (our converter writes “llama” for every model), so we use tensor presence + RoPE freq_base as the actual signal.



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/toy/models/arch.rb', line 159

def self.from_gguf(path)
  handle = TinyNN.tnn_gguf_load(path)
  if handle == nil
    puts "Arch.from_gguf: failed to open " + path
    return nil
  end

  # Llama-family GGUF keys are the canonical scalar metadata (the
  # converter writes "llama.*" for SmolLM2/TinyLlama/Qwen2.5/Llama3
  # alike). Read once and reuse.
  # M2.3: support multiple arch prefixes (llama.* OR olmoe.* OR …).
  # Probe embedding_length (present in every arch); whichever
  # resolves wins. vocab_size isn't reliable — some archs (OLMoE)
  # omit it and rely on the tokenizer.ggml.tokens array length.
  arch_prefix = "llama"
  if TinyNN.tnn_gguf_get_u32(handle, "llama.embedding_length") < 0
    if TinyNN.tnn_gguf_get_u32(handle, "olmoe.embedding_length") >= 0
      arch_prefix = "olmoe"
    elsif TinyNN.tnn_gguf_get_u32(handle, "gemma2.embedding_length") >= 0
      arch_prefix = "gemma2"
    end
  end
  vocab    = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".vocab_size")
  if vocab < 0
    vocab = TinyNN.tnn_gguf_arr_n(handle, "tokenizer.ggml.tokens")
  end
  d_model  = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".embedding_length")
  d_ff     = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".feed_forward_length")
  n_q      = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".attention.head_count")
  n_kv     = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".attention.head_count_kv")
  n_layers = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".block_count")
  ctx      = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".context_length")
  if ctx < 0
    ctx = 8192   # default if metadata missing
  end
  rope_base = TinyNN.tnn_gguf_get_f32(handle, arch_prefix + ".rope.freq_base")
  rms_eps   = TinyNN.tnn_gguf_get_f32(handle, arch_prefix + ".attention.layer_norm_rms_epsilon")
  d_head    = d_model / n_q

  # Tensor-presence flags. Per-head bias (toy from-scratch ckpts, #153)
  # carries blk.0.attn_q.head_0.bias instead of the fused name.
  has_qkv_bias = (TinyNN.tnn_gguf_find_index(handle, "blk.0.attn_q.bias") >= 0) ||
                 (TinyNN.tnn_gguf_find_index(handle, "blk.0.attn_q.head_0.bias") >= 0)
  untied       = TinyNN.tnn_gguf_find_index(handle, "output.weight")     >= 0
  # M2.3 MoE detection — same sentinel as detect_smollm2_flags.
  is_moe       = TinyNN.tnn_gguf_find_index(handle, "blk.0.ffn_gate_inp.weight") >= 0
  moe_n_exp    = 0
  moe_n_used   = 0
  if is_moe
    ne_v = TinyNN.tnn_gguf_get_u32(handle, "llama.expert_count")
    nu_v = TinyNN.tnn_gguf_get_u32(handle, "llama.expert_used_count")
    moe_n_exp  = ne_v > 0 ? ne_v : 0
    moe_n_used = nu_v > 0 ? nu_v : 0
  end

  # Tokenizer metadata (most current GGUFs in this repo don't embed
  # it — our converter skips it. Read anyway for forward-compat).
  bos = TinyNN.tnn_gguf_get_u32(handle, "tokenizer.ggml.bos_token_id")
  eos = TinyNN.tnn_gguf_get_u32(handle, "tokenizer.ggml.eos_token_id")
  pad = TinyNN.tnn_gguf_get_u32(handle, "tokenizer.ggml.padding_token_id")
  unk = TinyNN.tnn_gguf_get_u32(handle, "tokenizer.ggml.unknown_token_id")
  vocab_n = TinyNN.tnn_gguf_arr_n(handle, "tokenizer.ggml.tokens")
  tok_kind = :external
  if vocab_n > 0
    tok_kind = :gguf_embedded
  end

  # Family detection — see the comment above. The current set of
  # models all share the Llama-family graph; the only structural
  # delta we care about is QKV bias.
  family = :llama
  if has_qkv_bias
    family = :qwen2
  end

  TinyNN.tnn_gguf_free(handle)

  # Arch.new positional args: family, name, vocab, d_model, n_layers,
  # n_q, n_kv, d_head, d_ff, max_pos, untied, qkv_bias, qk_norm,
  # swa_window, rope_freq_base, rope_scale, rope_partial, norm_kind,
  # norm_eps, ffn_kind, ffn_bias, moe, n_experts, n_experts_used,
  # n_shared_experts, expert_gating, tokenizer_kind, bos, eos, pad,
  # unk, add_bos, embed_scale.
  Arch.new(family, path,
           vocab, d_model, n_layers, n_q, n_kv, d_head, d_ff,
           ctx, untied,
           has_qkv_bias, false, nil,
           rope_base, 1.0, 1.0,
           :rms, rms_eps,
           :swiglu, false,
           is_moe, moe_n_exp, moe_n_used, 0, :softmax,
           tok_kind, bos, eos, pad, unk, false,
           1.0)
end

.load_or_fail(path, cmd) ⇒ Object

Load an arch from ‘path` or FAIL LOUD. Every infer/eval runner repeated the same `from_gguf` + nil-check + exit; this folds it. `cmd` is the runner’s name for the error prefix (“toy-infer” / “toy-eval”). Returns the Arch (never nil — exits 1 on failure).



145
146
147
148
149
150
151
152
153
# File 'lib/toy/models/arch.rb', line 145

def self.load_or_fail(path, cmd)
  a = Arch.from_gguf(path)
  if a == nil
    puts cmd + ": could not load " + path +
         " — set GGUF= to a valid file (see `toy list`)."
    exit 1
  end
  a
end

Instance Method Details

#gqa?Boolean

Returns:

  • (Boolean)


118
119
120
# File 'lib/toy/models/arch.rb', line 118

def gqa?
  @n_heads_kv < @n_heads_q
end

#moe?Boolean

Returns:

  • (Boolean)


114
115
116
# File 'lib/toy/models/arch.rb', line 114

def moe?
  @moe
end

#summaryObject

Pretty one-line summary for log lines / startup.



127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/toy/models/arch.rb', line 127

def summary
  "Arch(" + @family.to_s +
    ", vocab=" + @vocab_size.to_s +
    ", d=" + @d_model.to_s +
    ", L=" + @n_layers.to_s +
    ", n_q=" + @n_heads_q.to_s +
    ", n_kv=" + @n_heads_kv.to_s +
    ", d_ff=" + @d_ff.to_s +
    ", qkv_bias=" + @qkv_bias.to_s +
    ", rope_base=" + @rope_freq_base.to_s +
    ", " + @norm_kind.to_s + " eps=" + @norm_eps.to_s +
    ", " + @ffn_kind.to_s + ")"
end

#swa?Boolean

Returns:

  • (Boolean)


122
123
124
# File 'lib/toy/models/arch.rb', line 122

def swa?
  @swa_window != nil
end