Class: Arch

Inherits:

Object

Object
Arch

show all

Defined in:: lib/toy/models/arch.rb

Instance Attribute Summary collapse

#add_bos_by_default ⇒ Object readonly

Returns the value of attribute add_bos_by_default.
#bos_id ⇒ Object readonly

Returns the value of attribute bos_id.
#d_ff ⇒ Object readonly

Returns the value of attribute d_ff.
#d_head ⇒ Object readonly

Returns the value of attribute d_head.
#d_model ⇒ Object readonly

Returns the value of attribute d_model.
#embed_scale ⇒ Object readonly

Embed scale (some models multiply token_embd by sqrt(d_model); Llama-family does not).
#eos_id ⇒ Object readonly

Returns the value of attribute eos_id.
#expert_gating ⇒ Object readonly

:softmax | :sigmoid.
#family ⇒ Object readonly

Identity — :qwen2, :llama, :smollm.
#ffn_bias ⇒ Object readonly

Returns the value of attribute ffn_bias.
#ffn_kind ⇒ Object readonly

FFN.
#max_position ⇒ Object readonly

Returns the value of attribute max_position.
#moe ⇒ Object readonly

MoE (zeros / false when not MoE).
#n_experts ⇒ Object readonly

Returns the value of attribute n_experts.
#n_experts_used ⇒ Object readonly

Returns the value of attribute n_experts_used.
#n_heads_kv ⇒ Object readonly

Returns the value of attribute n_heads_kv.
#n_heads_q ⇒ Object readonly

Returns the value of attribute n_heads_q.
#n_layers ⇒ Object readonly

Returns the value of attribute n_layers.
#n_shared_experts ⇒ Object readonly

Returns the value of attribute n_shared_experts.
#name ⇒ Object readonly

Returns the value of attribute name.
#norm_eps ⇒ Object readonly

Returns the value of attribute norm_eps.
#norm_kind ⇒ Object readonly

Norm.
#pad_id ⇒ Object readonly

Returns the value of attribute pad_id.
#qk_norm ⇒ Object readonly

Returns the value of attribute qk_norm.
#qkv_bias ⇒ Object readonly

Attention.
#rope_freq_base ⇒ Object readonly

RoPE.
#rope_freq_scale ⇒ Object readonly

Returns the value of attribute rope_freq_scale.
#rope_partial_factor ⇒ Object readonly

1.0 default; 0.5 for GLM/Phi.
#swa_window ⇒ Object readonly

nil when no sliding-window.
#tokenizer_kind ⇒ Object readonly

Tokenizer (Phase 0: GGUF metadata or nil).
#unk_id ⇒ Object readonly

Returns the value of attribute unk_id.
#untied_lm_head ⇒ Object readonly

Returns the value of attribute untied_lm_head.
#vocab_size ⇒ Object readonly

Dimensions.

Class Method Summary collapse

.from_gguf(path) ⇒ Object

Detect the architecture family by reading the GGUF and inspecting what’s there.
.load_or_fail(path, cmd) ⇒ Object

Load an arch from ‘path` or FAIL LOUD.

Instance Method Summary collapse

#gqa? ⇒ Boolean
#initialize(family, name, vocab_size, d_model, n_layers, n_heads_q, n_heads_kv, d_head, d_ff, max_position, untied_lm_head, qkv_bias, qk_norm, swa_window, rope_freq_base, rope_freq_scale, rope_partial_factor, norm_kind, norm_eps, ffn_kind, ffn_bias, moe, n_experts, n_experts_used, n_shared_experts, expert_gating, tokenizer_kind, bos_id, eos_id, pad_id, unk_id, add_bos_by_default, embed_scale) ⇒ Arch constructor

A new instance of Arch.
#moe? ⇒ Boolean
#summary ⇒ Object

Pretty one-line summary for log lines / startup.
#swa? ⇒ Boolean

Constructor Details

#initialize(family, name, vocab_size, d_model, n_layers, n_heads_q, n_heads_kv, d_head, d_ff, max_position, untied_lm_head, qkv_bias, qk_norm, swa_window, rope_freq_base, rope_freq_scale, rope_partial_factor, norm_kind, norm_eps, ffn_kind, ffn_bias, moe, n_experts, n_experts_used, n_shared_experts, expert_gating, tokenizer_kind, bos_id, eos_id, pad_id, unk_id, add_bos_by_default, embed_scale) ⇒ `Arch`

Returns a new instance of Arch.

# File 'lib/toy/models/arch.rb', line 69

def initialize(family, name,
               vocab_size, d_model, n_layers, n_heads_q, n_heads_kv, d_head, d_ff,
               max_position, untied_lm_head,
               qkv_bias, qk_norm, swa_window,
               rope_freq_base, rope_freq_scale, rope_partial_factor,
               norm_kind, norm_eps,
               ffn_kind, ffn_bias,
               moe, n_experts, n_experts_used, n_shared_experts, expert_gating,
               tokenizer_kind, bos_id, eos_id, pad_id, unk_id, add_bos_by_default,
               embed_scale)
  @family               = family
  @name                 = name
  @vocab_size           = vocab_size
  @d_model              = d_model
  @n_layers             = n_layers
  @n_heads_q            = n_heads_q
  @n_heads_kv           = n_heads_kv
  @d_head               = d_head
  @d_ff                 = d_ff
  @max_position         = max_position
  @untied_lm_head       = untied_lm_head
  @qkv_bias             = qkv_bias
  @qk_norm              = qk_norm
  @swa_window           = swa_window
  @rope_freq_base       = rope_freq_base
  @rope_freq_scale      = rope_freq_scale
  @rope_partial_factor  = rope_partial_factor
  @norm_kind            = norm_kind
  @norm_eps             = norm_eps
  @ffn_kind             = ffn_kind
  @ffn_bias             = ffn_bias
  @moe                  = moe
  @n_experts            = n_experts
  @n_experts_used       = n_experts_used
  @n_shared_experts     = n_shared_experts
  @expert_gating        = expert_gating
  @tokenizer_kind       = tokenizer_kind
  @bos_id               = bos_id
  @eos_id               = eos_id
  @pad_id               = pad_id
  @unk_id               = unk_id
  @add_bos_by_default   = add_bos_by_default
  @embed_scale          = embed_scale
end

Instance Attribute Details

#add_bos_by_default ⇒ `Object` (readonly)

Returns the value of attribute add_bos_by_default.



63
64
65

# File 'lib/toy/models/arch.rb', line 63

def add_bos_by_default
  @add_bos_by_default
end

#bos_id ⇒ `Object` (readonly)

Returns the value of attribute bos_id.



59
60
61

# File 'lib/toy/models/arch.rb', line 59

def bos_id
  @bos_id
end

#d_ff ⇒ `Object` (readonly)

Returns the value of attribute d_ff.



28
29
30

# File 'lib/toy/models/arch.rb', line 28

def d_ff
  @d_ff
end

#d_head ⇒ `Object` (readonly)

Returns the value of attribute d_head.



27
28
29

# File 'lib/toy/models/arch.rb', line 27

def d_head
  @d_head
end

#d_model ⇒ `Object` (readonly)

Returns the value of attribute d_model.



23
24
25

# File 'lib/toy/models/arch.rb', line 23

def d_model
  @d_model
end

#embed_scale ⇒ `Object` (readonly)

Embed scale (some models multiply token_embd by sqrt(d_model); Llama-family does not).



67
68
69

# File 'lib/toy/models/arch.rb', line 67

def embed_scale
  @embed_scale
end

#eos_id ⇒ `Object` (readonly)

Returns the value of attribute eos_id.



60
61
62

# File 'lib/toy/models/arch.rb', line 60

def eos_id
  @eos_id
end

#expert_gating ⇒ `Object` (readonly)

:softmax | :sigmoid



55
56
57

# File 'lib/toy/models/arch.rb', line 55

def expert_gating
  @expert_gating
end

#family ⇒ `Object` (readonly)

Identity — :qwen2, :llama, :smollm. The label comes from tensor- presence detection (NOT general.architecture: our converter writes “llama” for every model, so it’s unreliable).



18
19
20

# File 'lib/toy/models/arch.rb', line 18

def family
  @family
end

#ffn_bias ⇒ `Object` (readonly)

Returns the value of attribute ffn_bias.



48
49
50

# File 'lib/toy/models/arch.rb', line 48

def ffn_bias
  @ffn_bias
end

#ffn_kind ⇒ `Object` (readonly)

FFN



47
48
49

# File 'lib/toy/models/arch.rb', line 47

def ffn_kind
  @ffn_kind
end

#max_position ⇒ `Object` (readonly)

Returns the value of attribute max_position.



29
30
31

# File 'lib/toy/models/arch.rb', line 29

def max_position
  @max_position
end

#moe ⇒ `Object` (readonly)

MoE (zeros / false when not MoE)



51
52
53

# File 'lib/toy/models/arch.rb', line 51

def moe
  @moe
end

#n_experts ⇒ `Object` (readonly)

Returns the value of attribute n_experts.



52
53
54

# File 'lib/toy/models/arch.rb', line 52

def n_experts
  @n_experts
end

#n_experts_used ⇒ `Object` (readonly)

Returns the value of attribute n_experts_used.



53
54
55

# File 'lib/toy/models/arch.rb', line 53

def n_experts_used
  @n_experts_used
end

#n_heads_kv ⇒ `Object` (readonly)

Returns the value of attribute n_heads_kv.



26
27
28

# File 'lib/toy/models/arch.rb', line 26

def n_heads_kv
  @n_heads_kv
end

#n_heads_q ⇒ `Object` (readonly)

Returns the value of attribute n_heads_q.



25
26
27

# File 'lib/toy/models/arch.rb', line 25

def n_heads_q
  @n_heads_q
end

#n_layers ⇒ `Object` (readonly)

Returns the value of attribute n_layers.



24
25
26

# File 'lib/toy/models/arch.rb', line 24

def n_layers
  @n_layers
end

#n_shared_experts ⇒ `Object` (readonly)

Returns the value of attribute n_shared_experts.



54
55
56

# File 'lib/toy/models/arch.rb', line 54

def n_shared_experts
  @n_shared_experts
end

#name ⇒ `Object` (readonly)

Returns the value of attribute name.



19
20
21

# File 'lib/toy/models/arch.rb', line 19

def name
  @name
end

#norm_eps ⇒ `Object` (readonly)

Returns the value of attribute norm_eps.



44
45
46

# File 'lib/toy/models/arch.rb', line 44

def norm_eps
  @norm_eps
end

#norm_kind ⇒ `Object` (readonly)

Norm



43
44
45

# File 'lib/toy/models/arch.rb', line 43

def norm_kind
  @norm_kind
end

#pad_id ⇒ `Object` (readonly)

Returns the value of attribute pad_id.



61
62
63

# File 'lib/toy/models/arch.rb', line 61

def pad_id
  @pad_id
end

#qk_norm ⇒ `Object` (readonly)

Returns the value of attribute qk_norm.



34
35
36

# File 'lib/toy/models/arch.rb', line 34

def qk_norm
  @qk_norm
end

#qkv_bias ⇒ `Object` (readonly)

Attention



33
34
35

# File 'lib/toy/models/arch.rb', line 33

def qkv_bias
  @qkv_bias
end

#rope_freq_base ⇒ `Object` (readonly)

RoPE



38
39
40

# File 'lib/toy/models/arch.rb', line 38

def rope_freq_base
  @rope_freq_base
end

#rope_freq_scale ⇒ `Object` (readonly)

Returns the value of attribute rope_freq_scale.



39
40
41

# File 'lib/toy/models/arch.rb', line 39

def rope_freq_scale
  @rope_freq_scale
end

#rope_partial_factor ⇒ `Object` (readonly)

1.0 default; 0.5 for GLM/Phi



40
41
42

# File 'lib/toy/models/arch.rb', line 40

def rope_partial_factor
  @rope_partial_factor
end

#swa_window ⇒ `Object` (readonly)

nil when no sliding-window



35
36
37

# File 'lib/toy/models/arch.rb', line 35

def swa_window
  @swa_window
end

#tokenizer_kind ⇒ `Object` (readonly)

Tokenizer (Phase 0: GGUF metadata or nil)



58
59
60

# File 'lib/toy/models/arch.rb', line 58

def tokenizer_kind
  @tokenizer_kind
end

#unk_id ⇒ `Object` (readonly)

Returns the value of attribute unk_id.



62
63
64

# File 'lib/toy/models/arch.rb', line 62

def unk_id
  @unk_id
end

#untied_lm_head ⇒ `Object` (readonly)

Returns the value of attribute untied_lm_head.



30
31
32

# File 'lib/toy/models/arch.rb', line 30

def untied_lm_head
  @untied_lm_head
end

#vocab_size ⇒ `Object` (readonly)

Dimensions



22
23
24

# File 'lib/toy/models/arch.rb', line 22

def vocab_size
  @vocab_size
end

Class Method Details

.from_gguf(path) ⇒ `Object`

Detect the architecture family by reading the GGUF and inspecting what’s there. The general.architecture key is unreliable (our converter writes “llama” for every model), so we use tensor presence + RoPE freq_base as the actual signal.

# File 'lib/toy/models/arch.rb', line 159

def self.from_gguf(path)
  handle = TinyNN.tnn_gguf_load(path)
  if handle == nil
    puts "Arch.from_gguf: failed to open " + path
    return nil
  end

  # Llama-family GGUF keys are the canonical scalar metadata (the
  # converter writes "llama.*" for SmolLM2/TinyLlama/Qwen2.5/Llama3
  # alike). Read once and reuse.
  # M2.3: support multiple arch prefixes (llama.* OR olmoe.* OR …).
  # Probe embedding_length (present in every arch); whichever
  # resolves wins. vocab_size isn't reliable — some archs (OLMoE)
  # omit it and rely on the tokenizer.ggml.tokens array length.
  arch_prefix = "llama"
  if TinyNN.tnn_gguf_get_u32(handle, "llama.embedding_length") < 0
    if TinyNN.tnn_gguf_get_u32(handle, "olmoe.embedding_length") >= 0
      arch_prefix = "olmoe"
    elsif TinyNN.tnn_gguf_get_u32(handle, "gemma2.embedding_length") >= 0
      arch_prefix = "gemma2"
    end
  end
  vocab    = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".vocab_size")
  if vocab < 0
    vocab = TinyNN.tnn_gguf_arr_n(handle, "tokenizer.ggml.tokens")
  end
  d_model  = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".embedding_length")
  d_ff     = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".feed_forward_length")
  n_q      = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".attention.head_count")
  n_kv     = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".attention.head_count_kv")
  n_layers = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".block_count")
  ctx      = TinyNN.tnn_gguf_get_u32(handle, arch_prefix + ".context_length")
  if ctx < 0
    ctx = 8192   # default if metadata missing
  end
  rope_base = TinyNN.tnn_gguf_get_f32(handle, arch_prefix + ".rope.freq_base")
  rms_eps   = TinyNN.tnn_gguf_get_f32(handle, arch_prefix + ".attention.layer_norm_rms_epsilon")
  d_head    = d_model / n_q

  # Tensor-presence flags. Per-head bias (toy from-scratch ckpts, #153)
  # carries blk.0.attn_q.head_0.bias instead of the fused name.
  has_qkv_bias = (TinyNN.tnn_gguf_find_index(handle, "blk.0.attn_q.bias") >= 0) ||
                 (TinyNN.tnn_gguf_find_index(handle, "blk.0.attn_q.head_0.bias") >= 0)
  untied       = TinyNN.tnn_gguf_find_index(handle, "output.weight")     >= 0
  # M2.3 MoE detection — same sentinel as detect_smollm2_flags.
  is_moe       = TinyNN.tnn_gguf_find_index(handle, "blk.0.ffn_gate_inp.weight") >= 0
  moe_n_exp    = 0
  moe_n_used   = 0
  if is_moe
    ne_v = TinyNN.tnn_gguf_get_u32(handle, "llama.expert_count")
    nu_v = TinyNN.tnn_gguf_get_u32(handle, "llama.expert_used_count")
    moe_n_exp  = ne_v > 0 ? ne_v : 0
    moe_n_used = nu_v > 0 ? nu_v : 0
  end

  # Tokenizer metadata (most current GGUFs in this repo don't embed
  # it — our converter skips it. Read anyway for forward-compat).
  bos = TinyNN.tnn_gguf_get_u32(handle, "tokenizer.ggml.bos_token_id")
  eos = TinyNN.tnn_gguf_get_u32(handle, "tokenizer.ggml.eos_token_id")
  pad = TinyNN.tnn_gguf_get_u32(handle, "tokenizer.ggml.padding_token_id")
  unk = TinyNN.tnn_gguf_get_u32(handle, "tokenizer.ggml.unknown_token_id")
  vocab_n = TinyNN.tnn_gguf_arr_n(handle, "tokenizer.ggml.tokens")
  tok_kind = :external
  if vocab_n > 0
    tok_kind = :gguf_embedded
  end

  # Family detection — see the comment above. The current set of
  # models all share the Llama-family graph; the only structural
  # delta we care about is QKV bias.
  family = :llama
  if has_qkv_bias
    family = :qwen2
  end

  TinyNN.tnn_gguf_free(handle)

  # Arch.new positional args: family, name, vocab, d_model, n_layers,
  # n_q, n_kv, d_head, d_ff, max_pos, untied, qkv_bias, qk_norm,
  # swa_window, rope_freq_base, rope_scale, rope_partial, norm_kind,
  # norm_eps, ffn_kind, ffn_bias, moe, n_experts, n_experts_used,
  # n_shared_experts, expert_gating, tokenizer_kind, bos, eos, pad,
  # unk, add_bos, embed_scale.
  Arch.new(family, path,
           vocab, d_model, n_layers, n_q, n_kv, d_head, d_ff,
           ctx, untied,
           has_qkv_bias, false, nil,
           rope_base, 1.0, 1.0,
           :rms, rms_eps,
           :swiglu, false,
           is_moe, moe_n_exp, moe_n_used, 0, :softmax,
           tok_kind, bos, eos, pad, unk, false,
           1.0)
end

.load_or_fail(path, cmd) ⇒ `Object`

Load an arch from ‘path` or FAIL LOUD. Every infer/eval runner repeated the same `from_gguf` + nil-check + exit; this folds it. `cmd` is the runner’s name for the error prefix (“toy-infer” / “toy-eval”). Returns the Arch (never nil — exits 1 on failure).

# File 'lib/toy/models/arch.rb', line 145

def self.load_or_fail(path, cmd)
  a = Arch.from_gguf(path)
  if a == nil
    puts cmd + ": could not load " + path +
         " — set GGUF= to a valid file (see `toy list`)."
    exit 1
  end
  a
end

Instance Method Details

#gqa? ⇒ `Boolean`

Returns:

(Boolean)



118
119
120

# File 'lib/toy/models/arch.rb', line 118

def gqa?
  @n_heads_kv < @n_heads_q
end

#moe? ⇒ `Boolean`

Returns:

(Boolean)



114
115
116

# File 'lib/toy/models/arch.rb', line 114

def moe?
  @moe
end

#summary ⇒ `Object`

Pretty one-line summary for log lines / startup.

# File 'lib/toy/models/arch.rb', line 127

def summary
  "Arch(" + @family.to_s +
    ", vocab=" + @vocab_size.to_s +
    ", d=" + @d_model.to_s +
    ", L=" + @n_layers.to_s +
    ", n_q=" + @n_heads_q.to_s +
    ", n_kv=" + @n_heads_kv.to_s +
    ", d_ff=" + @d_ff.to_s +
    ", qkv_bias=" + @qkv_bias.to_s +
    ", rope_base=" + @rope_freq_base.to_s +
    ", " + @norm_kind.to_s + " eps=" + @norm_eps.to_s +
    ", " + @ffn_kind.to_s + ")"
end

#swa? ⇒ `Boolean`

Returns:

(Boolean)



122
123
124

# File 'lib/toy/models/arch.rb', line 122

def swa?
  @swa_window != nil
end

Class: Arch

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

Instance Attribute Details

#add_bos_by_default ⇒ Object (readonly)

#bos_id ⇒ Object (readonly)

#d_ff ⇒ Object (readonly)

#d_head ⇒ Object (readonly)

#d_model ⇒ Object (readonly)

#embed_scale ⇒ Object (readonly)

#eos_id ⇒ Object (readonly)

#expert_gating ⇒ Object (readonly)

#family ⇒ Object (readonly)

#ffn_bias ⇒ Object (readonly)

#ffn_kind ⇒ Object (readonly)

#max_position ⇒ Object (readonly)

#moe ⇒ Object (readonly)

#n_experts ⇒ Object (readonly)

#n_experts_used ⇒ Object (readonly)

#n_heads_kv ⇒ Object (readonly)

#n_heads_q ⇒ Object (readonly)

#n_layers ⇒ Object (readonly)

#n_shared_experts ⇒ Object (readonly)

#name ⇒ Object (readonly)

#norm_eps ⇒ Object (readonly)

#norm_kind ⇒ Object (readonly)

#pad_id ⇒ Object (readonly)

#qk_norm ⇒ Object (readonly)

#qkv_bias ⇒ Object (readonly)

#rope_freq_base ⇒ Object (readonly)

#rope_freq_scale ⇒ Object (readonly)

#rope_partial_factor ⇒ Object (readonly)

#swa_window ⇒ Object (readonly)

#tokenizer_kind ⇒ Object (readonly)

#unk_id ⇒ Object (readonly)

#untied_lm_head ⇒ Object (readonly)

#vocab_size ⇒ Object (readonly)

Class Method Details

.from_gguf(path) ⇒ Object

.load_or_fail(path, cmd) ⇒ Object

Instance Method Details

#gqa? ⇒ Boolean

#moe? ⇒ Boolean

#summary ⇒ Object

#swa? ⇒ Boolean

#add_bos_by_default ⇒ `Object` (readonly)

#bos_id ⇒ `Object` (readonly)

#d_ff ⇒ `Object` (readonly)

#d_head ⇒ `Object` (readonly)

#d_model ⇒ `Object` (readonly)

#embed_scale ⇒ `Object` (readonly)

#eos_id ⇒ `Object` (readonly)

#expert_gating ⇒ `Object` (readonly)

#family ⇒ `Object` (readonly)

#ffn_bias ⇒ `Object` (readonly)

#ffn_kind ⇒ `Object` (readonly)

#max_position ⇒ `Object` (readonly)

#moe ⇒ `Object` (readonly)

#n_experts ⇒ `Object` (readonly)

#n_experts_used ⇒ `Object` (readonly)

#n_heads_kv ⇒ `Object` (readonly)

#n_heads_q ⇒ `Object` (readonly)

#n_layers ⇒ `Object` (readonly)

#n_shared_experts ⇒ `Object` (readonly)

#name ⇒ `Object` (readonly)

#norm_eps ⇒ `Object` (readonly)

#norm_kind ⇒ `Object` (readonly)

#pad_id ⇒ `Object` (readonly)

#qk_norm ⇒ `Object` (readonly)

#qkv_bias ⇒ `Object` (readonly)

#rope_freq_base ⇒ `Object` (readonly)

#rope_freq_scale ⇒ `Object` (readonly)

#rope_partial_factor ⇒ `Object` (readonly)

#swa_window ⇒ `Object` (readonly)

#tokenizer_kind ⇒ `Object` (readonly)

#unk_id ⇒ `Object` (readonly)

#untied_lm_head ⇒ `Object` (readonly)

#vocab_size ⇒ `Object` (readonly)

.from_gguf(path) ⇒ `Object`

.load_or_fail(path, cmd) ⇒ `Object`

#gqa? ⇒ `Boolean`

#moe? ⇒ `Boolean`

#summary ⇒ `Object`

#swa? ⇒ `Boolean`