Class: SmolLM2KVBlockFFICuda

Inherits:
Object
  • Object
show all
Defined in:
lib/toy/llm/engine/llama_kv_engine_cuda.rb

Overview

Per-block persistent tensors for the SmolLM2 KV cache.

Q is split per query head (n_heads of them). K, V, and their persistent buffers are split per KV head (n_kv of them).

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSmolLM2KVBlockFFICuda

Returns a new instance of SmolLM2KVBlockFFICuda.



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 83

def initialize
  @t_rn1_gamma = TinyNNCuda.tnn_null_ptr
  @t_rn2_gamma = TinyNNCuda.tnn_null_ptr
  @t_q_norm_gamma = TinyNNCuda.tnn_null_ptr
  @t_k_norm_gamma = TinyNNCuda.tnn_null_ptr
  @t_post_attn_norm_gamma = TinyNNCuda.tnn_null_ptr
  @t_post_ffn_norm_gamma  = TinyNNCuda.tnn_null_ptr
  @t_w_q  = [TinyNNCuda.tnn_null_ptr]
  @t_w_k  = [TinyNNCuda.tnn_null_ptr]
  @t_w_v  = [TinyNNCuda.tnn_null_ptr]
  @t_b_q  = [TinyNNCuda.tnn_null_ptr]   # per-Q-head bias (Qwen2.x)
  @t_b_k  = [TinyNNCuda.tnn_null_ptr]   # per-KV-head bias
  @t_b_v  = [TinyNNCuda.tnn_null_ptr]   # per-KV-head bias (1-D [d_head])
  @t_K    = [TinyNNCuda.tnn_null_ptr]
  @t_V    = [TinyNNCuda.tnn_null_ptr]
  @t_w_o    = TinyNNCuda.tnn_null_ptr
  @t_w_gate = TinyNNCuda.tnn_null_ptr
  @t_w_up   = TinyNNCuda.tnn_null_ptr
  @t_w_down = TinyNNCuda.tnn_null_ptr
  @t_w_router    = TinyNNCuda.tnn_null_ptr
  @t_w_gate_exps = TinyNNCuda.tnn_null_ptr
  @t_w_up_exps   = TinyNNCuda.tnn_null_ptr
  @t_w_down_exps = TinyNNCuda.tnn_null_ptr
  @t_w_lora_a_q = [TinyNNCuda.tnn_null_ptr]
  @t_w_lora_b_q = [TinyNNCuda.tnn_null_ptr]
  @t_w_lora_a_q_m = [TinyNNCuda.tnn_null_ptr]
  @t_w_lora_a_q_v = [TinyNNCuda.tnn_null_ptr]
  @t_w_lora_b_q_m = [TinyNNCuda.tnn_null_ptr]
  @t_w_lora_b_q_v = [TinyNNCuda.tnn_null_ptr]
end

Instance Attribute Details

#t_b_kObject

Returns the value of attribute t_b_k.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_b_k
  @t_b_k
end

#t_b_qObject

Returns the value of attribute t_b_q.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_b_q
  @t_b_q
end

#t_b_vObject

Returns the value of attribute t_b_v.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_b_v
  @t_b_v
end

#t_KObject

Returns the value of attribute t_K.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_K
  @t_K
end

#t_k_norm_gammaObject

Returns the value of attribute t_k_norm_gamma.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_k_norm_gamma
  @t_k_norm_gamma
end

#t_post_attn_norm_gammaObject

Returns the value of attribute t_post_attn_norm_gamma.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_post_attn_norm_gamma
  @t_post_attn_norm_gamma
end

#t_post_ffn_norm_gammaObject

Returns the value of attribute t_post_ffn_norm_gamma.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_post_ffn_norm_gamma
  @t_post_ffn_norm_gamma
end

#t_q_norm_gammaObject

Returns the value of attribute t_q_norm_gamma.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_q_norm_gamma
  @t_q_norm_gamma
end

#t_rn1_gammaObject

Returns the value of attribute t_rn1_gamma.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_rn1_gamma
  @t_rn1_gamma
end

#t_rn2_gammaObject

Returns the value of attribute t_rn2_gamma.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_rn2_gamma
  @t_rn2_gamma
end

#t_VObject

Returns the value of attribute t_V.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_V
  @t_V
end

#t_w_downObject

Returns the value of attribute t_w_down.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_down
  @t_w_down
end

#t_w_down_expsObject

Returns the value of attribute t_w_down_exps.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_down_exps
  @t_w_down_exps
end

#t_w_gateObject

Returns the value of attribute t_w_gate.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_gate
  @t_w_gate
end

#t_w_gate_expsObject

Returns the value of attribute t_w_gate_exps.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_gate_exps
  @t_w_gate_exps
end

#t_w_kObject

Returns the value of attribute t_w_k.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_k
  @t_w_k
end

#t_w_lora_a_qObject

Returns the value of attribute t_w_lora_a_q.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_lora_a_q
  @t_w_lora_a_q
end

#t_w_lora_a_q_mObject

Returns the value of attribute t_w_lora_a_q_m.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_lora_a_q_m
  @t_w_lora_a_q_m
end

#t_w_lora_a_q_vObject

Returns the value of attribute t_w_lora_a_q_v.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_lora_a_q_v
  @t_w_lora_a_q_v
end

#t_w_lora_b_qObject

Returns the value of attribute t_w_lora_b_q.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_lora_b_q
  @t_w_lora_b_q
end

#t_w_lora_b_q_mObject

Returns the value of attribute t_w_lora_b_q_m.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_lora_b_q_m
  @t_w_lora_b_q_m
end

#t_w_lora_b_q_vObject

Returns the value of attribute t_w_lora_b_q_v.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_lora_b_q_v
  @t_w_lora_b_q_v
end

#t_w_oObject

Returns the value of attribute t_w_o.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_o
  @t_w_o
end

#t_w_qObject

Returns the value of attribute t_w_q.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_q
  @t_w_q
end

#t_w_routerObject

Returns the value of attribute t_w_router.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_router
  @t_w_router
end

#t_w_upObject

Returns the value of attribute t_w_up.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_up
  @t_w_up
end

#t_w_up_expsObject

Returns the value of attribute t_w_up_exps.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_up_exps
  @t_w_up_exps
end

#t_w_vObject

Returns the value of attribute t_w_v.



41
42
43
# File 'lib/toy/llm/engine/llama_kv_engine_cuda.rb', line 41

def t_w_v
  @t_w_v
end