Module: LlamaCpp

Defined in:: lib/llama_cpp.rb,
lib/llama_cpp/version.rb,
ext/llama_cpp/llama_cpp.c

Overview

llama_cpp.rb provides Ruby bindings for the llama.cpp.

Defined Under Namespace

Classes: LlamaAdapterLora, LlamaBatch, LlamaChatMessage, LlamaContext, LlamaContextParams, LlamaKvCacheView, LlamaKvCacheViewCell, LlamaLogitBias, LlamaModel, LlamaModelKvOverride, LlamaModelParams, LlamaModelQuantizeParams, LlamaPerfContextData, LlamaPerfSamplerData, LlamaSampler, LlamaSamplerChainParams, LlamaTokenData, LlamaTokenDataArray, LlamaVocab

Constant Summary collapse

VERSION = The version of llama_cpp.rb you install.

'0.18.2'

LLAMA_CPP_VERSION = The supported version of llama.cpp.

'b4793'

LLAMA_DEFAULT_SEED =

rb_str_new2(tmp)

LLAMA_TOKEN_NULL =

INT2NUM(LLAMA_TOKEN_NULL)

LLAMA_FILE_MAGIC_GGLA =

rb_str_new2(tmp)

LLAMA_FILE_MAGIC_GGSN =

rb_str_new2(tmp)

LLAMA_FILE_MAGIC_GGSQ =

rb_str_new2(tmp)

LLAMA_SESSION_MAGIC =

rb_str_new2(tmp)

LLAMA_SESSION_VERSION =

INT2NUM(LLAMA_SESSION_VERSION)

LLAMA_STATE_SEQ_MAGIC =

rb_str_new2(tmp)

LLAMA_STATE_SEQ_VERSION =

INT2NUM(LLAMA_STATE_SEQ_VERSION)

LLAMA_VOCAB_TYPE_NONE =

INT2NUM(LLAMA_VOCAB_TYPE_NONE)

LLAMA_VOCAB_TYPE_SPM =

INT2NUM(LLAMA_VOCAB_TYPE_SPM)

LLAMA_VOCAB_TYPE_BPE =

INT2NUM(LLAMA_VOCAB_TYPE_BPE)

LLAMA_VOCAB_TYPE_WPM =

INT2NUM(LLAMA_VOCAB_TYPE_WPM)

LLAMA_VOCAB_TYPE_UGM =

INT2NUM(LLAMA_VOCAB_TYPE_UGM)

LLAMA_VOCAB_TYPE_RWKV =

INT2NUM(LLAMA_VOCAB_TYPE_RWKV)

LLAMA_VOCAB_PRE_TYPE_DEFAULT =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEFAULT)

LLAMA_VOCAB_PRE_TYPE_LLAMA3 =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_LLAMA3)

LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM)

LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER)

LLAMA_VOCAB_PRE_TYPE_FALCON =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_FALCON)

LLAMA_VOCAB_PRE_TYPE_MPT =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_MPT)

LLAMA_VOCAB_PRE_TYPE_STARCODER =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_STARCODER)

LLAMA_VOCAB_PRE_TYPE_GPT2 =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT2)

LLAMA_VOCAB_PRE_TYPE_REFACT =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_REFACT)

LLAMA_VOCAB_PRE_TYPE_COMMAND_R =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_COMMAND_R)

LLAMA_VOCAB_PRE_TYPE_STABLELM2 =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_STABLELM2)

LLAMA_VOCAB_PRE_TYPE_QWEN2 =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_QWEN2)

LLAMA_VOCAB_PRE_TYPE_OLMO =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_OLMO)

LLAMA_VOCAB_PRE_TYPE_DBRX =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX)

LLAMA_VOCAB_PRE_TYPE_SMAUG =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG)

LLAMA_VOCAB_PRE_TYPE_PORO =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO)

LLAMA_VOCAB_PRE_TYPE_CHATGLM3 =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM3)

LLAMA_VOCAB_PRE_TYPE_CHATGLM4 =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM4)

LLAMA_VOCAB_PRE_TYPE_VIKING =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING)

LLAMA_VOCAB_PRE_TYPE_JAIS =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_JAIS)

LLAMA_VOCAB_PRE_TYPE_TEKKEN =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_TEKKEN)

LLAMA_VOCAB_PRE_TYPE_SMOLLM =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMOLLM)

LLAMA_VOCAB_PRE_TYPE_CODESHELL =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_CODESHELL)

LLAMA_VOCAB_PRE_TYPE_BLOOM =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_BLOOM)

LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH)

LLAMA_VOCAB_PRE_TYPE_EXAONE =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_EXAONE)

LLAMA_VOCAB_PRE_TYPE_CHAMELEON =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHAMELEON)

LLAMA_VOCAB_PRE_TYPE_MINERVA =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_MINERVA)

LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM)

LLAMA_VOCAB_PRE_TYPE_GPT4O =

INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT4O)

LLAMA_ROPE_TYPE_NONE =

INT2NUM(LLAMA_ROPE_TYPE_NONE)

LLAMA_ROPE_TYPE_NORM =

INT2NUM(LLAMA_ROPE_TYPE_NORM)

LLAMA_ROPE_TYPE_NEOX =

INT2NUM(LLAMA_ROPE_TYPE_NEOX)

LLAMA_ROPE_TYPE_MROPE =

INT2NUM(LLAMA_ROPE_TYPE_MROPE)

LLAMA_ROPE_TYPE_VISION =

INT2NUM(LLAMA_ROPE_TYPE_VISION)

LLAMA_TOKEN_TYPE_UNDEFINED =

INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED)

LLAMA_TOKEN_TYPE_NORMAL =

INT2NUM(LLAMA_TOKEN_TYPE_NORMAL)

LLAMA_TOKEN_TYPE_UNKNOWN =

INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN)

LLAMA_TOKEN_TYPE_CONTROL =

INT2NUM(LLAMA_TOKEN_TYPE_CONTROL)

LLAMA_TOKEN_TYPE_USER_DEFINED =

INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED)

LLAMA_TOKEN_TYPE_UNUSED =

INT2NUM(LLAMA_TOKEN_TYPE_UNUSED)

LLAMA_TOKEN_TYPE_BYTE =

INT2NUM(LLAMA_TOKEN_TYPE_BYTE)

LLAMA_TOKEN_ATTR_UNDEFINED =

INT2NUM(LLAMA_TOKEN_ATTR_UNDEFINED)

LLAMA_TOKEN_ATTR_UNKNOWN =

INT2NUM(LLAMA_TOKEN_ATTR_UNKNOWN)

LLAMA_TOKEN_ATTR_UNUSED =

INT2NUM(LLAMA_TOKEN_ATTR_UNUSED)

LLAMA_TOKEN_ATTR_NORMAL =

INT2NUM(LLAMA_TOKEN_ATTR_NORMAL)

LLAMA_TOKEN_ATTR_CONTROL =

INT2NUM(LLAMA_TOKEN_ATTR_CONTROL)

LLAMA_TOKEN_ATTR_USER_DEFINED =

INT2NUM(LLAMA_TOKEN_ATTR_USER_DEFINED)

LLAMA_TOKEN_ATTR_BYTE =

INT2NUM(LLAMA_TOKEN_ATTR_BYTE)

LLAMA_TOKEN_ATTR_NORMALIZED =

INT2NUM(LLAMA_TOKEN_ATTR_NORMALIZED)

LLAMA_TOKEN_ATTR_LSTRIP =

INT2NUM(LLAMA_TOKEN_ATTR_LSTRIP)

LLAMA_TOKEN_ATTR_RSTRIP =

INT2NUM(LLAMA_TOKEN_ATTR_RSTRIP)

LLAMA_TOKEN_ATTR_SINGLE_WORD =

INT2NUM(LLAMA_TOKEN_ATTR_SINGLE_WORD)

LLAMA_FTYPE_ALL_F32 =

INT2NUM(LLAMA_FTYPE_ALL_F32)

LLAMA_FTYPE_MOSTLY_F16 =

INT2NUM(LLAMA_FTYPE_MOSTLY_F16)

LLAMA_FTYPE_MOSTLY_Q4_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0)

LLAMA_FTYPE_MOSTLY_Q4_1 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1)

LLAMA_FTYPE_MOSTLY_Q8_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q8_0)

LLAMA_FTYPE_MOSTLY_Q5_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0)

LLAMA_FTYPE_MOSTLY_Q5_1 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1)

LLAMA_FTYPE_MOSTLY_Q2_K =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K)

LLAMA_FTYPE_MOSTLY_Q3_K_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_S)

LLAMA_FTYPE_MOSTLY_Q3_K_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_M)

LLAMA_FTYPE_MOSTLY_Q3_K_L =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_L)

LLAMA_FTYPE_MOSTLY_Q4_K_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_K_S)

LLAMA_FTYPE_MOSTLY_Q4_K_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_K_M)

LLAMA_FTYPE_MOSTLY_Q5_K_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_S)

LLAMA_FTYPE_MOSTLY_Q5_K_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M)

LLAMA_FTYPE_MOSTLY_Q6_K =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K)

LLAMA_FTYPE_MOSTLY_IQ2_XXS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS)

LLAMA_FTYPE_MOSTLY_IQ2_XS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS)

LLAMA_FTYPE_MOSTLY_Q2_K_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S)

LLAMA_FTYPE_MOSTLY_IQ3_XS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XS)

LLAMA_FTYPE_MOSTLY_IQ3_XXS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XXS)

LLAMA_FTYPE_MOSTLY_IQ1_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_S)

LLAMA_FTYPE_MOSTLY_IQ4_NL =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_NL)

LLAMA_FTYPE_MOSTLY_IQ3_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_S)

LLAMA_FTYPE_MOSTLY_IQ3_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_M)

LLAMA_FTYPE_MOSTLY_IQ2_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_S)

LLAMA_FTYPE_MOSTLY_IQ2_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_M)

LLAMA_FTYPE_MOSTLY_IQ4_XS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_XS)

LLAMA_FTYPE_MOSTLY_IQ1_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_M)

LLAMA_FTYPE_MOSTLY_BF16 =

INT2NUM(LLAMA_FTYPE_MOSTLY_BF16)

LLAMA_FTYPE_MOSTLY_TQ1_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_TQ1_0)

LLAMA_FTYPE_MOSTLY_TQ2_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0)

LLAMA_FTYPE_GUESSED =

INT2NUM(LLAMA_FTYPE_GUESSED)

LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED)

LLAMA_ROPE_SCALING_TYPE_NONE =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE)

LLAMA_ROPE_SCALING_TYPE_LINEAR =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_LINEAR)

LLAMA_ROPE_SCALING_TYPE_YARN =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_YARN)

LLAMA_ROPE_SCALING_TYPE_LONGROPE =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_LONGROPE)

LLAMA_ROPE_SCALING_TYPE_MAX_VALUE =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_MAX_VALUE)

LLAMA_POOLING_TYPE_UNSPECIFIED =

INT2NUM(LLAMA_POOLING_TYPE_UNSPECIFIED)

LLAMA_POOLING_TYPE_NONE =

INT2NUM(LLAMA_POOLING_TYPE_NONE)

LLAMA_POOLING_TYPE_MEAN =

INT2NUM(LLAMA_POOLING_TYPE_MEAN)

LLAMA_POOLING_TYPE_CLS =

INT2NUM(LLAMA_POOLING_TYPE_CLS)

LLAMA_POOLING_TYPE_LAST =

INT2NUM(LLAMA_POOLING_TYPE_LAST)

LLAMA_POOLING_TYPE_RANK =

INT2NUM(LLAMA_POOLING_TYPE_RANK)

LLAMA_ATTENTION_TYPE_UNSPECIFIED =

INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED)

LLAMA_ATTENTION_TYPE_CAUSAL =

INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL)

LLAMA_ATTENTION_TYPE_NON_CAUSAL =

INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL)

LLAMA_SPLIT_MODE_NONE =

INT2NUM(LLAMA_SPLIT_MODE_NONE)

LLAMA_SPLIT_MODE_LAYER =

INT2NUM(LLAMA_SPLIT_MODE_LAYER)

LLAMA_SPLIT_MODE_ROW =

INT2NUM(LLAMA_SPLIT_MODE_ROW)

LLAMA_KV_OVERRIDE_TYPE_INT =

INT2NUM(LLAMA_KV_OVERRIDE_TYPE_INT)

LLAMA_KV_OVERRIDE_TYPE_FLOAT =

INT2NUM(LLAMA_KV_OVERRIDE_TYPE_FLOAT)

LLAMA_KV_OVERRIDE_TYPE_BOOL =

INT2NUM(LLAMA_KV_OVERRIDE_TYPE_BOOL)

LLAMA_KV_OVERRIDE_TYPE_STR =

INT2NUM(LLAMA_KV_OVERRIDE_TYPE_STR)

Class Method Summary collapse

.generate(context, prompt, n_predict: 128) ⇒ String

Generates sentences following the given prompt for operation check.
.ggml_backend_load_all ⇒ NilClass
.llama_adapter_lora_free(adapter) ⇒ NilClass
.llama_adapter_lora_init(model, path_lora) ⇒ LlamaAdapterLora
.llama_backend_free ⇒ NilClass
.llama_backend_init ⇒ NilClass
.llama_batch_free(batch) ⇒ NilClass
.llama_batch_get_one(tokens) ⇒ LlamaBatch
.llama_batch_init(n_tokens, embd, n_seq_max) ⇒ LlamaBatch
.llama_clear_adapter_lora(context) ⇒ NilClass
.llama_decode(context, batch) ⇒ Integer
.llama_detokenize(vocab, tokens, remove_special, unparse_special) ⇒ String
.llama_encode(context, batch) ⇒ Integer
.llama_free(context) ⇒ NilClass
.llama_get_kv_cache_token_count(context) ⇒ Integer
.llama_get_kv_cache_used_cells(context) ⇒ Integer
.llama_get_model(context) ⇒ LlamaModel
.llama_init_from_model(model, params) ⇒ LlamaContext
.llama_kv_cache_can_shift?(context) ⇒ Boolean
.llama_kv_cache_clear(context) ⇒ NilClass
.llama_kv_cache_defrag(context) ⇒ NilClass
.llama_kv_cache_seq_add(context, seq_id, p0, p1, delta) ⇒ NilClass
.llama_kv_cache_seq_cp(context, seq_id_src, seq_id_dst, p0, p1) ⇒ NilClass
.llama_kv_cache_seq_div(context, seq_id, p0, p1, d) ⇒ NilClass
.llama_kv_cache_seq_keep(context, seq_id) ⇒ NilClass
.llama_kv_cache_seq_pos_max(context, seq_id) ⇒ Integer
.llama_kv_cache_seq_rm(context, seq_id, p0, p1) ⇒ Boolean
.llama_kv_cache_update(context) ⇒ NilClass
.llama_kv_cache_view_free(view) ⇒ NilClass
.llama_kv_cache_view_init(context, n_seq_max) ⇒ LlamaKvCacheView
.llama_kv_cache_view_update(context, view) ⇒ NilClass
.llama_max_devices ⇒ Integer
.llama_model_decoder_start_token(model) ⇒ Integer
.llama_model_desc(model) ⇒ String
.llama_model_free(model) ⇒ NilClass
.llama_model_get_vocab(model) ⇒ LlamaVocab
.llama_model_has_decoder(model) ⇒ Boolean
.llama_model_has_encoder?(model) ⇒ Boolean
.llama_model_is_recurrent?(model) ⇒ Boolean
.llama_model_load_from_file(path_model) ⇒ LlamaModel
.llama_model_load_from_splits(paths, params) ⇒ LlamaModel
.llama_model_n_ctx_train(model) ⇒ Integer
.llama_model_n_embd(model) ⇒ Integer
.llama_model_n_head(model) ⇒ Integer
.llama_model_n_head_kv(model) ⇒ Integer
.llama_model_n_layer(model) ⇒ Integer
.llama_model_n_params(model) ⇒ Integer
.llama_model_quantize(fname_inp, fname_out, params) ⇒ Boolean
.llama_model_rope_freq_scale_train(model) ⇒ Float
.llama_model_rope_type(model) ⇒ Integer
.llama_model_size(model) ⇒ Integer
.llama_n_batch(context) ⇒ Integer
.llama_n_ctx(context) ⇒ Integer
.llama_n_seq_max(context) ⇒ Integer
.llama_n_threads(context) ⇒ Integer
.llama_n_threads_batch(context) ⇒ Integer
.llama_n_ubatch(context) ⇒ Integer
.llama_numa_init(numa) ⇒ NilClass
.llama_perf_context(context) ⇒ LlamaPerfContextData
.llama_perf_context_print(context) ⇒ NilClass
.llama_perf_context_reset(context) ⇒ NilClass
.llama_perf_sampler(chain) ⇒ LlamaPerfSamplerData
.llama_perf_sampler_print(chain) ⇒ NilClass
.llama_perf_sampler_reset(chain) ⇒ NilClass
.llama_pooling_type(context) ⇒ Integer
.llama_print_system_info ⇒ String
.llama_rm_adapter_lora(context, adapter) ⇒ Integer
.llama_sampler_accept(sampler, token) ⇒ NilClass
.llama_sampler_apply(sampler, cur_p) ⇒ NilClass
.llama_sampler_chain_add(chain, smpl) ⇒ NilClass
.llama_sampler_chain_get(chain, i) ⇒ LlamaSampler
.llama_sampler_chain_init(params) ⇒ LlamaSampler
.llama_sampler_chain_n(chain) ⇒ Integer
.llama_sampler_chain_remove(chain, i) ⇒ LlamaSampler
.llama_sampler_clone(sampler) ⇒ LlamaSampler
.llama_sampler_free(sampler) ⇒ NilClass
.llama_sampler_get_seed(sampler) ⇒ Integer
.llama_sampler_init_dist(seed) ⇒ LlamaSampler
.llama_sampler_init_grammar(vocab, grammar_str, grammar_root) ⇒ LlamaSampler
.llama_sampler_init_greedy ⇒ LlamaSampler
.llama_sampler_init_infill(vocab) ⇒ LlamaSampler
.llama_sampler_init_logit_bias(n_vocab, n_logit_bias, logit_bias) ⇒ LlamaSampler
.llama_sampler_init_min_p(p, min_keep) ⇒ LlamaSampler
.llama_sampler_init_mirostat(n_vocab, seed, tau, eta, m) ⇒ LlamaSampler
.llama_sampler_init_mirostat_v2(seed, tau, eta) ⇒ LlamaSampler
.llama_sampler_init_penalties(penalty_last_n, penalty_repeat, penalty_freq, penalty_present) ⇒ LlamaSampler
.llama_sampler_init_temp(t) ⇒ LlamaSampler
.llama_sampler_init_temp_ext(t, delta, exponent) ⇒ LlamaSampler
.llama_sampler_init_top_k(k) ⇒ LlamaSampler
.llama_sampler_init_top_n_sigma(n) ⇒ LlamaSampler
.llama_sampler_init_top_p(p, min_keep) ⇒ LlamaSampler
.llama_sampler_init_typical(p, min_keep) ⇒ LlamaSampler
.llama_sampler_init_xtc(p, t, min_keep, seed) ⇒ LlamaSampler
.llama_sampler_name(sampler) ⇒ String
.llama_sampler_reset(sampler) ⇒ NilClass
.llama_sampler_sample(sampler, context, idx) ⇒ Integer
.llama_set_adapter_lora(context, adapter, scale) ⇒ Integer
.llama_set_causal_attn(context, causal_attn) ⇒ NilClass
.llama_set_embeddings(context, embeddings) ⇒ NilClass
.llama_set_n_threads(context, n_threads, n_threads_batch) ⇒ NilClass
.llama_state_get_size(context) ⇒ Integer
.llama_state_seq_get_size(context, seq_id) ⇒ Integer
.llama_supports_gpu_offload? ⇒ Boolean
.llama_supports_mlock? ⇒ Boolean
.llama_supports_mmap? ⇒ Boolean
.llama_supports_rpc? ⇒ Boolean
.llama_synchronize(context) ⇒ NilClass
.llama_time_us ⇒ Integer
.llama_token_to_piece(vocab, token, lstrip, special) ⇒ String
.llama_tokenize(vocab, text, tokens, n_tokens_max, add_special, parse_special) ⇒ Integer
.llama_vocab_bos(vocab) ⇒ Integer
.llama_vocab_eos(vocab) ⇒ Integer
.llama_vocab_eot(vocab) ⇒ Integer
.llama_vocab_fim_mid(vocab) ⇒ Integer
.llama_vocab_fim_pad(vocab) ⇒ Integer
.llama_vocab_fim_pre(vocab) ⇒ Integer
.llama_vocab_fim_rep(vocab) ⇒ Integer
.llama_vocab_fim_sep(vocab) ⇒ Integer
.llama_vocab_fim_suf(vocab) ⇒ Integer
.llama_vocab_get_add_bos ⇒ Boolean
.llama_vocab_get_add_eos(vocab) ⇒ Boolean
.llama_vocab_get_attr(vocab, token) ⇒ Integer
.llama_vocab_get_score(vocab, token) ⇒ Float
.llama_vocab_get_text(vocab, token) ⇒ String
.llama_vocab_is_control?(vocab, token) ⇒ Boolean
.llama_vocab_is_eog?(vocab, token) ⇒ Boolean
.llama_vocab_n_tokens(vocab) ⇒ Integer
.llama_vocab_nl(vocab) ⇒ Integer
.llama_vocab_pad(vocab) ⇒ Integer
.llama_vocab_sep(vocab) ⇒ Integer
.llama_vocab_type(vocab) ⇒ Integer

Class Method Details

.generate(context, prompt, n_predict: 128) ⇒ `String`

Generates sentences following the given prompt for operation check.

Parameters:

context (LlamaCpp::LlamaContext) —

The context to use.
prompt (String) —

The prompt to start generation with.
n_predict (Integer) (defaults to: 128) —

The number of tokens to predict.

Returns:

(String)

Raises:

(ArgumentError)

# File 'lib/llama_cpp.rb', line 16

def generate(context, prompt, n_predict: 128) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
  raise ArgumentError, 'context must be a LlamaContext' unless context.is_a?(LlamaCpp::LlamaContext)
  raise ArgumentError, 'prompt must be a String' unless prompt.is_a?(String)

  model = LlamaCpp.llama_get_model(context)
  vocab = LlamaCpp.llama_model_get_vocab(model)

  n_prompt = -LlamaCpp.llama_tokenize(vocab, prompt, [], 0, true, true)

  prompt_tokens = []
  raise 'Failed to tokenize the prompt' if LlamaCpp.llama_tokenize(vocab, prompt, prompt_tokens, n_prompt, true,
                                                                   true).negative?

  ctx_params = LlamaCpp::LlamaContextParams.new
  ctx_params.n_ctx = n_prompt + n_predict - 1
  ctx_params.n_batch = n_prompt
  ctx_params.no_perf = false

  ctx = LlamaCpp.llama_init_from_model(model, ctx_params)

  sparams = LlamaCpp::LlamaSamplerChainParams.new
  sparams.no_perf = false
  smpl = LlamaCpp.llama_sampler_chain_init(sparams)
  LlamaCpp.llama_sampler_chain_add(smpl, LlamaCpp.llama_sampler_init_greedy)

  batch = LlamaCpp.llama_batch_get_one(prompt_tokens)

  n_pos = 0
  output = []
  while n_pos + batch.n_tokens < n_prompt + n_predict
    break if LlamaCpp.llama_decode(ctx, batch) != 0

    n_pos += batch.n_tokens

    new_token_id = LlamaCpp.llama_sampler_sample(smpl, ctx, -1)
    break if llama_vocab_is_eog?(vocab, new_token_id)

    buf = llama_token_to_piece(vocab, new_token_id, 0, true)
    output << buf

    batch = LlamaCpp.llama_batch_get_one([new_token_id])
  end

  output.join
end

.ggml_backend_load_all ⇒ `NilClass`

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 24

static VALUE rb_ggml_backend_load_all(VALUE self) {
  ggml_backend_load_all();
  return Qnil;
}

.llama_adapter_lora_free(adapter) ⇒ `NilClass`

Parameters:

adapter (LlamaAdapterLora)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1756

static VALUE rb_llama_adapter_lora_free(VALUE self, VALUE adapter) {
  if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
    rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
    return Qnil;
  }
  llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
  if (adapter_wrapper->adapter != NULL) {
    llama_adapter_lora_free(adapter_wrapper->adapter);
    adapter_wrapper->adapter = NULL;
  }
  RB_GC_GUARD(adapter);
  return Qnil;
}

.llama_adapter_lora_init(model, path_lora) ⇒ `LlamaAdapterLora`

Parameters:

model (LlamaModel)
path_lora (String)

Returns:

(LlamaAdapterLora)

# File 'ext/llama_cpp/llama_cpp.c', line 1666

static VALUE rb_llama_adapter_lora_init(VALUE self, VALUE model, VALUE path_lora) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  if (!RB_TYPE_P(path_lora, T_STRING)) {
    rb_raise(rb_eArgError, "path_lora must be a String");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  const char* path_lora_ = StringValueCStr(path_lora);
  llama_adapter_lora_wrapper* adapter_wrapper = (llama_adapter_lora_wrapper*)ruby_xmalloc(sizeof(llama_adapter_lora_wrapper));
  adapter_wrapper->adapter = llama_adapter_lora_init(model_wrapper->model, path_lora_);
  RB_GC_GUARD(model);
  RB_GC_GUARD(path_lora);
  return TypedData_Wrap_Struct(rb_cLlamaAdapterLora, &llama_adapter_lora_wrapper_data_type, adapter_wrapper);
}

.llama_backend_free ⇒ `NilClass`

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1095

static VALUE rb_llama_backend_free(VALUE self) {
  llama_backend_free();
  return Qnil;
}

.llama_backend_init ⇒ `NilClass`

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1086

static VALUE rb_llama_backend_init(VALUE self) {
  llama_backend_init();
  return Qnil;
}

.llama_batch_free(batch) ⇒ `NilClass`

Parameters:

batch (LlamaBatch)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2314

static VALUE rb_llama_batch_free(VALUE self, VALUE batch) {
  if (!rb_obj_is_kind_of(batch, rb_cLlamaBatch)) {
    rb_raise(rb_eArgError, "batch must be a LlamaBatch");
    return Qnil;
  }
  llama_batch* batch_ = get_llama_batch(batch);
  llama_batch_free(*batch_);
  batch_ = NULL;
  RB_GC_GUARD(batch);
  return Qnil;
}

.llama_batch_get_one(tokens) ⇒ `LlamaBatch`

Parameters:

tokens (Array<Integer>)

Returns:

(LlamaBatch)

# File 'ext/llama_cpp/llama_cpp.c', line 2259

static VALUE rb_llama_batch_get_one(VALUE self, VALUE tokens) {
  if (!RB_TYPE_P(tokens, T_ARRAY)) {
    rb_raise(rb_eArgError, "tokens must be an Array");
    return Qnil;
  }
  const size_t n_tokens = RARRAY_LEN(tokens);
  if (n_tokens == 0) {
    return Qnil;
  }
  llama_token* tokens_ = (llama_token*)ruby_xmalloc(sizeof(llama_token) * n_tokens);
  for (size_t i = 0; i < n_tokens; i++) {
    VALUE token = rb_ary_entry(tokens, i);
    if (!RB_INTEGER_TYPE_P(token)) {
      ruby_xfree(tokens_);
      rb_raise(rb_eArgError, "tokens must be an Array of Integers");
      return Qnil;
    }
    tokens_[i] = NUM2INT(token);
  }
  llama_batch* batch = (llama_batch*)ruby_xmalloc(sizeof(llama_batch));
  *batch = llama_batch_get_one(NULL, (int32_t)n_tokens);
  batch->token = tokens_;
  return TypedData_Wrap_Struct(rb_cLlamaBatch, &llama_batch_type, batch);
}

.llama_batch_init(n_tokens, embd, n_seq_max) ⇒ `LlamaBatch`

Parameters:

n_tokens (Integer)
embd (Integer)
n_seq_max (Integer)

Returns:

(LlamaBatch)

# File 'ext/llama_cpp/llama_cpp.c', line 2291

static VALUE rb_llama_batch_init(VALUE self, VALUE n_tokens, VALUE embd, VALUE n_seq_max) {
  if (!RB_INTEGER_TYPE_P(n_tokens)) {
    rb_raise(rb_eArgError, "n_tokens must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(embd)) {
    rb_raise(rb_eArgError, "embd must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_seq_max)) {
    rb_raise(rb_eArgError, "n_seq_max must be an Integer");
    return Qnil;
  }
  llama_batch* batch = (llama_batch*)ruby_xmalloc(sizeof(llama_batch));
  *batch = llama_batch_init(NUM2INT(n_tokens), NUM2INT(embd), NUM2INT(n_seq_max));
  return TypedData_Wrap_Struct(rb_cLlamaBatch, &llama_batch_type, batch);
}

.llama_clear_adapter_lora(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1740

static VALUE rb_llama_clear_adapter_lora(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_clear_adapter_lora(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_decode(context, batch) ⇒ `Integer`

Parameters:

context (LlamaContext)
batch (LlamaBatch)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2355

static VALUE rb_llama_decode(VALUE self, VALUE ctx, VALUE batch) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(batch, rb_cLlamaBatch)) {
    rb_raise(rb_eArgError, "batch must be a LlamaBatch");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_batch* batch_ = get_llama_batch(batch);
  const int32_t res = llama_decode(context_wrapper->context, *batch_);
  RB_GC_GUARD(ctx);
  RB_GC_GUARD(batch);
  return INT2NUM(res);
}

.llama_detokenize(vocab, tokens, remove_special, unparse_special) ⇒ `String`

Parameters:

vocab (LlamaVocab)
tokens (Array<Integer>)
remove_special (Boolean)
unparse_special (Boolean)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 2936

static VALUE rb_llama_detokenize(VALUE self, VALUE vocab, VALUE tokens, VALUE remove_special, VALUE unparse_special) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_TYPE_P(tokens, T_ARRAY)) {
    rb_raise(rb_eArgError, "tokens must be an Array");
    return Qnil;
  }

  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t n_tokens = (int32_t)RARRAY_LEN(tokens);
  if (n_tokens == 0) {
    return Qnil;
  }
  llama_token* tokens_ = (llama_token*)ruby_xmalloc(sizeof(llama_token) * n_tokens);
  for (int32_t i = 0; i < n_tokens; i++) {
    tokens_[i] = NUM2INT(rb_ary_entry(tokens, i));
  }
  const int32_t text_len_max = n_tokens > 1024 ? n_tokens : 1024;
  char* text = (char*)ruby_xmalloc(sizeof(char) * text_len_max);
  const bool remove_special_ = RTEST(remove_special) ? true : false;
  const bool unparse_special_ = RTEST(unparse_special) ? true : false;

  int32_t n_chars = llama_detokenize(vocab_wrapper->vocab, tokens_, n_tokens, text, text_len_max, remove_special_, unparse_special_);

  if (n_chars < 0) {
    ruby_xfree(text);
    text = (char*)ruby_xmalloc(sizeof(char) * -n_chars);
    n_chars = llama_detokenize(vocab_wrapper->vocab, tokens_, n_tokens, text, -n_chars, remove_special_, unparse_special_);
    if (n_chars <= (int32_t)strlen(text)) {
      ruby_xfree(tokens_);
      ruby_xfree(text);
      rb_raise(rb_eRuntimeError, "Failed to detokenize");
      return Qnil;
    }
  }

  VALUE ret = rb_utf8_str_new_cstr(text);
  ruby_xfree(tokens_);
  ruby_xfree(text);
  RB_GC_GUARD(vocab);

  return ret;
}

.llama_encode(context, batch) ⇒ `Integer`

Parameters:

context (LlamaContext)
batch (LlamaBatch)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2332

static VALUE rb_llama_encode(VALUE self, VALUE ctx, VALUE batch) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(batch, rb_cLlamaBatch)) {
    rb_raise(rb_eArgError, "batch must be a LlamaBatch");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_batch* batch_ = get_llama_batch(batch);
  const int32_t res = llama_encode(context_wrapper->context, *batch_);
  RB_GC_GUARD(ctx);
  RB_GC_GUARD(batch);
  return INT2NUM(res);
}

.llama_free(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1218

static VALUE rb_llama_free(VALUE self, VALUE context) {
  if (!rb_obj_is_kind_of(context, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "context must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(context);
  if (context_wrapper->context != NULL) {
    llama_free(context_wrapper->context);
    context_wrapper->context = NULL;
  }
  return Qnil;
}

.llama_get_kv_cache_token_count(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1945

static VALUE rb_llama_get_kv_cache_token_count(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t n_tokens_kv_cache = llama_get_kv_cache_token_count(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return INT2NUM(n_tokens_kv_cache);
}

.llama_get_kv_cache_used_cells(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1961

static VALUE rb_llama_get_kv_cache_used_cells(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t n_used_kv_cells = llama_get_kv_cache_used_cells(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return INT2NUM(n_used_kv_cells);
}

.llama_get_model(context) ⇒ `LlamaModel`

Parameters:

context (LlamaContext)

Returns:

(LlamaModel)

# File 'ext/llama_cpp/llama_cpp.c', line 1340

static VALUE rb_llama_get_model(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a Context");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_model_wrapper* model_wrapper = (llama_model_wrapper*)ruby_xmalloc(sizeof(llama_model_wrapper));
  model_wrapper->model = (struct llama_model*)llama_get_model(context_wrapper->context);
  model_wrapper->copied = true;
  RB_GC_GUARD(ctx);
  return TypedData_Wrap_Struct(rb_cLlamaModel, &llama_model_wrapper_data_type, model_wrapper);
}

.llama_init_from_model(model, params) ⇒ `LlamaContext`

Parameters:

model (LlamaModel)
params (LlamaContextParams)

Returns:

(LlamaContext)

# File 'ext/llama_cpp/llama_cpp.c', line 1195

static VALUE rb_llama_init_from_model(VALUE self, VALUE model, VALUE params) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(params, rb_cLlamaContextParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaContextParams");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  struct llama_context_params* params_ = get_llama_context_params(params);
  llama_context_wrapper* context_wrapper = (llama_context_wrapper*)ruby_xmalloc(sizeof(llama_context_wrapper));
  context_wrapper->context = llama_init_from_model(model_wrapper->model, *params_);
  RB_GC_GUARD(model);
  RB_GC_GUARD(params);
  return TypedData_Wrap_Struct(rb_cLlamaContext, &llama_context_wrapper_data_type, context_wrapper);
}

.llama_kv_cache_can_shift?(context) ⇒ `Boolean`

Parameters:

context (LlamaContext)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2206

static VALUE rb_llama_kv_cache_can_shift(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const bool res = llama_kv_cache_can_shift(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return res ? Qtrue : Qfalse;
}

.llama_kv_cache_clear(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1977

static VALUE rb_llama_kv_cache_clear(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_kv_cache_clear(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_kv_cache_defrag(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2174

static VALUE rb_llama_kv_cache_defrag(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_kv_cache_defrag(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_kv_cache_seq_add(context, seq_id, p0, p1, delta) ⇒ `NilClass`

Parameters:

context (LlamaContext)
seq_id (Integer)
p0 (Integer)
p1 (Integer)
delta (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2085

static VALUE rb_llama_kv_cache_seq_add(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p0)) {
    rb_raise(rb_eArgError, "p0 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p1)) {
    rb_raise(rb_eArgError, "p1 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(delta)) {
    rb_raise(rb_eArgError, "delta must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_kv_cache_seq_add(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_kv_cache_seq_cp(context, seq_id_src, seq_id_dst, p0, p1) ⇒ `NilClass`

Parameters:

context (LlamaContext)
seq_id_src (Integer)
seq_id_dst (Integer)
p0 (Integer)
p1 (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2028

static VALUE rb_llama_kv_cache_seq_cp(VALUE self, VALUE ctx, VALUE seq_id_src, VALUE seq_id_dst, VALUE p0, VALUE p1) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id_src)) {
    rb_raise(rb_eArgError, "seq_id_src must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id_dst)) {
    rb_raise(rb_eArgError, "seq_id_dst must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p0)) {
    rb_raise(rb_eArgError, "p0 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p1)) {
    rb_raise(rb_eArgError, "p1 must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_kv_cache_seq_cp(context_wrapper->context, NUM2INT(seq_id_src), NUM2INT(seq_id_dst), NUM2INT(p0), NUM2INT(p1));
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_kv_cache_seq_div(context, seq_id, p0, p1, d) ⇒ `NilClass`

Parameters:

context (LlamaContext)
seq_id (Integer)
p0 (Integer)
p1 (Integer)
d (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2121

static VALUE rb_llama_kv_cache_seq_div(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1, VALUE d) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p0)) {
    rb_raise(rb_eArgError, "p0 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p1)) {
    rb_raise(rb_eArgError, "p1 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(d)) {
    rb_raise(rb_eArgError, "d must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_kv_cache_seq_div(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(d));
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_kv_cache_seq_keep(context, seq_id) ⇒ `NilClass`

Parameters:

context (LlamaContext)
seq_id (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2061

static VALUE rb_llama_kv_cache_seq_keep(VALUE self, VALUE ctx, VALUE seq_id) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_kv_cache_seq_keep(context_wrapper->context, NUM2INT(seq_id));
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_kv_cache_seq_pos_max(context, seq_id) ⇒ `Integer`

Parameters:

context (LlamaContext)
seq_id (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2154

static VALUE rb_llama_kv_cache_seq_pos_max(VALUE self, VALUE ctx, VALUE seq_id) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t pos_max = llama_kv_cache_seq_pos_max(context_wrapper->context, NUM2INT(seq_id));
  RB_GC_GUARD(ctx);
  return INT2NUM(pos_max);
}

.llama_kv_cache_seq_rm(context, seq_id, p0, p1) ⇒ `Boolean`

Parameters:

context (LlamaContext)
seq_id (Integer)
p0 (Integer)
p1 (Integer)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 1996

static VALUE rb_llama_kv_cache_seq_rm(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p0)) {
    rb_raise(rb_eArgError, "p0 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p1)) {
    rb_raise(rb_eArgError, "p1 must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const bool res = llama_kv_cache_seq_rm(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1));
  RB_GC_GUARD(ctx);
  return res ? Qtrue : Qfalse;
}

.llama_kv_cache_update(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2190

static VALUE rb_llama_kv_cache_update(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_kv_cache_update(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_kv_cache_view_free(view) ⇒ `NilClass`

Parameters:

view (LlamaKvCacheView)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1905

static VALUE rb_llama_kv_cache_view_free(VALUE self, VALUE view) {
  if (!rb_obj_is_kind_of(view, rb_cLlamaKvCacheView)) {
    rb_raise(rb_eArgError, "view must be a LlamaKvCacheView");
    return Qnil;
  }
  struct llama_kv_cache_view* view_ = get_llama_kv_cache_view(view);
  llama_kv_cache_view_free(view_);
  view_ = NULL;
  RB_GC_GUARD(view);
  return Qnil;
}

.llama_kv_cache_view_init(context, n_seq_max) ⇒ `LlamaKvCacheView`

Parameters:

context (LlamaContext)
n_seq_max (Integer)

Returns:

(LlamaKvCacheView)

# File 'ext/llama_cpp/llama_cpp.c', line 1884

static VALUE rb_llama_kv_cache_view_init(VALUE self, VALUE ctx, VALUE n_seq_max) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_seq_max)) {
    rb_raise(rb_eArgError, "n_seq_max must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  struct llama_kv_cache_view* data = (struct llama_kv_cache_view*)ruby_xmalloc(sizeof(struct llama_kv_cache_view));
  *data = llama_kv_cache_view_init(context_wrapper->context, NUM2UINT(n_seq_max));
  RB_GC_GUARD(ctx);
  return TypedData_Wrap_Struct(rb_cLlamaKvCacheView, &llama_kv_cache_view_type, data);
}

.llama_kv_cache_view_update(context, view) ⇒ `NilClass`

Parameters:

context (LlamaContext)
view (LlamaKvCacheView)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1923

static VALUE rb_llama_kv_cache_view_update(VALUE self, VALUE ctx, VALUE view) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(view, rb_cLlamaKvCacheView)) {
    rb_raise(rb_eArgError, "view must be a LlamaKvCacheView");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  struct llama_kv_cache_view* view_ = get_llama_kv_cache_view(view);
  llama_kv_cache_view_update(context_wrapper->context, view_);
  RB_GC_GUARD(ctx);
  RB_GC_GUARD(view);
  return Qnil;
}

.llama_max_devices ⇒ `Integer`

Returns:

(Integer)



1243
1244
1245

# File 'ext/llama_cpp/llama_cpp.c', line 1243

static VALUE rb_llama_max_devices(VALUE self) {
  return SIZET2NUM(llama_max_devices());
}

.llama_model_decoder_start_token(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1607

static VALUE rb_llama_model_decoder_start_token(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_decoder_start_token(model_wrapper->model));
}

.llama_model_desc(model) ⇒ `String`

Parameters:

model (LlamaModel)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 1520

static VALUE rb_llama_model_desc(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  char buf[128];
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  llama_model_desc(model_wrapper->model, buf, sizeof(buf));
  RB_GC_GUARD(model);
  return rb_utf8_str_new_cstr(buf);
}

.llama_model_free(model) ⇒ `NilClass`

Parameters:

model (LlamaModel)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1119

static VALUE rb_llama_model_free(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  if (model_wrapper->model != NULL) {
    llama_model_free(model_wrapper->model);
    model_wrapper->model = NULL;
  }
  return Qnil;
}

.llama_model_get_vocab(model) ⇒ `LlamaVocab`

Parameters:

model (LlamaModel)

Returns:

(LlamaVocab)

# File 'ext/llama_cpp/llama_cpp.c', line 1372

static VALUE rb_llama_model_get_vocab(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  llama_vocab_wrapper* vocab_wrapper = (llama_vocab_wrapper*)ruby_xmalloc(sizeof(llama_vocab_wrapper));
  vocab_wrapper->vocab = (struct llama_vocab*)llama_model_get_vocab(model_wrapper->model);
  vocab_wrapper->copied = true;
  RB_GC_GUARD(model);
  return TypedData_Wrap_Struct(rb_cLlamaVocab, &llama_vocab_wrapper_data_type, vocab_wrapper);
}

.llama_model_has_decoder(model) ⇒ `Boolean`

Parameters:

model (LlamaModel)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 1593

static VALUE rb_llama_model_has_decoder(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return llama_model_has_decoder(model_wrapper->model) ? Qtrue : Qfalse;
}

.llama_model_has_encoder?(model) ⇒ `Boolean`

Parameters:

model (LlamaModel)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 1579

static VALUE rb_llama_model_has_encoder(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return llama_model_has_encoder(model_wrapper->model) ? Qtrue : Qfalse;
}

.llama_model_is_recurrent?(model) ⇒ `Boolean`

Parameters:

model (LlamaModel)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 1621

static VALUE rb_llama_model_is_recurrent(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return llama_model_is_recurrent(model_wrapper->model) ? Qtrue : Qfalse;
}

.llama_model_load_from_file(path_model) ⇒ `LlamaModel`

Parameters:

path_model (String)
params (LlamaModelParams)

Returns:

(LlamaModel)

# File 'ext/llama_cpp/llama_cpp.c', line 1138

static VALUE rb_llama_model_load_from_file(VALUE self, VALUE path_model, VALUE params) {
  if (!RB_TYPE_P(path_model, T_STRING)) {
    rb_raise(rb_eArgError, "path_model must be a String");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(params, rb_cLlamaModelParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaModelParams");
    return Qnil;
  }
  const char* path_model_ = StringValueCStr(path_model);
  struct llama_model_params* params_ = get_llama_model_params(params);
  llama_model_wrapper* model_wrapper = (llama_model_wrapper*)ruby_xmalloc(sizeof(llama_model_wrapper));
  model_wrapper->model = llama_model_load_from_file(path_model_, *params_);
  RB_GC_GUARD(path_model);
  RB_GC_GUARD(params);
  return TypedData_Wrap_Struct(rb_cLlamaModel, &llama_model_wrapper_data_type, model_wrapper);
}

.llama_model_load_from_splits(paths, params) ⇒ `LlamaModel`

Parameters:

paths (Array<String>)
params (LlamaModelParams)

Returns:

(LlamaModel)

# File 'ext/llama_cpp/llama_cpp.c', line 1162

static VALUE rb_llama_model_load_from_splits(VALUE self, VALUE paths, VALUE params) {
  if (!RB_TYPE_P(paths, T_ARRAY)) {
    rb_raise(rb_eArgError, "paths must be an Array");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(params, rb_cLlamaModelParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaModelParams");
    return Qnil;
  }
  size_t n_paths = RARRAY_LEN(paths);
  const char** paths_ = ALLOCA_N(const char*, n_paths);
  for (size_t i = 0; i < n_paths; i++) {
    VALUE path = rb_ary_entry(paths, i);
    if (!RB_TYPE_P(path, T_STRING)) {
      rb_raise(rb_eArgError, "paths must be an Array of Strings");
      return Qnil;
    }
    paths_[i] = StringValueCStr(path);
  }
  struct llama_model_params* params_ = get_llama_model_params(params);
  llama_model_wrapper* model_wrapper = (llama_model_wrapper*)ruby_xmalloc(sizeof(llama_model_wrapper));
  model_wrapper->model = llama_model_load_from_splits(paths_, n_paths, *params_);
  RB_GC_GUARD(paths);
  RB_GC_GUARD(params);
  return TypedData_Wrap_Struct(rb_cLlamaModel, &llama_model_wrapper_data_type, model_wrapper);
}

.llama_model_n_ctx_train(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1404

static VALUE rb_llama_model_n_ctx_train(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_ctx_train(model_wrapper->model));
}

.llama_model_n_embd(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1418

static VALUE rb_llama_model_n_embd(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_embd(model_wrapper->model));
}

.llama_model_n_head(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1446

static VALUE rb_llama_model_n_head(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_head(model_wrapper->model));
}

.llama_model_n_head_kv(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1460

static VALUE rb_llama_model_n_head_kv(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_head_kv(model_wrapper->model));
}

.llama_model_n_layer(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1432

static VALUE rb_llama_model_n_layer(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_layer(model_wrapper->model));
}

.llama_model_n_params(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1565

static VALUE rb_llama_model_n_params(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return ULONG2NUM(llama_model_n_params(model_wrapper->model));
}

.llama_model_quantize(fname_inp, fname_out, params) ⇒ `Boolean`

Parameters:

fname_inp (String)
fname_out (String)
params (LlamaModelQuantizeParams)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 1637

static VALUE rb_llama_model_quantize(VALUE self, VALUE fname_inp, VALUE fname_out, VALUE params) {
  if (!RB_TYPE_P(fname_inp, T_STRING)) {
    rb_raise(rb_eArgError, "fname_inp must be a String");
    return Qnil;
  }
  if (!RB_TYPE_P(fname_out, T_STRING)) {
    rb_raise(rb_eArgError, "fname_out must be a String");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(params, rb_cLlamaModelQuantizeParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaModelQuantizeParams");
    return Qnil;
  }
  const char* fname_inp_ = StringValueCStr(fname_inp);
  const char* fname_out_ = StringValueCStr(fname_out);
  const llama_model_quantize_params* params_ = get_llama_model_quantize_params(params);
  const uint32_t res = llama_model_quantize(fname_inp_, fname_out_, params_);
  RB_GC_GUARD(fname_inp);
  RB_GC_GUARD(fname_out);
  RB_GC_GUARD(params);
  return res == 0 ? Qtrue : Qfalse;
}

.llama_model_rope_freq_scale_train(model) ⇒ `Float`

Parameters:

model (LlamaModel)

Returns:

(Float)

# File 'ext/llama_cpp/llama_cpp.c', line 1474

static VALUE rb_llama_model_rope_freq_scale_train(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return DBL2NUM(llama_model_rope_freq_scale_train(model_wrapper->model));
}

.llama_model_rope_type(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1390

static VALUE rb_llama_model_rope_type(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_rope_type(model_wrapper->model));
}

.llama_model_size(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1537

static VALUE rb_llama_model_size(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return ULONG2NUM(llama_model_size(model_wrapper->model));
}

.llama_n_batch(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1298

static VALUE rb_llama_n_batch(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_batch(context_wrapper->context));
}

.llama_n_ctx(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1284

static VALUE rb_llama_n_ctx(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_ctx(context_wrapper->context));
}

.llama_n_seq_max(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1326

static VALUE rb_llama_n_seq_max(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_seq_max(context_wrapper->context));
}

.llama_n_threads(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2403

static VALUE rb_llama_n_threads(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t n_threads = llama_n_threads(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return INT2NUM(n_threads);
}

.llama_n_threads_batch(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2419

static VALUE rb_llama_n_threads_batch(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t n_threads_batch = llama_n_threads_batch(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return INT2NUM(n_threads_batch);
}

.llama_n_ubatch(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1312

static VALUE rb_llama_n_ubatch(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_ubatch(context_wrapper->context));
}

.llama_numa_init(numa) ⇒ `NilClass`

Parameters:

numa (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1105

static VALUE rb_llama_numa_init(VALUE self, VALUE numa) {
  if (!RB_INTEGER_TYPE_P(numa)) {
    rb_raise(rb_eArgError, "numa must be an Integer");
    return Qnil;
  }
  llama_numa_init((enum ggml_numa_strategy)NUM2INT(numa));
  return Qnil;
}

.llama_perf_context(context) ⇒ `LlamaPerfContextData`

Parameters:

context (LlamaContext)

Returns:

(LlamaPerfContextData)

# File 'ext/llama_cpp/llama_cpp.c', line 3729

static VALUE rb_llama_perf_context(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* ctx_wrapper = get_llama_context_wrapper(ctx);
  struct llama_perf_context_data* data = (struct llama_perf_context_data*)ruby_xmalloc(sizeof(struct llama_perf_context_data));
  *data = llama_perf_context(ctx_wrapper->context);
  RB_GC_GUARD(ctx);
  return TypedData_Wrap_Struct(rb_cLlamaPerfContextData, &llama_perf_context_data_type, data);
}

.llama_perf_context_print(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3746

static VALUE rb_llama_perf_context_print(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* ctx_wrapper = get_llama_context_wrapper(ctx);
  llama_perf_context_print(ctx_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_perf_context_reset(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3778

static VALUE rb_llama_perf_context_reset(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* ctx_wrapper = get_llama_context_wrapper(ctx);
  llama_perf_context_reset(ctx_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_perf_sampler(chain) ⇒ `LlamaPerfSamplerData`

Parameters:

chain (LlamaSampler)

Returns:

(LlamaPerfSamplerData)

# File 'ext/llama_cpp/llama_cpp.c', line 3794

static VALUE rb_llama_perf_sampler(VALUE self, VALUE chain) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  struct llama_perf_sampler_data* data = (struct llama_perf_sampler_data*)ruby_xmalloc(sizeof(struct llama_perf_sampler_data));
  *data = llama_perf_sampler(chain_);
  RB_GC_GUARD(chain);
  return TypedData_Wrap_Struct(rb_cLlamaPerfSamplerData, &llama_perf_sampler_data_type, data);
}

.llama_perf_sampler_print(chain) ⇒ `NilClass`

Parameters:

chain (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3762

static VALUE rb_llama_perf_sampler_print(VALUE self, VALUE chain) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  llama_perf_sampler_print(chain_);
  RB_GC_GUARD(chain);
  return Qnil;
}

.llama_perf_sampler_reset(chain) ⇒ `NilClass`

Parameters:

chain (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3811

static VALUE rb_llama_perf_sampler_reset(VALUE self, VALUE chain) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  llama_perf_sampler_reset(chain_);
  RB_GC_GUARD(chain);
  return Qnil;
}

.llama_pooling_type(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1358

static VALUE rb_llama_pooling_type(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return INT2NUM(llama_pooling_type(context_wrapper->context));
}

.llama_print_system_info ⇒ `String`

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 3607

static VALUE rb_llama_print_system_info(VALUE self) {
  const char* info = llama_print_system_info();
  return rb_utf8_str_new_cstr(info);
}

.llama_rm_adapter_lora(context, adapter) ⇒ `Integer`

Parameters:

context (LlamaContext)
adapter (LlamaAdapterLora)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1718

static VALUE rb_llama_rm_adapter_lora(VALUE self, VALUE ctx, VALUE adapter) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
    rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
  const int32_t res = llama_rm_adapter_lora(context_wrapper->context, adapter_wrapper->adapter);
  RB_GC_GUARD(ctx);
  RB_GC_GUARD(adapter);
  return NUM2INT(res);
}

.llama_sampler_accept(sampler, token) ⇒ `NilClass`

Parameters:

sampler (LlamaSampler)
token (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3038

static VALUE rb_llama_sampler_accept(VALUE self, VALUE sampler, VALUE token) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  llama_token token_ = NUM2INT(token);
  llama_sampler_accept(sampler_, token_);
  RB_GC_GUARD(sampler);
  return Qnil;
}

.llama_sampler_apply(sampler, cur_p) ⇒ `NilClass`

Parameters:

sampler (LlamaSampler)
cur_p (LlamaTokenDataArray)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3060

static VALUE rb_llama_sampler_apply(VALUE self, VALUE sampler, VALUE cur_p) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(cur_p, rb_cLlamaTokenDataArray)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaTokenDataArray");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  llama_token_data_array* cur_p_ = get_llama_token_data_array(cur_p);
  llama_sampler_apply(sampler_, cur_p_);
  RB_GC_GUARD(sampler);
  RB_GC_GUARD(cur_p);
  return Qnil;
}

.llama_sampler_chain_add(chain, smpl) ⇒ `NilClass`

Parameters:

chain (LlamaSampler)
smpl (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3148

static VALUE rb_llama_sampler_chain_add(VALUE self, VALUE chain, VALUE smpl) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(smpl, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "smpl must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  struct llama_sampler* smpl_ = get_llama_sampler(smpl);
  llama_sampler_chain_add(chain_, smpl_);
  RB_GC_GUARD(chain);
  RB_GC_GUARD(smpl);
  return Qnil;
}

.llama_sampler_chain_get(chain, i) ⇒ `LlamaSampler`

Parameters:

chain (LlamaSampler)
i (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3171

static VALUE rb_llama_sampler_chain_get(VALUE self, VALUE chain, VALUE i) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(i)) {
    rb_raise(rb_eArgError, "i must be an Integer");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  struct llama_sampler* smpl = llama_sampler_chain_get(chain_, NUM2INT(i));
  RB_GC_GUARD(chain);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, smpl);
}

.llama_sampler_chain_init(params) ⇒ `LlamaSampler`

Parameters:

params (LlamaSamplerChainParams)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3131

static VALUE rb_llama_sampler_chain_init(VALUE self, VALUE params) {
  if (!rb_obj_is_kind_of(params, rb_cLlamaSamplerChainParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaSamplerChainParams");
    return Qnil;
  }
  llama_sampler_chain_params* params_ = get_llama_sampler_chain_params(params);
  struct llama_sampler* sampler = llama_sampler_chain_init(*params_);
  RB_GC_GUARD(params);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_chain_n(chain) ⇒ `Integer`

Parameters:

chain (LlamaSampler)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3191

static VALUE rb_llama_sampler_chain_n(VALUE self, VALUE chain) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  const int n = llama_sampler_chain_n(chain_);
  RB_GC_GUARD(chain);
  return INT2NUM(n);
}

.llama_sampler_chain_remove(chain, i) ⇒ `LlamaSampler`

Parameters:

chain (LlamaSampler)
i (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3208

static VALUE rb_llama_sampler_chain_remove(VALUE self, VALUE chain, VALUE i) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(i)) {
    rb_raise(rb_eArgError, "i must be an Integer");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  struct llama_sampler* smpl = llama_sampler_chain_remove(chain_, NUM2INT(i));
  RB_GC_GUARD(chain);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, smpl);
}

.llama_sampler_clone(sampler) ⇒ `LlamaSampler`

Parameters:

sampler (LlamaSampler)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3098

static VALUE rb_llama_sampler_clone(VALUE self, VALUE sampler) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  struct llama_sampler* clone = llama_sampler_clone(sampler_);
  RB_GC_GUARD(sampler);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, clone);
}

.llama_sampler_free(sampler) ⇒ `NilClass`

Parameters:

sampler (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3114

static VALUE rb_llama_sampler_free(VALUE self, VALUE sampler) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  llama_sampler_free(sampler_);
  sampler_ = NULL;
  RB_GC_GUARD(sampler);
  return Qnil;
}

.llama_sampler_get_seed(sampler) ⇒ `Integer`

Parameters:

sampler (LlamaSampler)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3564

static VALUE rb_llama_sampler_get_seed(VALUE self, VALUE smpl) {
  if (!rb_obj_is_kind_of(smpl, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "smpl must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* smpl_ = get_llama_sampler(smpl);
  const uint32_t seed = llama_sampler_get_seed(smpl_);
  RB_GC_GUARD(smpl);
  return UINT2NUM(seed);
}

.llama_sampler_init_dist(seed) ⇒ `LlamaSampler`

Parameters:

seed (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3237

static VALUE rb_llama_sampler_init_dist(VALUE self, VALUE seed) {
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_dist(NUM2UINT(seed));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_grammar(vocab, grammar_str, grammar_root) ⇒ `LlamaSampler`

Parameters:

vocab (LlamaVocab)
grammar_str (String)
grammar_root (String)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3463

static VALUE rb_llama_sampler_init_grammar(VALUE self, VALUE vocab, VALUE grammar_str, VALUE grammar_root) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_TYPE_P(grammar_str, T_STRING)) {
    rb_raise(rb_eArgError, "grammar_str must be a String");
    return Qnil;
  }
  if (!RB_TYPE_P(grammar_root, T_STRING)) {
    rb_raise(rb_eArgError, "grammar_root must be a String");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const char* grammar_str_ = StringValueCStr(grammar_str);
  const char* grammar_root_ = StringValueCStr(grammar_root);
  struct llama_sampler* sampler = llama_sampler_init_grammar(vocab_wrapper->vocab, grammar_str_, grammar_root_);
  RB_GC_GUARD(vocab);
  RB_GC_GUARD(grammar_str);
  RB_GC_GUARD(grammar_root);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_greedy ⇒ `LlamaSampler`

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3227

static VALUE rb_llama_sampler_init_greedy(VALUE self) {
  struct llama_sampler* sampler = llama_sampler_init_greedy();
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_infill(vocab) ⇒ `LlamaSampler`

Parameters:

vocab (LlamaVocab)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3548

static VALUE rb_llama_sampler_init_infill(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  struct llama_sampler* sampler = llama_sampler_init_infill(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_logit_bias(n_vocab, n_logit_bias, logit_bias) ⇒ `LlamaSampler`

Parameters:

n_vocab (Integer)
n_logit_bias (Integer)
logit_bias (LlamaLogitBias)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3522

static VALUE rb_llama_sampler_init_logit_bias(VALUE self, VALUE n_vocab, VALUE n_logit_bias, VALUE logit_bias) {
  if (!RB_INTEGER_TYPE_P(n_vocab)) {
    rb_raise(rb_eArgError, "n_vocab must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_logit_bias)) {
    rb_raise(rb_eArgError, "n_logit_bias must be an Integer");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(logit_bias, rb_cLlamaLogitBias)) {
    rb_raise(rb_eArgError, "logit_bias must be a LlamaLogitBias");
    return Qnil;
  }
  const int32_t n_vocab_ = NUM2INT(n_vocab);
  const int32_t n_logit_bias_ = NUM2INT(n_logit_bias);
  const llama_logit_bias* logit_bias_ = get_llama_logit_bias(logit_bias);
  struct llama_sampler* sampler = llama_sampler_init_logit_bias(n_vocab_, n_logit_bias_, logit_bias_);
  RB_GC_GUARD(logit_bias);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_min_p(p, min_keep) ⇒ `LlamaSampler`

Parameters:

p (Float)
min_keep (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3285

static VALUE rb_llama_sampler_init_min_p(VALUE self, VALUE p, VALUE min_keep) {
  if (!RB_FLOAT_TYPE_P(p)) {
    rb_raise(rb_eArgError, "p must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(min_keep)) {
    rb_raise(rb_eArgError, "min_keep must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_min_p(NUM2DBL(p), NUM2SIZET(min_keep));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_mirostat(n_vocab, seed, tau, eta, m) ⇒ `LlamaSampler`

Parameters:

n_vocab (Integer)
seed (Integer)
tau (Float)
eta (Float)
m (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3407

static VALUE rb_llama_sampler_init_mirostat(VALUE self, VALUE n_vocab, VALUE seed, VALUE tau, VALUE eta, VALUE m) {
  if (!RB_INTEGER_TYPE_P(n_vocab)) {
    rb_raise(rb_eArgError, "n_vocab must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(tau)) {
    rb_raise(rb_eArgError, "tau must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(eta)) {
    rb_raise(rb_eArgError, "eta must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(m)) {
    rb_raise(rb_eArgError, "m must be an Integer ");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_mirostat(NUM2INT(n_vocab), NUM2UINT(seed), NUM2DBL(tau), NUM2DBL(eta), NUM2INT(m));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_mirostat_v2(seed, tau, eta) ⇒ `LlamaSampler`

Parameters:

seed (Integer)
tau (Float)
eta (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3439

static VALUE rb_llama_sampler_init_mirostat_v2(VALUE self, VALUE seed, VALUE tau, VALUE eta) {
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(tau)) {
    rb_raise(rb_eArgError, "tau must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(eta)) {
    rb_raise(rb_eArgError, "eta must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_mirostat_v2(NUM2UINT(seed), NUM2DBL(tau), NUM2DBL(eta));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_penalties(penalty_last_n, penalty_repeat, penalty_freq, penalty_present) ⇒ `LlamaSampler`

Parameters:

penalty_last_n (Integer)
penalty_repeat (Float)
penalty_freq (Float)
penalty_present (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3494

static VALUE rb_llama_sampler_init_penalties(VALUE self, VALUE penalty_last_n, VALUE penalty_repeat, VALUE penalty_freq, VALUE penalty_present) {
  if (!RB_INTEGER_TYPE_P(penalty_last_n)) {
    rb_raise(rb_eArgError, "penalty_last_n must be an Integer");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(penalty_repeat)) {
    rb_raise(rb_eArgError, "penalty_repeat must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(penalty_freq)) {
    rb_raise(rb_eArgError, "penalty_freq must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(penalty_present)) {
    rb_raise(rb_eArgError, "penalty_present must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_penalties(NUM2INT(penalty_last_n), NUM2DBL(penalty_repeat), NUM2DBL(penalty_freq), NUM2DBL(penalty_present));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_temp(t) ⇒ `LlamaSampler`

Parameters:

t (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3322

static VALUE rb_llama_sampler_init_temp(VALUE self, VALUE t) {
  if (!RB_FLOAT_TYPE_P(t)) {
    rb_raise(rb_eArgError, "t must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_temp(NUM2DBL(t));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_temp_ext(t, delta, exponent) ⇒ `LlamaSampler`

Parameters:

t (Float)
delta (Float)
exponent (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3338

static VALUE rb_llama_sampler_init_temp_ext(VALUE self, VALUE t, VALUE delta, VALUE exponent) {
  if (!RB_FLOAT_TYPE_P(t)) {
    rb_raise(rb_eArgError, "t must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(delta)) {
    rb_raise(rb_eArgError, "delta must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(exponent)) {
    rb_raise(rb_eArgError, "exponent must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_temp_ext(NUM2DBL(t), NUM2DBL(delta), NUM2DBL(exponent));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_top_k(k) ⇒ `LlamaSampler`

Parameters:

k (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3251

static VALUE rb_llama_sampler_init_top_k(VALUE self, VALUE k) {
  if (!RB_INTEGER_TYPE_P(k)) {
    rb_raise(rb_eArgError, "k must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_top_k(NUM2INT(k));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_top_n_sigma(n) ⇒ `LlamaSampler`

Parameters:

n (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3389

static VALUE rb_llama_sampler_init_top_n_sigma(VALUE self, VALUE n) {
  if (!RB_FLOAT_TYPE_P(n)) {
    rb_raise(rb_eArgError, "n must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_top_n_sigma(NUM2DBL(n));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_top_p(p, min_keep) ⇒ `LlamaSampler`

Parameters:

p (Float)
min_keep (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3266

static VALUE rb_llama_sampler_init_top_p(VALUE self, VALUE p, VALUE min_keep) {
  if (!RB_FLOAT_TYPE_P(p)) {
    rb_raise(rb_eArgError, "p must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(min_keep)) {
    rb_raise(rb_eArgError, "min_keep must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_top_p(NUM2DBL(p), NUM2SIZET(min_keep));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_typical(p, min_keep) ⇒ `LlamaSampler`

Parameters:

p (Float)
min_keep (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3304

static VALUE rb_llama_sampler_init_typical(VALUE self, VALUE p, VALUE min_keep) {
  if (!RB_FLOAT_TYPE_P(p)) {
    rb_raise(rb_eArgError, "p must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(min_keep)) {
    rb_raise(rb_eArgError, "min_keep must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_typical(NUM2DBL(p), NUM2SIZET(min_keep));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_xtc(p, t, min_keep, seed) ⇒ `LlamaSampler`

Parameters:

p (Float)
t (Float)
min_keep (Integer)
seed (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3363

static VALUE rb_llama_sampler_init_xtc(VALUE self, VALUE p, VALUE t, VALUE min_keep, VALUE seed ) {
  if (!RB_FLOAT_TYPE_P(p)) {
    rb_raise(rb_eArgError, "p must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(t)) {
    rb_raise(rb_eArgError, "t must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(min_keep)) {
    rb_raise(rb_eArgError, "min_keep must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_xtc(NUM2DBL(p), NUM2DBL(t), NUM2SIZET(min_keep), NUM2UINT(seed));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_name(sampler) ⇒ `String`

Parameters:

sampler (LlamaSampler)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 3020

static VALUE rb_llama_sampler_name(VALUE self, VALUE sampler) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  const char* name = llama_sampler_name(sampler_);
  VALUE ret = rb_utf8_str_new_cstr(name);
  RB_GC_GUARD(sampler);
  return ret;
}

.llama_sampler_reset(sampler) ⇒ `NilClass`

Parameters:

sampler (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3082

static VALUE rb_llama_sampler_reset(VALUE self, VALUE sampler) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  llama_sampler_reset(sampler_);
  RB_GC_GUARD(sampler);
  return Qnil;
}

.llama_sampler_sample(sampler, context, idx) ⇒ `Integer`

Parameters:

sampler (LlamaSampler)
context (LlamaContext)
idx (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3582

static VALUE rb_llama_sampler_sample(VALUE self, VALUE smpl, VALUE ctx, VALUE idx) {
  if (!rb_obj_is_kind_of(smpl, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "smpl must be a LlamaSampler");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(idx)) {
    rb_raise(rb_eArgError, "idx must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = get_llama_sampler(smpl);
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t token = llama_sampler_sample(sampler, context_wrapper->context, NUM2INT(idx));
  RB_GC_GUARD(smpl);
  RB_GC_GUARD(ctx);
  return INT2NUM(token);
}

.llama_set_adapter_lora(context, adapter, scale) ⇒ `Integer`

Parameters:

context (LlamaContext)
adapter (LlamaAdapterLora)
scale (Float)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1691

static VALUE rb_llama_set_adapter_lora(VALUE self, VALUE ctx, VALUE adapter, VALUE scale) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
    rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(scale)) {
    rb_raise(rb_eArgError, "scale must be a Float");
    return Qnil;
  }
  llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t res = llama_set_adapter_lora(context_wrapper->context, adapter_wrapper->adapter, (float)NUM2DBL(scale));
  RB_GC_GUARD(ctx);
  RB_GC_GUARD(adapter);
  return NUM2INT(res);
}

.llama_set_causal_attn(context, causal_attn) ⇒ `NilClass`

Parameters:

context (LlamaContext)
causal_attn (Boolean)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2454

static VALUE rb_llama_set_causal_attn(VALUE self, VALUE ctx, VALUE causal_attn) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const bool causal_attn_ = RTEST(causal_attn) ? true : false;
  llama_set_causal_attn(context_wrapper->context, causal_attn_);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_set_embeddings(context, embeddings) ⇒ `NilClass`

Parameters:

context (LlamaContext)
embeddings (Boolean)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2436

static VALUE rb_llama_set_embeddings(VALUE self, VALUE ctx, VALUE embeddings) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const bool embeddings_ = RTEST(embeddings) ? true : false;
  llama_set_embeddings(context_wrapper->context, embeddings_);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_set_n_threads(context, n_threads, n_threads_batch) ⇒ `NilClass`

Parameters:

context (LlamaContext)
n_threads (Integer)
n_threads_batch (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2379

static VALUE rb_llama_set_n_threads(VALUE self, VALUE ctx, VALUE n_threads, VALUE n_threads_batch) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_threads)) {
    rb_raise(rb_eArgError, "n_threads must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_threads_batch)) {
    rb_raise(rb_eArgError, "n_threads_batch must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_set_n_threads(context_wrapper->context, NUM2INT(n_threads), NUM2INT(n_threads_batch));
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_state_get_size(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2222

static VALUE rb_llama_state_get_size(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const size_t size = llama_state_get_size(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return SIZET2NUM(size);
}

.llama_state_seq_get_size(context, seq_id) ⇒ `Integer`

Parameters:

context (LlamaContext)
seq_id (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2239

static VALUE rb_llama_state_seq_get_size(VALUE self, VALUE ctx, VALUE seq_id) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const size_t size = llama_state_seq_get_size(context_wrapper->context, NUM2INT(seq_id));
  RB_GC_GUARD(ctx);
  return SIZET2NUM(size);
}

.llama_supports_gpu_offload? ⇒ `Boolean`

Returns:

(Boolean)



1267
1268
1269

# File 'ext/llama_cpp/llama_cpp.c', line 1267

static VALUE rb_llama_supports_gpu_offload(VALUE self) {
  return llama_supports_gpu_offload() ? Qtrue : Qfalse;
}

.llama_supports_mlock? ⇒ `Boolean`

Returns:

(Boolean)



1259
1260
1261

# File 'ext/llama_cpp/llama_cpp.c', line 1259

static VALUE rb_llama_supports_mlock(VALUE self) {
  return llama_supports_mlock() ? Qtrue : Qfalse;
}

.llama_supports_mmap? ⇒ `Boolean`

Returns:

(Boolean)



1251
1252
1253

# File 'ext/llama_cpp/llama_cpp.c', line 1251

static VALUE rb_llama_supports_mmap(VALUE self) {
  return llama_supports_mmap() ? Qtrue : Qfalse;
}

.llama_supports_rpc? ⇒ `Boolean`

Returns:

(Boolean)



1275
1276
1277

# File 'ext/llama_cpp/llama_cpp.c', line 1275

static VALUE rb_llama_supports_rpc(VALUE self) {
  return llama_supports_rpc() ? Qtrue : Qfalse;
}

.llama_synchronize(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2471

static VALUE rb_llama_synchronize(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_synchronize(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_time_us ⇒ `Integer`

Returns:

(Integer)



1235
1236
1237

# File 'ext/llama_cpp/llama_cpp.c', line 1235

static VALUE rb_llama_time_us(VALUE self) {
  return LONG2NUM(llama_time_us());
}

.llama_token_to_piece(vocab, token, lstrip, special) ⇒ `String`

Parameters:

vocab (LlamaVocab)
token (Integer)
lstrip (Integer)
special (Boolean)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 2889

static VALUE rb_llama_token_to_piece(VALUE self, VALUE vocab, VALUE token, VALUE lstrip, VALUE special) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(lstrip)) {
    rb_raise(rb_eArgError, "lstrip must be an Integer");
    return Qnil;
  }

  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  llama_token token_ = NUM2INT(token);
  const int32_t lstrip_ = NUM2INT(lstrip);
  const bool special_ = RTEST(special) ? true : false;
  char *buf = (char*)ruby_xmalloc(sizeof(char) * 8);
  const int32_t n_tokens = llama_token_to_piece(vocab_wrapper->vocab, token_, buf, 8, lstrip_, special_);

  if (n_tokens < 0) {
    ruby_xfree(buf);
    buf = (char*)ruby_xmalloc(sizeof(char) * -n_tokens);
    const int32_t check = llama_token_to_piece(vocab_wrapper->vocab, token_, buf, -n_tokens, lstrip_, special_);
    if (check != -n_tokens) {
      ruby_xfree(buf);
      rb_raise(rb_eRuntimeError, "Failed to convert");
      return Qnil;
    }
  }

  VALUE ret = rb_utf8_str_new_cstr(buf);
  ruby_xfree(buf);
  RB_GC_GUARD(vocab);

  return ret;
}

.llama_tokenize(vocab, text, tokens, n_tokens_max, add_special, parse_special) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)
text (String)
tokens (Array<Integer>)
n_tokens_max (Integer)
add_special (Boolean)
parse_special (Boolean)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2821

static VALUE rb_llama_tokenize(VALUE self, VALUE vocab, VALUE text, VALUE tokens, VALUE n_tokens_max, VALUE add_special, VALUE parse_special) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_TYPE_P(text, T_STRING)) {
    rb_raise(rb_eArgError, "text must be a String");
    return Qnil;
  }
  if (!RB_TYPE_P(tokens, T_ARRAY)) {
    rb_raise(rb_eArgError, "tokens must be an Array");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_tokens_max)) {
    rb_raise(rb_eArgError, "n_tokens_max must be an Integer");
    return Qnil;
  }

  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const char* text_ = StringValueCStr(text);
  const int32_t text_len = (int32_t)strlen(text_);
  int32_t n_tokens_max_ = NUM2INT(n_tokens_max);
  const bool add_special_ = RTEST(add_special) ? true : false;
  const bool parse_special_ = RTEST(parse_special) ? true : false;

  if (text_len <= 0) {
    rb_raise(rb_eArgError, "text must not be empty");
    return Qnil;
  }

  llama_token* tokens_ = n_tokens_max <= 0 ? NULL : ALLOCA_N(llama_token, n_tokens_max);
  const int32_t sz_tokens = (int32_t)RARRAY_LEN(tokens);

  for (int32_t i = 0; i < n_tokens_max_; i++) {
    if (i >= sz_tokens) break;
    VALUE token = rb_ary_entry(tokens, i);
    if (!RB_INTEGER_TYPE_P(token)) {
      rb_raise(rb_eArgError, "tokens must be an Array of Integers");
      return Qnil;
    }
    tokens_[i] = NUM2INT(token);
  }

  const int32_t n_tokens = llama_tokenize(vocab_wrapper->vocab, text_, text_len, tokens_, n_tokens_max_, add_special_, parse_special_);

  if (n_tokens < 0) {
    return INT2NUM(n_tokens);
  }

  rb_ary_resize(tokens, n_tokens);
  for (int i = 0; i < n_tokens; i++) {
    rb_ary_store(tokens, i, INT2NUM(tokens_[i]));
  }

  RB_GC_GUARD(vocab);
  RB_GC_GUARD(text);

  return INT2NUM(n_tokens);
}

.llama_vocab_bos(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2592

static VALUE rb_llama_vocab_bos(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_bos(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_eos(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2608

static VALUE rb_llama_vocab_eos(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_eos(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_eot(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2624

static VALUE rb_llama_vocab_eot(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_eot(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_mid(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2752

static VALUE rb_llama_vocab_fim_mid(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_mid(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_pad(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2768

static VALUE rb_llama_vocab_fim_pad(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_pad(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_pre(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2720

static VALUE rb_llama_vocab_fim_pre(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_pre(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_rep(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2784

static VALUE rb_llama_vocab_fim_rep(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_rep(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_sep(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2800

static VALUE rb_llama_vocab_fim_sep(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_sep(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_suf(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2736

static VALUE rb_llama_vocab_fim_suf(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_suf(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_get_add_bos ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2688

static VALUE rb_llama_vocab_get_add_bos(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool flag = llama_vocab_get_add_bos(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return flag ? Qtrue : Qfalse;
}

.llama_vocab_get_add_eos(vocab) ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2704

static VALUE rb_llama_vocab_get_add_eos(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool flag = llama_vocab_get_add_eos(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return flag ? Qtrue : Qfalse;
}

.llama_vocab_get_attr(vocab, token) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2530

static VALUE rb_llama_vocab_get_attr(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t attr = llama_vocab_get_attr(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return INT2NUM(attr);
}

.llama_vocab_get_score(vocab, token) ⇒ `Float`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(Float)

# File 'ext/llama_cpp/llama_cpp.c', line 2509

static VALUE rb_llama_vocab_get_score(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const float score = llama_vocab_get_score(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return DBL2NUM(score);
}

.llama_vocab_get_text(vocab, token) ⇒ `String`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 2488

static VALUE rb_llama_vocab_get_text(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const char* text = llama_vocab_get_text(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return rb_utf8_str_new_cstr(text);
}

.llama_vocab_is_control?(vocab, token) ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2572

static VALUE rb_llama_vocab_is_control(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "model must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool is_control = llama_vocab_is_control(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return is_control ? Qtrue : Qfalse;
}

.llama_vocab_is_eog?(vocab, token) ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2551

static VALUE rb_llama_vocab_is_eog(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool is_eog = llama_vocab_is_eog(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return is_eog ? Qtrue : Qfalse;
}

.llama_vocab_n_tokens(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1504

static VALUE rb_llama_vocab_n_tokens(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  VALUE n_tokens = INT2NUM(llama_vocab_n_tokens(vocab_wrapper->vocab));
  RB_GC_GUARD(vocab);
  return n_tokens;
}

.llama_vocab_nl(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2656

static VALUE rb_llama_vocab_nl(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_nl(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_pad(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2672

static VALUE rb_llama_vocab_pad(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_pad(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_sep(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2640

static VALUE rb_llama_vocab_sep(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_sep(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_type(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1488

static VALUE rb_llama_vocab_type(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  VALUE vt = INT2NUM(llama_vocab_type(vocab_wrapper->vocab));
  RB_GC_GUARD(vocab);
  return vt;
}

Module: LlamaCpp

Overview

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.generate(context, prompt, n_predict: 128) ⇒ String

.ggml_backend_load_all ⇒ NilClass

.llama_adapter_lora_free(adapter) ⇒ NilClass

.llama_adapter_lora_init(model, path_lora) ⇒ LlamaAdapterLora

.llama_backend_free ⇒ NilClass

.llama_backend_init ⇒ NilClass

.llama_batch_free(batch) ⇒ NilClass

.llama_batch_get_one(tokens) ⇒ LlamaBatch

.llama_batch_init(n_tokens, embd, n_seq_max) ⇒ LlamaBatch

.llama_clear_adapter_lora(context) ⇒ NilClass

.llama_decode(context, batch) ⇒ Integer

.llama_detokenize(vocab, tokens, remove_special, unparse_special) ⇒ String

.llama_encode(context, batch) ⇒ Integer

.llama_free(context) ⇒ NilClass

.llama_get_kv_cache_token_count(context) ⇒ Integer

.llama_get_kv_cache_used_cells(context) ⇒ Integer

.llama_get_model(context) ⇒ LlamaModel

.llama_init_from_model(model, params) ⇒ LlamaContext

.llama_kv_cache_can_shift?(context) ⇒ Boolean

.llama_kv_cache_clear(context) ⇒ NilClass

.llama_kv_cache_defrag(context) ⇒ NilClass

.llama_kv_cache_seq_add(context, seq_id, p0, p1, delta) ⇒ NilClass

.llama_kv_cache_seq_cp(context, seq_id_src, seq_id_dst, p0, p1) ⇒ NilClass

.llama_kv_cache_seq_div(context, seq_id, p0, p1, d) ⇒ NilClass

.llama_kv_cache_seq_keep(context, seq_id) ⇒ NilClass

.llama_kv_cache_seq_pos_max(context, seq_id) ⇒ Integer

.llama_kv_cache_seq_rm(context, seq_id, p0, p1) ⇒ Boolean

.llama_kv_cache_update(context) ⇒ NilClass

.llama_kv_cache_view_free(view) ⇒ NilClass

.llama_kv_cache_view_init(context, n_seq_max) ⇒ LlamaKvCacheView

.llama_kv_cache_view_update(context, view) ⇒ NilClass

.llama_max_devices ⇒ Integer

.llama_model_decoder_start_token(model) ⇒ Integer

.llama_model_desc(model) ⇒ String

.llama_model_free(model) ⇒ NilClass

.llama_model_get_vocab(model) ⇒ LlamaVocab

.llama_model_has_decoder(model) ⇒ Boolean

.llama_model_has_encoder?(model) ⇒ Boolean

.llama_model_is_recurrent?(model) ⇒ Boolean

.llama_model_load_from_file(path_model) ⇒ LlamaModel

.llama_model_load_from_splits(paths, params) ⇒ LlamaModel

.llama_model_n_ctx_train(model) ⇒ Integer

.llama_model_n_embd(model) ⇒ Integer

.llama_model_n_head(model) ⇒ Integer

.llama_model_n_head_kv(model) ⇒ Integer

.llama_model_n_layer(model) ⇒ Integer

.llama_model_n_params(model) ⇒ Integer

.llama_model_quantize(fname_inp, fname_out, params) ⇒ Boolean

.llama_model_rope_freq_scale_train(model) ⇒ Float

.llama_model_rope_type(model) ⇒ Integer

.llama_model_size(model) ⇒ Integer

.llama_n_batch(context) ⇒ Integer

.llama_n_ctx(context) ⇒ Integer

.llama_n_seq_max(context) ⇒ Integer

.llama_n_threads(context) ⇒ Integer

.llama_n_threads_batch(context) ⇒ Integer

.llama_n_ubatch(context) ⇒ Integer

.llama_numa_init(numa) ⇒ NilClass

.llama_perf_context(context) ⇒ LlamaPerfContextData

.llama_perf_context_print(context) ⇒ NilClass

.llama_perf_context_reset(context) ⇒ NilClass

.llama_perf_sampler(chain) ⇒ LlamaPerfSamplerData

.llama_perf_sampler_print(chain) ⇒ NilClass

.llama_perf_sampler_reset(chain) ⇒ NilClass

.llama_pooling_type(context) ⇒ Integer

.llama_print_system_info ⇒ String

.llama_rm_adapter_lora(context, adapter) ⇒ Integer

.llama_sampler_accept(sampler, token) ⇒ NilClass

.llama_sampler_apply(sampler, cur_p) ⇒ NilClass

.llama_sampler_chain_add(chain, smpl) ⇒ NilClass

.llama_sampler_chain_get(chain, i) ⇒ LlamaSampler

.llama_sampler_chain_init(params) ⇒ LlamaSampler

.llama_sampler_chain_n(chain) ⇒ Integer

.llama_sampler_chain_remove(chain, i) ⇒ LlamaSampler

.generate(context, prompt, n_predict: 128) ⇒ `String`

.ggml_backend_load_all ⇒ `NilClass`

.llama_adapter_lora_free(adapter) ⇒ `NilClass`

.llama_adapter_lora_init(model, path_lora) ⇒ `LlamaAdapterLora`

.llama_backend_free ⇒ `NilClass`

.llama_backend_init ⇒ `NilClass`

.llama_batch_free(batch) ⇒ `NilClass`

.llama_batch_get_one(tokens) ⇒ `LlamaBatch`

.llama_batch_init(n_tokens, embd, n_seq_max) ⇒ `LlamaBatch`

.llama_clear_adapter_lora(context) ⇒ `NilClass`

.llama_decode(context, batch) ⇒ `Integer`

.llama_detokenize(vocab, tokens, remove_special, unparse_special) ⇒ `String`

.llama_encode(context, batch) ⇒ `Integer`

.llama_free(context) ⇒ `NilClass`

.llama_get_kv_cache_token_count(context) ⇒ `Integer`

.llama_get_kv_cache_used_cells(context) ⇒ `Integer`

.llama_get_model(context) ⇒ `LlamaModel`

.llama_init_from_model(model, params) ⇒ `LlamaContext`

.llama_kv_cache_can_shift?(context) ⇒ `Boolean`

.llama_kv_cache_clear(context) ⇒ `NilClass`

.llama_kv_cache_defrag(context) ⇒ `NilClass`

.llama_kv_cache_seq_add(context, seq_id, p0, p1, delta) ⇒ `NilClass`

.llama_kv_cache_seq_cp(context, seq_id_src, seq_id_dst, p0, p1) ⇒ `NilClass`

.llama_kv_cache_seq_div(context, seq_id, p0, p1, d) ⇒ `NilClass`

.llama_kv_cache_seq_keep(context, seq_id) ⇒ `NilClass`

.llama_kv_cache_seq_pos_max(context, seq_id) ⇒ `Integer`

.llama_kv_cache_seq_rm(context, seq_id, p0, p1) ⇒ `Boolean`

.llama_kv_cache_update(context) ⇒ `NilClass`

.llama_kv_cache_view_free(view) ⇒ `NilClass`

.llama_kv_cache_view_init(context, n_seq_max) ⇒ `LlamaKvCacheView`

.llama_kv_cache_view_update(context, view) ⇒ `NilClass`

.llama_max_devices ⇒ `Integer`

.llama_model_decoder_start_token(model) ⇒ `Integer`

.llama_model_desc(model) ⇒ `String`

.llama_model_free(model) ⇒ `NilClass`

.llama_model_get_vocab(model) ⇒ `LlamaVocab`

.llama_model_has_decoder(model) ⇒ `Boolean`

.llama_model_has_encoder?(model) ⇒ `Boolean`

.llama_model_is_recurrent?(model) ⇒ `Boolean`

.llama_model_load_from_file(path_model) ⇒ `LlamaModel`

.llama_model_load_from_splits(paths, params) ⇒ `LlamaModel`

.llama_model_n_ctx_train(model) ⇒ `Integer`

.llama_model_n_embd(model) ⇒ `Integer`

.llama_model_n_head(model) ⇒ `Integer`

.llama_model_n_head_kv(model) ⇒ `Integer`

.llama_model_n_layer(model) ⇒ `Integer`

.llama_model_n_params(model) ⇒ `Integer`

.llama_model_quantize(fname_inp, fname_out, params) ⇒ `Boolean`

.llama_model_rope_freq_scale_train(model) ⇒ `Float`

.llama_model_rope_type(model) ⇒ `Integer`

.llama_model_size(model) ⇒ `Integer`

.llama_n_batch(context) ⇒ `Integer`

.llama_n_ctx(context) ⇒ `Integer`

.llama_n_seq_max(context) ⇒ `Integer`

.llama_n_threads(context) ⇒ `Integer`

.llama_n_threads_batch(context) ⇒ `Integer`

.llama_n_ubatch(context) ⇒ `Integer`

.llama_numa_init(numa) ⇒ `NilClass`

.llama_perf_context(context) ⇒ `LlamaPerfContextData`

.llama_perf_context_print(context) ⇒ `NilClass`

.llama_perf_context_reset(context) ⇒ `NilClass`

.llama_perf_sampler(chain) ⇒ `LlamaPerfSamplerData`

.llama_perf_sampler_print(chain) ⇒ `NilClass`

.llama_perf_sampler_reset(chain) ⇒ `NilClass`

.llama_pooling_type(context) ⇒ `Integer`

.llama_print_system_info ⇒ `String`

.llama_rm_adapter_lora(context, adapter) ⇒ `Integer`

.llama_sampler_accept(sampler, token) ⇒ `NilClass`

.llama_sampler_apply(sampler, cur_p) ⇒ `NilClass`

.llama_sampler_chain_add(chain, smpl) ⇒ `NilClass`

.llama_sampler_chain_get(chain, i) ⇒ `LlamaSampler`

.llama_sampler_chain_init(params) ⇒ `LlamaSampler`

.llama_sampler_chain_n(chain) ⇒ `Integer`

.llama_sampler_chain_remove(chain, i) ⇒ `LlamaSampler`

.llama_sampler_clone(sampler) ⇒ `LlamaSampler`

.llama_sampler_free(sampler) ⇒ `NilClass`

.llama_sampler_get_seed(sampler) ⇒ `Integer`

.llama_sampler_init_dist(seed) ⇒ `LlamaSampler`

.llama_sampler_init_grammar(vocab, grammar_str, grammar_root) ⇒ `LlamaSampler`

.llama_sampler_init_greedy ⇒ `LlamaSampler`