Module: LlamaCpp

Defined in:: lib/llama_cpp.rb,
lib/llama_cpp/version.rb,
ext/llama_cpp/llama_cpp.c

Overview

llama_cpp.rb provides Ruby bindings for the llama.cpp.

Defined Under Namespace

Classes: LlamaAdapterLora, LlamaBatch, LlamaChatMessage, LlamaContext, LlamaContextParams, LlamaLogitBias, LlamaMemoryT, LlamaModel, LlamaModelImatrixData, LlamaModelKvOverride, LlamaModelParams, LlamaModelQuantizeParams, LlamaModelTensorBuftOverride, LlamaModelTensorOverride, LlamaPerfContextData, LlamaPerfSamplerData, LlamaSampler, LlamaSamplerChainParams, LlamaTokenData, LlamaTokenDataArray, LlamaVocab

Constant Summary collapse

VERSION = The version of llama_cpp.rb you install.

'0.25.0'

LLAMA_CPP_VERSION = The supported version of llama.cpp.

'b8920'

LLAMA_DEFAULT_SEED =

rb_str_new2(tmp)

LLAMA_TOKEN_NULL =

INT2NUM(LLAMA_TOKEN_NULL)

LLAMA_STATE_SEQ_FLAGS_SWA_ONLY =

INT2NUM(LLAMA_STATE_SEQ_FLAGS_SWA_ONLY)

LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY =

INT2NUM(LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY)

LLAMA_FILE_MAGIC_GGLA =

rb_str_new2(tmp)

LLAMA_FILE_MAGIC_GGSN =

rb_str_new2(tmp)

LLAMA_FILE_MAGIC_GGSQ =

rb_str_new2(tmp)

LLAMA_SESSION_MAGIC =

rb_str_new2(tmp)

LLAMA_SESSION_VERSION =

INT2NUM(LLAMA_SESSION_VERSION)

LLAMA_STATE_SEQ_MAGIC =

rb_str_new2(tmp)

LLAMA_STATE_SEQ_VERSION =

INT2NUM(LLAMA_STATE_SEQ_VERSION)

LLAMA_VOCAB_TYPE_NONE =

INT2NUM(LLAMA_VOCAB_TYPE_NONE)

LLAMA_VOCAB_TYPE_SPM =

INT2NUM(LLAMA_VOCAB_TYPE_SPM)

LLAMA_VOCAB_TYPE_BPE =

INT2NUM(LLAMA_VOCAB_TYPE_BPE)

LLAMA_VOCAB_TYPE_WPM =

INT2NUM(LLAMA_VOCAB_TYPE_WPM)

LLAMA_VOCAB_TYPE_UGM =

INT2NUM(LLAMA_VOCAB_TYPE_UGM)

LLAMA_VOCAB_TYPE_RWKV =

INT2NUM(LLAMA_VOCAB_TYPE_RWKV)

LLAMA_VOCAB_TYPE_PLAMO2 =

INT2NUM(LLAMA_VOCAB_TYPE_PLAMO2)

LLAMA_ROPE_TYPE_NONE =

INT2NUM(LLAMA_ROPE_TYPE_NONE)

LLAMA_ROPE_TYPE_NORM =

INT2NUM(LLAMA_ROPE_TYPE_NORM)

LLAMA_ROPE_TYPE_NEOX =

INT2NUM(LLAMA_ROPE_TYPE_NEOX)

LLAMA_ROPE_TYPE_MROPE =

INT2NUM(LLAMA_ROPE_TYPE_MROPE)

LLAMA_ROPE_TYPE_IMROPE =

INT2NUM(LLAMA_ROPE_TYPE_IMROPE)

LLAMA_ROPE_TYPE_VISION =

INT2NUM(LLAMA_ROPE_TYPE_VISION)

LLAMA_TOKEN_TYPE_UNDEFINED =

INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED)

LLAMA_TOKEN_TYPE_NORMAL =

INT2NUM(LLAMA_TOKEN_TYPE_NORMAL)

LLAMA_TOKEN_TYPE_UNKNOWN =

INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN)

LLAMA_TOKEN_TYPE_CONTROL =

INT2NUM(LLAMA_TOKEN_TYPE_CONTROL)

LLAMA_TOKEN_TYPE_USER_DEFINED =

INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED)

LLAMA_TOKEN_TYPE_UNUSED =

INT2NUM(LLAMA_TOKEN_TYPE_UNUSED)

LLAMA_TOKEN_TYPE_BYTE =

INT2NUM(LLAMA_TOKEN_TYPE_BYTE)

LLAMA_TOKEN_ATTR_UNDEFINED =

INT2NUM(LLAMA_TOKEN_ATTR_UNDEFINED)

LLAMA_TOKEN_ATTR_UNKNOWN =

INT2NUM(LLAMA_TOKEN_ATTR_UNKNOWN)

LLAMA_TOKEN_ATTR_UNUSED =

INT2NUM(LLAMA_TOKEN_ATTR_UNUSED)

LLAMA_TOKEN_ATTR_NORMAL =

INT2NUM(LLAMA_TOKEN_ATTR_NORMAL)

LLAMA_TOKEN_ATTR_CONTROL =

INT2NUM(LLAMA_TOKEN_ATTR_CONTROL)

LLAMA_TOKEN_ATTR_USER_DEFINED =

INT2NUM(LLAMA_TOKEN_ATTR_USER_DEFINED)

LLAMA_TOKEN_ATTR_BYTE =

INT2NUM(LLAMA_TOKEN_ATTR_BYTE)

LLAMA_TOKEN_ATTR_NORMALIZED =

INT2NUM(LLAMA_TOKEN_ATTR_NORMALIZED)

LLAMA_TOKEN_ATTR_LSTRIP =

INT2NUM(LLAMA_TOKEN_ATTR_LSTRIP)

LLAMA_TOKEN_ATTR_RSTRIP =

INT2NUM(LLAMA_TOKEN_ATTR_RSTRIP)

LLAMA_TOKEN_ATTR_SINGLE_WORD =

INT2NUM(LLAMA_TOKEN_ATTR_SINGLE_WORD)

LLAMA_FTYPE_ALL_F32 =

INT2NUM(LLAMA_FTYPE_ALL_F32)

LLAMA_FTYPE_MOSTLY_F16 =

INT2NUM(LLAMA_FTYPE_MOSTLY_F16)

LLAMA_FTYPE_MOSTLY_Q4_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0)

LLAMA_FTYPE_MOSTLY_Q4_1 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1)

LLAMA_FTYPE_MOSTLY_Q8_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q8_0)

LLAMA_FTYPE_MOSTLY_Q5_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0)

LLAMA_FTYPE_MOSTLY_Q5_1 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1)

LLAMA_FTYPE_MOSTLY_Q2_K =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K)

LLAMA_FTYPE_MOSTLY_Q3_K_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_S)

LLAMA_FTYPE_MOSTLY_Q3_K_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_M)

LLAMA_FTYPE_MOSTLY_Q3_K_L =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_L)

LLAMA_FTYPE_MOSTLY_Q4_K_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_K_S)

LLAMA_FTYPE_MOSTLY_Q4_K_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_K_M)

LLAMA_FTYPE_MOSTLY_Q5_K_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_S)

LLAMA_FTYPE_MOSTLY_Q5_K_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M)

LLAMA_FTYPE_MOSTLY_Q6_K =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K)

LLAMA_FTYPE_MOSTLY_IQ2_XXS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS)

LLAMA_FTYPE_MOSTLY_IQ2_XS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS)

LLAMA_FTYPE_MOSTLY_Q2_K_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S)

LLAMA_FTYPE_MOSTLY_IQ3_XS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XS)

LLAMA_FTYPE_MOSTLY_IQ3_XXS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XXS)

LLAMA_FTYPE_MOSTLY_IQ1_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_S)

LLAMA_FTYPE_MOSTLY_IQ4_NL =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_NL)

LLAMA_FTYPE_MOSTLY_IQ3_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_S)

LLAMA_FTYPE_MOSTLY_IQ3_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_M)

LLAMA_FTYPE_MOSTLY_IQ2_S =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_S)

LLAMA_FTYPE_MOSTLY_IQ2_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_M)

LLAMA_FTYPE_MOSTLY_IQ4_XS =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_XS)

LLAMA_FTYPE_MOSTLY_IQ1_M =

INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_M)

LLAMA_FTYPE_MOSTLY_BF16 =

INT2NUM(LLAMA_FTYPE_MOSTLY_BF16)

LLAMA_FTYPE_MOSTLY_TQ1_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_TQ1_0)

LLAMA_FTYPE_MOSTLY_TQ2_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0)

LLAMA_FTYPE_MOSTLY_MXFP4_MOE =

INT2NUM(LLAMA_FTYPE_MOSTLY_MXFP4_MOE)

LLAMA_FTYPE_MOSTLY_NVFP4 =

INT2NUM(LLAMA_FTYPE_MOSTLY_NVFP4)

LLAMA_FTYPE_MOSTLY_Q1_0 =

INT2NUM(LLAMA_FTYPE_MOSTLY_Q1_0)

LLAMA_FTYPE_GUESSED =

INT2NUM(LLAMA_FTYPE_GUESSED)

LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED)

LLAMA_ROPE_SCALING_TYPE_NONE =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE)

LLAMA_ROPE_SCALING_TYPE_LINEAR =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_LINEAR)

LLAMA_ROPE_SCALING_TYPE_YARN =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_YARN)

LLAMA_ROPE_SCALING_TYPE_LONGROPE =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_LONGROPE)

LLAMA_ROPE_SCALING_TYPE_MAX_VALUE =

INT2NUM(LLAMA_ROPE_SCALING_TYPE_MAX_VALUE)

LLAMA_POOLING_TYPE_UNSPECIFIED =

INT2NUM(LLAMA_POOLING_TYPE_UNSPECIFIED)

LLAMA_POOLING_TYPE_NONE =

INT2NUM(LLAMA_POOLING_TYPE_NONE)

LLAMA_POOLING_TYPE_MEAN =

INT2NUM(LLAMA_POOLING_TYPE_MEAN)

LLAMA_POOLING_TYPE_CLS =

INT2NUM(LLAMA_POOLING_TYPE_CLS)

LLAMA_POOLING_TYPE_LAST =

INT2NUM(LLAMA_POOLING_TYPE_LAST)

LLAMA_POOLING_TYPE_RANK =

INT2NUM(LLAMA_POOLING_TYPE_RANK)

LLAMA_ATTENTION_TYPE_UNSPECIFIED =

INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED)

LLAMA_ATTENTION_TYPE_CAUSAL =

INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL)

LLAMA_ATTENTION_TYPE_NON_CAUSAL =

INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL)

LLAMA_FLASH_ATTN_TYPE_AUTO =

INT2NUM(LLAMA_FLASH_ATTN_TYPE_AUTO)

LLAMA_FLASH_ATTN_TYPE_DISABLED =

INT2NUM(LLAMA_FLASH_ATTN_TYPE_DISABLED)

LLAMA_FLASH_ATTN_TYPE_ENABLED =

INT2NUM(LLAMA_FLASH_ATTN_TYPE_ENABLED)

LLAMA_SPLIT_MODE_NONE =

INT2NUM(LLAMA_SPLIT_MODE_NONE)

LLAMA_SPLIT_MODE_LAYER =

INT2NUM(LLAMA_SPLIT_MODE_LAYER)

LLAMA_SPLIT_MODE_ROW =

INT2NUM(LLAMA_SPLIT_MODE_ROW)

LLAMA_SPLIT_MODE_TENSOR =

INT2NUM(LLAMA_SPLIT_MODE_TENSOR)

LLAMA_KV_OVERRIDE_TYPE_INT =

INT2NUM(LLAMA_KV_OVERRIDE_TYPE_INT)

LLAMA_KV_OVERRIDE_TYPE_FLOAT =

INT2NUM(LLAMA_KV_OVERRIDE_TYPE_FLOAT)

LLAMA_KV_OVERRIDE_TYPE_BOOL =

INT2NUM(LLAMA_KV_OVERRIDE_TYPE_BOOL)

LLAMA_KV_OVERRIDE_TYPE_STR =

INT2NUM(LLAMA_KV_OVERRIDE_TYPE_STR)

LLAMA_MODEL_META_KEY_SAMPLING_SEQUENCE =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_SEQUENCE)

LLAMA_MODEL_META_KEY_SAMPLING_TOP_K =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_TOP_K)

LLAMA_MODEL_META_KEY_SAMPLING_TOP_P =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_TOP_P)

LLAMA_MODEL_META_KEY_SAMPLING_MIN_P =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_MIN_P)

LLAMA_MODEL_META_KEY_SAMPLING_XTC_PROBABILITY =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_XTC_PROBABILITY)

LLAMA_MODEL_META_KEY_SAMPLING_XTC_THRESHOLD =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_XTC_THRESHOLD)

LLAMA_MODEL_META_KEY_SAMPLING_TEMP =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_TEMP)

LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_LAST_N =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_LAST_N)

LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_REPEAT =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_REPEAT)

LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT)

LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_TAU =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_TAU)

LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_ETA =

INT2NUM(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_ETA)

Class Method Summary collapse

.generate(context, prompt, n_predict: 128) ⇒ String

Generates sentences following the given prompt for operation check.
.ggml_backend_load_all ⇒ NilClass
.llama_adapter_get_alora_n_invocation_tokens(adapter) ⇒ Integer
.llama_adapter_lora_free(adapter) ⇒ NilClass
.llama_adapter_lora_init(model, path_lora) ⇒ LlamaAdapterLora
.llama_adapter_meta_count(adapter) ⇒ Integer
.llama_backend_free ⇒ NilClass
.llama_backend_init ⇒ NilClass
.llama_batch_free(batch) ⇒ NilClass
.llama_batch_get_one(tokens) ⇒ LlamaBatch
.llama_batch_init(n_tokens, embd, n_seq_max) ⇒ LlamaBatch
.llama_decode(context, batch) ⇒ Integer
.llama_detokenize(vocab, tokens, remove_special, unparse_special) ⇒ String
.llama_encode(context, batch) ⇒ Integer
.llama_flash_attn_type_name(flash_attn_type) ⇒ String
.llama_free(context) ⇒ NilClass
.llama_get_memory(context) ⇒ LlamaMemoryT
.llama_get_model(context) ⇒ LlamaModel
.llama_init_from_model(model, params) ⇒ LlamaContext
.llama_max_devices ⇒ Integer
.llama_max_parallel_sequences ⇒ Integer
.llama_max_tensor_buft_overrides ⇒ Integer
.llama_memory_can_shift?(memory) ⇒ Boolean

llama_memory_can_shift.
.llama_memory_clear(memory, data) ⇒ Object

llama_memory_clear.
.llama_memory_seq_add(memory, seq_id, p0, p1, delta) ⇒ Object

llama_memory_seq_add.
.llama_memory_seq_cp(memory, seq_id_src, seq_id_dst, p0, p1) ⇒ Object

llama_memory_seq_cp.
.llama_memory_seq_div(memory, seq_id, p0, p1, d) ⇒ Object

llama_memory_seq_div.
.llama_memory_seq_keep(memory, seq_id) ⇒ Object

llama_memory_seq_keep.
.llama_memory_seq_pos_max(memory, seq_id) ⇒ Object

llama_memory_seq_pos_max.
.llama_memory_seq_pos_min(memory, seq_id) ⇒ Object

llama_memory_seq_pos_min.
.llama_memory_seq_rm(memory, seq_id, p0, p1) ⇒ Object

llama_memory_seq_rm.
.llama_model_cls_label(model, id) ⇒ Integer
.llama_model_decoder_start_token(model) ⇒ Integer
.llama_model_desc(model) ⇒ String
.llama_model_free(model) ⇒ NilClass
.llama_model_get_vocab(model) ⇒ LlamaVocab
.llama_model_has_decoder(model) ⇒ Boolean
.llama_model_has_encoder?(model) ⇒ Boolean
.llama_model_is_diffusion?(model) ⇒ Boolean
.llama_model_is_hybrid?(model) ⇒ Boolean
.llama_model_is_recurrent?(model) ⇒ Boolean
.llama_model_load_from_file(path_model) ⇒ LlamaModel
.llama_model_load_from_splits(paths, params) ⇒ LlamaModel
.llama_model_meta_count(model) ⇒ Integer
.llama_model_meta_key_str(key) ⇒ String
.llama_model_n_cls_out(model) ⇒ Integer
.llama_model_n_ctx_train(model) ⇒ Integer
.llama_model_n_embd(model) ⇒ Integer
.llama_model_n_embd_inp(model) ⇒ Integer
.llama_model_n_embd_out(model) ⇒ Integer
.llama_model_n_head(model) ⇒ Integer
.llama_model_n_head_kv(model) ⇒ Integer
.llama_model_n_layer(model) ⇒ Integer
.llama_model_n_params(model) ⇒ Integer
.llama_model_n_swa(model) ⇒ Integer
.llama_model_quantize(fname_inp, fname_out, params) ⇒ Boolean
.llama_model_rope_freq_scale_train(model) ⇒ Float
.llama_model_rope_type(model) ⇒ Integer
.llama_model_save_to_file(model, path_model) ⇒ NilClass
.llama_model_size(model) ⇒ Integer
.llama_n_batch(context) ⇒ Integer
.llama_n_ctx(context) ⇒ Integer
.llama_n_ctx_seq(context) ⇒ Integer
.llama_n_seq_max(context) ⇒ Integer
.llama_n_threads(context) ⇒ Integer
.llama_n_threads_batch(context) ⇒ Integer
.llama_n_ubatch(context) ⇒ Integer
.llama_numa_init(numa) ⇒ NilClass
.llama_perf_context(context) ⇒ LlamaPerfContextData
.llama_perf_context_print(context) ⇒ NilClass
.llama_perf_context_reset(context) ⇒ NilClass
.llama_perf_sampler(chain) ⇒ LlamaPerfSamplerData
.llama_perf_sampler_print(chain) ⇒ NilClass
.llama_perf_sampler_reset(chain) ⇒ NilClass
.llama_pooling_type(context) ⇒ Integer
.llama_print_system_info ⇒ String
.llama_sampler_accept(sampler, token) ⇒ NilClass
.llama_sampler_apply(sampler, cur_p) ⇒ NilClass
.llama_sampler_chain_add(chain, smpl) ⇒ NilClass
.llama_sampler_chain_get(chain, i) ⇒ LlamaSampler
.llama_sampler_chain_init(params) ⇒ LlamaSampler
.llama_sampler_chain_n(chain) ⇒ Integer
.llama_sampler_chain_remove(chain, i) ⇒ LlamaSampler
.llama_sampler_clone(sampler) ⇒ LlamaSampler
.llama_sampler_free(sampler) ⇒ NilClass
.llama_sampler_get_seed(sampler) ⇒ Integer
.llama_sampler_init_adaptive_p(target, decay, seed) ⇒ LlamaSampler
.llama_sampler_init_dist(seed) ⇒ LlamaSampler
.llama_sampler_init_grammar(vocab, grammar_str, grammar_root) ⇒ LlamaSampler
.llama_sampler_init_greedy ⇒ LlamaSampler
.llama_sampler_init_infill(vocab) ⇒ LlamaSampler
.llama_sampler_init_logit_bias(n_vocab, n_logit_bias, logit_bias) ⇒ LlamaSampler
.llama_sampler_init_min_p(p, min_keep) ⇒ LlamaSampler
.llama_sampler_init_mirostat(n_vocab, seed, tau, eta, m) ⇒ LlamaSampler
.llama_sampler_init_mirostat_v2(seed, tau, eta) ⇒ LlamaSampler
.llama_sampler_init_penalties(penalty_last_n, penalty_repeat, penalty_freq, penalty_present) ⇒ LlamaSampler
.llama_sampler_init_temp(t) ⇒ LlamaSampler
.llama_sampler_init_temp_ext(t, delta, exponent) ⇒ LlamaSampler
.llama_sampler_init_top_k(k) ⇒ LlamaSampler
.llama_sampler_init_top_n_sigma(n) ⇒ LlamaSampler
.llama_sampler_init_top_p(p, min_keep) ⇒ LlamaSampler
.llama_sampler_init_typical(p, min_keep) ⇒ LlamaSampler
.llama_sampler_init_xtc(p, t, min_keep, seed) ⇒ LlamaSampler
.llama_sampler_name(sampler) ⇒ String
.llama_sampler_reset(sampler) ⇒ NilClass
.llama_sampler_sample(sampler, context, idx) ⇒ Integer
.llama_set_adapters_lora(context, adapters, scales) ⇒ Integer
.llama_set_causal_attn(context, causal_attn) ⇒ NilClass
.llama_set_embeddings(context, embeddings) ⇒ NilClass
.llama_set_n_threads(context, n_threads, n_threads_batch) ⇒ NilClass
.llama_set_warmup(context, warmup) ⇒ NilClass
.llama_state_get_size(context) ⇒ Integer
.llama_state_seq_get_size(context, seq_id) ⇒ Integer
.llama_state_seq_get_size_ext(context, seq_id, flags) ⇒ Integer
.llama_supports_gpu_offload? ⇒ Boolean
.llama_supports_mlock? ⇒ Boolean
.llama_supports_mmap? ⇒ Boolean
.llama_supports_rpc? ⇒ Boolean
.llama_synchronize(context) ⇒ NilClass
.llama_time_us ⇒ Integer
.llama_token_to_piece(vocab, token, lstrip, special) ⇒ String
.llama_tokenize(vocab, text, tokens, n_tokens_max, add_special, parse_special) ⇒ Integer
.llama_vocab_bos(vocab) ⇒ Integer
.llama_vocab_eos(vocab) ⇒ Integer
.llama_vocab_eot(vocab) ⇒ Integer
.llama_vocab_fim_mid(vocab) ⇒ Integer
.llama_vocab_fim_pad(vocab) ⇒ Integer
.llama_vocab_fim_pre(vocab) ⇒ Integer
.llama_vocab_fim_rep(vocab) ⇒ Integer
.llama_vocab_fim_sep(vocab) ⇒ Integer
.llama_vocab_fim_suf(vocab) ⇒ Integer
.llama_vocab_get_add_bos ⇒ Boolean
.llama_vocab_get_add_eos(vocab) ⇒ Boolean
.llama_vocab_get_add_sep(vocab) ⇒ Boolean
.llama_vocab_get_attr(vocab, token) ⇒ Integer
.llama_vocab_get_score(vocab, token) ⇒ Float
.llama_vocab_get_text(vocab, token) ⇒ String
.llama_vocab_is_control?(vocab, token) ⇒ Boolean
.llama_vocab_is_eog?(vocab, token) ⇒ Boolean
.llama_vocab_mask(vocab) ⇒ Integer
.llama_vocab_n_tokens(vocab) ⇒ Integer
.llama_vocab_nl(vocab) ⇒ Integer
.llama_vocab_pad(vocab) ⇒ Integer
.llama_vocab_sep(vocab) ⇒ Integer
.llama_vocab_type(vocab) ⇒ Integer

Class Method Details

.generate(context, prompt, n_predict: 128) ⇒ `String`

Generates sentences following the given prompt for operation check.

Parameters:

context (LlamaCpp::LlamaContext) —

The context to use.
prompt (String) —

The prompt to start generation with.
n_predict (Integer) (defaults to: 128) —

The number of tokens to predict.

Returns:

(String)

Raises:

(ArgumentError)

# File 'lib/llama_cpp.rb', line 18

def generate(context, prompt, n_predict: 128) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
  raise ArgumentError, 'context must be a LlamaContext' unless context.is_a?(LlamaCpp::LlamaContext)
  raise ArgumentError, 'prompt must be a String' unless prompt.is_a?(String)

  model = LlamaCpp.llama_get_model(context)
  vocab = LlamaCpp.llama_model_get_vocab(model)

  n_prompt = -LlamaCpp.llama_tokenize(vocab, prompt, [], 0, true, true)

  prompt_tokens = []
  raise 'Failed to tokenize the prompt' if LlamaCpp.llama_tokenize(vocab, prompt, prompt_tokens, n_prompt, true,
                                                                   true).negative?

  ctx_params = LlamaCpp::LlamaContextParams.new
  ctx_params.n_ctx = n_prompt + n_predict - 1
  ctx_params.n_batch = n_prompt
  ctx_params.no_perf = false

  ctx = LlamaCpp.llama_init_from_model(model, ctx_params)

  sparams = LlamaCpp::LlamaSamplerChainParams.new
  sparams.no_perf = false
  smpl = LlamaCpp.llama_sampler_chain_init(sparams)
  LlamaCpp.llama_sampler_chain_add(smpl, LlamaCpp.llama_sampler_init_greedy)

  batch = LlamaCpp.llama_batch_get_one(prompt_tokens)

  n_pos = 0
  output = []
  while n_pos + batch.n_tokens < n_prompt + n_predict
    break if LlamaCpp.llama_decode(ctx, batch) != 0

    n_pos += batch.n_tokens

    new_token_id = LlamaCpp.llama_sampler_sample(smpl, ctx, -1)
    break if llama_vocab_is_eog?(vocab, new_token_id)

    buf = llama_token_to_piece(vocab, new_token_id, 0, true)
    output << buf

    batch = LlamaCpp.llama_batch_get_one([new_token_id])
  end

  output.join
end

.ggml_backend_load_all ⇒ `NilClass`

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 27

static VALUE rb_ggml_backend_load_all(VALUE self) {
  ggml_backend_load_all();
  return Qnil;
}

.llama_adapter_get_alora_n_invocation_tokens(adapter) ⇒ `Integer`

Parameters:

adapter (LlamaAdapterLora)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2184

static VALUE rb_llama_adapter_get_alora_n_invocation_tokens(VALUE self, VALUE adapter) {
  if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
    rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
    return Qnil;
  }
  llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
  return ULONG2NUM(llama_adapter_get_alora_n_invocation_tokens(adapter_wrapper->adapter));
}

.llama_adapter_lora_free(adapter) ⇒ `NilClass`

Parameters:

adapter (LlamaAdapterLora)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2165

static VALUE rb_llama_adapter_lora_free(VALUE self, VALUE adapter) {
  if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
    rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
    return Qnil;
  }
  llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
  if (adapter_wrapper->adapter != NULL) {
    llama_adapter_lora_free(adapter_wrapper->adapter);
    adapter_wrapper->adapter = NULL;
  }
  RB_GC_GUARD(adapter);
  return Qnil;
}

.llama_adapter_lora_init(model, path_lora) ⇒ `LlamaAdapterLora`

Parameters:

model (LlamaModel)
path_lora (String)

Returns:

(LlamaAdapterLora)

# File 'ext/llama_cpp/llama_cpp.c', line 2069

static VALUE rb_llama_adapter_lora_init(VALUE self, VALUE model, VALUE path_lora) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  if (!RB_TYPE_P(path_lora, T_STRING)) {
    rb_raise(rb_eArgError, "path_lora must be a String");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  const char* path_lora_ = StringValueCStr(path_lora);
  llama_adapter_lora_wrapper* adapter_wrapper = (llama_adapter_lora_wrapper*)ruby_xmalloc(sizeof(llama_adapter_lora_wrapper));
  adapter_wrapper->adapter = llama_adapter_lora_init(model_wrapper->model, path_lora_);
  RB_GC_GUARD(model);
  RB_GC_GUARD(path_lora);
  return TypedData_Wrap_Struct(rb_cLlamaAdapterLora, &llama_adapter_lora_wrapper_data_type, adapter_wrapper);
}

.llama_adapter_meta_count(adapter) ⇒ `Integer`

Parameters:

adapter (LlamaAdapterLora)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2092

static VALUE rb_llama_adapter_meta_count(VALUE self, VALUE adapter) {
  if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
    rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
    return Qnil;
  }
  llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
  return INT2NUM(llama_adapter_meta_count(adapter_wrapper->adapter));
}

.llama_backend_free ⇒ `NilClass`

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1312

static VALUE rb_llama_backend_free(VALUE self) {
  llama_backend_free();
  return Qnil;
}

.llama_backend_init ⇒ `NilClass`

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1303

static VALUE rb_llama_backend_init(VALUE self) {
  llama_backend_init();
  return Qnil;
}

.llama_batch_free(batch) ⇒ `NilClass`

Parameters:

batch (LlamaBatch)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2549

static VALUE rb_llama_batch_free(VALUE self, VALUE batch) {
  if (!rb_obj_is_kind_of(batch, rb_cLlamaBatch)) {
    rb_raise(rb_eArgError, "batch must be a LlamaBatch");
    return Qnil;
  }
  llama_batch* batch_ = get_llama_batch(batch);
  llama_batch_free(*batch_);
  batch_ = NULL;
  RB_GC_GUARD(batch);
  return Qnil;
}

.llama_batch_get_one(tokens) ⇒ `LlamaBatch`

Parameters:

tokens (Array<Integer>)

Returns:

(LlamaBatch)

# File 'ext/llama_cpp/llama_cpp.c', line 2494

static VALUE rb_llama_batch_get_one(VALUE self, VALUE tokens) {
  if (!RB_TYPE_P(tokens, T_ARRAY)) {
    rb_raise(rb_eArgError, "tokens must be an Array");
    return Qnil;
  }
  const size_t n_tokens = RARRAY_LEN(tokens);
  if (n_tokens == 0) {
    return Qnil;
  }
  llama_token* tokens_ = (llama_token*)ruby_xmalloc(sizeof(llama_token) * n_tokens);
  for (size_t i = 0; i < n_tokens; i++) {
    VALUE token = rb_ary_entry(tokens, i);
    if (!RB_INTEGER_TYPE_P(token)) {
      ruby_xfree(tokens_);
      rb_raise(rb_eArgError, "tokens must be an Array of Integers");
      return Qnil;
    }
    tokens_[i] = NUM2INT(token);
  }
  llama_batch* batch = (llama_batch*)ruby_xmalloc(sizeof(llama_batch));
  *batch = llama_batch_get_one(NULL, (int32_t)n_tokens);
  batch->token = tokens_;
  return TypedData_Wrap_Struct(rb_cLlamaBatch, &llama_batch_type, batch);
}

.llama_batch_init(n_tokens, embd, n_seq_max) ⇒ `LlamaBatch`

Parameters:

n_tokens (Integer)
embd (Integer)
n_seq_max (Integer)

Returns:

(LlamaBatch)

# File 'ext/llama_cpp/llama_cpp.c', line 2526

static VALUE rb_llama_batch_init(VALUE self, VALUE n_tokens, VALUE embd, VALUE n_seq_max) {
  if (!RB_INTEGER_TYPE_P(n_tokens)) {
    rb_raise(rb_eArgError, "n_tokens must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(embd)) {
    rb_raise(rb_eArgError, "embd must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_seq_max)) {
    rb_raise(rb_eArgError, "n_seq_max must be an Integer");
    return Qnil;
  }
  llama_batch* batch = (llama_batch*)ruby_xmalloc(sizeof(llama_batch));
  *batch = llama_batch_init(NUM2INT(n_tokens), NUM2INT(embd), NUM2INT(n_seq_max));
  return TypedData_Wrap_Struct(rb_cLlamaBatch, &llama_batch_type, batch);
}

.llama_decode(context, batch) ⇒ `Integer`

Parameters:

context (LlamaContext)
batch (LlamaBatch)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2590

static VALUE rb_llama_decode(VALUE self, VALUE ctx, VALUE batch) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(batch, rb_cLlamaBatch)) {
    rb_raise(rb_eArgError, "batch must be a LlamaBatch");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_batch* batch_ = get_llama_batch(batch);
  const int32_t res = llama_decode(context_wrapper->context, *batch_);
  RB_GC_GUARD(ctx);
  RB_GC_GUARD(batch);
  return INT2NUM(res);
}

.llama_detokenize(vocab, tokens, remove_special, unparse_special) ⇒ `String`

Parameters:

vocab (LlamaVocab)
tokens (Array<Integer>)
remove_special (Boolean)
unparse_special (Boolean)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 3221

static VALUE rb_llama_detokenize(VALUE self, VALUE vocab, VALUE tokens, VALUE remove_special, VALUE unparse_special) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_TYPE_P(tokens, T_ARRAY)) {
    rb_raise(rb_eArgError, "tokens must be an Array");
    return Qnil;
  }

  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t n_tokens = (int32_t)RARRAY_LEN(tokens);
  if (n_tokens == 0) {
    return Qnil;
  }
  llama_token* tokens_ = (llama_token*)ruby_xmalloc(sizeof(llama_token) * n_tokens);
  for (int32_t i = 0; i < n_tokens; i++) {
    tokens_[i] = NUM2INT(rb_ary_entry(tokens, i));
  }
  const int32_t text_len_max = n_tokens > 1024 ? n_tokens : 1024;
  char* text = (char*)ruby_xmalloc(sizeof(char) * text_len_max);
  const bool remove_special_ = RTEST(remove_special) ? true : false;
  const bool unparse_special_ = RTEST(unparse_special) ? true : false;

  int32_t n_chars = llama_detokenize(vocab_wrapper->vocab, tokens_, n_tokens, text, text_len_max, remove_special_, unparse_special_);

  if (n_chars < 0) {
    ruby_xfree(text);
    text = (char*)ruby_xmalloc(sizeof(char) * -n_chars);
    n_chars = llama_detokenize(vocab_wrapper->vocab, tokens_, n_tokens, text, -n_chars, remove_special_, unparse_special_);
    if (n_chars <= (int32_t)strlen(text)) {
      ruby_xfree(tokens_);
      ruby_xfree(text);
      rb_raise(rb_eRuntimeError, "Failed to detokenize");
      return Qnil;
    }
  }

  VALUE ret = rb_utf8_str_new_cstr(text);
  ruby_xfree(tokens_);
  ruby_xfree(text);
  RB_GC_GUARD(vocab);

  return ret;
}

.llama_encode(context, batch) ⇒ `Integer`

Parameters:

context (LlamaContext)
batch (LlamaBatch)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2567

static VALUE rb_llama_encode(VALUE self, VALUE ctx, VALUE batch) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(batch, rb_cLlamaBatch)) {
    rb_raise(rb_eArgError, "batch must be a LlamaBatch");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_batch* batch_ = get_llama_batch(batch);
  const int32_t res = llama_encode(context_wrapper->context, *batch_);
  RB_GC_GUARD(ctx);
  RB_GC_GUARD(batch);
  return INT2NUM(res);
}

.llama_flash_attn_type_name(flash_attn_type) ⇒ `String`

Parameters:

flash_attn_type (Integer)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 4142

static VALUE rb_llama_flash_attn_type_name(VALUE self, VALUE flash_attn_type) {
  if (!RB_INTEGER_TYPE_P(flash_attn_type)) {
    rb_raise(rb_eArgError, "flash_attn_type must be an Integer");
    return Qnil;
  }
  const char* name = llama_flash_attn_type_name((enum llama_flash_attn_type)NUM2INT(flash_attn_type));
  return rb_utf8_str_new_cstr(name);
}

.llama_free(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1458

static VALUE rb_llama_free(VALUE self, VALUE context) {
  if (!rb_obj_is_kind_of(context, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "context must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(context);
  if (context_wrapper->context != NULL) {
    llama_free(context_wrapper->context);
    context_wrapper->context = NULL;
  }
  return Qnil;
}

.llama_get_memory(context) ⇒ `LlamaMemoryT`

Parameters:

context (LlamaContext)

Returns:

(LlamaMemoryT)

# File 'ext/llama_cpp/llama_cpp.c', line 2413

static VALUE rb_llama_get_memory(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_memory_t memory = llama_get_memory(context_wrapper->context);
  llama_memory_t_wrapper* memory_wrapper = (llama_memory_t_wrapper*)ruby_xmalloc(sizeof(llama_memory_t_wrapper));
  memory_wrapper->memory = memory;
  RB_GC_GUARD(ctx);
  return TypedData_Wrap_Struct(rb_cLlamaMemoryT, &llama_memory_t_wrapper_data_type, memory_wrapper);
}

.llama_get_model(context) ⇒ `LlamaModel`

Parameters:

context (LlamaContext)

Returns:

(LlamaModel)

# File 'ext/llama_cpp/llama_cpp.c', line 1610

static VALUE rb_llama_get_model(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a Context");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_model_wrapper* model_wrapper = (llama_model_wrapper*)ruby_xmalloc(sizeof(llama_model_wrapper));
  model_wrapper->model = (struct llama_model*)llama_get_model(context_wrapper->context);
  model_wrapper->copied = true;
  RB_GC_GUARD(ctx);
  return TypedData_Wrap_Struct(rb_cLlamaModel, &llama_model_wrapper_data_type, model_wrapper);
}

.llama_init_from_model(model, params) ⇒ `LlamaContext`

Parameters:

model (LlamaModel)
params (LlamaContextParams)

Returns:

(LlamaContext)

# File 'ext/llama_cpp/llama_cpp.c', line 1435

static VALUE rb_llama_init_from_model(VALUE self, VALUE model, VALUE params) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(params, rb_cLlamaContextParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaContextParams");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  struct llama_context_params* params_ = get_llama_context_params(params);
  llama_context_wrapper* context_wrapper = (llama_context_wrapper*)ruby_xmalloc(sizeof(llama_context_wrapper));
  context_wrapper->context = llama_init_from_model(model_wrapper->model, *params_);
  RB_GC_GUARD(model);
  RB_GC_GUARD(params);
  return TypedData_Wrap_Struct(rb_cLlamaContext, &llama_context_wrapper_data_type, context_wrapper);
}

.llama_max_devices ⇒ `Integer`

Returns:

(Integer)



1483
1484
1485

# File 'ext/llama_cpp/llama_cpp.c', line 1483

static VALUE rb_llama_max_devices(VALUE self) {
  return SIZET2NUM(llama_max_devices());
}

.llama_max_parallel_sequences ⇒ `Integer`

Returns:

(Integer)



1491
1492
1493

# File 'ext/llama_cpp/llama_cpp.c', line 1491

static VALUE rb_llama_max_parallel_sequences(VALUE self) {
  return SIZET2NUM(llama_max_parallel_sequences());
}

.llama_max_tensor_buft_overrides ⇒ `Integer`

Returns:

(Integer)



1499
1500
1501

# File 'ext/llama_cpp/llama_cpp.c', line 1499

static VALUE rb_llama_max_tensor_buft_overrides(VALUE self) {
  return SIZET2NUM(llama_max_tensor_buft_overrides());
}

.llama_memory_can_shift?(memory) ⇒ `Boolean`

llama_memory_can_shift

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2397

static VALUE rb_llama_memory_can_shift(VALUE self, VALUE memory) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  const bool can_shift = llama_memory_can_shift(memory_wrapper->memory);
  RB_GC_GUARD(memory);
  return can_shift ? Qtrue : Qfalse;
}

.llama_memory_clear(memory, data) ⇒ `Object`

llama_memory_clear

# File 'ext/llama_cpp/llama_cpp.c', line 2237

static VALUE rb_llama_memory_clear(VALUE self, VALUE memory, VALUE data) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  llama_memory_clear(memory_wrapper->memory, RTEST(data) ? true : false);
  RB_GC_GUARD(memory);
  return Qnil;
}

.llama_memory_seq_add(memory, seq_id, p0, p1, delta) ⇒ `Object`

llama_memory_seq_add

# File 'ext/llama_cpp/llama_cpp.c', line 2313

static VALUE rb_llama_memory_seq_add(VALUE self, VALUE memory, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p0)) {
    rb_raise(rb_eArgError, "p0 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p1)) {
    rb_raise(rb_eArgError, "p1 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(delta)) {
    rb_raise(rb_eArgError, "delta must be an Integer");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  llama_memory_seq_add(memory_wrapper->memory, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
  RB_GC_GUARD(memory);
  return Qnil;
}

.llama_memory_seq_cp(memory, seq_id_src, seq_id_dst, p0, p1) ⇒ `Object`

llama_memory_seq_cp

# File 'ext/llama_cpp/llama_cpp.c', line 2271

static VALUE rb_llama_memory_seq_cp(VALUE self, VALUE memory, VALUE seq_id_src, VALUE seq_id_dst, VALUE p0, VALUE p1) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id_src)) {
    rb_raise(rb_eArgError, "seq_id_src must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id_dst)) {
    rb_raise(rb_eArgError, "seq_id_dst must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p0)) {
    rb_raise(rb_eArgError, "p0 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p1)) {
    rb_raise(rb_eArgError, "p1 must be an Integer");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  llama_memory_seq_cp(memory_wrapper->memory, NUM2INT(seq_id_src), NUM2INT(seq_id_dst), NUM2INT(p0), NUM2INT(p1));
  RB_GC_GUARD(memory);
  return Qnil;
}

.llama_memory_seq_div(memory, seq_id, p0, p1, d) ⇒ `Object`

llama_memory_seq_div

# File 'ext/llama_cpp/llama_cpp.c', line 2340

static VALUE rb_llama_memory_seq_div(VALUE self, VALUE memory, VALUE seq_id, VALUE p0, VALUE p1, VALUE d) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p0)) {
    rb_raise(rb_eArgError, "p0 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p1)) {
    rb_raise(rb_eArgError, "p1 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(d)) {
    rb_raise(rb_eArgError, "d must be an Integer");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  llama_memory_seq_div(memory_wrapper->memory, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(d));
  RB_GC_GUARD(memory);
  return Qnil;
}

.llama_memory_seq_keep(memory, seq_id) ⇒ `Object`

llama_memory_seq_keep

# File 'ext/llama_cpp/llama_cpp.c', line 2298

static VALUE rb_llama_memory_seq_keep(VALUE self, VALUE memory, VALUE seq_id) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  llama_memory_seq_keep(memory_wrapper->memory, NUM2INT(seq_id));
  RB_GC_GUARD(memory);
  return Qnil;
}

.llama_memory_seq_pos_max(memory, seq_id) ⇒ `Object`

llama_memory_seq_pos_max

# File 'ext/llama_cpp/llama_cpp.c', line 2382

static VALUE rb_llama_memory_seq_pos_max(VALUE self, VALUE memory, VALUE seq_id) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  llama_pos pos_max = llama_memory_seq_pos_max(memory_wrapper->memory, NUM2INT(seq_id));
  RB_GC_GUARD(memory);
  return INT2NUM(pos_max);
}

.llama_memory_seq_pos_min(memory, seq_id) ⇒ `Object`

llama_memory_seq_pos_min

# File 'ext/llama_cpp/llama_cpp.c', line 2367

static VALUE rb_llama_memory_seq_pos_min(VALUE self, VALUE memory, VALUE seq_id) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  llama_pos pos_min = llama_memory_seq_pos_min(memory_wrapper->memory, NUM2INT(seq_id));
  RB_GC_GUARD(memory);
  return INT2NUM(pos_min);
}

.llama_memory_seq_rm(memory, seq_id, p0, p1) ⇒ `Object`

llama_memory_seq_rm

# File 'ext/llama_cpp/llama_cpp.c', line 2248

static VALUE rb_llama_memory_seq_rm(VALUE self, VALUE memory, VALUE seq_id, VALUE p0, VALUE p1) {
  if (!rb_obj_is_kind_of(memory, rb_cLlamaMemoryT)) {
    rb_raise(rb_eArgError, "memory must be a LlamaMemoryT");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p0)) {
    rb_raise(rb_eArgError, "p0 must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(p1)) {
    rb_raise(rb_eArgError, "p1 must be an Integer");
    return Qnil;
  }
  llama_memory_t_wrapper* memory_wrapper = get_llama_memory_t_wrapper(memory);
  const bool res = llama_memory_seq_rm(memory_wrapper->memory, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1));
  RB_GC_GUARD(memory);
  return res ? Qtrue : Qfalse;
}

.llama_model_cls_label(model, id) ⇒ `Integer`

Parameters:

model (LlamaModel)
id (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1815

static VALUE rb_llama_model_cls_label(VALUE self, VALUE model, VALUE id) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(id)) {
    rb_raise(rb_eArgError, "id must be an Integer");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  const char* str = llama_model_cls_label(model_wrapper->model, NUM2UINT(id));
  RB_GC_GUARD(model);
  return rb_utf8_str_new_cstr(str);
}

.llama_model_decoder_start_token(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1982

static VALUE rb_llama_model_decoder_start_token(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_decoder_start_token(model_wrapper->model));
}

.llama_model_desc(model) ⇒ `String`

Parameters:

model (LlamaModel)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 1895

static VALUE rb_llama_model_desc(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  char buf[128];
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  llama_model_desc(model_wrapper->model, buf, sizeof(buf));
  RB_GC_GUARD(model);
  return rb_utf8_str_new_cstr(buf);
}

.llama_model_free(model) ⇒ `NilClass`

Parameters:

model (LlamaModel)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1336

static VALUE rb_llama_model_free(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  if (model_wrapper->model != NULL) {
    llama_model_free(model_wrapper->model);
    model_wrapper->model = NULL;
  }
  return Qnil;
}

.llama_model_get_vocab(model) ⇒ `LlamaVocab`

Parameters:

model (LlamaModel)

Returns:

(LlamaVocab)

# File 'ext/llama_cpp/llama_cpp.c', line 1642

static VALUE rb_llama_model_get_vocab(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  llama_vocab_wrapper* vocab_wrapper = (llama_vocab_wrapper*)ruby_xmalloc(sizeof(llama_vocab_wrapper));
  vocab_wrapper->vocab = (struct llama_vocab*)llama_model_get_vocab(model_wrapper->model);
  vocab_wrapper->copied = true;
  RB_GC_GUARD(model);
  return TypedData_Wrap_Struct(rb_cLlamaVocab, &llama_vocab_wrapper_data_type, vocab_wrapper);
}

.llama_model_has_decoder(model) ⇒ `Boolean`

Parameters:

model (LlamaModel)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 1968

static VALUE rb_llama_model_has_decoder(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return llama_model_has_decoder(model_wrapper->model) ? Qtrue : Qfalse;
}

.llama_model_has_encoder?(model) ⇒ `Boolean`

Parameters:

model (LlamaModel)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 1954

static VALUE rb_llama_model_has_encoder(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return llama_model_has_encoder(model_wrapper->model) ? Qtrue : Qfalse;
}

.llama_model_is_diffusion?(model) ⇒ `Boolean`

Parameters:

model (LlamaModel)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2024

static VALUE rb_llama_model_is_diffusion(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return llama_model_is_diffusion(model_wrapper->model) ? Qtrue : Qfalse;
}

.llama_model_is_hybrid?(model) ⇒ `Boolean`

Parameters:

model (LlamaModel)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2010

static VALUE rb_llama_model_is_hybrid(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return llama_model_is_hybrid(model_wrapper->model) ? Qtrue : Qfalse;
}

.llama_model_is_recurrent?(model) ⇒ `Boolean`

Parameters:

model (LlamaModel)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 1996

static VALUE rb_llama_model_is_recurrent(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return llama_model_is_recurrent(model_wrapper->model) ? Qtrue : Qfalse;
}

.llama_model_load_from_file(path_model) ⇒ `LlamaModel`

Parameters:

path_model (String)
params (LlamaModelParams)

Returns:

(LlamaModel)

# File 'ext/llama_cpp/llama_cpp.c', line 1355

static VALUE rb_llama_model_load_from_file(VALUE self, VALUE path_model, VALUE params) {
  if (!RB_TYPE_P(path_model, T_STRING)) {
    rb_raise(rb_eArgError, "path_model must be a String");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(params, rb_cLlamaModelParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaModelParams");
    return Qnil;
  }
  const char* path_model_ = StringValueCStr(path_model);
  struct llama_model_params* params_ = get_llama_model_params(params);
  llama_model_wrapper* model_wrapper = (llama_model_wrapper*)ruby_xmalloc(sizeof(llama_model_wrapper));
  model_wrapper->model = llama_model_load_from_file(path_model_, *params_);
  RB_GC_GUARD(path_model);
  RB_GC_GUARD(params);
  return TypedData_Wrap_Struct(rb_cLlamaModel, &llama_model_wrapper_data_type, model_wrapper);
}

.llama_model_load_from_splits(paths, params) ⇒ `LlamaModel`

Parameters:

paths (Array<String>)
params (LlamaModelParams)

Returns:

(LlamaModel)

# File 'ext/llama_cpp/llama_cpp.c', line 1379

static VALUE rb_llama_model_load_from_splits(VALUE self, VALUE paths, VALUE params) {
  if (!RB_TYPE_P(paths, T_ARRAY)) {
    rb_raise(rb_eArgError, "paths must be an Array");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(params, rb_cLlamaModelParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaModelParams");
    return Qnil;
  }
  size_t n_paths = RARRAY_LEN(paths);
  const char** paths_ = ALLOCA_N(const char*, n_paths);
  for (size_t i = 0; i < n_paths; i++) {
    VALUE path = rb_ary_entry(paths, i);
    if (!RB_TYPE_P(path, T_STRING)) {
      rb_raise(rb_eArgError, "paths must be an Array of Strings");
      return Qnil;
    }
    paths_[i] = StringValueCStr(path);
  }
  struct llama_model_params* params_ = get_llama_model_params(params);
  llama_model_wrapper* model_wrapper = (llama_model_wrapper*)ruby_xmalloc(sizeof(llama_model_wrapper));
  model_wrapper->model = llama_model_load_from_splits(paths_, n_paths, *params_);
  RB_GC_GUARD(paths);
  RB_GC_GUARD(params);
  return TypedData_Wrap_Struct(rb_cLlamaModel, &llama_model_wrapper_data_type, model_wrapper);
}

.llama_model_meta_count(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1867

static VALUE rb_llama_model_meta_count(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_meta_count(model_wrapper->model));
}

.llama_model_meta_key_str(key) ⇒ `String`

Parameters:

key (Integer) —

(must be one of Llama::LLAMA_MODEL_META_KEY_* constants)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 1881

static VALUE rb_llama_model_meta_key_str(VALUE self, VALUE key) {
  if (!RB_INTEGER_TYPE_P(key)) {
    rb_raise(rb_eArgError, "key must be an Integer");
    return Qnil;
  }
  const char* key_str = llama_model_meta_key_str(NUM2INT(key));
  return rb_utf8_str_new_cstr(key_str);
}

.llama_model_n_cls_out(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1800

static VALUE rb_llama_model_n_cls_out(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return UINT2NUM(llama_model_n_cls_out(model_wrapper->model));
}

.llama_model_n_ctx_train(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1674

static VALUE rb_llama_model_n_ctx_train(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_ctx_train(model_wrapper->model));
}

.llama_model_n_embd(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1688

static VALUE rb_llama_model_n_embd(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_embd(model_wrapper->model));
}

.llama_model_n_embd_inp(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1702

static VALUE rb_llama_model_n_embd_inp(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_embd_inp(model_wrapper->model));
}

.llama_model_n_embd_out(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1716

static VALUE rb_llama_model_n_embd_out(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_embd_out(model_wrapper->model));
}

.llama_model_n_head(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1744

static VALUE rb_llama_model_n_head(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_head(model_wrapper->model));
}

.llama_model_n_head_kv(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1758

static VALUE rb_llama_model_n_head_kv(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_head_kv(model_wrapper->model));
}

.llama_model_n_layer(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1730

static VALUE rb_llama_model_n_layer(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_layer(model_wrapper->model));
}

.llama_model_n_params(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1940

static VALUE rb_llama_model_n_params(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return ULONG2NUM(llama_model_n_params(model_wrapper->model));
}

.llama_model_n_swa(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1772

static VALUE rb_llama_model_n_swa(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_n_swa(model_wrapper->model));
}

.llama_model_quantize(fname_inp, fname_out, params) ⇒ `Boolean`

Parameters:

fname_inp (String)
fname_out (String)
params (LlamaModelQuantizeParams)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2040

static VALUE rb_llama_model_quantize(VALUE self, VALUE fname_inp, VALUE fname_out, VALUE params) {
  if (!RB_TYPE_P(fname_inp, T_STRING)) {
    rb_raise(rb_eArgError, "fname_inp must be a String");
    return Qnil;
  }
  if (!RB_TYPE_P(fname_out, T_STRING)) {
    rb_raise(rb_eArgError, "fname_out must be a String");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(params, rb_cLlamaModelQuantizeParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaModelQuantizeParams");
    return Qnil;
  }
  const char* fname_inp_ = StringValueCStr(fname_inp);
  const char* fname_out_ = StringValueCStr(fname_out);
  const llama_model_quantize_params* params_ = get_llama_model_quantize_params(params);
  const uint32_t res = llama_model_quantize(fname_inp_, fname_out_, params_);
  RB_GC_GUARD(fname_inp);
  RB_GC_GUARD(fname_out);
  RB_GC_GUARD(params);
  return res == 0 ? Qtrue : Qfalse;
}

.llama_model_rope_freq_scale_train(model) ⇒ `Float`

Parameters:

model (LlamaModel)

Returns:

(Float)

# File 'ext/llama_cpp/llama_cpp.c', line 1786

static VALUE rb_llama_model_rope_freq_scale_train(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return DBL2NUM(llama_model_rope_freq_scale_train(model_wrapper->model));
}

.llama_model_rope_type(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1660

static VALUE rb_llama_model_rope_type(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return INT2NUM(llama_model_rope_type(model_wrapper->model));
}

.llama_model_save_to_file(model, path_model) ⇒ `NilClass`

Parameters:

model (LlamaModel)
path_model (String)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1412

static VALUE rb_llama_model_save_to_file(VALUE self, VALUE model, VALUE path_model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  if (!RB_TYPE_P(path_model, T_STRING)) {
    rb_raise(rb_eArgError, "path_model must be a String");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  const char* path_model_ = StringValueCStr(path_model);
  llama_model_save_to_file(model_wrapper->model, path_model_);
  RB_GC_GUARD(model);
  RB_GC_GUARD(path_model);
  return Qnil;
}

.llama_model_size(model) ⇒ `Integer`

Parameters:

model (LlamaModel)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1912

static VALUE rb_llama_model_size(VALUE self, VALUE model) {
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
    rb_raise(rb_eArgError, "model must be a LlamaModel");
    return Qnil;
  }
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
  return ULONG2NUM(llama_model_size(model_wrapper->model));
}

.llama_n_batch(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1568

static VALUE rb_llama_n_batch(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_batch(context_wrapper->context));
}

.llama_n_ctx(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1540

static VALUE rb_llama_n_ctx(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_ctx(context_wrapper->context));
}

.llama_n_ctx_seq(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1554

static VALUE rb_llama_n_ctx_seq(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_ctx_seq(context_wrapper->context));
}

.llama_n_seq_max(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1596

static VALUE rb_llama_n_seq_max(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_seq_max(context_wrapper->context));
}

.llama_n_threads(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2638

static VALUE rb_llama_n_threads(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t n_threads = llama_n_threads(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return INT2NUM(n_threads);
}

.llama_n_threads_batch(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2654

static VALUE rb_llama_n_threads_batch(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t n_threads_batch = llama_n_threads_batch(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return INT2NUM(n_threads_batch);
}

.llama_n_ubatch(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1582

static VALUE rb_llama_n_ubatch(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return UINT2NUM(llama_n_ubatch(context_wrapper->context));
}

.llama_numa_init(numa) ⇒ `NilClass`

Parameters:

numa (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 1322

static VALUE rb_llama_numa_init(VALUE self, VALUE numa) {
  if (!RB_INTEGER_TYPE_P(numa)) {
    rb_raise(rb_eArgError, "numa must be an Integer");
    return Qnil;
  }
  llama_numa_init((enum ggml_numa_strategy)NUM2INT(numa));
  return Qnil;
}

.llama_perf_context(context) ⇒ `LlamaPerfContextData`

Parameters:

context (LlamaContext)

Returns:

(LlamaPerfContextData)

# File 'ext/llama_cpp/llama_cpp.c', line 4044

static VALUE rb_llama_perf_context(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* ctx_wrapper = get_llama_context_wrapper(ctx);
  struct llama_perf_context_data* data = (struct llama_perf_context_data*)ruby_xmalloc(sizeof(struct llama_perf_context_data));
  *data = llama_perf_context(ctx_wrapper->context);
  RB_GC_GUARD(ctx);
  return TypedData_Wrap_Struct(rb_cLlamaPerfContextData, &llama_perf_context_data_type, data);
}

.llama_perf_context_print(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 4061

static VALUE rb_llama_perf_context_print(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* ctx_wrapper = get_llama_context_wrapper(ctx);
  llama_perf_context_print(ctx_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_perf_context_reset(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 4093

static VALUE rb_llama_perf_context_reset(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* ctx_wrapper = get_llama_context_wrapper(ctx);
  llama_perf_context_reset(ctx_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_perf_sampler(chain) ⇒ `LlamaPerfSamplerData`

Parameters:

chain (LlamaSampler)

Returns:

(LlamaPerfSamplerData)

# File 'ext/llama_cpp/llama_cpp.c', line 4109

static VALUE rb_llama_perf_sampler(VALUE self, VALUE chain) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  struct llama_perf_sampler_data* data = (struct llama_perf_sampler_data*)ruby_xmalloc(sizeof(struct llama_perf_sampler_data));
  *data = llama_perf_sampler(chain_);
  RB_GC_GUARD(chain);
  return TypedData_Wrap_Struct(rb_cLlamaPerfSamplerData, &llama_perf_sampler_data_type, data);
}

.llama_perf_sampler_print(chain) ⇒ `NilClass`

Parameters:

chain (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 4077

static VALUE rb_llama_perf_sampler_print(VALUE self, VALUE chain) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  llama_perf_sampler_print(chain_);
  RB_GC_GUARD(chain);
  return Qnil;
}

.llama_perf_sampler_reset(chain) ⇒ `NilClass`

Parameters:

chain (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 4126

static VALUE rb_llama_perf_sampler_reset(VALUE self, VALUE chain) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  llama_perf_sampler_reset(chain_);
  RB_GC_GUARD(chain);
  return Qnil;
}

.llama_pooling_type(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1628

static VALUE rb_llama_pooling_type(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  return INT2NUM(llama_pooling_type(context_wrapper->context));
}

.llama_print_system_info ⇒ `String`

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 3916

static VALUE rb_llama_print_system_info(VALUE self) {
  const char* info = llama_print_system_info();
  return rb_utf8_str_new_cstr(info);
}

.llama_sampler_accept(sampler, token) ⇒ `NilClass`

Parameters:

sampler (LlamaSampler)
token (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3323

static VALUE rb_llama_sampler_accept(VALUE self, VALUE sampler, VALUE token) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  llama_token token_ = NUM2INT(token);
  llama_sampler_accept(sampler_, token_);
  RB_GC_GUARD(sampler);
  return Qnil;
}

.llama_sampler_apply(sampler, cur_p) ⇒ `NilClass`

Parameters:

sampler (LlamaSampler)
cur_p (LlamaTokenDataArray)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3345

static VALUE rb_llama_sampler_apply(VALUE self, VALUE sampler, VALUE cur_p) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(cur_p, rb_cLlamaTokenDataArray)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaTokenDataArray");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  llama_token_data_array* cur_p_ = get_llama_token_data_array(cur_p);
  llama_sampler_apply(sampler_, cur_p_);
  RB_GC_GUARD(sampler);
  RB_GC_GUARD(cur_p);
  return Qnil;
}

.llama_sampler_chain_add(chain, smpl) ⇒ `NilClass`

Parameters:

chain (LlamaSampler)
smpl (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3433

static VALUE rb_llama_sampler_chain_add(VALUE self, VALUE chain, VALUE smpl) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(smpl, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "smpl must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  struct llama_sampler* smpl_ = get_llama_sampler(smpl);
  llama_sampler_chain_add(chain_, smpl_);
  RB_GC_GUARD(chain);
  RB_GC_GUARD(smpl);
  return Qnil;
}

.llama_sampler_chain_get(chain, i) ⇒ `LlamaSampler`

Parameters:

chain (LlamaSampler)
i (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3456

static VALUE rb_llama_sampler_chain_get(VALUE self, VALUE chain, VALUE i) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(i)) {
    rb_raise(rb_eArgError, "i must be an Integer");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  struct llama_sampler* smpl = llama_sampler_chain_get(chain_, NUM2INT(i));
  RB_GC_GUARD(chain);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, smpl);
}

.llama_sampler_chain_init(params) ⇒ `LlamaSampler`

Parameters:

params (LlamaSamplerChainParams)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3416

static VALUE rb_llama_sampler_chain_init(VALUE self, VALUE params) {
  if (!rb_obj_is_kind_of(params, rb_cLlamaSamplerChainParams)) {
    rb_raise(rb_eArgError, "params must be a LlamaSamplerChainParams");
    return Qnil;
  }
  llama_sampler_chain_params* params_ = get_llama_sampler_chain_params(params);
  struct llama_sampler* sampler = llama_sampler_chain_init(*params_);
  RB_GC_GUARD(params);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_chain_n(chain) ⇒ `Integer`

Parameters:

chain (LlamaSampler)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3476

static VALUE rb_llama_sampler_chain_n(VALUE self, VALUE chain) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  const int n = llama_sampler_chain_n(chain_);
  RB_GC_GUARD(chain);
  return INT2NUM(n);
}

.llama_sampler_chain_remove(chain, i) ⇒ `LlamaSampler`

Parameters:

chain (LlamaSampler)
i (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3493

static VALUE rb_llama_sampler_chain_remove(VALUE self, VALUE chain, VALUE i) {
  if (!rb_obj_is_kind_of(chain, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "chain must be a LlamaSampler");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(i)) {
    rb_raise(rb_eArgError, "i must be an Integer");
    return Qnil;
  }
  struct llama_sampler* chain_ = get_llama_sampler(chain);
  struct llama_sampler* smpl = llama_sampler_chain_remove(chain_, NUM2INT(i));
  RB_GC_GUARD(chain);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, smpl);
}

.llama_sampler_clone(sampler) ⇒ `LlamaSampler`

Parameters:

sampler (LlamaSampler)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3383

static VALUE rb_llama_sampler_clone(VALUE self, VALUE sampler) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  struct llama_sampler* clone = llama_sampler_clone(sampler_);
  RB_GC_GUARD(sampler);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, clone);
}

.llama_sampler_free(sampler) ⇒ `NilClass`

Parameters:

sampler (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3399

static VALUE rb_llama_sampler_free(VALUE self, VALUE sampler) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  llama_sampler_free(sampler_);
  sampler_ = NULL;
  RB_GC_GUARD(sampler);
  return Qnil;
}

.llama_sampler_get_seed(sampler) ⇒ `Integer`

Parameters:

sampler (LlamaSampler)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3873

static VALUE rb_llama_sampler_get_seed(VALUE self, VALUE smpl) {
  if (!rb_obj_is_kind_of(smpl, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "smpl must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* smpl_ = get_llama_sampler(smpl);
  const uint32_t seed = llama_sampler_get_seed(smpl_);
  RB_GC_GUARD(smpl);
  return UINT2NUM(seed);
}

.llama_sampler_init_adaptive_p(target, decay, seed) ⇒ `LlamaSampler`

Parameters:

target (Float)
decay (Float)
seed (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3807

static VALUE rb_llama_sampler_init_adaptive_p(VALUE self, VALUE target, VALUE decay, VALUE seed) {
  if (!RB_FLOAT_TYPE_P(target)) {
    rb_raise(rb_eArgError, "target must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(decay)) {
    rb_raise(rb_eArgError, "decay must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_adaptive_p(NUM2DBL(target), NUM2DBL(decay), NUM2UINT(seed));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_dist(seed) ⇒ `LlamaSampler`

Parameters:

seed (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3522

static VALUE rb_llama_sampler_init_dist(VALUE self, VALUE seed) {
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_dist(NUM2UINT(seed));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_grammar(vocab, grammar_str, grammar_root) ⇒ `LlamaSampler`

Parameters:

vocab (LlamaVocab)
grammar_str (String)
grammar_root (String)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3748

static VALUE rb_llama_sampler_init_grammar(VALUE self, VALUE vocab, VALUE grammar_str, VALUE grammar_root) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_TYPE_P(grammar_str, T_STRING)) {
    rb_raise(rb_eArgError, "grammar_str must be a String");
    return Qnil;
  }
  if (!RB_TYPE_P(grammar_root, T_STRING)) {
    rb_raise(rb_eArgError, "grammar_root must be a String");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const char* grammar_str_ = StringValueCStr(grammar_str);
  const char* grammar_root_ = StringValueCStr(grammar_root);
  struct llama_sampler* sampler = llama_sampler_init_grammar(vocab_wrapper->vocab, grammar_str_, grammar_root_);
  RB_GC_GUARD(vocab);
  RB_GC_GUARD(grammar_str);
  RB_GC_GUARD(grammar_root);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_greedy ⇒ `LlamaSampler`

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3512

static VALUE rb_llama_sampler_init_greedy(VALUE self) {
  struct llama_sampler* sampler = llama_sampler_init_greedy();
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_infill(vocab) ⇒ `LlamaSampler`

Parameters:

vocab (LlamaVocab)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3857

static VALUE rb_llama_sampler_init_infill(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  struct llama_sampler* sampler = llama_sampler_init_infill(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_logit_bias(n_vocab, n_logit_bias, logit_bias) ⇒ `LlamaSampler`

Parameters:

n_vocab (Integer)
n_logit_bias (Integer)
logit_bias (LlamaLogitBias)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3831

static VALUE rb_llama_sampler_init_logit_bias(VALUE self, VALUE n_vocab, VALUE n_logit_bias, VALUE logit_bias) {
  if (!RB_INTEGER_TYPE_P(n_vocab)) {
    rb_raise(rb_eArgError, "n_vocab must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_logit_bias)) {
    rb_raise(rb_eArgError, "n_logit_bias must be an Integer");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(logit_bias, rb_cLlamaLogitBias)) {
    rb_raise(rb_eArgError, "logit_bias must be a LlamaLogitBias");
    return Qnil;
  }
  const int32_t n_vocab_ = NUM2INT(n_vocab);
  const int32_t n_logit_bias_ = NUM2INT(n_logit_bias);
  const llama_logit_bias* logit_bias_ = get_llama_logit_bias(logit_bias);
  struct llama_sampler* sampler = llama_sampler_init_logit_bias(n_vocab_, n_logit_bias_, logit_bias_);
  RB_GC_GUARD(logit_bias);
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_min_p(p, min_keep) ⇒ `LlamaSampler`

Parameters:

p (Float)
min_keep (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3570

static VALUE rb_llama_sampler_init_min_p(VALUE self, VALUE p, VALUE min_keep) {
  if (!RB_FLOAT_TYPE_P(p)) {
    rb_raise(rb_eArgError, "p must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(min_keep)) {
    rb_raise(rb_eArgError, "min_keep must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_min_p(NUM2DBL(p), NUM2SIZET(min_keep));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_mirostat(n_vocab, seed, tau, eta, m) ⇒ `LlamaSampler`

Parameters:

n_vocab (Integer)
seed (Integer)
tau (Float)
eta (Float)
m (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3692

static VALUE rb_llama_sampler_init_mirostat(VALUE self, VALUE n_vocab, VALUE seed, VALUE tau, VALUE eta, VALUE m) {
  if (!RB_INTEGER_TYPE_P(n_vocab)) {
    rb_raise(rb_eArgError, "n_vocab must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(tau)) {
    rb_raise(rb_eArgError, "tau must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(eta)) {
    rb_raise(rb_eArgError, "eta must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(m)) {
    rb_raise(rb_eArgError, "m must be an Integer ");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_mirostat(NUM2INT(n_vocab), NUM2UINT(seed), NUM2DBL(tau), NUM2DBL(eta), NUM2INT(m));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_mirostat_v2(seed, tau, eta) ⇒ `LlamaSampler`

Parameters:

seed (Integer)
tau (Float)
eta (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3724

static VALUE rb_llama_sampler_init_mirostat_v2(VALUE self, VALUE seed, VALUE tau, VALUE eta) {
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(tau)) {
    rb_raise(rb_eArgError, "tau must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(eta)) {
    rb_raise(rb_eArgError, "eta must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_mirostat_v2(NUM2UINT(seed), NUM2DBL(tau), NUM2DBL(eta));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_penalties(penalty_last_n, penalty_repeat, penalty_freq, penalty_present) ⇒ `LlamaSampler`

Parameters:

penalty_last_n (Integer)
penalty_repeat (Float)
penalty_freq (Float)
penalty_present (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3779

static VALUE rb_llama_sampler_init_penalties(VALUE self, VALUE penalty_last_n, VALUE penalty_repeat, VALUE penalty_freq, VALUE penalty_present) {
  if (!RB_INTEGER_TYPE_P(penalty_last_n)) {
    rb_raise(rb_eArgError, "penalty_last_n must be an Integer");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(penalty_repeat)) {
    rb_raise(rb_eArgError, "penalty_repeat must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(penalty_freq)) {
    rb_raise(rb_eArgError, "penalty_freq must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(penalty_present)) {
    rb_raise(rb_eArgError, "penalty_present must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_penalties(NUM2INT(penalty_last_n), NUM2DBL(penalty_repeat), NUM2DBL(penalty_freq), NUM2DBL(penalty_present));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_temp(t) ⇒ `LlamaSampler`

Parameters:

t (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3607

static VALUE rb_llama_sampler_init_temp(VALUE self, VALUE t) {
  if (!RB_FLOAT_TYPE_P(t)) {
    rb_raise(rb_eArgError, "t must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_temp(NUM2DBL(t));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_temp_ext(t, delta, exponent) ⇒ `LlamaSampler`

Parameters:

t (Float)
delta (Float)
exponent (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3623

static VALUE rb_llama_sampler_init_temp_ext(VALUE self, VALUE t, VALUE delta, VALUE exponent) {
  if (!RB_FLOAT_TYPE_P(t)) {
    rb_raise(rb_eArgError, "t must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(delta)) {
    rb_raise(rb_eArgError, "delta must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(exponent)) {
    rb_raise(rb_eArgError, "exponent must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_temp_ext(NUM2DBL(t), NUM2DBL(delta), NUM2DBL(exponent));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_top_k(k) ⇒ `LlamaSampler`

Parameters:

k (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3536

static VALUE rb_llama_sampler_init_top_k(VALUE self, VALUE k) {
  if (!RB_INTEGER_TYPE_P(k)) {
    rb_raise(rb_eArgError, "k must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_top_k(NUM2INT(k));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_top_n_sigma(n) ⇒ `LlamaSampler`

Parameters:

n (Float)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3674

static VALUE rb_llama_sampler_init_top_n_sigma(VALUE self, VALUE n) {
  if (!RB_FLOAT_TYPE_P(n)) {
    rb_raise(rb_eArgError, "n must be a Float");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_top_n_sigma(NUM2DBL(n));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_top_p(p, min_keep) ⇒ `LlamaSampler`

Parameters:

p (Float)
min_keep (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3551

static VALUE rb_llama_sampler_init_top_p(VALUE self, VALUE p, VALUE min_keep) {
  if (!RB_FLOAT_TYPE_P(p)) {
    rb_raise(rb_eArgError, "p must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(min_keep)) {
    rb_raise(rb_eArgError, "min_keep must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_top_p(NUM2DBL(p), NUM2SIZET(min_keep));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_typical(p, min_keep) ⇒ `LlamaSampler`

Parameters:

p (Float)
min_keep (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3589

static VALUE rb_llama_sampler_init_typical(VALUE self, VALUE p, VALUE min_keep) {
  if (!RB_FLOAT_TYPE_P(p)) {
    rb_raise(rb_eArgError, "p must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(min_keep)) {
    rb_raise(rb_eArgError, "min_keep must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_typical(NUM2DBL(p), NUM2SIZET(min_keep));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_init_xtc(p, t, min_keep, seed) ⇒ `LlamaSampler`

Parameters:

p (Float)
t (Float)
min_keep (Integer)
seed (Integer)

Returns:

(LlamaSampler)

# File 'ext/llama_cpp/llama_cpp.c', line 3648

static VALUE rb_llama_sampler_init_xtc(VALUE self, VALUE p, VALUE t, VALUE min_keep, VALUE seed ) {
  if (!RB_FLOAT_TYPE_P(p)) {
    rb_raise(rb_eArgError, "p must be a Float");
    return Qnil;
  }
  if (!RB_FLOAT_TYPE_P(t)) {
    rb_raise(rb_eArgError, "t must be a Float");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(min_keep)) {
    rb_raise(rb_eArgError, "min_keep must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seed)) {
    rb_raise(rb_eArgError, "seed must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = llama_sampler_init_xtc(NUM2DBL(p), NUM2DBL(t), NUM2SIZET(min_keep), NUM2UINT(seed));
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
}

.llama_sampler_name(sampler) ⇒ `String`

Parameters:

sampler (LlamaSampler)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 3305

static VALUE rb_llama_sampler_name(VALUE self, VALUE sampler) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  const char* name = llama_sampler_name(sampler_);
  VALUE ret = rb_utf8_str_new_cstr(name);
  RB_GC_GUARD(sampler);
  return ret;
}

.llama_sampler_reset(sampler) ⇒ `NilClass`

Parameters:

sampler (LlamaSampler)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 3367

static VALUE rb_llama_sampler_reset(VALUE self, VALUE sampler) {
  if (!rb_obj_is_kind_of(sampler, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "sampler must be a LlamaSampler");
    return Qnil;
  }
  struct llama_sampler* sampler_ = get_llama_sampler(sampler);
  llama_sampler_reset(sampler_);
  RB_GC_GUARD(sampler);
  return Qnil;
}

.llama_sampler_sample(sampler, context, idx) ⇒ `Integer`

Parameters:

sampler (LlamaSampler)
context (LlamaContext)
idx (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3891

static VALUE rb_llama_sampler_sample(VALUE self, VALUE smpl, VALUE ctx, VALUE idx) {
  if (!rb_obj_is_kind_of(smpl, rb_cLlamaSampler)) {
    rb_raise(rb_eArgError, "smpl must be a LlamaSampler");
    return Qnil;
  }
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(idx)) {
    rb_raise(rb_eArgError, "idx must be an Integer");
    return Qnil;
  }
  struct llama_sampler* sampler = get_llama_sampler(smpl);
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t token = llama_sampler_sample(sampler, context_wrapper->context, NUM2INT(idx));
  RB_GC_GUARD(smpl);
  RB_GC_GUARD(ctx);
  return INT2NUM(token);
}

.llama_set_adapters_lora(context, adapters, scales) ⇒ `Integer`

Parameters:

context (LlamaContext)
adapters (Array<LlamaAdapterLora>)
scales (Array<Float>)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2108

static VALUE rb_llama_set_adapters_lora(VALUE self, VALUE ctx, VALUE adapters, VALUE scales) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_TYPE_P(adapters, T_ARRAY)) {
    rb_raise(rb_eArgError, "adapters must be an Array");
    return Qnil;
  }
  if (!RB_TYPE_P(scales, T_ARRAY)) {
    rb_raise(rb_eArgError, "scales must be an Array");
    return Qnil;
  }
  long n_adapters = RARRAY_LEN(adapters);
  long n_scales = RARRAY_LEN(scales);
  if (n_adapters != n_scales) {
    rb_raise(rb_eArgError, "adapters and scales must have the same length");
    return Qnil;
  }
  for (long i = 0; i < n_adapters; i++) {
    VALUE adapter = rb_ary_entry(adapters, i);
    if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
      rb_raise(rb_eArgError, "adapters must be an Array of LlamaAdapterLora");
      return Qnil;
    }
  }
  for (long i = 0; i < n_scales; i++) {
    VALUE scale = rb_ary_entry(scales, i);
    if (!RB_FLOAT_TYPE_P(scale)) {
      rb_raise(rb_eArgError, "scales must be an Array of Float");
      return Qnil;
    }
  }
  struct llama_adapter_lora** adapters_ = ALLOCA_N(struct llama_adapter_lora*, n_adapters);
  for (long i = 0; i < n_adapters; i++) {
    VALUE adapter = rb_ary_entry(adapters, i);
    llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
    adapters_[i] = adapter_wrapper->adapter;
  }
  float* scales_ = ALLOCA_N(float, n_scales);
  for (long i = 0; i < n_scales; i++) {
    VALUE scale = rb_ary_entry(scales, i);
    scales_[i] = (float)NUM2DBL(scale);
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const int32_t res = llama_set_adapters_lora(context_wrapper->context, adapters_, n_adapters, scales_);
  RB_GC_GUARD(ctx);
  RB_GC_GUARD(adapters);
  RB_GC_GUARD(scales);
  return NUM2INT(res);
}

.llama_set_causal_attn(context, causal_attn) ⇒ `NilClass`

Parameters:

context (LlamaContext)
causal_attn (Boolean)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2689

static VALUE rb_llama_set_causal_attn(VALUE self, VALUE ctx, VALUE causal_attn) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const bool causal_attn_ = RTEST(causal_attn) ? true : false;
  llama_set_causal_attn(context_wrapper->context, causal_attn_);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_set_embeddings(context, embeddings) ⇒ `NilClass`

Parameters:

context (LlamaContext)
embeddings (Boolean)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2671

static VALUE rb_llama_set_embeddings(VALUE self, VALUE ctx, VALUE embeddings) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const bool embeddings_ = RTEST(embeddings) ? true : false;
  llama_set_embeddings(context_wrapper->context, embeddings_);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_set_n_threads(context, n_threads, n_threads_batch) ⇒ `NilClass`

Parameters:

context (LlamaContext)
n_threads (Integer)
n_threads_batch (Integer)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2614

static VALUE rb_llama_set_n_threads(VALUE self, VALUE ctx, VALUE n_threads, VALUE n_threads_batch) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_threads)) {
    rb_raise(rb_eArgError, "n_threads must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_threads_batch)) {
    rb_raise(rb_eArgError, "n_threads_batch must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_set_n_threads(context_wrapper->context, NUM2INT(n_threads), NUM2INT(n_threads_batch));
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_set_warmup(context, warmup) ⇒ `NilClass`

Parameters:

context (LlamaContext)
warmup (Boolean)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2707

static VALUE rb_llama_set_warmup(VALUE self, VALUE ctx, VALUE warmup) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const bool warmup_ = RTEST(warmup) ? true : false;
  llama_set_warmup(context_wrapper->context, warmup_);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_state_get_size(context) ⇒ `Integer`

Parameters:

context (LlamaContext)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2431

static VALUE rb_llama_state_get_size(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const size_t size = llama_state_get_size(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return SIZET2NUM(size);
}

.llama_state_seq_get_size(context, seq_id) ⇒ `Integer`

Parameters:

context (LlamaContext)
seq_id (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2448

static VALUE rb_llama_state_seq_get_size(VALUE self, VALUE ctx, VALUE seq_id) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const size_t size = llama_state_seq_get_size(context_wrapper->context, NUM2INT(seq_id));
  RB_GC_GUARD(ctx);
  return SIZET2NUM(size);
}

.llama_state_seq_get_size_ext(context, seq_id, flags) ⇒ `Integer`

Parameters:

context (LlamaContext)
seq_id (Integer)
flags (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2470

static VALUE rb_llama_state_seq_get_size_ext(VALUE self, VALUE ctx, VALUE seq_id, VALUE flags) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(seq_id)) {
    rb_raise(rb_eArgError, "seq_id must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(flags)) {
    rb_raise(rb_eArgError, "flags must be an Integer");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  const size_t size = llama_state_seq_get_size_ext(context_wrapper->context, NUM2INT(seq_id), (uint32_t)NUM2UINT(flags));
  RB_GC_GUARD(ctx);
  return SIZET2NUM(size);
}

.llama_supports_gpu_offload? ⇒ `Boolean`

Returns:

(Boolean)



1523
1524
1525

# File 'ext/llama_cpp/llama_cpp.c', line 1523

static VALUE rb_llama_supports_gpu_offload(VALUE self) {
  return llama_supports_gpu_offload() ? Qtrue : Qfalse;
}

.llama_supports_mlock? ⇒ `Boolean`

Returns:

(Boolean)



1515
1516
1517

# File 'ext/llama_cpp/llama_cpp.c', line 1515

static VALUE rb_llama_supports_mlock(VALUE self) {
  return llama_supports_mlock() ? Qtrue : Qfalse;
}

.llama_supports_mmap? ⇒ `Boolean`

Returns:

(Boolean)



1507
1508
1509

# File 'ext/llama_cpp/llama_cpp.c', line 1507

static VALUE rb_llama_supports_mmap(VALUE self) {
  return llama_supports_mmap() ? Qtrue : Qfalse;
}

.llama_supports_rpc? ⇒ `Boolean`

Returns:

(Boolean)



1531
1532
1533

# File 'ext/llama_cpp/llama_cpp.c', line 1531

static VALUE rb_llama_supports_rpc(VALUE self) {
  return llama_supports_rpc() ? Qtrue : Qfalse;
}

.llama_synchronize(context) ⇒ `NilClass`

Parameters:

context (LlamaContext)

Returns:

(NilClass)

# File 'ext/llama_cpp/llama_cpp.c', line 2724

static VALUE rb_llama_synchronize(VALUE self, VALUE ctx) {
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
    rb_raise(rb_eArgError, "ctx must be a LlamaContext");
    return Qnil;
  }
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
  llama_synchronize(context_wrapper->context);
  RB_GC_GUARD(ctx);
  return Qnil;
}

.llama_time_us ⇒ `Integer`

Returns:

(Integer)



1475
1476
1477

# File 'ext/llama_cpp/llama_cpp.c', line 1475

static VALUE rb_llama_time_us(VALUE self) {
  return LONG2NUM(llama_time_us());
}

.llama_token_to_piece(vocab, token, lstrip, special) ⇒ `String`

Parameters:

vocab (LlamaVocab)
token (Integer)
lstrip (Integer)
special (Boolean)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 3174

static VALUE rb_llama_token_to_piece(VALUE self, VALUE vocab, VALUE token, VALUE lstrip, VALUE special) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(lstrip)) {
    rb_raise(rb_eArgError, "lstrip must be an Integer");
    return Qnil;
  }

  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  llama_token token_ = NUM2INT(token);
  const int32_t lstrip_ = NUM2INT(lstrip);
  const bool special_ = RTEST(special) ? true : false;
  char *buf = (char*)ruby_xmalloc(sizeof(char) * 8);
  const int32_t n_tokens = llama_token_to_piece(vocab_wrapper->vocab, token_, buf, 8, lstrip_, special_);

  if (n_tokens < 0) {
    ruby_xfree(buf);
    buf = (char*)ruby_xmalloc(sizeof(char) * -n_tokens);
    const int32_t check = llama_token_to_piece(vocab_wrapper->vocab, token_, buf, -n_tokens, lstrip_, special_);
    if (check != -n_tokens) {
      ruby_xfree(buf);
      rb_raise(rb_eRuntimeError, "Failed to convert");
      return Qnil;
    }
  }

  VALUE ret = rb_utf8_str_new_cstr(buf);
  ruby_xfree(buf);
  RB_GC_GUARD(vocab);

  return ret;
}

.llama_tokenize(vocab, text, tokens, n_tokens_max, add_special, parse_special) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)
text (String)
tokens (Array<Integer>)
n_tokens_max (Integer)
add_special (Boolean)
parse_special (Boolean)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3106

static VALUE rb_llama_tokenize(VALUE self, VALUE vocab, VALUE text, VALUE tokens, VALUE n_tokens_max, VALUE add_special, VALUE parse_special) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_TYPE_P(text, T_STRING)) {
    rb_raise(rb_eArgError, "text must be a String");
    return Qnil;
  }
  if (!RB_TYPE_P(tokens, T_ARRAY)) {
    rb_raise(rb_eArgError, "tokens must be an Array");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(n_tokens_max)) {
    rb_raise(rb_eArgError, "n_tokens_max must be an Integer");
    return Qnil;
  }

  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const char* text_ = StringValueCStr(text);
  const int32_t text_len = (int32_t)strlen(text_);
  int32_t n_tokens_max_ = NUM2INT(n_tokens_max);
  const bool add_special_ = RTEST(add_special) ? true : false;
  const bool parse_special_ = RTEST(parse_special) ? true : false;

  if (text_len <= 0) {
    rb_raise(rb_eArgError, "text must not be empty");
    return Qnil;
  }

  llama_token* tokens_ = n_tokens_max <= 0 ? NULL : ALLOCA_N(llama_token, n_tokens_max);
  const int32_t sz_tokens = (int32_t)RARRAY_LEN(tokens);

  for (int32_t i = 0; i < n_tokens_max_; i++) {
    if (i >= sz_tokens) break;
    VALUE token = rb_ary_entry(tokens, i);
    if (!RB_INTEGER_TYPE_P(token)) {
      rb_raise(rb_eArgError, "tokens must be an Array of Integers");
      return Qnil;
    }
    tokens_[i] = NUM2INT(token);
  }

  const int32_t n_tokens = llama_tokenize(vocab_wrapper->vocab, text_, text_len, tokens_, n_tokens_max_, add_special_, parse_special_);

  if (n_tokens < 0) {
    return INT2NUM(n_tokens);
  }

  rb_ary_resize(tokens, n_tokens);
  for (int i = 0; i < n_tokens; i++) {
    rb_ary_store(tokens, i, INT2NUM(tokens_[i]));
  }

  RB_GC_GUARD(vocab);
  RB_GC_GUARD(text);

  return INT2NUM(n_tokens);
}

.llama_vocab_bos(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2845

static VALUE rb_llama_vocab_bos(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_bos(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_eos(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2861

static VALUE rb_llama_vocab_eos(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_eos(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_eot(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2877

static VALUE rb_llama_vocab_eot(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_eot(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_mid(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3037

static VALUE rb_llama_vocab_fim_mid(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_mid(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_pad(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3053

static VALUE rb_llama_vocab_fim_pad(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_pad(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_pre(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3005

static VALUE rb_llama_vocab_fim_pre(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_pre(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_rep(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3069

static VALUE rb_llama_vocab_fim_rep(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_rep(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_sep(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3085

static VALUE rb_llama_vocab_fim_sep(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_sep(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_fim_suf(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 3021

static VALUE rb_llama_vocab_fim_suf(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_fim_suf(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_get_add_bos ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2957

static VALUE rb_llama_vocab_get_add_bos(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool flag = llama_vocab_get_add_bos(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return flag ? Qtrue : Qfalse;
}

.llama_vocab_get_add_eos(vocab) ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2973

static VALUE rb_llama_vocab_get_add_eos(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool flag = llama_vocab_get_add_eos(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return flag ? Qtrue : Qfalse;
}

.llama_vocab_get_add_sep(vocab) ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2989

static VALUE rb_llama_vocab_get_add_sep(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool flag = llama_vocab_get_add_sep(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return flag ? Qtrue : Qfalse;
}

.llama_vocab_get_attr(vocab, token) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2783

static VALUE rb_llama_vocab_get_attr(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t attr = llama_vocab_get_attr(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return INT2NUM(attr);
}

.llama_vocab_get_score(vocab, token) ⇒ `Float`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(Float)

# File 'ext/llama_cpp/llama_cpp.c', line 2762

static VALUE rb_llama_vocab_get_score(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const float score = llama_vocab_get_score(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return DBL2NUM(score);
}

.llama_vocab_get_text(vocab, token) ⇒ `String`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(String)

# File 'ext/llama_cpp/llama_cpp.c', line 2741

static VALUE rb_llama_vocab_get_text(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const char* text = llama_vocab_get_text(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return rb_utf8_str_new_cstr(text);
}

.llama_vocab_is_control?(vocab, token) ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2825

static VALUE rb_llama_vocab_is_control(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "model must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool is_control = llama_vocab_is_control(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return is_control ? Qtrue : Qfalse;
}

.llama_vocab_is_eog?(vocab, token) ⇒ `Boolean`

Parameters:

vocab (LlamaVocab)
token (Integer)

Returns:

(Boolean)

# File 'ext/llama_cpp/llama_cpp.c', line 2804

static VALUE rb_llama_vocab_is_eog(VALUE self, VALUE vocab, VALUE token) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  if (!RB_INTEGER_TYPE_P(token)) {
    rb_raise(rb_eArgError, "token must be an Integer");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const bool is_eog = llama_vocab_is_eog(vocab_wrapper->vocab, NUM2INT(token));
  RB_GC_GUARD(vocab);
  return is_eog ? Qtrue : Qfalse;
}

.llama_vocab_mask(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2941

static VALUE rb_llama_vocab_mask(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_mask(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_n_tokens(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1851

static VALUE rb_llama_vocab_n_tokens(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  VALUE n_tokens = INT2NUM(llama_vocab_n_tokens(vocab_wrapper->vocab));
  RB_GC_GUARD(vocab);
  return n_tokens;
}

.llama_vocab_nl(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2909

static VALUE rb_llama_vocab_nl(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_nl(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_pad(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2925

static VALUE rb_llama_vocab_pad(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_pad(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_sep(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 2893

static VALUE rb_llama_vocab_sep(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  const int32_t token = llama_vocab_sep(vocab_wrapper->vocab);
  RB_GC_GUARD(vocab);
  return INT2NUM(token);
}

.llama_vocab_type(vocab) ⇒ `Integer`

Parameters:

vocab (LlamaVocab)

Returns:

(Integer)

# File 'ext/llama_cpp/llama_cpp.c', line 1835

static VALUE rb_llama_vocab_type(VALUE self, VALUE vocab) {
  if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
    rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
    return Qnil;
  }
  llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
  VALUE vt = INT2NUM(llama_vocab_type(vocab_wrapper->vocab));
  RB_GC_GUARD(vocab);
  return vt;
}

Module: LlamaCpp

Overview

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.generate(context, prompt, n_predict: 128) ⇒ String

.ggml_backend_load_all ⇒ NilClass

.llama_adapter_get_alora_n_invocation_tokens(adapter) ⇒ Integer

.llama_adapter_lora_free(adapter) ⇒ NilClass

.llama_adapter_lora_init(model, path_lora) ⇒ LlamaAdapterLora

.llama_adapter_meta_count(adapter) ⇒ Integer

.llama_backend_free ⇒ NilClass

.llama_backend_init ⇒ NilClass

.llama_batch_free(batch) ⇒ NilClass

.llama_batch_get_one(tokens) ⇒ LlamaBatch

.llama_batch_init(n_tokens, embd, n_seq_max) ⇒ LlamaBatch

.llama_decode(context, batch) ⇒ Integer

.llama_detokenize(vocab, tokens, remove_special, unparse_special) ⇒ String

.llama_encode(context, batch) ⇒ Integer

.llama_flash_attn_type_name(flash_attn_type) ⇒ String

.llama_free(context) ⇒ NilClass

.llama_get_memory(context) ⇒ LlamaMemoryT

.llama_get_model(context) ⇒ LlamaModel

.llama_init_from_model(model, params) ⇒ LlamaContext

.llama_max_devices ⇒ Integer

.llama_max_parallel_sequences ⇒ Integer

.llama_max_tensor_buft_overrides ⇒ Integer

.llama_memory_can_shift?(memory) ⇒ Boolean

.llama_memory_clear(memory, data) ⇒ Object

.llama_memory_seq_add(memory, seq_id, p0, p1, delta) ⇒ Object

.llama_memory_seq_cp(memory, seq_id_src, seq_id_dst, p0, p1) ⇒ Object

.llama_memory_seq_div(memory, seq_id, p0, p1, d) ⇒ Object

.llama_memory_seq_keep(memory, seq_id) ⇒ Object

.llama_memory_seq_pos_max(memory, seq_id) ⇒ Object

.llama_memory_seq_pos_min(memory, seq_id) ⇒ Object

.llama_memory_seq_rm(memory, seq_id, p0, p1) ⇒ Object

.llama_model_cls_label(model, id) ⇒ Integer

.llama_model_decoder_start_token(model) ⇒ Integer

.llama_model_desc(model) ⇒ String

.llama_model_free(model) ⇒ NilClass

.llama_model_get_vocab(model) ⇒ LlamaVocab

.llama_model_has_decoder(model) ⇒ Boolean

.llama_model_has_encoder?(model) ⇒ Boolean

.llama_model_is_diffusion?(model) ⇒ Boolean

.llama_model_is_hybrid?(model) ⇒ Boolean

.llama_model_is_recurrent?(model) ⇒ Boolean

.llama_model_load_from_file(path_model) ⇒ LlamaModel

.llama_model_load_from_splits(paths, params) ⇒ LlamaModel

.llama_model_meta_count(model) ⇒ Integer

.llama_model_meta_key_str(key) ⇒ String

.llama_model_n_cls_out(model) ⇒ Integer

.llama_model_n_ctx_train(model) ⇒ Integer

.llama_model_n_embd(model) ⇒ Integer

.llama_model_n_embd_inp(model) ⇒ Integer

.llama_model_n_embd_out(model) ⇒ Integer

.llama_model_n_head(model) ⇒ Integer

.llama_model_n_head_kv(model) ⇒ Integer

.llama_model_n_layer(model) ⇒ Integer

.llama_model_n_params(model) ⇒ Integer

.llama_model_n_swa(model) ⇒ Integer

.llama_model_quantize(fname_inp, fname_out, params) ⇒ Boolean

.llama_model_rope_freq_scale_train(model) ⇒ Float

.llama_model_rope_type(model) ⇒ Integer

.llama_model_save_to_file(model, path_model) ⇒ NilClass

.llama_model_size(model) ⇒ Integer

.llama_n_batch(context) ⇒ Integer

.llama_n_ctx(context) ⇒ Integer

.llama_n_ctx_seq(context) ⇒ Integer

.llama_n_seq_max(context) ⇒ Integer

.llama_n_threads(context) ⇒ Integer

.llama_n_threads_batch(context) ⇒ Integer

.llama_n_ubatch(context) ⇒ Integer

.llama_numa_init(numa) ⇒ NilClass

.llama_perf_context(context) ⇒ LlamaPerfContextData

.llama_perf_context_print(context) ⇒ NilClass

.llama_perf_context_reset(context) ⇒ NilClass

.llama_perf_sampler(chain) ⇒ LlamaPerfSamplerData

.llama_perf_sampler_print(chain) ⇒ NilClass

.llama_perf_sampler_reset(chain) ⇒ NilClass

.generate(context, prompt, n_predict: 128) ⇒ `String`

.ggml_backend_load_all ⇒ `NilClass`

.llama_adapter_get_alora_n_invocation_tokens(adapter) ⇒ `Integer`

.llama_adapter_lora_free(adapter) ⇒ `NilClass`

.llama_adapter_lora_init(model, path_lora) ⇒ `LlamaAdapterLora`

.llama_adapter_meta_count(adapter) ⇒ `Integer`

.llama_backend_free ⇒ `NilClass`

.llama_backend_init ⇒ `NilClass`

.llama_batch_free(batch) ⇒ `NilClass`

.llama_batch_get_one(tokens) ⇒ `LlamaBatch`

.llama_batch_init(n_tokens, embd, n_seq_max) ⇒ `LlamaBatch`

.llama_decode(context, batch) ⇒ `Integer`

.llama_detokenize(vocab, tokens, remove_special, unparse_special) ⇒ `String`

.llama_encode(context, batch) ⇒ `Integer`

.llama_flash_attn_type_name(flash_attn_type) ⇒ `String`

.llama_free(context) ⇒ `NilClass`

.llama_get_memory(context) ⇒ `LlamaMemoryT`

.llama_get_model(context) ⇒ `LlamaModel`

.llama_init_from_model(model, params) ⇒ `LlamaContext`

.llama_max_devices ⇒ `Integer`

.llama_max_parallel_sequences ⇒ `Integer`

.llama_max_tensor_buft_overrides ⇒ `Integer`

.llama_memory_can_shift?(memory) ⇒ `Boolean`

.llama_memory_clear(memory, data) ⇒ `Object`

.llama_memory_seq_add(memory, seq_id, p0, p1, delta) ⇒ `Object`

.llama_memory_seq_cp(memory, seq_id_src, seq_id_dst, p0, p1) ⇒ `Object`

.llama_memory_seq_div(memory, seq_id, p0, p1, d) ⇒ `Object`

.llama_memory_seq_keep(memory, seq_id) ⇒ `Object`

.llama_memory_seq_pos_max(memory, seq_id) ⇒ `Object`

.llama_memory_seq_pos_min(memory, seq_id) ⇒ `Object`

.llama_memory_seq_rm(memory, seq_id, p0, p1) ⇒ `Object`

.llama_model_cls_label(model, id) ⇒ `Integer`

.llama_model_decoder_start_token(model) ⇒ `Integer`

.llama_model_desc(model) ⇒ `String`

.llama_model_free(model) ⇒ `NilClass`

.llama_model_get_vocab(model) ⇒ `LlamaVocab`

.llama_model_has_decoder(model) ⇒ `Boolean`

.llama_model_has_encoder?(model) ⇒ `Boolean`

.llama_model_is_diffusion?(model) ⇒ `Boolean`

.llama_model_is_hybrid?(model) ⇒ `Boolean`

.llama_model_is_recurrent?(model) ⇒ `Boolean`

.llama_model_load_from_file(path_model) ⇒ `LlamaModel`

.llama_model_load_from_splits(paths, params) ⇒ `LlamaModel`

.llama_model_meta_count(model) ⇒ `Integer`

.llama_model_meta_key_str(key) ⇒ `String`

.llama_model_n_cls_out(model) ⇒ `Integer`

.llama_model_n_ctx_train(model) ⇒ `Integer`

.llama_model_n_embd(model) ⇒ `Integer`

.llama_model_n_embd_inp(model) ⇒ `Integer`

.llama_model_n_embd_out(model) ⇒ `Integer`

.llama_model_n_head(model) ⇒ `Integer`

.llama_model_n_head_kv(model) ⇒ `Integer`

.llama_model_n_layer(model) ⇒ `Integer`

.llama_model_n_params(model) ⇒ `Integer`

.llama_model_n_swa(model) ⇒ `Integer`

.llama_model_quantize(fname_inp, fname_out, params) ⇒ `Boolean`

.llama_model_rope_freq_scale_train(model) ⇒ `Float`

.llama_model_rope_type(model) ⇒ `Integer`

.llama_model_save_to_file(model, path_model) ⇒ `NilClass`

.llama_model_size(model) ⇒ `Integer`

.llama_n_batch(context) ⇒ `Integer`

.llama_n_ctx(context) ⇒ `Integer`

.llama_n_ctx_seq(context) ⇒ `Integer`

.llama_n_seq_max(context) ⇒ `Integer`

.llama_n_threads(context) ⇒ `Integer`

.llama_n_threads_batch(context) ⇒ `Integer`

.llama_n_ubatch(context) ⇒ `Integer`

.llama_numa_init(numa) ⇒ `NilClass`

.llama_perf_context(context) ⇒ `LlamaPerfContextData`

.llama_perf_context_print(context) ⇒ `NilClass`

.llama_perf_context_reset(context) ⇒ `NilClass`

.llama_perf_sampler(chain) ⇒ `LlamaPerfSamplerData`

.llama_perf_sampler_print(chain) ⇒ `NilClass`

.llama_perf_sampler_reset(chain) ⇒ `NilClass`

.llama_pooling_type(context) ⇒ `Integer`

.llama_print_system_info ⇒ `String`

.llama_sampler_accept(sampler, token) ⇒ `NilClass`

.llama_sampler_apply(sampler, cur_p) ⇒ `NilClass`

.llama_sampler_chain_add(chain, smpl) ⇒ `NilClass`

.llama_sampler_chain_get(chain, i) ⇒ `LlamaSampler`