Class: LLaMACpp::Client
- Inherits:
-
Object
- Object
- LLaMACpp::Client
- Defined in:
- lib/llama_cpp/client.rb
Overview
Client provides a high-level interface to the LLM model.
Instance Method Summary collapse
-
#completions(prompt, max_tokens: 64, n_keep: 10, repeat_last_n: 64, n_batch: 512, top_k: 40, top_p: 0.95, temperature: 0.80, repeat_penalty: 1.1) ⇒ String
Generates completions for a given prompt.
-
#embeddings(text) ⇒ Array<Float>
Obtains the embedding for a given text.
-
#initialize(model_path:, lora_adapter_path: nil, lora_base_path: nil, n_ctx: 512, n_parts: -1,, memory_f16: false, use_mmap: true, use_mlock: false, embedding: false, n_threads: 1, seed: 0) ⇒ Client
constructor
Creates a new client.
Constructor Details
#initialize(model_path:, lora_adapter_path: nil, lora_base_path: nil, n_ctx: 512, n_parts: -1,, memory_f16: false, use_mmap: true, use_mlock: false, embedding: false, n_threads: 1, seed: 0) ⇒ Client
Creates a new client.
rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/llama_cpp/client.rb', line 21 def initialize(model_path:, lora_adapter_path: nil, lora_base_path: nil, n_ctx: 512, n_parts: -1, memory_f16: false, use_mmap: true, use_mlock: false, embedding: false, n_threads: 1, seed: 0) @params = { model_path: model_path, lora_adapter_path: lora_adapter_path, lora_base_path: lora_base_path, n_ctx: n_ctx, n_parts: n_parts, memory_f16: memory_f16, use_mmap: use_mmap, use_mlock: use_mlock, embedding: , n_threads: n_threads, seed: seed } @context_params = ContextParams.new @context_params.n_ctx = n_ctx @context_params.n_parts = n_parts @context_params.f16_kv = memory_f16 @context_params.use_mmap = use_mmap @context_params.use_mlock = use_mlock @context_params. = @context_params.seed = seed @context = Context.new(model_path: model_path, params: @context_params) return unless lora_adapter_path.is_a?(String) if lora_base_path.is_a?(String) @context.apply_lora_from_file(lora_path: lora_adapter_path, base_model_path: lora_base_path, n_threads: n_threads) else @context.apply_lora_from_file(lora_path: lora_adapter_path, n_threads: n_threads) end end |
Instance Method Details
#completions(prompt, max_tokens: 64, n_keep: 10, repeat_last_n: 64, n_batch: 512, top_k: 40, top_p: 0.95, temperature: 0.80, repeat_penalty: 1.1) ⇒ String
Generates completions for a given prompt.
rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/ParameterLists, Metrics/PerceivedComplexity
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/llama_cpp/client.rb', line 70 def completions(prompt, max_tokens: 64, n_keep: 10, repeat_last_n: 64, n_batch: 512, top_k: 40, top_p: 0.95, temperature: 0.80, repeat_penalty: 1.1) embd_input = tokenize_prompt(prompt) n_ctx = @context.n_ctx raise ArgumentError, "prompt is too long #{embd_input.size} tokens, maximum is #{n_ctx - 4}" if embd_input.size > n_ctx - 4 last_n_tokens = [0] * n_ctx embd = [] n_consumed = 0 n_past = 0 n_remain = max_tokens output = [] while n_remain != 0 unless embd.empty? if n_past + embd.size > n_ctx n_left = n_past - n_keep n_past = n_keep embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size]) end @context.eval(tokens: embd, n_past: n_past, n_threads: @params[:n_threads]) end n_past += embd.size embd.clear if embd_input.size <= n_consumed start = n_ctx - repeat_last_n id = @context.sample_top_p_top_k( last_n_tokens[start...(start + repeat_last_n)], top_k: top_k, top_p: top_p, temp: temperature, penalty: repeat_penalty ) last_n_tokens.shift last_n_tokens.push(id) embd.push(id) n_remain -= 1 else while embd_input.size > n_consumed embd.push(embd_input[n_consumed]) last_n_tokens.shift last_n_tokens.push(embd_input[n_consumed]) n_consumed += 1 break if embd.size >= n_batch end end embd.each { |token| output << @context.token_to_str(token) } break if !embd.empty? && embd[-1] == LLaMACpp.token_eos end output.join.delete_prefix(" #{prompt}").strip end |
#embeddings(text) ⇒ Array<Float>
Obtains the embedding for a given text.
135 136 137 138 139 140 141 142 143 |
# File 'lib/llama_cpp/client.rb', line 135 def (text) raise 'The embedding option is set to false' unless @params[:embedding] embd_input = tokenize_prompt(text) raise 'The result of tokenizing the input text is empty' unless embd_input.size.positive? @context.eval(tokens: embd_input, n_past: 0, n_threads: @params[:n_threads]) @context. end |