Module: LLaMACpp
- Defined in:
- lib/llama_cpp.rb,
lib/llama_cpp/version.rb,
ext/llama_cpp/llama_cpp.cpp
Overview
llama_cpp.rb provides Ruby bindings for the llama.cpp.
Constant Summary collapse
- Params =
Class alias to match interface of whispercpp gem.
ContextParams
- VERSION =
The version of llama_cpp.rb you install.
'0.0.4'
- LLAMA_CPP_VERSION =
The version of llama.cpp bundled with llama_cpp.rb.
'master-c85e03d'
- LLAMA_FILE_VERSION =
rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str())
- LLAMA_FILE_MAGIC =
rb_str_new2(ss_magic.str().c_str())
- LLAMA_FILE_MAGIC_UNVERSIONED =
rb_str_new2(ss_magic_unversioned.str().c_str())
Class Method Summary collapse
-
.generate(context, prompt, n_threads: 1) ⇒ String
Generates sentences following the given prompt for operation check.
- .print_system_info ⇒ Object
-
.token_bos ⇒ Object
module functions.
- .token_eos ⇒ Object
Class Method Details
.generate(context, prompt, n_threads: 1) ⇒ String
Generates sentences following the given prompt for operation check.
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/llama_cpp.rb', line 19 def generate(context, prompt, n_threads: 1) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity prompt.insert(0, ' ') embd_input = context.tokenize(text: prompt, add_bos: true) n_ctx = context.n_ctx last_n_tokens = [0] * n_ctx embd = [] n_consumed = 0 n_keep = 10 n_past = 0 n_remain = 128 repeat_last_n = 64 output = [] while n_remain != 0 unless embd.empty? if n_past + embd.size > n_ctx n_left = n_past - n_keep n_past = n_keep embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size]) end context.eval(tokens: embd, n_past: n_past, n_threads: n_threads) end n_past += embd.size embd.clear if embd_input.size <= n_consumed start = n_ctx - repeat_last_n id = context.sample_top_p_top_k( last_n_tokens[start...(start + repeat_last_n)], top_k: 40, top_p: 0.95, temp: 0.80, penalty: 1.1 ) last_n_tokens.shift last_n_tokens.push(id) embd.push(id) n_remain -= 1 else while embd_input.size > n_consumed embd.push(embd_input[n_consumed]) last_n_tokens.shift last_n_tokens.push(embd_input[n_consumed]) n_consumed += 1 break if embd.size >= 512 end end embd.each { |token| output << context.token_to_str(token) } break if embd[-1] == LLaMACpp.token_eos end output.join.delete_prefix(prompt).strip end |
.print_system_info ⇒ Object
573 574 575 576 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 573 static VALUE rb_llama_print_system_info(VALUE self) { const char* result = llama_print_system_info(); return rb_utf8_str_new_cstr(result); } |
.token_bos ⇒ Object
module functions
565 566 567 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 565 static VALUE rb_llama_token_bos(VALUE self) { return INT2NUM(llama_token_bos()); } |
.token_eos ⇒ Object
569 570 571 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 569 static VALUE rb_llama_token_eos(VALUE self) { return INT2NUM(llama_token_eos()); } |