Module: LLaMACpp
- Defined in:
- lib/llama_cpp.rb,
lib/llama_cpp/version.rb,
ext/llama_cpp/llama_cpp.cpp
Overview
llama_cpp.rb provides Ruby bindings for the llama.cpp.
Constant Summary collapse
- Params =
Class alias to match interface of whispercpp gem.
ContextParams
- VERSION =
The version of llama_cpp.rb you install.
'0.0.5'
- LLAMA_CPP_VERSION =
The version of llama.cpp bundled with llama_cpp.rb.
'master-315a95a'
- LLAMA_FILE_VERSION =
rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str())
- LLAMA_FILE_MAGIC =
rb_str_new2(ss_magic.str().c_str())
- LLAMA_FILE_MAGIC_UNVERSIONED =
rb_str_new2(ss_magic_unversioned.str().c_str())
Class Method Summary collapse
-
.generate(context, prompt, n_threads: 1) ⇒ String
Generates sentences following the given prompt for operation check.
- .mlock_supported? ⇒ Boolean
- .mmap_supported? ⇒ Boolean
- .print_system_info ⇒ Object
-
.token_bos ⇒ Object
module functions.
- .token_eos ⇒ Object
Class Method Details
.generate(context, prompt, n_threads: 1) ⇒ String
Generates sentences following the given prompt for operation check.
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/llama_cpp.rb', line 19 def generate(context, prompt, n_threads: 1) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity spaced_prompt = " #{prompt}" embd_input = context.tokenize(text: spaced_prompt, add_bos: true) n_ctx = context.n_ctx last_n_tokens = [0] * n_ctx embd = [] n_consumed = 0 n_keep = 10 n_past = 0 n_remain = 128 repeat_last_n = 64 output = [] while n_remain != 0 unless embd.empty? if n_past + embd.size > n_ctx n_left = n_past - n_keep n_past = n_keep embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size]) end context.eval(tokens: embd, n_past: n_past, n_threads: n_threads) end n_past += embd.size embd.clear if embd_input.size <= n_consumed start = n_ctx - repeat_last_n id = context.sample_top_p_top_k( last_n_tokens[start...(start + repeat_last_n)], top_k: 40, top_p: 0.95, temp: 0.80, penalty: 1.1 ) last_n_tokens.shift last_n_tokens.push(id) embd.push(id) n_remain -= 1 else while embd_input.size > n_consumed embd.push(embd_input[n_consumed]) last_n_tokens.shift last_n_tokens.push(embd_input[n_consumed]) n_consumed += 1 break if embd.size >= 512 end end embd.each { |token| output << context.token_to_str(token) } break if embd[-1] == LLaMACpp.token_eos end output.join.delete_prefix(spaced_prompt).strip end |
.mlock_supported? ⇒ Boolean
632 633 634 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 632 static VALUE rb_llama_mlock_supported(VALUE self) { return llama_mlock_supported() ? Qtrue : Qfalse; } |
.mmap_supported? ⇒ Boolean
628 629 630 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 628 static VALUE rb_llama_mmap_supported(VALUE self) { return llama_mmap_supported() ? Qtrue : Qfalse; } |
.print_system_info ⇒ Object
623 624 625 626 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 623 static VALUE rb_llama_print_system_info(VALUE self) { const char* result = llama_print_system_info(); return rb_utf8_str_new_cstr(result); } |
.token_bos ⇒ Object
module functions
615 616 617 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 615 static VALUE rb_llama_token_bos(VALUE self) { return INT2NUM(llama_token_bos()); } |
.token_eos ⇒ Object
619 620 621 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 619 static VALUE rb_llama_token_eos(VALUE self) { return INT2NUM(llama_token_eos()); } |