Module: LLaMACpp
- Defined in:
- lib/llama_cpp.rb,
lib/llama_cpp/version.rb,
ext/llama_cpp/llama_cpp.cpp
Overview
llama_cpp.rb provides Ruby bindings for the llama.cpp.
Constant Summary collapse
- Params =
Class alias to match interface of whispercpp gem.
ContextParams
- VERSION =
The version of llama_cpp.rb you install.
'0.0.6'
- LLAMA_CPP_VERSION =
The version of llama.cpp bundled with llama_cpp.rb.
'master-12b5900'
- LLAMA_FTYPE_ALL_F32 =
INT2NUM(LLAMA_FTYPE_ALL_F32)
- LLAMA_FTYPE_MOSTLY_F16 =
INT2NUM(LLAMA_FTYPE_MOSTLY_F16)
- LLAMA_FTYPE_MOSTLY_Q4_0 =
INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0)
- LLAMA_FTYPE_MOSTLY_Q4_1 =
INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1)
- LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 =
INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16)
- LLAMA_FTYPE_MOSTLY_Q4_2 =
INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_2)
- LLAMA_FTYPE_MOSTLY_Q4_3 =
INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_3)
- LLAMA_FILE_VERSION =
rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str())
- LLAMA_FILE_MAGIC =
rb_str_new2(ss_magic.str().c_str())
- LLAMA_FILE_MAGIC_UNVERSIONED =
rb_str_new2(ss_magic_unversioned.str().c_str())
Class Method Summary collapse
-
.generate(context, prompt, n_threads: 1) ⇒ String
Generates sentences following the given prompt for operation check.
- .mlock_supported? ⇒ Boolean
- .mmap_supported? ⇒ Boolean
-
.model_quantize(*args) ⇒ Object
module functions.
- .print_system_info ⇒ Object
- .token_bos ⇒ Object
- .token_eos ⇒ Object
Class Method Details
.generate(context, prompt, n_threads: 1) ⇒ String
Generates sentences following the given prompt for operation check.
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/llama_cpp.rb', line 19 def generate(context, prompt, n_threads: 1) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity spaced_prompt = " #{prompt}" embd_input = context.tokenize(text: spaced_prompt, add_bos: true) n_ctx = context.n_ctx last_n_tokens = [0] * n_ctx embd = [] n_consumed = 0 n_keep = 10 n_past = 0 n_remain = 128 repeat_last_n = 64 output = [] while n_remain != 0 unless embd.empty? if n_past + embd.size > n_ctx n_left = n_past - n_keep n_past = n_keep embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size]) end context.eval(tokens: embd, n_past: n_past, n_threads: n_threads) end n_past += embd.size embd.clear if embd_input.size <= n_consumed start = n_ctx - repeat_last_n id = context.sample_top_p_top_k( last_n_tokens[start...(start + repeat_last_n)], top_k: 40, top_p: 0.95, temp: 0.80, penalty: 1.1 ) last_n_tokens.shift last_n_tokens.push(id) embd.push(id) n_remain -= 1 else while embd_input.size > n_consumed embd.push(embd_input[n_consumed]) last_n_tokens.shift last_n_tokens.push(embd_input[n_consumed]) n_consumed += 1 break if embd.size >= 512 end end embd.each { |token| output << context.token_to_str(token) } break if embd[-1] == LLaMACpp.token_eos end output.join.delete_prefix(spaced_prompt).strip end |
.mlock_supported? ⇒ Boolean
669 670 671 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 669 static VALUE rb_llama_mlock_supported(VALUE self) { return llama_mlock_supported() ? Qtrue : Qfalse; } |
.mmap_supported? ⇒ Boolean
665 666 667 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 665 static VALUE rb_llama_mmap_supported(VALUE self) { return llama_mmap_supported() ? Qtrue : Qfalse; } |
.model_quantize(*args) ⇒ Object
module functions
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 615
static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
VALUE kw_args = Qnil;
ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
VALUE kw_values[4] = { Qundef, Qundef, Qundef, Qundef };
rb_scan_args(argc, argv, ":", &kw_args);
rb_get_kwargs(kw_args, kw_table, 3, 1, kw_values);
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
rb_raise(rb_eArgError, "input_path must be a string");
return Qnil;
}
if (!RB_TYPE_P(kw_values[1], T_STRING)) {
rb_raise(rb_eArgError, "output_path must be a string");
return Qnil;
}
if (!RB_INTEGER_TYPE_P(kw_values[2])) {
rb_raise(rb_eArgError, "ftype must be an integer");
return Qnil;
}
if (kw_values[3] != Qundef && !RB_INTEGER_TYPE_P(kw_values[3])) {
rb_raise(rb_eArgError, "n_threads must be an integer");
return Qnil;
}
const char* input_path = StringValueCStr(kw_values[0]);
const char* output_path = StringValueCStr(kw_values[1]);
const int ftype = NUM2INT(kw_values[2]);
const int n_threads = kw_values[3] == Qundef ? 1 : NUM2INT(kw_values[3]);
if (llama_model_quantize(input_path, output_path, (llama_ftype)ftype, n_threads) != 0) {
rb_raise(rb_eRuntimeError, "Failed to quantize model");
return Qnil;
}
return Qnil;
}
|
.print_system_info ⇒ Object
660 661 662 663 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 660 static VALUE rb_llama_print_system_info(VALUE self) { const char* result = llama_print_system_info(); return rb_utf8_str_new_cstr(result); } |
.token_bos ⇒ Object
652 653 654 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 652 static VALUE rb_llama_token_bos(VALUE self) { return INT2NUM(llama_token_bos()); } |
.token_eos ⇒ Object
656 657 658 |
# File 'ext/llama_cpp/llama_cpp.cpp', line 656 static VALUE rb_llama_token_eos(VALUE self) { return INT2NUM(llama_token_eos()); } |