Module: LLaMACpp

Defined in:
lib/llama_cpp.rb,
lib/llama_cpp/version.rb,
ext/llama_cpp/llama_cpp.cpp

Overview

llama_cpp.rb provides Ruby bindings for the llama.cpp.

Constant Summary collapse

Params =

Class alias to match interface of whispercpp gem.

ContextParams
VERSION =

The version of llama_cpp.rb you install.

'0.0.4'
LLAMA_CPP_VERSION =

The version of llama.cpp bundled with llama_cpp.rb.

'master-c85e03d'
LLAMA_FILE_VERSION =
rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str())
LLAMA_FILE_MAGIC =
rb_str_new2(ss_magic.str().c_str())
LLAMA_FILE_MAGIC_UNVERSIONED =
rb_str_new2(ss_magic_unversioned.str().c_str())

Class Method Summary collapse

Class Method Details

.generate(context, prompt, n_threads: 1) ⇒ String

Generates sentences following the given prompt for operation check.

Parameters:

  • context (LLaMACpp::Context)
  • prompt (String)
  • n_threads (Integer) (defaults to: 1)

Returns:

  • (String)


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/llama_cpp.rb', line 19

def generate(context, prompt, n_threads: 1) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
  prompt.insert(0, ' ')

  embd_input = context.tokenize(text: prompt, add_bos: true)

  n_ctx = context.n_ctx
  last_n_tokens = [0] * n_ctx

  embd = []
  n_consumed = 0
  n_keep = 10
  n_past = 0
  n_remain = 128
  repeat_last_n = 64
  output = []

  while n_remain != 0
    unless embd.empty?
      if n_past + embd.size > n_ctx
        n_left = n_past - n_keep
        n_past = n_keep
        embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
      end

      context.eval(tokens: embd, n_past: n_past, n_threads: n_threads)
    end

    n_past += embd.size
    embd.clear

    if embd_input.size <= n_consumed
      start = n_ctx - repeat_last_n
      id = context.sample_top_p_top_k(
        last_n_tokens[start...(start + repeat_last_n)], top_k: 40, top_p: 0.95, temp: 0.80, penalty: 1.1
      )
      last_n_tokens.shift
      last_n_tokens.push(id)

      embd.push(id)
      n_remain -= 1
    else
      while embd_input.size > n_consumed
        embd.push(embd_input[n_consumed])
        last_n_tokens.shift
        last_n_tokens.push(embd_input[n_consumed])
        n_consumed += 1
        break if embd.size >= 512
      end
    end

    embd.each { |token| output << context.token_to_str(token) }

    break if embd[-1] == LLaMACpp.token_eos
  end

  output.join.delete_prefix(prompt).strip
end


573
574
575
576
# File 'ext/llama_cpp/llama_cpp.cpp', line 573

static VALUE rb_llama_print_system_info(VALUE self) {
  const char* result = llama_print_system_info();
  return rb_utf8_str_new_cstr(result);
}

.token_bosObject

module functions



565
566
567
# File 'ext/llama_cpp/llama_cpp.cpp', line 565

static VALUE rb_llama_token_bos(VALUE self) {
  return INT2NUM(llama_token_bos());
}

.token_eosObject



569
570
571
# File 'ext/llama_cpp/llama_cpp.cpp', line 569

static VALUE rb_llama_token_eos(VALUE self) {
  return INT2NUM(llama_token_eos());
}