Module: Legion::Extensions::Llamacpp::Runners::Completions

Extended by:
Helpers::Client
Includes:
Helpers::Lex
Included in:
Client
Defined in:
lib/legion/extensions/llamacpp/runners/completions.rb

Constant Summary

Constants included from Helpers::Client

Helpers::Client::DEFAULT_HOST

Instance Method Summary collapse

Methods included from Helpers::Client

client, streaming_client

Instance Method Details

#complete(model:, prompt:, temperature: nil, max_tokens: nil, top_p: nil, stream: false) ⇒ Object



15
16
17
18
19
20
# File 'lib/legion/extensions/llamacpp/runners/completions.rb', line 15

def complete(model:, prompt:, temperature: nil, max_tokens: nil, top_p: nil, stream: false, **)
  body = { model: model, prompt: prompt, temperature: temperature, max_tokens: max_tokens,
           top_p: top_p, stream: stream }.compact
  response = Helpers::Errors.with_retry { client(**).post('/v1/completions', body) }
  { result: response.body, usage: Helpers::Usage.from_response(response.body), status: response.status }
end

#complete_stream(model:, prompt:, temperature: nil, max_tokens: nil, top_p: nil, &block) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/legion/extensions/llamacpp/runners/completions.rb', line 22

def complete_stream(model:, prompt:, temperature: nil, max_tokens: nil, top_p: nil, **, &block)
  body = { model: model, prompt: prompt, temperature: temperature, max_tokens: max_tokens,
           top_p: top_p, stream: true }.compact
  accumulated = +''
  usage_data = nil
  buffer = +''

  Helpers::Errors.with_retry do
    streaming_client(**).post('/v1/completions', body) do |req|
      req.options.on_data = proc do |chunk, _size|
        buffer << chunk
        while (idx = buffer.index("\n\n"))
          line = buffer.slice!(0, idx + 2).strip
          next if line.empty?
          next unless line.start_with?('data: ')

          payload = line.sub('data: ', '')
          if payload == '[DONE]'
            block&.call({ type: :done, data: {} })
            next
          end

          parsed = ::JSON.parse(payload)
          text = parsed.dig('choices', 0, 'text') || ''
          usage_data = parsed['usage'] if parsed.key?('usage')
          unless text.empty?
            accumulated << text
            block&.call({ type: :delta, text: text })
          end
        end
      end
    end
  end

  usage = Helpers::Usage.from_response(usage_data ? { 'usage' => usage_data } : nil)
  { result: accumulated, usage: usage, status: 200 }
end