Class: LlmMetaClient::ServerQuery

Inherits:
Object
  • Object
show all
Defined in:
lib/llm_meta_client/server_query.rb

Instance Method Summary collapse

Instance Method Details

#call(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {}) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/llm_meta_client/server_query.rb', line 37

def call(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {})
  debug_log "Context: #{context}"
  context_and_user_content = "Context:#{context}, User Prompt: #{user_content}"
  debug_log "Request to LLM: \n===>\n#{context_and_user_content}\n===>"

  response = request(api_key_uuid, id_token, model_id, context_and_user_content, tool_ids, generation_settings)

  unless response.success?
    raise Exceptions::ServerError, build_error_message(response.code.to_i, response.parsed_response)
  end

  response_body = response.parsed_response

  raise Exceptions::InvalidResponseError, "LLM server returned non-JSON response" unless response_body.is_a?(Hash)

  content = response_body.dig("response", "message") || ""
  tool_calls = response_body.dig("response", "tool_calls")
  content = combine_with_tool_calls(content, tool_calls) if tool_calls.is_a?(Array) && tool_calls.any?

  raise Exceptions::EmptyResponseError, "LLM server returned empty response" if content.blank?

  debug_log "Response from LLM: \n<===\n#{content}\n<==>"

  content
end

#stream(id_token, api_key_uuid, model_id, context, user_content, generation_settings: {}) ⇒ Object

Stream LLM responses incrementally. Yields each content delta event ({ event: “message”, data: { “delta” => “…” } }) to the caller’s block. Upstream “done” markers are absorbed (end-of-stream is signaled by the block returning); upstream “error” events raise ServerError. Returns the assembled content string. Tool calls are not supported here.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/llm_meta_client/server_query.rb', line 12

def stream(id_token, api_key_uuid, model_id, context, user_content, generation_settings: {})
  context_and_user_content = "Context:#{context}, User Prompt: #{user_content}"
  debug_log "Streaming request to LLM: \n===>\n#{context_and_user_content}\n===>"

  body = { prompt: context_and_user_content }
  body[:generation_settings] = generation_settings if generation_settings.present?

  assembled = +""
  request_stream(api_key_uuid, id_token, model_id, body) do |event|
    case event[:event]
    when "message"
      assembled << event[:data]["delta"].to_s
      yield event if block_given?
    when "done"
      # End-of-stream marker from upstream; no-op here.
    when "error"
      raise Exceptions::ServerError, format_stream_error(event[:data])
    else
      yield event if block_given?
    end
  end

  assembled
end