Class: LlmMetaClient::ServerQuery

Inherits:
Object
  • Object
show all
Defined in:
lib/llm_meta_client/server_query.rb

Instance Method Summary collapse

Instance Method Details

#call(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {}) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/llm_meta_client/server_query.rb', line 62

def call(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {})
  debug_log "Context: #{context}"
  context_and_user_content = "Context:#{context}, User Prompt: #{user_content}"
  debug_log "Request to LLM: \n===>\n#{context_and_user_content}\n===>"

  response = request(api_key_uuid, id_token, model_id, context_and_user_content, tool_ids, generation_settings)

  unless response.success?
    raise Exceptions::ServerError, build_error_message(response.code.to_i, response.parsed_response)
  end

  response_body = response.parsed_response

  raise Exceptions::InvalidResponseError, "LLM server returned non-JSON response" unless response_body.is_a?(Hash)

  content = response_body.dig("response", "message") || ""
  tool_calls = response_body.dig("response", "tool_calls")
  content = combine_with_tool_calls(content, tool_calls) if tool_calls.is_a?(Array) && tool_calls.any?

  raise Exceptions::EmptyResponseError, "LLM server returned empty response" if content.blank?

  debug_log "Response from LLM: \n<===\n#{content}\n<==>"

  content
end

#stream(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {}, image_context: nil, image: nil, images: nil) ⇒ Object

Stream LLM responses incrementally. Yields each content delta event ({ event: “message”, data: { “delta” => “…” } }) and any tool_calls event ({ event: “tool_calls”, data: { “tool_calls” => […] } }) to the caller’s block. Upstream “done” markers are absorbed (end-of-stream is signaled by the block returning); upstream “error” events raise ServerError. Returns the final assistant content. If tool calls fired, the returned string mirrors the synchronous #call format (response + markdown “Tool calls” section appended) so persistence stays consistent.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/llm_meta_client/server_query.rb', line 15

def stream(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {}, image_context: nil, image: nil, images: nil)
  if image_context.present?
    prompt_text = user_content.is_a?(Hash) ? (user_content[:prompt] || user_content["prompt"]).to_s : user_content.to_s
    debug_log "Streaming image request to LLM: \n===>\n#{prompt_text}\n(with #{image_context.size} prior turn(s))\n===>"
    body = { prompt: prompt_text, image_context: image_context }
  else
    context_and_user_content = "Context:#{context}, User Prompt: #{user_content}"
    debug_log "Streaming request to LLM: \n===>\n#{context_and_user_content}\n===>"
    body = { prompt: context_and_user_content }
  end
  body[:tool_ids] = tool_ids if tool_ids.present?
  body[:generation_settings] = generation_settings if generation_settings.present?
  # images: ordered chronologically with the current turn's image last.
  # Legacy single `image:` is forwarded as a fallback for older callers.
  if images.present?
    body[:images] = images
  elsif image.present?
    body[:image] = image
  end

  assembled = +""
  collected_tool_calls = []
  request_stream(api_key_uuid, id_token, model_id, body) do |event|
    case event[:event]
    when "message"
      assembled << event[:data]["delta"].to_s
      yield event if block_given?
    when "tool_calls"
      collected_tool_calls = event[:data]["tool_calls"] || []
      yield event if block_given?
    when "thinking"
      # Thinking-mode deltas (Ollama hybrid models): forwarded to the
      # caller for live rendering, but NOT folded into `assembled` —
      # only the final content is persisted as the assistant message.
      yield event if block_given?
    when "done"
      # End-of-stream marker from upstream; no-op here.
    when "error"
      raise Exceptions::ServerError, format_stream_error(event[:data])
    else
      yield event if block_given?
    end
  end

  collected_tool_calls.any? ? combine_with_tool_calls(assembled, collected_tool_calls) : assembled
end