Module: RubyLLM::RedCandle::Chat

Included in:
Provider
Defined in:
lib/ruby_llm/red_candle/chat.rb

Overview

Chat implementation for Red Candle provider

Instance Method Summary collapse

Instance Method Details

#complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, tool_prefs: nil, thinking: nil, &block) ⇒ Object

Override the base complete method to handle local execution



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/ruby_llm/red_candle/chat.rb', line 8

def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, tool_prefs: nil, thinking: nil, &block)
  _ = headers # Interface compatibility
  _ = tool_prefs # Interface compatibility (not yet used by local models)
  _ = thinking # Interface compatibility (not yet used by local models)
  payload = RubyLLM::Utils.deep_merge(
    render_payload(
      messages,
      tools: tools,
      temperature: temperature,
      model: model,
      stream: block_given?,
      schema: schema
    ),
    params
  )

  if block_given?
    perform_streaming_completion!(payload, &block)
  else
    result = perform_completion!(payload)

    # perform_tool_completion! returns a Message directly
    return result if result.is_a?(RubyLLM::Message)

    # Convert hash result to Message object
    content = result[:content]
    estimated_output_tokens = (content.to_s.length / 4.0).round
    estimated_input_tokens = estimate_input_tokens(payload[:messages])

    RubyLLM::Message.new(
      role: result[:role].to_sym,
      content: content,
      model_id: model.id,
      input_tokens: estimated_input_tokens,
      output_tokens: estimated_output_tokens
    )
  end
end

#perform_completion!(payload) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/ruby_llm/red_candle/chat.rb', line 63

def perform_completion!(payload)
  model = ensure_model_loaded!(payload[:model])
  messages = format_messages(payload[:messages])

  # Handle tool calling
  if payload[:tools] && !payload[:tools].empty?
    return perform_tool_completion!(model, messages, payload)
  end

  # Handle structured generation differently - we need to build the prompt
  # with JSON instructions BEFORE applying the chat template
  response = if payload[:schema]
               generate_with_schema(model, messages, payload[:schema], payload)
             else
               prompt = build_prompt(model, messages)
               validate_context_length!(prompt, payload[:model])
               config = build_generation_config(payload)
               generate_with_error_handling(model, prompt, config, payload[:model])
             end

  format_response(response, payload[:schema])
end

#perform_streaming_completion!(payload, &block) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/ruby_llm/red_candle/chat.rb', line 86

def perform_streaming_completion!(payload, &block)
  model = ensure_model_loaded!(payload[:model])
  messages = format_messages(payload[:messages])

  prompt = build_prompt(model, messages)
  validate_context_length!(prompt, payload[:model])
  config = build_generation_config(payload)

  # Collect all streamed content
  full_content = ""

  # Stream tokens with error handling
  stream_with_error_handling(model, prompt, config, payload[:model]) do |token|
    full_content += token
    chunk = format_stream_chunk(token)
    block.call(chunk)
  end

  # Send final chunk with empty content (indicates completion)
  final_chunk = format_stream_chunk("")
  block.call(final_chunk)

  # Return a Message object with the complete response
  estimated_output_tokens = (full_content.length / 4.0).round
  estimated_input_tokens = estimate_input_tokens(payload[:messages])

  RubyLLM::Message.new(
    role: :assistant,
    content: full_content,
    model_id: payload[:model],
    input_tokens: estimated_input_tokens,
    output_tokens: estimated_output_tokens
  )
end

#render_payload(messages, tools:, temperature:, model:, stream:, schema:, tool_prefs: nil, thinking: nil) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/ruby_llm/red_candle/chat.rb', line 47

def render_payload(messages, tools:, temperature:, model:, stream:, schema:, tool_prefs: nil, thinking: nil)
  payload = {
    messages: messages,
    temperature: temperature,
    model: model.id,
    stream: stream,
    schema: schema
  }

  if tools && !tools.empty?
    payload[:tools] = tools
  end

  payload
end