Module: RubyLLM::RedCandle::Chat

Included in:: Provider

Defined in:: lib/ruby_llm/red_candle/chat.rb

Overview

Chat implementation for Red Candle provider

Instance Method Summary collapse

#complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, tool_prefs: nil, thinking: nil, &block) ⇒ Object

Override the base complete method to handle local execution.
#perform_completion!(payload) ⇒ Object
#perform_streaming_completion!(payload, &block) ⇒ Object
#render_payload(messages, tools:, temperature:, model:, stream:, schema:, tool_prefs: nil, thinking: nil) ⇒ Object

Instance Method Details

#complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, tool_prefs: nil, thinking: nil, &block) ⇒ `Object`

Override the base complete method to handle local execution

# File 'lib/ruby_llm/red_candle/chat.rb', line 8

def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, tool_prefs: nil, thinking: nil, &block)
  _ = headers # Interface compatibility
  _ = tool_prefs # Interface compatibility (not yet used by local models)
  _ = thinking # Interface compatibility (not yet used by local models)
  payload = RubyLLM::Utils.deep_merge(
    render_payload(
      messages,
      tools: tools,
      temperature: temperature,
      model: model,
      stream: block_given?,
      schema: schema
    ),
    params
  )

  if block_given?
    perform_streaming_completion!(payload, &block)
  else
    result = perform_completion!(payload)

    # perform_tool_completion! returns a Message directly
    return result if result.is_a?(RubyLLM::Message)

    # Convert hash result to Message object
    content = result[:content]
    estimated_output_tokens = (content.to_s.length / 4.0).round
    estimated_input_tokens = estimate_input_tokens(payload[:messages])

    RubyLLM::Message.new(
      role: result[:role].to_sym,
      content: content,
      model_id: model.id,
      input_tokens: estimated_input_tokens,
      output_tokens: estimated_output_tokens
    )
  end
end

#perform_completion!(payload) ⇒ `Object`

# File 'lib/ruby_llm/red_candle/chat.rb', line 63

def perform_completion!(payload)
  model = ensure_model_loaded!(payload[:model])
  messages = format_messages(payload[:messages])

  # Handle tool calling
  if payload[:tools] && !payload[:tools].empty?
    return perform_tool_completion!(model, messages, payload)
  end

  # Handle structured generation differently - we need to build the prompt
  # with JSON instructions BEFORE applying the chat template
  response = if payload[:schema]
               generate_with_schema(model, messages, payload[:schema], payload)
             else
               prompt = build_prompt(model, messages)
               validate_context_length!(prompt, payload[:model])
               config = build_generation_config(payload)
               generate_with_error_handling(model, prompt, config, payload[:model])
             end

  format_response(response, payload[:schema])
end

#perform_streaming_completion!(payload, &block) ⇒ `Object`

# File 'lib/ruby_llm/red_candle/chat.rb', line 86

def perform_streaming_completion!(payload, &block)
  model = ensure_model_loaded!(payload[:model])
  messages = format_messages(payload[:messages])

  prompt = build_prompt(model, messages)
  validate_context_length!(prompt, payload[:model])
  config = build_generation_config(payload)

  # Collect all streamed content
  full_content = ""

  # Stream tokens with error handling
  stream_with_error_handling(model, prompt, config, payload[:model]) do |token|
    full_content += token
    chunk = format_stream_chunk(token)
    block.call(chunk)
  end

  # Send final chunk with empty content (indicates completion)
  final_chunk = format_stream_chunk("")
  block.call(final_chunk)

  # Return a Message object with the complete response
  estimated_output_tokens = (full_content.length / 4.0).round
  estimated_input_tokens = estimate_input_tokens(payload[:messages])

  RubyLLM::Message.new(
    role: :assistant,
    content: full_content,
    model_id: payload[:model],
    input_tokens: estimated_input_tokens,
    output_tokens: estimated_output_tokens
  )
end

#render_payload(messages, tools:, temperature:, model:, stream:, schema:, tool_prefs: nil, thinking: nil) ⇒ `Object`

# File 'lib/ruby_llm/red_candle/chat.rb', line 47

def render_payload(messages, tools:, temperature:, model:, stream:, schema:, tool_prefs: nil, thinking: nil)
  payload = {
    messages: messages,
    temperature: temperature,
    model: model.id,
    stream: stream,
    schema: schema
  }

  if tools && !tools.empty?
    payload[:tools] = tools
  end

  payload
end

Module: RubyLLM::RedCandle::Chat

Overview

Instance Method Summary collapse

Instance Method Details

#complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, tool_prefs: nil, thinking: nil, &block) ⇒ Object

#perform_completion!(payload) ⇒ Object

#perform_streaming_completion!(payload, &block) ⇒ Object

#render_payload(messages, tools:, temperature:, model:, stream:, schema:, tool_prefs: nil, thinking: nil) ⇒ Object

#complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, tool_prefs: nil, thinking: nil, &block) ⇒ `Object`

#perform_completion!(payload) ⇒ `Object`

#perform_streaming_completion!(payload, &block) ⇒ `Object`

#render_payload(messages, tools:, temperature:, model:, stream:, schema:, tool_prefs: nil, thinking: nil) ⇒ `Object`