Class: Legion::Extensions::Llm::Vllm::Translator

Inherits:

Object

Object
Legion::Extensions::Llm::Vllm::Translator

show all

Includes:: Logging::Helper

Defined in:: lib/legion/extensions/llm/vllm/translator.rb

Overview

Canonical provider translator for vLLM (OpenAI-compatible wire format).

Implements render_request, parse_response, parse_chunk, and capabilities. Extracted from existing format_openai_/parse_ methods in OpenAICompatible mixin and vLLM-specific render_payload override in Provider.

vLLM quirks (declared in capabilities):

tool_calls_as_text: true — some model configurations output tool calls as JSON text in the content field rather than structured tool_calls.
forced_tool_choice: true — vLLM’s tool_choice handling is strict; named tool choices must be explicit function references.
thinking_tags: [‘think’, ‘thinking’] — Qwen-style models emit reasoning in <think> or <thinking> tags within content text.

rubocop:disable Metrics/ClassLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity – translator implementation

Constant Summary collapse

VLLM_STOP_REASON_MAP = vLLM-specific stop_reason mapping (per conformance fixture stop_reason_matrix).

{
  'stop' => :end_turn,
  'tool_use' => :tool_use,
  'length' => :max_tokens
}.freeze

FALLBACK_STOP_REASON =

:end_turn

SUPPORTED_PARAMS = G18 parameter mapping: supported canonical params.

%i[
  max_tokens temperature top_p top_k stop_sequences
  seed frequency_penalty presence_penalty response_format
].freeze

PARAM_WIRE_KEYS = vLLM wire keys for supported params (most are 1:1 with canonical names).

{
  max_tokens: :max_tokens,
  temperature: :temperature,
  top_p: :top_p,
  top_k: :top_k,
  stop_sequences: :stop,
  seed: :seed,
  frequency_penalty: :frequency_penalty,
  presence_penalty: :presence_penalty,
  response_format: :response_format
}.freeze

Instance Method Summary collapse

#capabilities ⇒ Object

Declared capabilities for the vLLM provider.
#initialize(config: nil) ⇒ Translator constructor

A new instance of Translator.
#parse_chunk(raw) ⇒ Object

Parse a single SSE chunk into a Canonical::Chunk or nil.
#parse_response(wire) ⇒ Object

Parse a vLLM/OpenAI-compatible completion response into a Canonical::Response.
#render_request(request) ⇒ Object

Render a canonical request into an OpenAI-compatible wire payload for vLLM.

Constructor Details

#initialize(config: nil) ⇒ `Translator`

Returns a new instance of Translator.



56
57
58

# File 'lib/legion/extensions/llm/vllm/translator.rb', line 56

def initialize(config: nil)
  @config = config
end

Instance Method Details

#capabilities ⇒ `Object`

Declared capabilities for the vLLM provider.

# File 'lib/legion/extensions/llm/vllm/translator.rb', line 211

def capabilities
  {
    provider: 'vllm',
    wire_format: 'openai_compatible',
    tool_calls_as_text: true,
    forced_tool_choice: true,
    thinking_tags: %w[think thinking],
    stop_reason_map: VLLM_STOP_REASON_MAP,
    streaming_token_usage: true
  }.freeze
end

#parse_chunk(raw) ⇒ `Object`

Parse a single SSE chunk into a Canonical::Chunk or nil.

# File 'lib/legion/extensions/llm/vllm/translator.rb', line 134

def parse_chunk(raw)
  return nil if raw.nil?
  return nil if raw.is_a?(String) && (raw == '[DONE]' || raw.strip.empty?)

  data = raw.is_a?(Hash) ? raw : parse_json_safely(raw)
  return nil if data.nil?

  # Handle canonical-form chunks (from conformance fixtures or other translators)
  return handle_canonical_chunk(data) if data['type']

  if data['error']
    return Canonical::Chunk.error_chunk(
      error: data['error'],
      request_id: data['id']
    )
  end

  choice = Array(data['choices']).first
  return build_done_chunk(data) if choice.nil? && data['usage']
  return nil unless choice

  delta = choice['delta'] || {}
  finish_reason = choice['finish_reason']
  request_id = data['request_id'] || data['id']

  if finish_reason && empty_delta?(delta)
    return Canonical::Chunk.done(
      request_id: request_id,
      usage: Canonical::Usage.from_hash(data['usage']),
      stop_reason: map_stop_reason(finish_reason)
    )
  end

  tool_calls = delta['tool_calls']
  unless Array(tool_calls).empty?
    first_call = tool_calls.first
    function = first_call.fetch('function', {})

    tc = Canonical::ToolCall.build(
      id: (first_call['id'] || function['name'] || 'synthesized').to_s,
      name: function['name'].to_s,
      arguments: parse_tool_arguments(function['arguments']),
      source: :client
    )

    return Canonical::Chunk.tool_call_delta(
      tool_call: tc,
      request_id: request_id,
      block_index: first_call['index']
    )
  end

  # Thinking delta from reasoning_content
  reasoning_content = delta['reasoning_content'] || delta['reasoning']
  unless reasoning_content.to_s.empty?
    return Canonical::Chunk.thinking_delta(
      delta: reasoning_content,
      request_id: request_id,
      block_index: delta.dig('content_block', 'index'),
      item_id: delta['content_block_start']&.dig('id')
    )
  end

  # Text delta — check for embedded think tags
  content = delta['content']
  return parse_text_delta_with_thinking(content, request_id, data) unless content.to_s.empty?

  nil
rescue Legion::JSON::ParseError => e
  handle_exception(e, level: :warn, handled: true, operation: 'vllm.translator.parse_chunk')
  nil
rescue StandardError => e
  handle_exception(e, level: :error, handled: false, operation: 'vllm.translator.parse_chunk')
  raise
end

#parse_response(wire) ⇒ `Object`

Parse a vLLM/OpenAI-compatible completion response into a Canonical::Response.

# File 'lib/legion/extensions/llm/vllm/translator.rb', line 88

def parse_response(wire)
  return canonical_error_response(wire) unless wire.is_a?(Hash)
  # Canonical-form response (from conformance kit): already in canonical shape
  return Canonical::Response.from_hash(wire) if canonical_response?(wire)

  choice = Array(wire['choices']).first || {}
  message = choice['message'] || {}
  usage = wire['usage'] || {}
  finish_reason = choice['finish_reason']
  model = wire['model']

  content = message['content'] || ''
  thinking_meta = extract_thinking_metadata(message)
  extraction = Responses::ThinkingExtractor.extract(content, metadata: thinking_meta)

  text = extraction.content || ''
  thinking = build_canonical_thinking(extraction)

  tool_calls = parse_tool_calls(message['tool_calls'])

  # vLLM quirk: tool_calls_as_text — synthesize from content if none found.
  if tool_calls.empty?
    synthesized = synthesize_tool_calls_from_content(extraction.content, message)
    tool_calls.concat(synthesized) unless synthesized.empty?
  end

  stop_reason = map_stop_reason(finish_reason)

  Canonical::Response.build(
    text: text.to_s,
    thinking: thinking,
    tool_calls: tool_calls,
    usage: Canonical::Usage.from_hash(usage),
    stop_reason: stop_reason,
    model: model,
    metadata: wire_metadata(wire, message, thinking_meta)
  )
rescue Legion::JSON::ParseError => e
  handle_exception(e, level: :warn, handled: true, operation: 'vllm.translator.parse_response')
  canonical_error_response(wire)
rescue StandardError => e
  handle_exception(e, level: :error, handled: false, operation: 'vllm.translator.parse_response')
  raise
end

#render_request(request) ⇒ `Object`

Render a canonical request into an OpenAI-compatible wire payload for vLLM.

# File 'lib/legion/extensions/llm/vllm/translator.rb', line 61

def render_request(request)
  model = request.metadata&.dig(:model) || 'default'
  messages = format_messages(request)
  payload = {
    model: model,
    messages: messages,
    stream: request.stream
  }

  payload[:tools] = format_tools(request.tools) unless request.tools.to_h.empty?
  payload[:tool_choice] = format_tool_choice(request.tool_choice) if request.tool_choice
  payload.merge!(map_params_to_wire(request.params)) if request.params
  apply_thinking_config(payload, request)
  if formatted_response_format?(request.params)
    payload[:response_format] =
      format_response_format(request.params)
  end

  log.debug do
    "vLLM translator rendered request model=#{model} stream=#{request.stream} " \
      "messages=#{messages.size} tools=#{request.tools&.size || 0} params=#{payload.keys.size}"
  end

  payload
end

Class: Legion::Extensions::Llm::Vllm::Translator

Overview

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config: nil) ⇒ Translator

Instance Method Details

#capabilities ⇒ Object

#parse_chunk(raw) ⇒ Object

#parse_response(wire) ⇒ Object

#render_request(request) ⇒ Object

#initialize(config: nil) ⇒ `Translator`

#capabilities ⇒ `Object`

#parse_chunk(raw) ⇒ `Object`

#parse_response(wire) ⇒ `Object`

#render_request(request) ⇒ `Object`