Class: Legion::Extensions::Llm::Vllm::Provider

Inherits:

Provider

Object
Provider
Legion::Extensions::Llm::Vllm::Provider

show all

Includes:: Provider::OpenAICompatible, Logging::Helper

Defined in:: lib/legion/extensions/llm/vllm/provider.rb

Overview

vLLM provider implementation for the Legion::Extensions::Llm base provider contract.

Defined Under Namespace

Modules: Capabilities

Instance Method Summary collapse

#api_base ⇒ Object
#discover_offerings(live: false, **filters) ⇒ Object
#fetch_model_detail(model_name) ⇒ Object
#headers ⇒ Object
#health(live: false) ⇒ Object
#health_url ⇒ Object
#list_models(live: false, **filters) ⇒ Object
#readiness(live: false) ⇒ Object
#reset_mm_cache ⇒ Object
#reset_mm_cache_url ⇒ Object
#reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object
#reset_prefix_cache_url ⇒ Object
#settings ⇒ Object
#sleep(level: 1) ⇒ Object
#sleep_url ⇒ Object
#stream_usage_supported? ⇒ Boolean
#translator ⇒ Object

Canonical translator instance — renders requests, parses responses/chunks.
#version ⇒ Object
#version_url ⇒ Object
#wake_up(tags: nil) ⇒ Object
#wake_up_url ⇒ Object

Class Method Details

.capabilities ⇒ `Object`

23	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 23 def capabilities = Capabilities

.configuration_options ⇒ `Object`

21	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 21 def configuration_options = %i[vllm_api_base vllm_api_key]

.configuration_requirements ⇒ `Object`

22	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 22 def configuration_requirements = []

.default_tier ⇒ `Object`

20	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 20 def default_tier = :direct

.default_transport ⇒ `Object`

19	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 19 def default_transport = :http

.local? ⇒ `Boolean`

Returns:

(Boolean)

18	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 18 def local? = false

.registry_publisher ⇒ `Object`



25
26
27

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 25

def registry_publisher
  Vllm.registry_publisher
end

.slug ⇒ `Object`

17	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 17 def slug = 'vllm'

Instance Method Details

#api_base ⇒ `Object`



61
62
63

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 61

def api_base
  normalize_url(config.vllm_api_base || settings[:endpoint] || 'http://localhost:8000')
end

#discover_offerings(live: false, **filters) ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 98

def discover_offerings(live: false, **filters)
  return filter_cached_offerings(Array(@cached_offerings), filters) unless live

  provider_health = health(live:)
  @cached_offerings = discover_live_offerings(filters, provider_health, live:)
  log_discover_complete(@cached_offerings)
  @cached_offerings
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings')
  []
end

#fetch_model_detail(model_name) ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 139

def fetch_model_detail(model_name)
  # vLLM provides context_length via /v1/models during discovery.
  # Re-fetch from the models endpoint if we need it outside discovery.
  response = @connection.get(models_url)
  models = response.body.fetch('data', [])
  entry = models.find { |m| m['id'] == model_name.to_s }
  return nil unless entry

  ctx = entry['max_model_len']
  ctx ? { context_window: ctx } : nil
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail',
                      model: model_name)
  nil
end

#headers ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 65

def headers
  hdrs = identity_headers
  token = config.vllm_api_key
  hdrs['Authorization'] = "Bearer #{token}" unless token.nil? || token.to_s.empty?
  hdrs
end

#health(live: false) ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 79

def health(live: false)
  log.info { "checking health live=#{live} at #{api_base}#{health_url}" }
  super
end

#health_url ⇒ `Object`

72	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 72 def health_url = '/health'

#list_models(live: false, **filters) ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 91

def list_models(live: false, **filters)
  log.info { "discovering models from #{api_base}#{models_url}" }
  super.tap do |models|
    log.info { "discovered #{models.size} model(s) from vLLM" }
  end
end

#readiness(live: false) ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 84

def readiness(live: false)
  log.info { "checking readiness live=#{live} at #{api_base}" }
  super.tap do |metadata|
    self.class.registry_publisher.publish_readiness_async(metadata) if live
  end
end

#reset_mm_cache ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 123

def reset_mm_cache
  log.debug { 'resetting vLLM multimodal cache' }
  connection.post(reset_mm_cache_url, {}).body
end

#reset_mm_cache_url ⇒ `Object`

75	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 75 def reset_mm_cache_url = '/reset_mm_cache'

#reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 115

def reset_prefix_cache(reset_running_requests: nil, reset_external: nil)
  log.debug do
    "resetting vLLM prefix cache reset_running_requests=#{reset_running_requests.inspect} " \
      "reset_external=#{reset_external.inspect}"
  end
  connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body
end

#reset_prefix_cache_url ⇒ `Object`

74	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 74 def reset_prefix_cache_url = '/reset_prefix_cache'

#settings ⇒ `Object`



52
53
54

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 52

def settings
  Vllm.default_settings
end

#sleep(level: 1) ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 128

def sleep(level: 1)
  log.debug { "putting vLLM worker to sleep level=#{level.inspect}" }
  connection.post(with_query(sleep_url, level:), {}).body
end

#sleep_url ⇒ `Object`

76	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 76 def sleep_url = '/sleep'

#stream_usage_supported? ⇒ `Boolean`

Returns:

(Boolean)

50	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 50 def stream_usage_supported? = true

#translator ⇒ `Object`

Canonical translator instance — renders requests, parses responses/chunks.



57
58
59

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 57

def translator
  @translator ||= Translator.new(config: config)
end

#version ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 110

def version
  log.info { "fetching version from #{api_base}#{version_url}" }
  connection.get(version_url).body
end

#version_url ⇒ `Object`

73	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 73 def version_url = '/version'

#wake_up(tags: nil) ⇒ `Object`

# File 'lib/legion/extensions/llm/vllm/provider.rb', line 133

def wake_up(tags: nil)
  log.debug { "waking vLLM worker tags=#{Array(tags).inspect}" }
  query = Array(tags).map { |tag| ['tags', tag] }
  connection.post(with_query(wake_up_url, query), {}).body
end

#wake_up_url ⇒ `Object`

77	# File 'lib/legion/extensions/llm/vllm/provider.rb', line 77 def wake_up_url = '/wake_up'

Class: Legion::Extensions::Llm::Vllm::Provider

Overview

Defined Under Namespace

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.capabilities ⇒ Object

.configuration_options ⇒ Object

.configuration_requirements ⇒ Object

.default_tier ⇒ Object

.default_transport ⇒ Object

.local? ⇒ Boolean

.registry_publisher ⇒ Object

.slug ⇒ Object

Instance Method Details

#api_base ⇒ Object

#discover_offerings(live: false, **filters) ⇒ Object

#fetch_model_detail(model_name) ⇒ Object

#headers ⇒ Object

#health(live: false) ⇒ Object

#health_url ⇒ Object

#list_models(live: false, **filters) ⇒ Object

#readiness(live: false) ⇒ Object

#reset_mm_cache ⇒ Object

#reset_mm_cache_url ⇒ Object

#reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object

#reset_prefix_cache_url ⇒ Object

#settings ⇒ Object

#sleep(level: 1) ⇒ Object

#sleep_url ⇒ Object

#stream_usage_supported? ⇒ Boolean

#translator ⇒ Object

#version ⇒ Object

#version_url ⇒ Object

#wake_up(tags: nil) ⇒ Object

#wake_up_url ⇒ Object