Module: Legion::Extensions::Llm::Vllm

Extended by:
Core, AutoRegistration, Logging::Helper
Defined in:
lib/legion/extensions/llm/vllm.rb,
lib/legion/extensions/llm/vllm/version.rb,
lib/legion/extensions/llm/vllm/provider.rb,
lib/legion/extensions/llm/vllm/actors/fleet_worker.rb,
lib/legion/extensions/llm/vllm/runners/fleet_worker.rb

Overview

Vllm provider extension namespace.

Defined Under Namespace

Modules: Actor, Runners Classes: Provider

Constant Summary collapse

PROVIDER_FAMILY =
:vllm
DEFAULT_INSTANCE_TIER =
{ tier: :direct }.freeze
VERSION =
'0.2.9'

Class Method Summary collapse

Class Method Details

.default_settingsObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/legion/extensions/llm/vllm.rb', line 20

def self.default_settings
  ::Legion::Extensions::Llm.provider_settings(
    family: PROVIDER_FAMILY,
    instance: {
      endpoint: 'http://localhost:8000',
      tier: :direct,
      transport: :http,
      credentials: { api_key: nil },
      enable_thinking: true,
      usage: { inference: true, embedding: true, image: true },
      limits: { concurrency: 1 },
      fleet: {
        enabled: false,
        respond_to_requests: false,
        capabilities: %i[chat stream_chat embed],
        lanes: [],
        concurrency: 1,
        queue_suffix: nil
      }
    }
  )
end

.discover_instancesObject



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/legion/extensions/llm/vllm.rb', line 51

def self.discover_instances
  instances = {}

  if CredentialSources.http_ok?('http://localhost:8000', path: '/health', timeout: 0.1)
    instances[:local] = {
      vllm_api_base: 'http://localhost:8000',
      tier: :local,
      capabilities: [:completion]
    }
  end

  configured = CredentialSources.setting(:extensions, :llm, :vllm, :instances)
  if configured.is_a?(Hash)
    configured.each do |name, config|
      instances[name.to_sym] = DEFAULT_INSTANCE_TIER.merge(normalize_instance_config(config))
    end
  end

  log.debug { "discovered #{instances.size} vLLM instance(s): #{instances.keys.join(', ')}" }
  instances
end

.infer_tier_from_endpoint(url) ⇒ Object



91
92
93
94
95
96
97
98
99
100
# File 'lib/legion/extensions/llm/vllm.rb', line 91

def self.infer_tier_from_endpoint(url)
  return :direct if url.nil? || url.to_s.empty?

  require 'uri'
  host = URI.parse(url.to_s).host.to_s.downcase
  %w[localhost 127.0.0.1 ::1].include?(host) ? :local : :direct
rescue URI::InvalidURIError => e
  handle_exception(e, level: :debug, handled: true, operation: 'vllm.infer_tier_from_endpoint')
  :direct
end

.normalize_api_base(url) ⇒ Object



87
88
89
# File 'lib/legion/extensions/llm/vllm.rb', line 87

def self.normalize_api_base(url)
  url.to_s.sub(%r{/v1/?\z}, '')
end

.normalize_instance_config(config) ⇒ Object



73
74
75
76
77
78
# File 'lib/legion/extensions/llm/vllm.rb', line 73

def self.normalize_instance_config(config)
  normalized = config.to_h.transform_keys(&:to_sym)
  resolve_api_base_aliases(normalized)
  normalized[:tier] ||= infer_tier_from_endpoint(normalized[:vllm_api_base])
  normalized
end

.provider_classObject



43
44
45
# File 'lib/legion/extensions/llm/vllm.rb', line 43

def self.provider_class
  Provider
end

.registry_publisherObject



47
48
49
# File 'lib/legion/extensions/llm/vllm.rb', line 47

def self.registry_publisher
  @registry_publisher ||= Legion::Extensions::Llm::RegistryPublisher.new(provider_family: PROVIDER_FAMILY)
end

.resolve_api_base_aliases(normalized) ⇒ Object



80
81
82
83
84
85
# File 'lib/legion/extensions/llm/vllm.rb', line 80

def self.resolve_api_base_aliases(normalized)
  normalized[:vllm_api_base] ||= normalized.delete(:base_url)
  normalized[:vllm_api_base] ||= normalized.delete(:api_base)
  normalized[:vllm_api_base] ||= normalized.delete(:endpoint)
  normalized[:vllm_api_base] = normalize_api_base(normalized[:vllm_api_base]) if normalized[:vllm_api_base]
end