Module: Legion::Extensions::Llm::Vllm
- Extended by:
- Core, AutoRegistration, Logging::Helper
- Defined in:
- lib/legion/extensions/llm/vllm.rb,
lib/legion/extensions/llm/vllm/version.rb,
lib/legion/extensions/llm/vllm/provider.rb,
lib/legion/extensions/llm/vllm/translator.rb,
lib/legion/extensions/llm/vllm/actors/fleet_worker.rb,
lib/legion/extensions/llm/vllm/runners/fleet_worker.rb,
lib/legion/extensions/llm/vllm/actors/discovery_refresh.rb
Overview
Vllm provider extension namespace.
Defined Under Namespace
Modules: Actor, Runners
Classes: Provider, Translator
Constant Summary
collapse
- PROVIDER_FAMILY =
:vllm
- DEFAULT_INSTANCE_TIER =
{ tier: :direct, capabilities: {}, provider_capabilities: { streaming: true } }.freeze
- VERSION =
'0.3.5'
Class Method Summary
collapse
Class Method Details
.default_settings ⇒ Object
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
# File 'lib/legion/extensions/llm/vllm.rb', line 21
def self.default_settings
::Legion::Extensions::Llm.provider_settings(
family: PROVIDER_FAMILY,
instance: {
endpoint: 'http://localhost:8000',
tier: :direct,
transport: :http,
credentials: { api_key: nil },
enable_thinking: true,
usage: { inference: true, embedding: true, image: true },
limits: { concurrency: 1 },
fleet: {
enabled: false,
respond_to_requests: false,
capabilities: %i[chat stream_chat embed]
}
}
)
end
|
.discover_instances ⇒ Object
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
# File 'lib/legion/extensions/llm/vllm.rb', line 49
def self.discover_instances
instances = {}
if CredentialSources.http_ok?('http://localhost:8000', path: '/health', timeout: 0.1)
instances[:local] = {
vllm_api_base: 'http://localhost:8000',
tier: :local,
capabilities: [:completion]
}
end
configured = CredentialSources.setting(:extensions, :llm, :vllm, :instances)
if configured.is_a?(Hash)
configured.each do |name, config|
instances[name.to_sym] = DEFAULT_INSTANCE_TIER.merge(normalize_instance_config(config))
end
end
log.debug { "discovered #{instances.size} vLLM instance(s): #{instances.keys.join(', ')}" }
instances
end
|
.infer_tier_from_endpoint(url) ⇒ Object
98
99
100
101
102
103
104
105
106
107
108
|
# File 'lib/legion/extensions/llm/vllm.rb', line 98
def self.infer_tier_from_endpoint(url)
return :direct if url.nil? || url.to_s.empty?
require 'uri'
require_relative 'vllm/actors/discovery_refresh'
host = URI.parse(url.to_s).host.to_s.downcase
%w[localhost 127.0.0.1 ::1].include?(host) ? :local : :direct
rescue URI::InvalidURIError => e
handle_exception(e, level: :debug, handled: true, operation: 'vllm.infer_tier_from_endpoint')
:direct
end
|
.normalize_api_base(url) ⇒ Object
94
95
96
|
# File 'lib/legion/extensions/llm/vllm.rb', line 94
def self.normalize_api_base(url)
url.to_s.sub(%r{/v1/?\z}, '')
end
|
.normalize_instance_config(config) ⇒ Object
71
72
73
74
75
76
77
|
# File 'lib/legion/extensions/llm/vllm.rb', line 71
def self.normalize_instance_config(config)
normalized = config.to_h.transform_keys(&:to_sym)
resolve_api_base_aliases(normalized)
resolve_credentials(normalized)
normalized[:tier] ||= infer_tier_from_endpoint(normalized[:vllm_api_base])
normalized
end
|
.provider_class ⇒ Object
41
42
43
|
# File 'lib/legion/extensions/llm/vllm.rb', line 41
def self.provider_class
Provider
end
|
.registry_publisher ⇒ Object
45
46
47
|
# File 'lib/legion/extensions/llm/vllm.rb', line 45
def self.registry_publisher
@registry_publisher ||= Legion::Extensions::Llm::RegistryPublisher.new(provider_family: PROVIDER_FAMILY)
end
|
.resolve_api_base_aliases(normalized) ⇒ Object
87
88
89
90
91
92
|
# File 'lib/legion/extensions/llm/vllm.rb', line 87
def self.resolve_api_base_aliases(normalized)
normalized[:vllm_api_base] ||= normalized.delete(:base_url)
normalized[:vllm_api_base] ||= normalized.delete(:api_base)
normalized[:vllm_api_base] ||= normalized.delete(:endpoint)
normalized[:vllm_api_base] = normalize_api_base(normalized[:vllm_api_base]) if normalized[:vllm_api_base]
end
|
.resolve_credentials(normalized) ⇒ Object
79
80
81
82
83
84
85
|
# File 'lib/legion/extensions/llm/vllm.rb', line 79
def self.resolve_credentials(normalized)
creds = normalized.delete(:credentials)
return unless creds.is_a?(Hash)
creds = creds.transform_keys(&:to_sym)
normalized[:vllm_api_key] ||= creds[:api_key]
end
|