Module: Legion::Extensions::Llm::Vllm
- Extended by:
- Core, AutoRegistration, Logging::Helper
- Defined in:
- lib/legion/extensions/llm/vllm.rb,
lib/legion/extensions/llm/vllm/version.rb,
lib/legion/extensions/llm/vllm/provider.rb,
lib/legion/extensions/llm/vllm/actors/fleet_worker.rb,
lib/legion/extensions/llm/vllm/runners/fleet_worker.rb
Overview
Vllm provider extension namespace.
Defined Under Namespace
Modules: Actor, Runners Classes: Provider
Constant Summary collapse
- PROVIDER_FAMILY =
:vllm- DEFAULT_INSTANCE_TIER =
{ tier: :direct }.freeze
- VERSION =
'0.2.7'
Class Method Summary collapse
- .default_settings ⇒ Object
- .discover_instances ⇒ Object
- .infer_tier_from_endpoint(url) ⇒ Object
- .normalize_api_base(url) ⇒ Object
- .normalize_instance_config(config) ⇒ Object
- .provider_class ⇒ Object
- .registry_publisher ⇒ Object
- .resolve_api_base_aliases(normalized) ⇒ Object
Class Method Details
.default_settings ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/legion/extensions/llm/vllm.rb', line 19 def self.default_settings ::Legion::Extensions::Llm.provider_settings( family: PROVIDER_FAMILY, instance: { endpoint: 'http://localhost:8000', tier: :direct, transport: :http, credentials: { api_key: nil }, enable_thinking: true, usage: { inference: true, embedding: true, image: true }, limits: { concurrency: 1 }, fleet: { enabled: false, respond_to_requests: false, capabilities: %i[chat stream_chat embed], lanes: [], concurrency: 1, queue_suffix: nil } } ) end |
.discover_instances ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/legion/extensions/llm/vllm.rb', line 50 def self.discover_instances instances = {} if CredentialSources.http_ok?('http://localhost:8000', path: '/health', timeout: 0.1) instances[:local] = { vllm_api_base: 'http://localhost:8000', tier: :local, capabilities: [:completion] } end configured = CredentialSources.setting(:extensions, :llm, :vllm, :instances) if configured.is_a?(Hash) configured.each do |name, config| instances[name.to_sym] = DEFAULT_INSTANCE_TIER.merge(normalize_instance_config(config)) end end instances end |
.infer_tier_from_endpoint(url) ⇒ Object
89 90 91 92 93 94 95 96 97 |
# File 'lib/legion/extensions/llm/vllm.rb', line 89 def self.infer_tier_from_endpoint(url) return :direct if url.nil? || url.to_s.empty? require 'uri' host = URI.parse(url.to_s).host.to_s.downcase %w[localhost 127.0.0.1 ::1].include?(host) ? :local : :direct rescue URI::InvalidURIError :direct end |
.normalize_api_base(url) ⇒ Object
85 86 87 |
# File 'lib/legion/extensions/llm/vllm.rb', line 85 def self.normalize_api_base(url) url.to_s.sub(%r{/v1/?\z}, '') end |
.normalize_instance_config(config) ⇒ Object
71 72 73 74 75 76 |
# File 'lib/legion/extensions/llm/vllm.rb', line 71 def self.normalize_instance_config(config) normalized = config.to_h.transform_keys(&:to_sym) resolve_api_base_aliases(normalized) normalized[:tier] ||= infer_tier_from_endpoint(normalized[:vllm_api_base]) normalized end |
.provider_class ⇒ Object
42 43 44 |
# File 'lib/legion/extensions/llm/vllm.rb', line 42 def self.provider_class Provider end |
.registry_publisher ⇒ Object
46 47 48 |
# File 'lib/legion/extensions/llm/vllm.rb', line 46 def self.registry_publisher @registry_publisher ||= Legion::Extensions::Llm::RegistryPublisher.new(provider_family: PROVIDER_FAMILY) end |
.resolve_api_base_aliases(normalized) ⇒ Object
78 79 80 81 82 83 |
# File 'lib/legion/extensions/llm/vllm.rb', line 78 def self.resolve_api_base_aliases(normalized) normalized[:vllm_api_base] ||= normalized.delete(:base_url) normalized[:vllm_api_base] ||= normalized.delete(:api_base) normalized[:vllm_api_base] ||= normalized.delete(:endpoint) normalized[:vllm_api_base] = normalize_api_base(normalized[:vllm_api_base]) if normalized[:vllm_api_base] end |