Class: Legion::Extensions::Llm::Vllm::Provider
- Inherits:
-
Provider
- Object
- Provider
- Legion::Extensions::Llm::Vllm::Provider
- Includes:
- Provider::OpenAICompatible, Logging::Helper
- Defined in:
- lib/legion/extensions/llm/vllm/provider.rb
Overview
vLLM provider implementation for the Legion::Extensions::Llm base provider contract.
Defined Under Namespace
Modules: Capabilities
Class Method Summary collapse
- .capabilities ⇒ Object
- .configuration_options ⇒ Object
- .configuration_requirements ⇒ Object
- .default_tier ⇒ Object
- .default_transport ⇒ Object
- .local? ⇒ Boolean
- .registry_publisher ⇒ Object
- .slug ⇒ Object
Instance Method Summary collapse
- #api_base ⇒ Object
- #discover_offerings(live: false, **filters) ⇒ Object
- #fetch_model_detail(model_name) ⇒ Object
- #headers ⇒ Object
- #health(live: false) ⇒ Object
- #health_url ⇒ Object
- #list_models(live: false, **filters) ⇒ Object
- #readiness(live: false) ⇒ Object
- #reset_mm_cache ⇒ Object
- #reset_mm_cache_url ⇒ Object
- #reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object
- #reset_prefix_cache_url ⇒ Object
- #settings ⇒ Object
- #sleep(level: 1) ⇒ Object
- #sleep_url ⇒ Object
- #stream_usage_supported? ⇒ Boolean
-
#translator ⇒ Object
Canonical translator instance — renders requests, parses responses/chunks.
- #version ⇒ Object
- #version_url ⇒ Object
- #wake_up(tags: nil) ⇒ Object
- #wake_up_url ⇒ Object
Class Method Details
.capabilities ⇒ Object
23 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 23 def capabilities = Capabilities |
.configuration_options ⇒ Object
21 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 21 def = %i[vllm_api_base vllm_api_key] |
.configuration_requirements ⇒ Object
22 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 22 def configuration_requirements = [] |
.default_tier ⇒ Object
20 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 20 def default_tier = :direct |
.default_transport ⇒ Object
19 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 19 def default_transport = :http |
.local? ⇒ Boolean
18 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 18 def local? = false |
.registry_publisher ⇒ Object
25 26 27 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 25 def registry_publisher Vllm.registry_publisher end |
.slug ⇒ Object
17 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 17 def slug = 'vllm' |
Instance Method Details
#api_base ⇒ Object
61 62 63 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 61 def api_base normalize_url(config.vllm_api_base || settings[:endpoint] || 'http://localhost:8000') end |
#discover_offerings(live: false, **filters) ⇒ Object
98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 98 def discover_offerings(live: false, **filters) return filter_cached_offerings(Array(@cached_offerings), filters) unless live provider_health = health(live:) @cached_offerings = discover_live_offerings(filters, provider_health, live:) log_discover_complete(@cached_offerings) @cached_offerings rescue StandardError => e handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings') [] end |
#fetch_model_detail(model_name) ⇒ Object
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 139 def fetch_model_detail(model_name) # vLLM provides context_length via /v1/models during discovery. # Re-fetch from the models endpoint if we need it outside discovery. response = @connection.get(models_url) models = response.body.fetch('data', []) entry = models.find { |m| m['id'] == model_name.to_s } return nil unless entry ctx = entry['max_model_len'] ctx ? { context_window: ctx } : nil rescue StandardError => e handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail', model: model_name) nil end |
#headers ⇒ Object
65 66 67 68 69 70 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 65 def headers hdrs = identity_headers token = config.vllm_api_key hdrs['Authorization'] = "Bearer #{token}" unless token.nil? || token.to_s.empty? hdrs end |
#health(live: false) ⇒ Object
79 80 81 82 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 79 def health(live: false) log.info { "checking health live=#{live} at #{api_base}#{health_url}" } super end |
#health_url ⇒ Object
72 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 72 def health_url = '/health' |
#list_models(live: false, **filters) ⇒ Object
91 92 93 94 95 96 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 91 def list_models(live: false, **filters) log.info { "discovering models from #{api_base}#{models_url}" } super.tap do |models| log.info { "discovered #{models.size} model(s) from vLLM" } end end |
#readiness(live: false) ⇒ Object
84 85 86 87 88 89 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 84 def readiness(live: false) log.info { "checking readiness live=#{live} at #{api_base}" } super.tap do || self.class.registry_publisher.publish_readiness_async() if live end end |
#reset_mm_cache ⇒ Object
123 124 125 126 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 123 def reset_mm_cache log.debug { 'resetting vLLM multimodal cache' } connection.post(reset_mm_cache_url, {}).body end |
#reset_mm_cache_url ⇒ Object
75 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 75 def reset_mm_cache_url = '/reset_mm_cache' |
#reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object
115 116 117 118 119 120 121 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 115 def reset_prefix_cache(reset_running_requests: nil, reset_external: nil) log.debug do "resetting vLLM prefix cache reset_running_requests=#{reset_running_requests.inspect} " \ "reset_external=#{reset_external.inspect}" end connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body end |
#reset_prefix_cache_url ⇒ Object
74 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 74 def reset_prefix_cache_url = '/reset_prefix_cache' |
#settings ⇒ Object
52 53 54 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 52 def settings Vllm.default_settings end |
#sleep(level: 1) ⇒ Object
128 129 130 131 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 128 def sleep(level: 1) log.debug { "putting vLLM worker to sleep level=#{level.inspect}" } connection.post(with_query(sleep_url, level:), {}).body end |
#sleep_url ⇒ Object
76 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 76 def sleep_url = '/sleep' |
#stream_usage_supported? ⇒ Boolean
50 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 50 def stream_usage_supported? = true |
#translator ⇒ Object
Canonical translator instance — renders requests, parses responses/chunks.
57 58 59 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 57 def translator @translator ||= Translator.new(config: config) end |
#version ⇒ Object
110 111 112 113 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 110 def version log.info { "fetching version from #{api_base}#{version_url}" } connection.get(version_url).body end |
#version_url ⇒ Object
73 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 73 def version_url = '/version' |
#wake_up(tags: nil) ⇒ Object
133 134 135 136 137 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 133 def wake_up(tags: nil) log.debug { "waking vLLM worker tags=#{Array().inspect}" } query = Array().map { |tag| ['tags', tag] } connection.post(with_query(wake_up_url, query), {}).body end |
#wake_up_url ⇒ Object
77 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 77 def wake_up_url = '/wake_up' |