Class: Legion::Extensions::Llm::Vllm::Provider
- Inherits:
-
Provider
- Object
- Provider
- Legion::Extensions::Llm::Vllm::Provider
- Includes:
- Provider::OpenAICompatible, Logging::Helper
- Defined in:
- lib/legion/extensions/llm/vllm/provider.rb
Overview
vLLM provider implementation for the Legion::Extensions::Llm base provider contract.
Defined Under Namespace
Modules: Capabilities
Class Attribute Summary collapse
Class Method Summary collapse
- .capabilities ⇒ Object
- .configuration_options ⇒ Object
- .configuration_requirements ⇒ Object
- .local? ⇒ Boolean
- .slug ⇒ Object
Instance Method Summary collapse
- #api_base ⇒ Object
- #headers ⇒ Object
- #health ⇒ Object
- #health_url ⇒ Object
- #list_models ⇒ Object
- #readiness(live: false) ⇒ Object
- #reset_mm_cache ⇒ Object
- #reset_mm_cache_url ⇒ Object
- #reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object
- #reset_prefix_cache_url ⇒ Object
- #sleep(level: 1) ⇒ Object
- #sleep_url ⇒ Object
- #stream_usage_supported? ⇒ Boolean
- #version ⇒ Object
- #version_url ⇒ Object
- #wake_up(tags: nil) ⇒ Object
- #wake_up_url ⇒ Object
Class Attribute Details
.registry_publisher ⇒ Object
24 25 26 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 24 def registry_publisher @registry_publisher ||= RegistryPublisher.new end |
Class Method Details
.capabilities ⇒ Object
22 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 22 def capabilities = Capabilities |
.configuration_options ⇒ Object
20 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 20 def = %i[vllm_api_base vllm_api_key] |
.configuration_requirements ⇒ Object
21 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 21 def configuration_requirements = [] |
.local? ⇒ Boolean
19 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 19 def local? = true |
.slug ⇒ Object
18 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 18 def slug = 'vllm' |
Instance Method Details
#api_base ⇒ Object
51 52 53 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 51 def api_base config.vllm_api_base || 'http://localhost:8000' end |
#headers ⇒ Object
55 56 57 58 59 60 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 55 def headers token = config.vllm_api_key return {} if token.nil? || token.to_s.empty? { 'Authorization' => "Bearer #{token}" } end |
#health ⇒ Object
69 70 71 72 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 69 def health log.info { "checking health at #{api_base}#{health_url}" } connection.get(health_url).body end |
#health_url ⇒ Object
62 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 62 def health_url = '/health' |
#list_models ⇒ Object
81 82 83 84 85 86 87 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 81 def list_models log.info { "discovering models from #{api_base}#{models_url}" } super.tap do |models| log.info { "discovered #{models.size} model(s) from vLLM" } self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false)) end end |
#readiness(live: false) ⇒ Object
74 75 76 77 78 79 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 74 def readiness(live: false) log.info { "checking readiness live=#{live} at #{api_base}" } super.tap do || self.class.registry_publisher.publish_readiness_async() if live end end |
#reset_mm_cache ⇒ Object
98 99 100 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 98 def reset_mm_cache connection.post(reset_mm_cache_url, {}).body end |
#reset_mm_cache_url ⇒ Object
65 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 65 def reset_mm_cache_url = '/reset_mm_cache' |
#reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object
94 95 96 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 94 def reset_prefix_cache(reset_running_requests: nil, reset_external: nil) connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body end |
#reset_prefix_cache_url ⇒ Object
64 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 64 def reset_prefix_cache_url = '/reset_prefix_cache' |
#sleep(level: 1) ⇒ Object
102 103 104 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 102 def sleep(level: 1) connection.post(with_query(sleep_url, level:), {}).body end |
#sleep_url ⇒ Object
66 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 66 def sleep_url = '/sleep' |
#stream_usage_supported? ⇒ Boolean
49 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 49 def stream_usage_supported? = true |
#version ⇒ Object
89 90 91 92 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 89 def version log.info { "fetching version from #{api_base}#{version_url}" } connection.get(version_url).body end |
#version_url ⇒ Object
63 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 63 def version_url = '/version' |
#wake_up(tags: nil) ⇒ Object
106 107 108 109 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 106 def wake_up(tags: nil) query = Array().map { |tag| ['tags', tag] } connection.post(with_query(wake_up_url, query), {}).body end |
#wake_up_url ⇒ Object
67 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 67 def wake_up_url = '/wake_up' |