Class: Legion::Extensions::Llm::Vllm::Provider
- Inherits:
-
Provider
- Object
- Provider
- Legion::Extensions::Llm::Vllm::Provider
- Includes:
- Provider::OpenAICompatible, Logging::Helper
- Defined in:
- lib/legion/extensions/llm/vllm/provider.rb
Overview
vLLM provider implementation for the Legion::Extensions::Llm base provider contract.
Defined Under Namespace
Modules: Capabilities
Class Method Summary collapse
- .capabilities ⇒ Object
- .configuration_options ⇒ Object
- .configuration_requirements ⇒ Object
- .local? ⇒ Boolean
- .registry_publisher ⇒ Object
- .slug ⇒ Object
Instance Method Summary collapse
- #api_base ⇒ Object
- #headers ⇒ Object
- #health ⇒ Object
- #health_url ⇒ Object
- #list_models ⇒ Object
- #readiness(live: false) ⇒ Object
- #reset_mm_cache ⇒ Object
- #reset_mm_cache_url ⇒ Object
- #reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object
- #reset_prefix_cache_url ⇒ Object
- #sleep(level: 1) ⇒ Object
- #sleep_url ⇒ Object
- #stream_usage_supported? ⇒ Boolean
- #version ⇒ Object
- #version_url ⇒ Object
- #wake_up(tags: nil) ⇒ Object
- #wake_up_url ⇒ Object
Class Method Details
.capabilities ⇒ Object
20 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 20 def capabilities = Capabilities |
.configuration_options ⇒ Object
18 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 18 def = %i[vllm_api_base vllm_api_key] |
.configuration_requirements ⇒ Object
19 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 19 def configuration_requirements = [] |
.local? ⇒ Boolean
17 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 17 def local? = false |
.registry_publisher ⇒ Object
22 23 24 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 22 def registry_publisher Vllm.registry_publisher end |
.slug ⇒ Object
16 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 16 def slug = 'vllm' |
Instance Method Details
#api_base ⇒ Object
49 50 51 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 49 def api_base config.vllm_api_base || 'http://localhost:8000' end |
#headers ⇒ Object
53 54 55 56 57 58 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 53 def headers token = config.vllm_api_key return {} if token.nil? || token.to_s.empty? { 'Authorization' => "Bearer #{token}" } end |
#health ⇒ Object
67 68 69 70 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 67 def health log.info { "checking health at #{api_base}#{health_url}" } connection.get(health_url).body end |
#health_url ⇒ Object
60 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 60 def health_url = '/health' |
#list_models ⇒ Object
79 80 81 82 83 84 85 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 79 def list_models log.info { "discovering models from #{api_base}#{models_url}" } super.tap do |models| log.info { "discovered #{models.size} model(s) from vLLM" } self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false)) end end |
#readiness(live: false) ⇒ Object
72 73 74 75 76 77 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 72 def readiness(live: false) log.info { "checking readiness live=#{live} at #{api_base}" } super.tap do || self.class.registry_publisher.publish_readiness_async() if live end end |
#reset_mm_cache ⇒ Object
96 97 98 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 96 def reset_mm_cache connection.post(reset_mm_cache_url, {}).body end |
#reset_mm_cache_url ⇒ Object
63 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 63 def reset_mm_cache_url = '/reset_mm_cache' |
#reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object
92 93 94 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 92 def reset_prefix_cache(reset_running_requests: nil, reset_external: nil) connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body end |
#reset_prefix_cache_url ⇒ Object
62 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 62 def reset_prefix_cache_url = '/reset_prefix_cache' |
#sleep(level: 1) ⇒ Object
100 101 102 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 100 def sleep(level: 1) connection.post(with_query(sleep_url, level:), {}).body end |
#sleep_url ⇒ Object
64 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 64 def sleep_url = '/sleep' |
#stream_usage_supported? ⇒ Boolean
47 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 47 def stream_usage_supported? = true |
#version ⇒ Object
87 88 89 90 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 87 def version log.info { "fetching version from #{api_base}#{version_url}" } connection.get(version_url).body end |
#version_url ⇒ Object
61 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 61 def version_url = '/version' |
#wake_up(tags: nil) ⇒ Object
104 105 106 107 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 104 def wake_up(tags: nil) query = Array().map { |tag| ['tags', tag] } connection.post(with_query(wake_up_url, query), {}).body end |
#wake_up_url ⇒ Object
65 |
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 65 def wake_up_url = '/wake_up' |