Module: Legion::LLM::Discovery::Vllm
- Extended by:
- Legion::Logging::Helper
- Defined in:
- lib/legion/llm/discovery/vllm.rb
Class Method Summary collapse
- .healthy? ⇒ Boolean
- .max_context(name) ⇒ Object
- .model_available?(name) ⇒ Boolean
- .model_names ⇒ Object
- .models ⇒ Object
- .refresh! ⇒ Object
- .reset! ⇒ Object
- .stale? ⇒ Boolean
Class Method Details
.healthy? ⇒ Boolean
33 34 35 36 37 38 39 |
# File 'lib/legion/llm/discovery/vllm.rb', line 33 def healthy? response = health_connection.get('/health') response.success? rescue StandardError => e handle_exception(e, level: :debug, operation: 'llm.discovery.vllm.healthy') false end |
.max_context(name) ⇒ Object
28 29 30 31 |
# File 'lib/legion/llm/discovery/vllm.rb', line 28 def max_context(name) model = models.find { |m| m[:id] == name } model&.dig(:max_model_len) end |
.model_available?(name) ⇒ Boolean
24 25 26 |
# File 'lib/legion/llm/discovery/vllm.rb', line 24 def model_available?(name) model_names.any? { |n| n == name } end |
.model_names ⇒ Object
20 21 22 |
# File 'lib/legion/llm/discovery/vllm.rb', line 20 def model_names models.map { |m| m[:id] } end |
.models ⇒ Object
15 16 17 18 |
# File 'lib/legion/llm/discovery/vllm.rb', line 15 def models ensure_fresh @models || [] end |
.refresh! ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/legion/llm/discovery/vllm.rb', line 41 def refresh! response = connection.get('/v1/models') if response.success? parsed = Legion::JSON.load(response.body) @models = parsed[:data] || [] log.debug "[llm][discovery][vllm] model list refreshed count=#{@models.size}" else log.warn "[llm][discovery][vllm] HTTP failure status=#{response.status}" @models ||= [] end rescue StandardError => e handle_exception(e, level: :warn, operation: 'llm.discovery.vllm.refresh') @models ||= [] ensure @last_refreshed_at = Time.now end |
.reset! ⇒ Object
58 59 60 61 |
# File 'lib/legion/llm/discovery/vllm.rb', line 58 def reset! @models = nil @last_refreshed_at = nil end |
.stale? ⇒ Boolean
63 64 65 66 67 68 |
# File 'lib/legion/llm/discovery/vllm.rb', line 63 def stale? return true if @last_refreshed_at.nil? ttl = discovery_settings[:refresh_seconds] || 60 Time.now - @last_refreshed_at > ttl end |