Module: Legion::LLM::Discovery::Vllm

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/discovery/vllm.rb

Class Method Summary collapse

Class Method Details

.healthy?Boolean

Returns:

  • (Boolean)


33
34
35
36
37
38
39
# File 'lib/legion/llm/discovery/vllm.rb', line 33

def healthy?
  response = health_connection.get('/health')
  response.success?
rescue StandardError => e
  handle_exception(e, level: :debug, operation: 'llm.discovery.vllm.healthy')
  false
end

.max_context(name) ⇒ Object



28
29
30
31
# File 'lib/legion/llm/discovery/vllm.rb', line 28

def max_context(name)
  model = models.find { |m| m[:id] == name }
  model&.dig(:max_model_len)
end

.model_available?(name) ⇒ Boolean

Returns:

  • (Boolean)


24
25
26
# File 'lib/legion/llm/discovery/vllm.rb', line 24

def model_available?(name)
  model_names.any? { |n| n == name }
end

.model_namesObject



20
21
22
# File 'lib/legion/llm/discovery/vllm.rb', line 20

def model_names
  models.map { |m| m[:id] }
end

.modelsObject



15
16
17
18
# File 'lib/legion/llm/discovery/vllm.rb', line 15

def models
  ensure_fresh
  @models || []
end

.refresh!Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/legion/llm/discovery/vllm.rb', line 41

def refresh!
  response = connection.get('/v1/models')
  if response.success?
    parsed = Legion::JSON.load(response.body)
    @models = parsed[:data] || []
    log.debug "[llm][discovery][vllm] model list refreshed count=#{@models.size}"
  else
    log.warn "[llm][discovery][vllm] HTTP failure status=#{response.status}"
    @models ||= []
  end
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.discovery.vllm.refresh')
  @models ||= []
ensure
  @last_refreshed_at = Time.now
end

.reset!Object



58
59
60
61
# File 'lib/legion/llm/discovery/vllm.rb', line 58

def reset!
  @models = nil
  @last_refreshed_at = nil
end

.stale?Boolean

Returns:

  • (Boolean)


63
64
65
66
67
68
# File 'lib/legion/llm/discovery/vllm.rb', line 63

def stale?
  return true if @last_refreshed_at.nil?

  ttl = discovery_settings[:refresh_seconds] || 60
  Time.now - @last_refreshed_at > ttl
end