Class: Legion::Extensions::Llm::Vllm::Provider

Inherits:
Provider
  • Object
show all
Includes:
Provider::OpenAICompatible, Logging::Helper
Defined in:
lib/legion/extensions/llm/vllm/provider.rb

Overview

vLLM provider implementation for the Legion::Extensions::Llm base provider contract.

Defined Under Namespace

Modules: Capabilities

Class Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Attribute Details

.registry_publisherObject



24
25
26
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 24

def registry_publisher
  @registry_publisher ||= RegistryPublisher.new
end

Class Method Details

.capabilitiesObject



22
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 22

def capabilities = Capabilities

.configuration_optionsObject



20
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 20

def configuration_options = %i[vllm_api_base vllm_api_key]

.configuration_requirementsObject



21
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 21

def configuration_requirements = []

.local?Boolean

Returns:

  • (Boolean)


19
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 19

def local? = true

.slugObject



18
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 18

def slug = 'vllm'

Instance Method Details

#api_baseObject



51
52
53
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 51

def api_base
  config.vllm_api_base || 'http://localhost:8000'
end

#headersObject



55
56
57
58
59
60
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 55

def headers
  token = config.vllm_api_key
  return {} if token.nil? || token.to_s.empty?

  { 'Authorization' => "Bearer #{token}" }
end

#healthObject



69
70
71
72
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 69

def health
  log.info { "checking health at #{api_base}#{health_url}" }
  connection.get(health_url).body
end

#health_urlObject



62
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 62

def health_url = '/health'

#list_modelsObject



81
82
83
84
85
86
87
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 81

def list_models
  log.info { "discovering models from #{api_base}#{models_url}" }
  super.tap do |models|
    log.info { "discovered #{models.size} model(s) from vLLM" }
    self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
  end
end

#readiness(live: false) ⇒ Object



74
75
76
77
78
79
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 74

def readiness(live: false)
  log.info { "checking readiness live=#{live} at #{api_base}" }
  super.tap do ||
    self.class.registry_publisher.publish_readiness_async() if live
  end
end

#reset_mm_cacheObject



98
99
100
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 98

def reset_mm_cache
  connection.post(reset_mm_cache_url, {}).body
end

#reset_mm_cache_urlObject



65
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 65

def reset_mm_cache_url = '/reset_mm_cache'

#reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object



94
95
96
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 94

def reset_prefix_cache(reset_running_requests: nil, reset_external: nil)
  connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body
end

#reset_prefix_cache_urlObject



64
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 64

def reset_prefix_cache_url = '/reset_prefix_cache'

#sleep(level: 1) ⇒ Object



102
103
104
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 102

def sleep(level: 1)
  connection.post(with_query(sleep_url, level:), {}).body
end

#sleep_urlObject



66
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 66

def sleep_url = '/sleep'

#stream_usage_supported?Boolean

Returns:

  • (Boolean)


49
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 49

def stream_usage_supported? = true

#versionObject



89
90
91
92
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 89

def version
  log.info { "fetching version from #{api_base}#{version_url}" }
  connection.get(version_url).body
end

#version_urlObject



63
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 63

def version_url = '/version'

#wake_up(tags: nil) ⇒ Object



106
107
108
109
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 106

def wake_up(tags: nil)
  query = Array(tags).map { |tag| ['tags', tag] }
  connection.post(with_query(wake_up_url, query), {}).body
end

#wake_up_urlObject



67
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 67

def wake_up_url = '/wake_up'