Class: Legion::Extensions::Llm::Vllm::Provider

Inherits:
Provider
  • Object
show all
Includes:
Provider::OpenAICompatible, Logging::Helper
Defined in:
lib/legion/extensions/llm/vllm/provider.rb

Overview

vLLM provider implementation for the Legion::Extensions::Llm base provider contract.

Defined Under Namespace

Modules: Capabilities

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.capabilitiesObject



23
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 23

def capabilities = Capabilities

.configuration_optionsObject



21
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 21

def configuration_options = %i[vllm_api_base vllm_api_key]

.configuration_requirementsObject



22
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 22

def configuration_requirements = []

.default_tierObject



20
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 20

def default_tier = :direct

.default_transportObject



19
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 19

def default_transport = :http

.local?Boolean

Returns:

  • (Boolean)


18
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 18

def local? = false

.registry_publisherObject



25
26
27
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 25

def registry_publisher
  Vllm.registry_publisher
end

.slugObject



17
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 17

def slug = 'vllm'

Instance Method Details

#api_baseObject



56
57
58
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 56

def api_base
  normalize_url(config.vllm_api_base || settings[:endpoint] || 'http://localhost:8000')
end

#discover_offerings(live: false) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 94

def discover_offerings(live: false, **)
  models = if live
             @cached_models = list_models
           else
             Array(@cached_models)
           end
  offerings = models.filter_map do |model_info|
    next unless model_allowed?(model_info.id)

    offering_from_model(model_info)
  end
  log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
  offerings
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings')
  []
end

#fetch_model_detail(model_name) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 141

def fetch_model_detail(model_name)
  # vLLM provides context_length via /v1/models during discovery.
  # Re-fetch from the models endpoint if we need it outside discovery.
  response = @connection.get(models_url)
  models = response.body.fetch('data', [])
  entry = models.find { |m| m['id'] == model_name.to_s }
  return nil unless entry

  ctx = entry['max_model_len']
  ctx ? { context_window: ctx } : nil
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail',
                      model: model_name)
  nil
end

#headersObject



60
61
62
63
64
65
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 60

def headers
  hdrs = identity_headers
  token = config.vllm_api_key
  hdrs['Authorization'] = "Bearer #{token}" unless token.nil? || token.to_s.empty?
  hdrs
end

#health(live: false) ⇒ Object



74
75
76
77
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 74

def health(live: false)
  log.info { "checking health live=#{live} at #{api_base}#{health_url}" }
  connection.get(health_url).body
end

#health_urlObject



67
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 67

def health_url = '/health'

#list_modelsObject



86
87
88
89
90
91
92
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 86

def list_models
  log.info { "discovering models from #{api_base}#{models_url}" }
  super.tap do |models|
    log.info { "discovered #{models.size} model(s) from vLLM" }
    self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
  end
end

#readiness(live: false) ⇒ Object



79
80
81
82
83
84
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 79

def readiness(live: false)
  log.info { "checking readiness live=#{live} at #{api_base}" }
  super.tap do ||
    self.class.registry_publisher.publish_readiness_async() if live
  end
end

#reset_mm_cacheObject



125
126
127
128
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 125

def reset_mm_cache
  log.debug { 'resetting vLLM multimodal cache' }
  connection.post(reset_mm_cache_url, {}).body
end

#reset_mm_cache_urlObject



70
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 70

def reset_mm_cache_url = '/reset_mm_cache'

#reset_prefix_cache(reset_running_requests: nil, reset_external: nil) ⇒ Object



117
118
119
120
121
122
123
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 117

def reset_prefix_cache(reset_running_requests: nil, reset_external: nil)
  log.debug do
    "resetting vLLM prefix cache reset_running_requests=#{reset_running_requests.inspect} " \
      "reset_external=#{reset_external.inspect}"
  end
  connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body
end

#reset_prefix_cache_urlObject



69
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 69

def reset_prefix_cache_url = '/reset_prefix_cache'

#settingsObject



52
53
54
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 52

def settings
  Vllm.default_settings
end

#sleep(level: 1) ⇒ Object



130
131
132
133
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 130

def sleep(level: 1)
  log.debug { "putting vLLM worker to sleep level=#{level.inspect}" }
  connection.post(with_query(sleep_url, level:), {}).body
end

#sleep_urlObject



71
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 71

def sleep_url = '/sleep'

#stream_usage_supported?Boolean

Returns:

  • (Boolean)


50
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 50

def stream_usage_supported? = true

#versionObject



112
113
114
115
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 112

def version
  log.info { "fetching version from #{api_base}#{version_url}" }
  connection.get(version_url).body
end

#version_urlObject



68
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 68

def version_url = '/version'

#wake_up(tags: nil) ⇒ Object



135
136
137
138
139
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 135

def wake_up(tags: nil)
  log.debug { "waking vLLM worker tags=#{Array(tags).inspect}" }
  query = Array(tags).map { |tag| ['tags', tag] }
  connection.post(with_query(wake_up_url, query), {}).body
end

#wake_up_urlObject



72
# File 'lib/legion/extensions/llm/vllm/provider.rb', line 72

def wake_up_url = '/wake_up'