Class: Legion::Extensions::Llm::Vertex::Provider

Inherits:
Provider
  • Object
show all
Defined in:
lib/legion/extensions/llm/vertex/provider.rb

Overview

Google Cloud Vertex AI provider implementation for the Legion::Extensions::Llm contract.

Defined Under Namespace

Modules: Capabilities

Constant Summary collapse

STATIC_MODELS =

rubocop:disable Metrics/ClassLength

[
  { model: 'gemini-2.5-flash', alias: 'gemini-flash', publisher: 'google', model_family: :gemini },
  { model: 'gemini-2.5-pro', alias: 'gemini-pro', publisher: 'google', model_family: :gemini },
  { model: 'gemini-embedding-001', alias: 'gemini-embedding', publisher: 'google',
    model_family: :gemini, usage_type: :embedding },
  { model: 'text-embedding-005', alias: 'text-embedding', publisher: 'google',
    model_family: :gemini, usage_type: :embedding },
  { model: 'claude-sonnet-4-5', alias: 'claude-sonnet', publisher: 'anthropic',
    model_family: :anthropic, api: :raw_predict },
  { model: 'mistral-medium-3', alias: 'mistral-medium', publisher: 'mistralai',
    model_family: :mistral, api: :raw_predict },
  { model: 'llama-4-maverick', alias: 'llama-4-maverick', publisher: 'meta',
    model_family: :meta, api: :raw_predict }
].freeze
ALIASES =
STATIC_MODELS.to_h { |entry| [entry.fetch(:alias), entry.fetch(:model)] }.freeze
PUBLISHERS =
STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:publisher)] }.freeze
API_MODES =
STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:api, :generate_content)] }.freeze
MODEL_FAMILIES =
STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:model_family)] }.freeze

Class Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Attribute Details

.registry_publisherObject



56
57
58
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 56

def registry_publisher
  @registry_publisher ||= Legion::Extensions::Llm::RegistryPublisher.new(provider_family: :vertex)
end

Class Method Details

.capabilitiesObject



54
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 54

def capabilities = Capabilities

.configuration_optionsObject



41
42
43
44
45
46
47
48
49
50
51
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 41

def configuration_options
  %i[
    vertex_project
    vertex_location
    vertex_api_base
    vertex_access_token
    vertex_credentials
    vertex_model_aliases
    vertex_discovery_live
  ]
end

.configuration_requirementsObject



53
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 53

def configuration_requirements = []

.default_tierObject



39
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 39

def default_tier = :cloud

.default_transportObject



38
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 38

def default_transport = :http

.resolve_model_id(model_id, config: nil) ⇒ Object



60
61
62
63
64
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 60

def resolve_model_id(model_id, config: nil)
  configured_aliases = config.respond_to?(:vertex_model_aliases) ? config.vertex_model_aliases : nil
  aliases = ALIASES.merge((configured_aliases || {}).transform_keys(&:to_s))
  aliases.fetch(model_id.to_s, model_id.to_s)
end

.slugObject



37
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 37

def slug = 'vertex'

Instance Method Details

#api_baseObject



88
89
90
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 88

def api_base
  config.vertex_api_base || "https://#{location}-aiplatform.googleapis.com/v1"
end

#chat(messages:, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {}, **_provider_options) ⇒ Object



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 190

def chat(
  messages:,
  model:,
  temperature: nil,
  max_tokens: nil,
  tools: {},
  tool_prefs: nil,
  params: {},
  **_provider_options
)
  model_id = model_id(model)
  log.info { "chat model=#{model_id} messages=#{messages.size}" }
  @model = model_id
  payload = Utils.deep_merge(chat_payload(messages, model: model_id, temperature:, max_tokens:, tools:,
                                                    tool_prefs:, stream: false), params)
  response = connection.post(chat_url(model_id, stream: false), payload)
  parse_chat_response(response, model: model_id)
end

#complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, tool_prefs: nil) ⇒ Object

rubocop:disable Lint/UnusedMethodArgument



270
271
272
273
274
275
276
277
278
279
280
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 270

def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, # rubocop:disable Lint/UnusedMethodArgument
             tool_prefs: nil, &)
  payload = params.dup
  payload[:generationConfig] = Utils.deep_merge(payload[:generationConfig] || {},
                                                generation_config(temperature, schema, thinking))
  if block_given?
    stream(messages:, model:, temperature:, tools:, tool_prefs:, params: payload, &)
  else
    chat(messages:, model:, temperature:, tools:, tool_prefs:, params: payload)
  end
end

#completion_urlObject



101
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 101

def completion_url = generate_content_url(model: @model || STATIC_MODELS.first.fetch(:model))

#count_tokens(messages:, model:, params: {}) ⇒ Object



227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 227

def count_tokens(
  messages:,
  model:,
  params: {}
)
  model_id = model_id(model)
  log.info { "count_tokens model=#{model_id}" }
  unless generate_content_model?(model_id)
    return {
      supported: false,
      provider: :vertex,
      model: resource_name(model_id),
      reason: 'Vertex countTokens is standardized for generateContent publisher models'
    }
  end

  payload = Utils.deep_merge({ contents: format_messages(messages) }, params)
  response = connection.post(count_tokens_url(model: model_id), payload)
  { input_tokens: response.body['totalTokens'], raw: response.body }
end

#count_tokens_url(model:) ⇒ Object



103
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 103

def count_tokens_url(model:) = "#{publisher_model_path(model)}:countTokens"

#default_publisherObject



99
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 99

def default_publisher = settings[:publisher] || 'google'

#discover_offerings(live: false, **filters) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 127

def discover_offerings(live: false, **filters)
  log.info { "discovering offerings live=#{live} project=#{project} location=#{location}" }
  return static_offerings(**filters) unless live

  response = connection.get(models_url)
  models = response.body['publisherModels'] || response.body['models'] || []
  offerings = models.filter_map do |model|
    offering = offering_from_live_model(model)
    model_id = offering.respond_to?(:model) ? offering.model : (offering[:model] || offering[:id])
    next unless model_allowed?(model_id.to_s)

    offering
  end
  log.info { "discovered #{offerings.size} live offering(s) from Vertex" }
  model_infos = offerings.map { |o| model_info_from_offering(o) }
  self.class.registry_publisher.publish_models_async(model_infos, readiness: readiness(live: false))
  offerings
end

#embed(text:, model:, dimensions: nil, task_type: nil, title: nil, params: {}, **_provider_options) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 248

def embed(
  text:,
  model:,
  dimensions: nil,
  task_type: nil,
  title: nil,
  params: {},
  **_provider_options
)
  model_id = model_id(model)
  log.info { "embed model=#{model_id} inputs=#{Array(text).size}" }
  unless Capabilities.embeddings?(model_id)
    raise NotImplementedError, "Vertex embedding payload for #{model_id} is not standardized"
  end

  instances = Array(text).map { |item| embedding_instance(item, task_type:, title:) }
  parameters = { outputDimensionality: dimensions }.compact
  payload = Utils.deep_merge({ instances: instances, parameters: parameters }, params)
  response = connection.post(embedding_url(model: model_id), payload)
  parse_embedding_response(response, model: model_id)
end

#embedding_url(model:) ⇒ Object



104
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 104

def embedding_url(model:) = "#{publisher_model_path(model)}:predict"

#generate_content_url(model:) ⇒ Object



106
107
108
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 106

def generate_content_url(model:)
  "#{publisher_model_path(model)}:generateContent"
end

#headersObject



92
93
94
95
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 92

def headers
  identity_headers.merge({ 'Authorization' => bearer_token,
                           'Content-Type' => 'application/json; charset=utf-8' }.compact)
end

#health(live: false) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 163

def health(live: false)
  log.info { "checking health live=#{live} project=#{project} location=#{location}" }
  baseline = {
    provider: :vertex,
    project: project,
    location: location,
    configured: configured?,
    ready: configured?,
    live: live,
    credentials: credential_source
  }
  return baseline.merge(checked: false) unless live

  connection.get(models_url)
  baseline.merge(checked: true)
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'vertex.provider.health')
  baseline.merge(checked: true, ready: false, error: e.class.name, message: e.message)
end

#list_modelsObject



119
120
121
122
123
124
125
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 119

def list_models(**)
  log.info { 'listing available Vertex models from static catalog' }
  STATIC_MODELS.map { |entry| model_info_from_static(entry) }.tap do |models|
    log.info { "discovered #{models.size} Vertex model(s); publishing to registry" }
    self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
  end
end

#locationObject



98
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 98

def location = config.vertex_location || settings[:location] || 'us-central1'

#models_urlObject



100
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 100

def models_url = publisher_parent

#offering_for(model:, model_family: nil, instance_id: :default, **metadata) ⇒ Object



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 146

def offering_for(model:, model_family: nil, instance_id: :default, **)
  model_id = model_id(model)
  publisher = .delete(:publisher) || publisher_for(model_id)
  family = model_family || .delete(:model_family) || model_family_for(model_id, publisher)

  build_offering(
    model: resource_name(model_id, publisher:),
    alias_name: alias_for(model_id),
    model_family: family,
    instance_id: instance_id,
    publisher: publisher,
    usage_type: .delete(:usage_type) || usage_type_for(model_id),
    api: .delete(:api) || api_for(model_id),
    metadata: 
  )
end

#projectObject



97
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 97

def project = config.vertex_project || settings[:project] || ENV.fetch('GOOGLE_CLOUD_PROJECT', nil)

#raw_predict_url(model:, stream: false) ⇒ Object



114
115
116
117
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 114

def raw_predict_url(model:, stream: false)
  suffix = stream ? 'streamRawPredict' : 'rawPredict'
  "#{publisher_model_path(model)}:#{suffix}"
end

#readiness(live: false) ⇒ Object



183
184
185
186
187
188
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 183

def readiness(live: false)
  health(live:).merge(local: false, remote: true, api_base: api_base,
                      endpoints: endpoint_manifest).tap do ||
    self.class.registry_publisher.publish_readiness_async() if live
  end
end

#settingsObject



84
85
86
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 84

def settings
  Vertex.default_settings
end

#stream(messages:, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {}, **_provider_options) {|chunk| ... } ⇒ Object

Yields:

  • (chunk)


209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 209

def stream(messages:, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {},
           **_provider_options)
  model_id = model_id(model)
  log.info { "stream model=#{model_id} messages=#{messages.size}" }
  @model = model_id
  payload = Utils.deep_merge(chat_payload(messages, model: model_id, temperature:, max_tokens:, tools:,
                                                    tool_prefs:, stream: true), params)
  response = connection.post(chat_url(model_id, stream: true), payload)
  chunk = build_chunk(response.body, model: model_id)
  yield chunk if block_given? && chunk.content
  parse_chat_response(response, model: model_id)
end

#stream_chat(messages:, model:, tools: {}, temperature: nil, max_tokens: nil, params: {}, tool_prefs: nil, **provider_options) ⇒ Object



222
223
224
225
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 222

def stream_chat(messages:, model:, tools: {}, temperature: nil, max_tokens: nil, params: {}, tool_prefs: nil,
                **provider_options, &)
  stream(messages:, model:, temperature:, max_tokens:, tools:, tool_prefs:, params:, **provider_options, &)
end

#stream_generate_content_url(model:) ⇒ Object



110
111
112
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 110

def stream_generate_content_url(model:)
  "#{publisher_model_path(model)}:streamGenerateContent?alt=sse"
end

#stream_urlObject



102
# File 'lib/legion/extensions/llm/vertex/provider.rb', line 102

def stream_url = stream_generate_content_url(model: @model || STATIC_MODELS.first.fetch(:model))