Module: Legion::LLM::Call::Embeddings
- Extended by:
- Legion::Logging::Helper
- Defined in:
- lib/legion/llm/call/embeddings.rb
Constant Summary collapse
- PREFIX_REGISTRY =
{ 'nomic-embed-text' => { document: 'search_document: ', query: 'search_query: ' }, 'mxbai-embed-large' => { query: 'Represent this sentence for searching relevant passages: ' } }.freeze
Class Method Summary collapse
-
.default_model ⇒ Object
G15: returns the configured embedding model (pinned).
-
.generate(text:, model: nil, **opts) ⇒ Object
G15: Embedding callers go through Router.request_lane(type: :embedding, …).
- .generate_batch(texts:, model: nil, dimensions: nil, task: :document) ⇒ Object
Class Method Details
.default_model ⇒ Object
G15: returns the configured embedding model (pinned). nil if not configured. Reads :default_model first, falls back to the deprecated :model alias.
170 171 172 |
# File 'lib/legion/llm/call/embeddings.rb', line 170 def default_model configured_default_model end |
.generate(text:, model: nil, **opts) ⇒ Object
G15: Embedding callers go through Router.request_lane(type: :embedding, …). Strict pin on (provider, instance, model) when configured — no cross-model failover (vector-comparability preserved). A down pinned lane → NoLaneAvailable (400, not silent dimension-switch).
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/legion/llm/call/embeddings.rb', line 23 def generate(text:, model: nil, **opts) return not_started_result(model, nil) unless LLM.started? pinned_model = model || configured_default_model pinned_provider = configured_provider pinned_instance = configured_instance if pinned_model.nil? || pinned_model.to_s.empty? raise Legion::LLM::Errors::ConfigError, 'no embedding model configured — set :llm, :embedding, :default_model in settings' end lane = Legion::LLM::Router.request_lane( type: :embedding, models: [pinned_model.to_s], providers: pinned_provider ? [pinned_provider.to_sym] : [], instances: pinned_instance ? [pinned_instance.to_sym] : [] ) if lane.nil? raise Legion::LLM::Errors::NoLaneAvailable.new( filters: { type: :embedding, models: [pinned_model], providers: pinned_provider ? [pinned_provider] : [], instances: pinned_instance ? [pinned_instance] : [] } ) end provider = lane[:provider_family] instance = lane[:instance_id] lane_model = lane[:model] text = coerce_text(text) dimensions = opts[:dimensions] task = opts[:task] || :document prepared_texts = (text, provider: provider, model: lane_model, task: task) dispatch_text = prepared_texts.one? ? prepared_texts.first : prepared_texts log.info("[llm][embed] action=generate provider=#{provider} instance=#{instance || 'default'} " \ "model=#{lane_model} task=#{task} text_chars=#{text.length} chunks=#{prepared_texts.size}") started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) response = Dispatch.call( provider: provider, instance: instance, capability: :embed, model: lane_model, text: dispatch_text, dimensions: dimensions ) elapsed = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - started_at) * 1000).round(1) vector = if prepared_texts.size > 1 aggregate_vectors(response[:result], weights: prepared_texts.map(&:length), model: lane_model, provider: provider) else normalize_vector(response[:result]) end vector = enforce_dimensions(vector) if enforce_dimension? tokens = extract_tokens(response) log.info("[llm][embed] action=generate.complete provider=#{provider} " \ "instance=#{instance || 'default'} model=#{lane_model} " \ "dimensions=#{vector&.size || 0} tokens=#{tokens} chunks=#{prepared_texts.size} duration_ms=#{elapsed}") { vector: vector, model: lane_model, provider: provider, dimensions: vector&.size || 0, tokens: tokens, chunks: prepared_texts.size } rescue Legion::LLM::Errors::NoLaneAvailable, Legion::LLM::Errors::ConfigError, Legion::LLM::LLMError raise rescue StandardError => e handle_exception(e, level: :warn, operation: 'llm.embeddings.generate') { vector: nil, model: pinned_model, provider: nil, error: e. } end |
.generate_batch(texts:, model: nil, dimensions: nil, task: :document) ⇒ Object
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/legion/llm/call/embeddings.rb', line 102 def generate_batch(texts:, model: nil, dimensions: nil, task: :document, **) return texts.map { { vector: nil, error: 'LLM not started' } } unless LLM.started? pinned_model = model || configured_default_model pinned_provider = configured_provider pinned_instance = configured_instance return texts.map { { vector: nil, error: 'no embedding model configured' } } if pinned_model.nil? || pinned_model.to_s.empty? lane = Legion::LLM::Router.request_lane( type: :embedding, models: [pinned_model.to_s], providers: pinned_provider ? [pinned_provider.to_sym] : [], instances: pinned_instance ? [pinned_instance.to_sym] : [] ) if lane.nil? raise Legion::LLM::Errors::NoLaneAvailable.new( filters: { type: :embedding, models: [pinned_model], providers: pinned_provider ? [pinned_provider] : [], instances: pinned_instance ? [pinned_instance] : [] } ) end provider = lane[:provider_family] instance = lane[:instance_id] model = lane[:model] log.info("[llm][embed] action=generate_batch provider=#{provider} instance=#{instance || 'default'} " \ "model=#{model} count=#{texts.size} task=#{task}") raw_texts = texts.map { |t| coerce_text(t) } prepared_texts = raw_texts.map { |t| (t, provider: provider, model: model, task: task) } if prepared_texts.any? { |chunks| chunks.size > 1 } return generate_chunked_batch( raw_texts, model: model, provider: provider, instance: instance, dimensions: dimensions, task: task ) end texts = prepared_texts.map(&:first) started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) response = Dispatch.call( provider: provider, instance: instance, capability: :embed, model: model, text: texts, dimensions: dimensions ) elapsed = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - started_at) * 1000).round(1) result = normalize_batch(response[:result], model, provider) log.info("[llm][embed] action=generate_batch.complete provider=#{provider} " \ "model=#{model} count=#{result.size} duration_ms=#{elapsed}") result rescue StandardError => e handle_exception(e, level: :warn, operation: 'llm.embeddings.generate_batch') texts.map { { vector: nil, model: model, provider: provider, error: e. } } end |