Module: Legion::LLM::Embeddings

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/embeddings.rb

Constant Summary collapse

PROVIDER_EMBEDDING_MODELS =
{
  bedrock:   'amazon.titan-embed-text-v2:0',
  anthropic: nil,
  openai:    'text-embedding-3-small',
  gemini:    'text-embedding-004',
  azure:     'text-embedding-3-small',
  ollama:    'mxbai-embed-large'
}.freeze
TARGET_DIMENSION =
1024
OLLAMA_CONTEXT_CHARS =
{
  'mxbai-embed-large'      => 1400,
  'bge-large'              => 1400,
  'snowflake-arctic-embed' => 1400,
  'nomic-embed-text'       => 24_000
}.freeze
OLLAMA_DEFAULT_CONTEXT_CHARS =
1400
PREFIX_REGISTRY =
{
  'nomic-embed-text'  => { document: 'search_document: ', query: 'search_query: ' },
  'mxbai-embed-large' => { query: 'Represent this sentence for searching relevant passages: ' }
}.freeze

Class Method Summary collapse

Class Method Details

.default_modelObject



81
82
83
# File 'lib/legion/llm/embeddings.rb', line 81

def default_model
  resolve_model(resolve_provider)
end

.generate(text:, model: nil, provider: nil, dimensions: nil, task: :document) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/legion/llm/embeddings.rb', line 36

def generate(text:, model: nil, provider: nil, dimensions: nil, task: :document)
  return { vector: nil, model: model, provider: provider, error: 'LLM not started' } unless LLM.started?

  provider ||= resolve_provider
  return { vector: nil, model: model, provider: provider, error: "provider #{provider} is disabled" } if provider_disabled?(provider)

  model ||= resolve_model(provider)
  text    = coerce_text_input(text)
  text    = apply_prefix(text, model: model, task: task)

  return generate_ollama(text: text, model: model) if provider&.to_sym == :ollama
  return generate_azure(text: text, model: model, dimensions: dimensions) if provider&.to_sym == :azure

  response   = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
  vector     = apply_dimension_enforcement(response.vectors.first, provider)
  return dimension_error(model, provider, vector) if vector.is_a?(String)

  { vector: vector, model: model, provider: provider, dimensions: vector&.size || 0, tokens: response.input_tokens }
rescue StandardError => e
  handle_exception(e, level: :warn)
  handle_embed_failure(e, text: text, failed_provider: provider, failed_model: model)
end

.generate_batch(texts:, model: nil, provider: nil, dimensions: nil, task: :document) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/legion/llm/embeddings.rb', line 59

def generate_batch(texts:, model: nil, provider: nil, dimensions: nil, task: :document)
  return texts.map { |_| { vector: nil, error: 'LLM not started' } } unless LLM.started?

  provider ||= resolve_provider
  disabled_result = disabled_batch_result(texts, provider, model)
  return disabled_result if disabled_result

  model  ||= resolve_model(provider)
  texts    = texts.map { |t| apply_prefix(coerce_text_input(t), model: model, task: task) }

  return generate_ollama_batch(texts: texts, model: model) if provider&.to_sym == :ollama
  return generate_azure_batch(texts: texts, model: model, dimensions: dimensions) if provider&.to_sym == :azure

  response = RubyLLM.embed(texts, **build_opts(model, provider, dimensions))
  response.vectors.each_with_index.map do |vec, i|
    build_batch_entry(vec, model, provider, i)
  end
rescue StandardError => e
  handle_exception(e, level: :warn)
  texts.map { |_| { vector: nil, model: model, provider: provider, error: e.message } }
end