Class: Ragnar::Embedder

Inherits:

Object

Object
Ragnar::Embedder

show all

Defined in:: lib/ragnar/embedder.rb

Instance Attribute Summary collapse

#model ⇒ Object readonly

Returns the value of attribute model.
#model_name ⇒ Object readonly

Returns the value of attribute model_name.

Class Method Summary collapse

.available_models ⇒ Object
.model_info(model_name) ⇒ Object

Instance Method Summary collapse

#embed_batch(texts, show_progress: true) ⇒ Object
#embed_chunks(chunks, show_progress: true) ⇒ Object
#embed_text(text) ⇒ Object
#initialize(model_name: Ragnar::DEFAULT_EMBEDDING_MODEL) ⇒ Embedder constructor

A new instance of Embedder.

Constructor Details

#initialize(model_name: Ragnar::DEFAULT_EMBEDDING_MODEL) ⇒ `Embedder`

Returns a new instance of Embedder.

# File 'lib/ragnar/embedder.rb', line 5

def initialize(model_name: Ragnar::DEFAULT_EMBEDDING_MODEL)
  @model_name = model_name
  @model = load_model(model_name)
end

Instance Attribute Details

#model ⇒ `Object` (readonly)

Returns the value of attribute model.



3
4
5

# File 'lib/ragnar/embedder.rb', line 3

def model
  @model
end

#model_name ⇒ `Object` (readonly)

Returns the value of attribute model_name.



3
4
5

# File 'lib/ragnar/embedder.rb', line 3

def model_name
  @model_name
end

Class Method Details

.available_models ⇒ `Object`

# File 'lib/ragnar/embedder.rb', line 89

def self.available_models
  # List of commonly used embedding models
  # This could be expanded or made dynamic
  [
    "BAAI/bge-small-en-v1.5",
    "BAAI/bge-base-en-v1.5",
    "BAAI/bge-large-en-v1.5",
    "sentence-transformers/all-MiniLM-L6-v2",
    "sentence-transformers/all-mpnet-base-v2",
    "thenlper/gte-small",
    "thenlper/gte-base",
    "thenlper/gte-large"
  ]
end

.model_info(model_name) ⇒ `Object`

# File 'lib/ragnar/embedder.rb', line 104

def self.model_info(model_name)
  # Provide information about embedding models
  info = {
    "BAAI/bge-small-en-v1.5" => {
      dimensions: 384,
      max_tokens: 512,
      description: "Small, fast, good quality embeddings"
    },
    "BAAI/bge-base-en-v1.5" => {
      dimensions: 768,
      max_tokens: 512,
      description: "Balanced size and quality"
    },
    "BAAI/bge-large-en-v1.5" => {
      dimensions: 1024,
      max_tokens: 512,
      description: "Large, highest quality embeddings"
    },
    "sentence-transformers/all-MiniLM-L6-v2" => {
      dimensions: 384,
      max_tokens: 256,
      description: "Fast, lightweight model"
    },
    "sentence-transformers/all-mpnet-base-v2" => {
      dimensions: 768,
      max_tokens: 384,
      description: "High quality general purpose embeddings"
    }
  }
  
  info[model_name] || { description: "Model information not available" }
end

Instance Method Details

#embed_batch(texts, show_progress: true) ⇒ `Object`

# File 'lib/ragnar/embedder.rb', line 34

def embed_batch(texts, show_progress: true)
  embeddings = []
  
  if show_progress && $stdout.respond_to?(:ioctl)
    progressbar = TTY::ProgressBar.new(
      "Generating embeddings [:bar] :percent :current/:total",
      total: texts.size,
      bar_format: :block,
      width: 30
    )
  end
  
  texts.each do |text|
    embedding = embed_text(text)
    embeddings << embedding
    progressbar.advance if show_progress
  end
  
  embeddings
end

#embed_chunks(chunks, show_progress: true) ⇒ `Object`

# File 'lib/ragnar/embedder.rb', line 55

def embed_chunks(chunks, show_progress: true)
  texts = chunks.map do |chunk|
    if chunk.is_a?(Hash)
      chunk[:text] || chunk["text"]
    else
      chunk.to_s
    end
  end
  
  embed_batch(texts, show_progress: show_progress)
end

#embed_text(text) ⇒ `Object`

# File 'lib/ragnar/embedder.rb', line 10

def embed_text(text)
  return nil if text.nil? || text.empty? || (text.respond_to?(:strip) && text.strip.empty?)
  
  # Use Candle to generate embeddings
  # The embedding method returns a tensor, we need to convert to array
  embedding = @model.embedding(text)
  
  # Convert tensor to array - Candle tensors need double to_a
  # First to_a gives [tensor], second to_a on the tensor gives the float array
  if embedding.respond_to?(:to_a)
    result = embedding.to_a
    if result.is_a?(Array) && result.first.respond_to?(:to_a)
      result.first.to_a
    else
      result
    end
  else
    embedding
  end
rescue => e
  puts "Error generating embedding: #{e.message}"
  nil
end

Class: Ragnar::Embedder

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model_name: Ragnar::DEFAULT_EMBEDDING_MODEL) ⇒ Embedder

Instance Attribute Details

#model ⇒ Object (readonly)

#model_name ⇒ Object (readonly)

Class Method Details

.available_models ⇒ Object

.model_info(model_name) ⇒ Object

Instance Method Details

#embed_batch(texts, show_progress: true) ⇒ Object

#embed_chunks(chunks, show_progress: true) ⇒ Object

#embed_text(text) ⇒ Object

#initialize(model_name: Ragnar::DEFAULT_EMBEDDING_MODEL) ⇒ `Embedder`

#model ⇒ `Object` (readonly)

#model_name ⇒ `Object` (readonly)

.available_models ⇒ `Object`

.model_info(model_name) ⇒ `Object`

#embed_batch(texts, show_progress: true) ⇒ `Object`

#embed_chunks(chunks, show_progress: true) ⇒ `Object`

#embed_text(text) ⇒ `Object`