Class: Leann::Embedding::OpenAI

Inherits:
Base
  • Object
show all
Defined in:
lib/leann/embedding/openai.rb

Overview

OpenAI Embeddings API provider

Examples:

provider = Leann::Embedding::OpenAI.new(model: "text-embedding-3-small")
embeddings = provider.compute(["Hello", "World"])

Constant Summary collapse

BASE_URL =
"https://api.openai.com/v1/embeddings"
MAX_BATCH_SIZE =
2048
MAX_RETRIES =
3
RETRY_DELAY =
1.0
DIMENSIONS =

Model dimensions lookup

{
  "text-embedding-3-small" => 1536,
  "text-embedding-3-large" => 3072,
  "text-embedding-ada-002" => 1536
}.freeze

Instance Attribute Summary

Attributes inherited from Base

#dimensions, #model

Instance Method Summary collapse

Methods inherited from Base

#compute_one

Constructor Details

#initialize(model: "text-embedding-3-small", api_key: nil, base_url: nil) ⇒ OpenAI

Returns a new instance of OpenAI.

Parameters:

  • model (String) (defaults to: "text-embedding-3-small")

    OpenAI embedding model name

  • api_key (String, nil) (defaults to: nil)

    API key (defaults to ENV or config)

  • base_url (String, nil) (defaults to: nil)

    Custom base URL



32
33
34
35
36
37
38
39
40
# File 'lib/leann/embedding/openai.rb', line 32

def initialize(model: "text-embedding-3-small", api_key: nil, base_url: nil)
  super(model: model)

  @api_key = api_key || Leann.configuration.openai_api_key || ENV["OPENAI_API_KEY"]
  @base_url = base_url || Leann.configuration.openai_base_url || BASE_URL
  @dimensions = DIMENSIONS[model]

  validate_configuration!
end

Instance Method Details

#compute(texts) ⇒ Array<Array<Float>>

Compute embeddings for texts

Parameters:

  • texts (Array<String>)

Returns:

  • (Array<Array<Float>>)


46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/leann/embedding/openai.rb', line 46

def compute(texts)
  return [] if texts.empty?

  all_embeddings = []

  in_batches(texts, MAX_BATCH_SIZE) do |batch|
    batch_embeddings = compute_batch(batch)
    all_embeddings.concat(batch_embeddings)
    print "." # Progress indicator
  end

  puts " Done! (#{all_embeddings.size} embeddings)" unless texts.size < MAX_BATCH_SIZE

  all_embeddings
end