Module: Kotoshu::Embeddings

Defined in:
lib/kotoshu/embeddings.rb,
lib/kotoshu/embeddings/similarity_search.rb

Defined Under Namespace

Modules: Protocols Classes: SimilaritySearch

Constant Summary collapse

DEFAULT_DIMENSION =

Constants

300
MAX_VOCABULARY_SIZE =
100_000
VERSION =
'2.0.0'
Vocabulary =

Expose classes

::Vocabulary
OnnxRuntimeModel =
::OnnxRuntimeModel
SimilarityEngine =
::SimilarityEngine
Search =
::Search
EmbeddingPipeline =
::EmbeddingPipeline
LruCache =
::LruCache
Registry =
::EmbeddingRegistry

Class Method Summary collapse

Class Method Details

.create_pipeline(vocabulary:, model:, preload: false, pre_normalize: false) ⇒ EmbeddingPipeline

Create a custom embedding pipeline

Parameters:

  • vocabulary (Vocabulary)

    Vocabulary instance

  • model (EmbeddingModel)

    Model instance

  • preload (Boolean) (defaults to: false)

    Preload embeddings

Returns:



88
89
90
91
92
93
94
95
# File 'lib/kotoshu/embeddings.rb', line 88

def self.create_pipeline(vocabulary:, model:, preload: false, pre_normalize: false)
  EmbeddingPipeline.new(
    vocabulary: vocabulary,
    model: model,
    preload: preload,
    pre_normalize: pre_normalize
  )
end

.from_cache(language:, preload: false, index: :exact) ⇒ EmbeddingPipeline

Create an EmbeddingPipeline from cache

Parameters:

  • language (String)

    ISO 639-1 language code

  • preload (Boolean) (defaults to: false)

    Preload embeddings into memory

Returns:



56
57
58
# File 'lib/kotoshu/embeddings.rb', line 56

def self.from_cache(language:, preload: false, index: :exact)
  EmbeddingPipeline.from_cache(language: language, preload: preload, index: index)
end

.language_supported?(language) ⇒ Boolean

Check if a language is supported

Parameters:

  • language (String)

    ISO 639-1 language code

Returns:

  • (Boolean)


65
66
67
68
69
# File 'lib/kotoshu/embeddings.rb', line 65

def self.language_supported?(language)
  require_relative '../cache/model_cache'
  cache = Cache::ModelCache.new
  cache.available_models_for(language.to_sym).include?(:onnx)
end

.supported_languagesArray<String>

List all supported languages

Returns:

  • (Array<String>)


75
76
77
78
79
# File 'lib/kotoshu/embeddings.rb', line 75

def self.supported_languages
  require_relative '../cache/model_cache'
  cache = Cache::ModelCache.new
  cache.all_available_models[:onnx].keys.map(&:to_s)
end