Class: Kiribi::MultilingualE5::Small::Model

Inherits:
Object
  • Object
show all
Defined in:
lib/kiribi/multilingual_e5/small.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeModel

Returns a new instance of Model.



21
22
23
24
# File 'lib/kiribi/multilingual_e5/small.rb', line 21

def initialize
  @tokenizer = Tokenizers.from_file(TOKENIZER_FILEPATH)
  @onnx_model = OnnxRuntime::Model.new(MODEL_FILEPATH)
end

Instance Attribute Details

#onnx_modelObject (readonly)

Returns the value of attribute onnx_model.



19
20
21
# File 'lib/kiribi/multilingual_e5/small.rb', line 19

def onnx_model
  @onnx_model
end

#tokenizerObject (readonly)

Returns the value of attribute tokenizer.



19
20
21
# File 'lib/kiribi/multilingual_e5/small.rb', line 19

def tokenizer
  @tokenizer
end

Instance Method Details

#embedding(prefix, input) ⇒ Object

Raises:

  • (ArgumentError)


34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/kiribi/multilingual_e5/small.rb', line 34

def embedding(prefix, input)
  prefix = prefix.to_s

  raise ArgumentError, "prefix must be :query or :passage" unless %w[query passage].include?(prefix)

  # https://huggingface.co/intfloat/multilingual-e5-small
  encoded = tokenizer.encode("#{prefix}: #{input}")
  batch = {
    input_ids: [encoded.ids],
    attention_mask: [encoded.attention_mask],
    token_type_ids: [[0] * encoded.ids.length]
  }
  outputs = onnx_model.predict(batch)
  last_hidden = outputs["last_hidden_state"][0]
  attentions = encoded.attention_mask

  output_matrix = last_hidden.filter.with_index {  |_, i| attentions[i] == 1 }
  valid_tokens = attentions.sum
  output_matrix.transpose.map { it.sum / valid_tokens }
end

#embedding_passage(input) ⇒ Object



30
31
32
# File 'lib/kiribi/multilingual_e5/small.rb', line 30

def embedding_passage(input)
  embedding(:passage, input)
end

#embedding_query(input) ⇒ Object



26
27
28
# File 'lib/kiribi/multilingual_e5/small.rb', line 26

def embedding_query(input)
  embedding(:query, input)
end