Class: Kiribi::MultilingualE5::Small::Model
- Inherits:
-
Object
- Object
- Kiribi::MultilingualE5::Small::Model
- Defined in:
- lib/kiribi/multilingual_e5/small.rb
Instance Attribute Summary collapse
-
#onnx_model ⇒ Object
readonly
Returns the value of attribute onnx_model.
-
#tokenizer ⇒ Object
readonly
Returns the value of attribute tokenizer.
Instance Method Summary collapse
- #embedding(prefix, input) ⇒ Object
- #embedding_passage(input) ⇒ Object
- #embedding_query(input) ⇒ Object
-
#initialize ⇒ Model
constructor
A new instance of Model.
Constructor Details
#initialize ⇒ Model
Returns a new instance of Model.
21 22 23 24 |
# File 'lib/kiribi/multilingual_e5/small.rb', line 21 def initialize @tokenizer = Tokenizers.from_file(TOKENIZER_FILEPATH) @onnx_model = OnnxRuntime::Model.new(MODEL_FILEPATH) end |
Instance Attribute Details
#onnx_model ⇒ Object (readonly)
Returns the value of attribute onnx_model.
19 20 21 |
# File 'lib/kiribi/multilingual_e5/small.rb', line 19 def onnx_model @onnx_model end |
#tokenizer ⇒ Object (readonly)
Returns the value of attribute tokenizer.
19 20 21 |
# File 'lib/kiribi/multilingual_e5/small.rb', line 19 def tokenizer @tokenizer end |
Instance Method Details
#embedding(prefix, input) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/kiribi/multilingual_e5/small.rb', line 34 def (prefix, input) prefix = prefix.to_s raise ArgumentError, "prefix must be :query or :passage" unless %w[query passage].include?(prefix) # https://huggingface.co/intfloat/multilingual-e5-small encoded = tokenizer.encode("#{prefix}: #{input}") batch = { input_ids: [encoded.ids], attention_mask: [encoded.attention_mask], token_type_ids: [[0] * encoded.ids.length] } outputs = onnx_model.predict(batch) last_hidden = outputs["last_hidden_state"][0] attentions = encoded.attention_mask output_matrix = last_hidden.filter.with_index { |_, i| attentions[i] == 1 } valid_tokens = attentions.sum output_matrix.transpose.map { it.sum / valid_tokens } end |
#embedding_passage(input) ⇒ Object
30 31 32 |
# File 'lib/kiribi/multilingual_e5/small.rb', line 30 def (input) (:passage, input) end |
#embedding_query(input) ⇒ Object
26 27 28 |
# File 'lib/kiribi/multilingual_e5/small.rb', line 26 def (input) (:query, input) end |