Class: MiniEmbed

Inherits:
Object
  • Object
show all
Defined in:
lib/mini_embed.rb,
ext/mini_embed/mini_embed.c

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ Object



1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
# File 'ext/mini_embed/mini_embed.c', line 1757

static VALUE rb_embedder_initialize(VALUE self, VALUE opts) {
    ruby_embedder *e;
    TypedData_Get_Struct(self, ruby_embedder, &ruby_embedder_type, e);

    Check_Type(opts, T_HASH);
    VALUE path = rb_hash_aref(opts, ID2SYM(rb_intern("model")));
    if (NIL_P(path)) rb_raise(rb_eArgError, "missing required key: model");
    const char *cpath = StringValueCStr(path);

    VALUE normalize = rb_hash_aref(opts, ID2SYM(rb_intern("normalize")));
    int norm_type = NORM_NONE;
    if (!NIL_P(normalize)) {
        if (SYMBOL_P(normalize)) {
            ID sym_id = SYM2ID(normalize);
            if (sym_id == rb_intern("l2") || sym_id == rb_intern("L2")) {
                norm_type = NORM_L2;
            }
        } else if (TYPE(normalize) == T_STRING) {
            const char *norm_str = StringValueCStr(normalize);
            if (strcasecmp(norm_str, "l2") == 0) {
                norm_type = NORM_L2;
            }
        }
    }

    e->model = embed_load_gguf(cpath);
    if (!e->model) rb_raise(rb_eRuntimeError, "failed to load GGUF model: %s", cpath);

    e->model->normalize = norm_type;
    return self;
}

Instance Method Details

#embed(opts) ⇒ Object



1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
# File 'ext/mini_embed/mini_embed.c', line 1789

static VALUE rb_embed(VALUE self, VALUE opts) {
    ruby_embedder *e;
    TypedData_Get_Struct(self, ruby_embedder, &ruby_embedder_type, e);

    Check_Type(opts, T_HASH);
    VALUE text = rb_hash_aref(opts, ID2SYM(rb_intern("text")));
    if (NIL_P(text)) rb_raise(rb_eArgError, "missing required key: text");
    const char *ctext = StringValueCStr(text);

    VALUE out = rb_str_new(NULL, e->model->dim * sizeof(float));
    embed_text(e->model, ctext, (float*)RSTRING_PTR(out));
    return out;
}

#embeddings(text:, type: :vector) ⇒ String, <Float>

Returns - type == :binary - binary string, type == :vector - array of floats.

Parameters:

  • text (String)
    • text to extract embeddings from

  • type (Symbol, nil) (defaults to: :vector)
    • :binary or :vector - type of data you want to receive

Returns:

  • (String, <Float>)
    • type == :binary - binary string, type == :vector - array of floats

Raises:

  • (ArgumentError)


9
10
11
12
13
14
15
16
# File 'lib/mini_embed.rb', line 9

def embeddings(text:, type: :vector)
  binary_data = embed(text: text) # call original C method

  return binary_data if type == :binary
  return binary_data.unpack('e*') if type == :vector

  raise ArgumentError, "Unsupported data type: #{type}"
end