Class: MiniEmbed

Inherits:
Object
  • Object
show all
Defined in:
lib/mini_embed.rb,
ext/mini_embed/mini_embed.c

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ Object



1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
# File 'ext/mini_embed/mini_embed.c', line 1849

static VALUE rb_embedder_initialize(VALUE self, VALUE opts) {
    ruby_embedder *e;
    TypedData_Get_Struct(self, ruby_embedder, &ruby_embedder_type, e);

    Check_Type(opts, T_HASH);
    VALUE path = rb_hash_aref(opts, ID2SYM(rb_intern("model")));
    if (NIL_P(path)) rb_raise(rb_eArgError, "missing required key: model");
    const char *cpath = StringValueCStr(path);

    VALUE normalize = rb_hash_aref(opts, ID2SYM(rb_intern("normalize")));
    int norm_type = NORM_NONE;
    if (!NIL_P(normalize)) {
        if (SYMBOL_P(normalize)) {
            ID sym_id = SYM2ID(normalize);
            if (sym_id == rb_intern("l2") || sym_id == rb_intern("L2")) {
                norm_type = NORM_L2;
            }
        } else if (TYPE(normalize) == T_STRING) {
            const char *norm_str = StringValueCStr(normalize);
            if (strcasecmp(norm_str, "l2") == 0) {
                norm_type = NORM_L2;
            }
        }
    }

    e->model = embed_load_gguf(cpath);
    if (!e->model) rb_raise(rb_eRuntimeError, "failed to load GGUF model: %s", cpath);

    e->model->normalize = norm_type;
    return self;
}

Instance Method Details

#embed(opts) ⇒ Object



1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
# File 'ext/mini_embed/mini_embed.c', line 1881

static VALUE rb_embed(VALUE self, VALUE opts) {
    ruby_embedder *e;
    TypedData_Get_Struct(self, ruby_embedder, &ruby_embedder_type, e);

    Check_Type(opts, T_HASH);
    VALUE text = rb_hash_aref(opts, ID2SYM(rb_intern("text")));
    if (NIL_P(text)) rb_raise(rb_eArgError, "missing required key: text");
    const char *ctext = StringValueCStr(text);

    VALUE out = rb_str_new(NULL, e->model->dim * sizeof(float));
    embed_text(e->model, ctext, (float*)RSTRING_PTR(out));
    return out;
}

#embeddings(text:, type: :vector) ⇒ String, <Float>

Returns - type == :binary - binary string, type == :vector - array of floats.

Parameters:

  • text (String)
    • text to extract embeddings from

  • type (Symbol, nil) (defaults to: :vector)
    • :binary or :vector - type of data you want to receive

Returns:

  • (String, <Float>)
    • type == :binary - binary string, type == :vector - array of floats

Raises:

  • (ArgumentError)


9
10
11
12
13
14
15
16
# File 'lib/mini_embed.rb', line 9

def embeddings(text:, type: :vector)
  binary_data = embed(text: text) # call original C method

  return binary_data if type == :binary
  return binary_data.unpack('e*') if type == :vector

  raise ArgumentError, "Unsupported data type: #{type}"
end