Class: Kreuzberg::ChunkSizingTokenizer

Inherits:
Data
  • Object
show all
Extended by:
T::Sig
Includes:
ChunkSizing
Defined in:
lib/kreuzberg/native.rb

Overview

Size measured in tokens from a HuggingFace tokenizer.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#cache_dirObject (readonly)

rubocop:disable Lint/UselessMethodDefinition



159
160
161
# File 'lib/kreuzberg/native.rb', line 159

def cache_dir
  @cache_dir
end

#modelObject (readonly)

rubocop:disable Lint/UselessMethodDefinition



159
160
161
# File 'lib/kreuzberg/native.rb', line 159

def model
  @model
end

Class Method Details

.from_hash(hash) ⇒ Object



178
179
180
# File 'lib/kreuzberg/native.rb', line 178

def self.from_hash(hash)
  new(model: hash[:model] || hash["model"], cache_dir: hash[:cache_dir] || hash["cache_dir"])
end

Instance Method Details

#characters?Boolean

Returns:

  • (Boolean)


172
# File 'lib/kreuzberg/native.rb', line 172

def characters? = false

#tokenizer?Boolean

Returns:

  • (Boolean)


174
175
176
# File 'lib/kreuzberg/native.rb', line 174

def tokenizer? = true
# @param hash [Hash] deserialized from the native extension
# @return [self]