Class: Kreuzberg::ChunkSizingTokenizer

Inherits:
Data
  • Object
show all
Extended by:
T::Sig
Includes:
ChunkSizing
Defined in:
lib/kreuzberg/native.rb

Overview

Size measured in tokens from a HuggingFace tokenizer.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#cache_dirObject (readonly)

rubocop:disable Lint/UselessMethodDefinition



60
61
62
# File 'lib/kreuzberg/native.rb', line 60

def cache_dir
  @cache_dir
end

#modelObject (readonly)

rubocop:disable Lint/UselessMethodDefinition



60
61
62
# File 'lib/kreuzberg/native.rb', line 60

def model
  @model
end

Class Method Details

.from_hash(hash) ⇒ Object



83
84
85
# File 'lib/kreuzberg/native.rb', line 83

def self.from_hash(hash)
  new(model: hash[:model] || hash["model"], cache_dir: hash[:cache_dir] || hash["cache_dir"])
end

Instance Method Details

#characters?Boolean

Returns:

  • (Boolean)


75
# File 'lib/kreuzberg/native.rb', line 75

def characters? = false

#tokenizer?Boolean

Returns:

  • (Boolean)


78
# File 'lib/kreuzberg/native.rb', line 78

def tokenizer? = true