Class: Kreuzberg::ChunkSizingTokenizer

Inherits:
Data
  • Object
show all
Extended by:
T::Sig
Includes:
ChunkSizing
Defined in:
lib/kreuzberg/native.rb

Overview

Size measured in tokens from a HuggingFace tokenizer.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#cache_dirObject (readonly)

rubocop:disable Lint/UselessMethodDefinition



375
376
377
# File 'lib/kreuzberg/native.rb', line 375

def cache_dir
  @cache_dir
end

#modelObject (readonly)

rubocop:disable Lint/UselessMethodDefinition



375
376
377
# File 'lib/kreuzberg/native.rb', line 375

def model
  @model
end

Class Method Details

.from_hash(hash) ⇒ Object



394
395
396
# File 'lib/kreuzberg/native.rb', line 394

def self.from_hash(hash)
  new(model: hash[:model] || hash["model"], cache_dir: hash[:cache_dir] || hash["cache_dir"])
end

Instance Method Details

#characters?Boolean

Returns:

  • (Boolean)


388
# File 'lib/kreuzberg/native.rb', line 388

def characters? = false

#tokenizer?Boolean

Returns:

  • (Boolean)


390
391
392
# File 'lib/kreuzberg/native.rb', line 390

def tokenizer? = true
# @param hash [Hash] deserialized from the native extension
# @return [self]