Class: RubyLLM::Tokenizer::Backend::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby_llm/tokenizer/backend.rb

Direct Known Subclasses

HuggingFace, Tiktoken

Instance Method Summary collapse

Instance Method Details

#analyze(text) ⇒ Object



22
23
24
25
# File 'lib/ruby_llm/tokenizer/backend.rb', line 22

def analyze(text)
  ids = encode(text)
  Analysis.new(tokens: ids.map { |id| decode_single(id) }, ids: ids, model: identifier)
end

#count(text) ⇒ Object



18
19
20
# File 'lib/ruby_llm/tokenizer/backend.rb', line 18

def count(text)
  encode(text).size
end

#decode(_ids) ⇒ Object

Raises:

  • (NotImplementedError)


14
15
16
# File 'lib/ruby_llm/tokenizer/backend.rb', line 14

def decode(_ids)
  raise NotImplementedError, "#{self.class}#decode must be implemented"
end

#encode(_text) ⇒ Object

Raises:

  • (NotImplementedError)


10
11
12
# File 'lib/ruby_llm/tokenizer/backend.rb', line 10

def encode(_text)
  raise NotImplementedError, "#{self.class}#encode must be implemented"
end

#identifierObject



39
40
41
# File 'lib/ruby_llm/tokenizer/backend.rb', line 39

def identifier
  self.class.name.split("::").last.downcase
end

#truncate(text, max_tokens:, overflow: :truncate_right) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
# File 'lib/ruby_llm/tokenizer/backend.rb', line 27

def truncate(text, max_tokens:, overflow: :truncate_right)
  validate_max_tokens!(max_tokens)
  validate_overflow!(overflow)
  return "" if max_tokens.zero?

  if text.respond_to?(:to_str) || !text.respond_to?(:each)
    truncate_string(text.to_s, max_tokens: max_tokens, overflow: overflow)
  else
    truncate_stream(text, max_tokens: max_tokens, overflow: overflow)
  end
end