Class: RubyLLM::Tokenizer::Backend::Tiktoken

Inherits:
Base
  • Object
show all
Defined in:
lib/ruby_llm/tokenizer/backend/tiktoken.rb

Direct Known Subclasses

Approximate

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#analyze, #count, #truncate

Constructor Details

#initialize(encoding:) ⇒ Tiktoken

Returns a new instance of Tiktoken.



12
13
14
15
16
17
18
19
# File 'lib/ruby_llm/tokenizer/backend/tiktoken.rb', line 12

def initialize(encoding:)
  super()
  @encoding_name = encoding.to_s
  @encoding = ::Tiktoken.get_encoding(@encoding_name)
  raise BackendError, "Unknown tiktoken encoding: #{encoding.inspect}" if @encoding.nil?
rescue StandardError => e
  raise BackendError, "Failed to load tiktoken encoding #{encoding.inspect}: #{e.message}"
end

Instance Attribute Details

#encoding_nameObject (readonly)

Returns the value of attribute encoding_name.



10
11
12
# File 'lib/ruby_llm/tokenizer/backend/tiktoken.rb', line 10

def encoding_name
  @encoding_name
end

Instance Method Details

#decode(ids) ⇒ Object



25
26
27
# File 'lib/ruby_llm/tokenizer/backend/tiktoken.rb', line 25

def decode(ids)
  @encoding.decode(Array(ids))
end

#encode(text) ⇒ Object



21
22
23
# File 'lib/ruby_llm/tokenizer/backend/tiktoken.rb', line 21

def encode(text)
  @encoding.encode(text.to_s)
end

#identifierObject



29
30
31
# File 'lib/ruby_llm/tokenizer/backend/tiktoken.rb', line 29

def identifier
  "tiktoken:#{encoding_name}"
end