Module: RubyLLM::Tokenizer

Defined in:
lib/ruby_llm/tokenizer.rb,
lib/ruby_llm/tokenizer/errors.rb,
lib/ruby_llm/tokenizer/backend.rb,
lib/ruby_llm/tokenizer/version.rb,
lib/ruby_llm/tokenizer/analysis.rb,
lib/ruby_llm/tokenizer/registry.rb,
lib/ruby_llm/tokenizer/configuration.rb,
lib/ruby_llm/tokenizer/backend/tiktoken.rb,
lib/ruby_llm/tokenizer/backend/approximate.rb,
lib/ruby_llm/tokenizer/backend/hugging_face.rb

Defined Under Namespace

Modules: Backend Classes: Analysis, BackendError, CacheError, Configuration, ContextExceededError, Error, Registry, UnknownModelError

Constant Summary collapse

VERSION =
"0.1.0"

Class Method Summary collapse

Class Method Details

.analyze(text, model:) ⇒ Object



16
17
18
# File 'lib/ruby_llm/tokenizer.rb', line 16

def analyze(text, model:)
  backend_for(model).analyze(text)
end

.configurationObject



38
39
40
# File 'lib/ruby_llm/tokenizer.rb', line 38

def configuration
  @configuration ||= Configuration.new
end

.configure {|configuration| ... } ⇒ Object

Yields:



34
35
36
# File 'lib/ruby_llm/tokenizer.rb', line 34

def configure
  yield configuration
end

.count(text, model:) ⇒ Object



12
13
14
# File 'lib/ruby_llm/tokenizer.rb', line 12

def count(text, model:)
  backend_for(model).count(text)
end

.enable_claude_approximation!(encoding: "o200k_base") ⇒ Object

Opt-in: route any “claude*” model to an approximation backend. Counts are not exact. See Backend::Approximate for caveats.



30
31
32
# File 'lib/ruby_llm/tokenizer.rb', line 30

def enable_claude_approximation!(encoding: "o200k_base")
  registry.register(match: /^claude/i, backend: :approximate, encoding: encoding)
end

.register(match:, backend:) ⇒ Object



24
25
26
# File 'lib/ruby_llm/tokenizer.rb', line 24

def register(match:, backend:, **)
  registry.register(match: match, backend: backend, **)
end

.registryObject



42
43
44
# File 'lib/ruby_llm/tokenizer.rb', line 42

def registry
  @registry ||= Registry.load_default
end

.reset!Object



46
47
48
49
# File 'lib/ruby_llm/tokenizer.rb', line 46

def reset!
  @configuration = nil
  @registry = nil
end

.truncate(text, max_tokens:, model:, overflow: :truncate_right) ⇒ Object



20
21
22
# File 'lib/ruby_llm/tokenizer.rb', line 20

def truncate(text, max_tokens:, model:, overflow: :truncate_right)
  backend_for(model).truncate(text, max_tokens: max_tokens, overflow: overflow)
end