Module: RubyLLM::Tokenizer
- Defined in:
- lib/ruby_llm/tokenizer.rb,
lib/ruby_llm/tokenizer/errors.rb,
lib/ruby_llm/tokenizer/backend.rb,
lib/ruby_llm/tokenizer/version.rb,
lib/ruby_llm/tokenizer/analysis.rb,
lib/ruby_llm/tokenizer/registry.rb,
lib/ruby_llm/tokenizer/configuration.rb,
lib/ruby_llm/tokenizer/backend/tiktoken.rb,
lib/ruby_llm/tokenizer/backend/approximate.rb,
lib/ruby_llm/tokenizer/backend/hugging_face.rb
Defined Under Namespace
Modules: Backend
Classes: Analysis, BackendError, CacheError, Configuration, ContextExceededError, Error, Registry, UnknownModelError
Constant Summary
collapse
- VERSION =
"0.1.0"
Class Method Summary
collapse
Class Method Details
.analyze(text, model:) ⇒ Object
16
17
18
|
# File 'lib/ruby_llm/tokenizer.rb', line 16
def analyze(text, model:)
backend_for(model).analyze(text)
end
|
.configuration ⇒ Object
38
39
40
|
# File 'lib/ruby_llm/tokenizer.rb', line 38
def configuration
@configuration ||= Configuration.new
end
|
34
35
36
|
# File 'lib/ruby_llm/tokenizer.rb', line 34
def configure
yield configuration
end
|
.count(text, model:) ⇒ Object
12
13
14
|
# File 'lib/ruby_llm/tokenizer.rb', line 12
def count(text, model:)
backend_for(model).count(text)
end
|
.enable_claude_approximation!(encoding: "o200k_base") ⇒ Object
Opt-in: route any “claude*” model to an approximation backend. Counts are not exact. See Backend::Approximate for caveats.
30
31
32
|
# File 'lib/ruby_llm/tokenizer.rb', line 30
def enable_claude_approximation!(encoding: "o200k_base")
registry.register(match: /^claude/i, backend: :approximate, encoding: encoding)
end
|
.register(match:, backend:) ⇒ Object
24
25
26
|
# File 'lib/ruby_llm/tokenizer.rb', line 24
def register(match:, backend:, **)
registry.register(match: match, backend: backend, **)
end
|
.registry ⇒ Object
42
43
44
|
# File 'lib/ruby_llm/tokenizer.rb', line 42
def registry
@registry ||= Registry.load_default
end
|
.reset! ⇒ Object
46
47
48
49
|
# File 'lib/ruby_llm/tokenizer.rb', line 46
def reset!
@configuration = nil
@registry = nil
end
|
.truncate(text, max_tokens:, model:, overflow: :truncate_right) ⇒ Object
20
21
22
|
# File 'lib/ruby_llm/tokenizer.rb', line 20
def truncate(text, max_tokens:, model:, overflow: :truncate_right)
backend_for(model).truncate(text, max_tokens: max_tokens, overflow: overflow)
end
|