Class: RubyLLM::Tokenizer::Backend::Base
- Inherits:
-
Object
- Object
- RubyLLM::Tokenizer::Backend::Base
- Defined in:
- lib/ruby_llm/tokenizer/backend.rb
Direct Known Subclasses
Instance Method Summary collapse
- #analyze(text) ⇒ Object
- #count(text) ⇒ Object
- #decode(_ids) ⇒ Object
- #encode(_text) ⇒ Object
- #identifier ⇒ Object
- #truncate(text, max_tokens:, overflow: :truncate_right) ⇒ Object
Instance Method Details
#analyze(text) ⇒ Object
22 23 24 25 |
# File 'lib/ruby_llm/tokenizer/backend.rb', line 22 def analyze(text) ids = encode(text) Analysis.new(tokens: ids.map { |id| decode_single(id) }, ids: ids, model: identifier) end |
#count(text) ⇒ Object
18 19 20 |
# File 'lib/ruby_llm/tokenizer/backend.rb', line 18 def count(text) encode(text).size end |
#decode(_ids) ⇒ Object
14 15 16 |
# File 'lib/ruby_llm/tokenizer/backend.rb', line 14 def decode(_ids) raise NotImplementedError, "#{self.class}#decode must be implemented" end |
#encode(_text) ⇒ Object
10 11 12 |
# File 'lib/ruby_llm/tokenizer/backend.rb', line 10 def encode(_text) raise NotImplementedError, "#{self.class}#encode must be implemented" end |
#identifier ⇒ Object
39 40 41 |
# File 'lib/ruby_llm/tokenizer/backend.rb', line 39 def identifier self.class.name.split("::").last.downcase end |
#truncate(text, max_tokens:, overflow: :truncate_right) ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/ruby_llm/tokenizer/backend.rb', line 27 def truncate(text, max_tokens:, overflow: :truncate_right) validate_max_tokens!(max_tokens) validate_overflow!(overflow) return "" if max_tokens.zero? if text.respond_to?(:to_str) || !text.respond_to?(:each) truncate_string(text.to_s, max_tokens: max_tokens, overflow: overflow) else truncate_stream(text, max_tokens: max_tokens, overflow: overflow) end end |