Class: RubyLLM::Tokenizer::Backend::HuggingFace

Inherits:
Base
  • Object
show all
Defined in:
lib/ruby_llm/tokenizer/backend/hugging_face.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#count, #truncate

Constructor Details

#initialize(repo:, revision: nil) ⇒ HuggingFace

Returns a new instance of HuggingFace.



13
14
15
16
17
18
# File 'lib/ruby_llm/tokenizer/backend/hugging_face.rb', line 13

def initialize(repo:, revision: nil)
  super()
  @repo = repo
  @revision = revision
  @tokenizer = load_tokenizer
end

Instance Attribute Details

#repoObject (readonly)

Returns the value of attribute repo.



11
12
13
# File 'lib/ruby_llm/tokenizer/backend/hugging_face.rb', line 11

def repo
  @repo
end

#revisionObject (readonly)

Returns the value of attribute revision.



11
12
13
# File 'lib/ruby_llm/tokenizer/backend/hugging_face.rb', line 11

def revision
  @revision
end

Instance Method Details

#analyze(text) ⇒ Object



28
29
30
31
# File 'lib/ruby_llm/tokenizer/backend/hugging_face.rb', line 28

def analyze(text)
  encoding = @tokenizer.encode(text.to_s)
  Analysis.new(tokens: encoding.tokens, ids: encoding.ids, model: identifier)
end

#decode(ids) ⇒ Object



24
25
26
# File 'lib/ruby_llm/tokenizer/backend/hugging_face.rb', line 24

def decode(ids)
  @tokenizer.decode(Array(ids), skip_special_tokens: true)
end

#encode(text) ⇒ Object



20
21
22
# File 'lib/ruby_llm/tokenizer/backend/hugging_face.rb', line 20

def encode(text)
  @tokenizer.encode(text.to_s).ids
end

#identifierObject



33
34
35
# File 'lib/ruby_llm/tokenizer/backend/hugging_face.rb', line 33

def identifier
  "hugging_face:#{repo}#{"@#{revision}" if revision}"
end