Class: UTF8Decoder

Inherits:
Object
  • Object
show all
Defined in:
lib/utf8decoder.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeUTF8Decoder

Returns a new instance of UTF8Decoder.



5
6
7
# File 'lib/utf8decoder.rb', line 5

def initialize
  @buffer = "".b
end

Instance Attribute Details

#bufferObject (readonly)

Returns the value of attribute buffer.



3
4
5
# File 'lib/utf8decoder.rb', line 3

def buffer
  @buffer
end

Instance Method Details

#<<(str) ⇒ Object



9
# File 'lib/utf8decoder.rb', line 9

def <<(str) = @buffer << str.b

#each(&block) ⇒ Object

Yields each complete character as a String (the original contract).



12
13
14
# File 'lib/utf8decoder.rb', line 12

def each(&block)
  decode { |complete| complete.each_char(&block) }
end

#each_codepoint(&block) ⇒ Object

Yields each complete character as an Integer codepoint. The hot path (Term#feed) wants codepoints, not 1-char Strings: on a valid chunk String#each_codepoint avoids allocating a String per character and the per-character valid_encoding?/ord that #feed used to do. Validity is checked once per chunk; only a chunk that actually contains bad bytes falls back to the slower per-character path (rendering them as U+FFFD).



22
23
24
25
26
27
28
29
30
# File 'lib/utf8decoder.rb', line 22

def each_codepoint(&block)
  decode do |complete|
    if complete.valid_encoding?
      complete.each_codepoint(&block)
    else
      complete.each_char { |c| block.call(c.valid_encoding? ? c.ord : 0xFFFD) }
    end
  end
end