Module: Ensure::Encoding
- Defined in:
- lib/ensure/encoding.rb
Defined Under Namespace
Modules: String
Constant Summary collapse
- BYTE_ORDER_MARKS =
{ ::Encoding::UTF_16BE => [0xfe, 0xff], ::Encoding::UTF_16LE => [0xff, 0xfe], ::Encoding::UTF_8 => [0xef, 0xbb, 0xbf] }
Class Method Summary collapse
-
.force_encoding(string, target_encoding, options = {}) ⇒ Object
Forces the encoding of
string
totarget_encoding
and using a number of smart tricks. -
.force_encoding!(string, target_encoding, options = {}) ⇒ Object
Performs just like
force_encoding
, only it changes the string in place instead of returning it. -
.guess_encoding(string, guesses) ⇒ Object
Checks the encodings in
guesses
from front to back and returns the first encoding in which the character data is a valid sequence. -
.sniff_encoding(string) ⇒ Object
Tries to guess the encoding of the string and returns the most likely encoding.
Class Method Details
.force_encoding(string, target_encoding, options = {}) ⇒ Object
Forces the encoding of string
to target_encoding
and using a number of smart tricks. See String#ensure_encoding for more details.
43 44 45 46 47 |
# File 'lib/ensure/encoding.rb', line 43 def self.force_encoding(string, target_encoding, ={}) target_string = string.dup force_encoding!(target_string, target_encoding, ) target_string end |
.force_encoding!(string, target_encoding, options = {}) ⇒ Object
Performs just like force_encoding
, only it changes the string in place instead of returning it.
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/ensure/encoding.rb', line 51 def self.force_encoding!(string, target_encoding, ={}) if [:external_encoding] == :sniff external_encoding = sniff_encoding(string) else external_encoding = [:external_encoding] || [target_encoding, string.encoding] end if external_encoding.respond_to?(:each) external_encoding = guess_encoding(string, external_encoding) || target_encoding end if [:invalid_characters] == :raise string.force_encoding(target_encoding) raise ::Encoding::InvalidByteSequenceError, "String is not encoded as `#{target_encoding}'" unless string.valid_encoding? else filters = ([:invalid_characters] == :drop) ? { :replace => '', :undef => :replace, :invalid => :replace } : {} string.encode!(target_encoding, external_encoding, **filters) # https://piechowski.io/post/last-arg-keyword-deprecated-ruby-2-7/ end end |
.guess_encoding(string, guesses) ⇒ Object
Checks the encodings in guesses
from front to back and returns the first encoding in which the character data is a valid sequence.
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/ensure/encoding.rb', line 25 def self.guess_encoding(string, guesses) original_encoding = string.encoding guessed_encoding = nil guesses.each do |guess| string.force_encoding(guess) if string.valid_encoding? guessed_encoding = string.encoding break end end string.force_encoding(original_encoding) guessed_encoding end |
.sniff_encoding(string) ⇒ Object
Tries to guess the encoding of the string and returns the most likely encoding.
13 14 15 16 17 18 19 20 21 |
# File 'lib/ensure/encoding.rb', line 13 def self.sniff_encoding(string) first_bytes = string.unpack('C3') BYTE_ORDER_MARKS.each do |encoding, bytes| if first_bytes[0...bytes.length] == bytes return encoding end end ::Encoding::UTF_8 end |