Module: RubyXlsxToMd::Encoding

Defined in:
lib/ruby_xlsx_to_md/encoding.rb

Constant Summary collapse

BOMS =
{
  "\xEF\xBB\xBF".b => "UTF-8",
  "\xFF\xFE".b     => "UTF-16LE",
  "\xFE\xFF".b     => "UTF-16BE",
}.freeze

Class Method Summary collapse

Class Method Details

.detect(bytes) ⇒ Object



11
12
13
14
15
16
17
18
19
20
# File 'lib/ruby_xlsx_to_md/encoding.rb', line 11

def self.detect(bytes)
  BOMS.each do |bom, encoding|
    return encoding if bytes.start_with?(bom)
  end

  utf8 = bytes.dup.force_encoding("UTF-8")
  return "UTF-8" if utf8.valid_encoding?

  "Windows-1252"
end

.ensure_utf8(value) ⇒ Object



31
32
33
34
35
# File 'lib/ruby_xlsx_to_md/encoding.rb', line 31

def self.ensure_utf8(value)
  return value if value.encoding == ::Encoding::UTF_8 && value.valid_encoding?

  to_utf8(value.b)
end

.to_utf8(bytes) ⇒ Object



22
23
24
25
26
27
28
29
# File 'lib/ruby_xlsx_to_md/encoding.rb', line 22

def self.to_utf8(bytes)
  encoding = detect(bytes)

  text = bytes.dup.force_encoding(encoding)
  text = text.encode("UTF-8", invalid: :replace, undef: :replace)
  text.delete_prefix!("\uFEFF")
  text
end