Class: ChupaText::UTF8Converter
- Inherits:
- 
      Object
      
        - Object
- ChupaText::UTF8Converter
 
- Defined in:
- lib/chupa-text/utf8-converter.rb
Instance Method Summary collapse
- 
  
    
      #convert  ⇒ Object 
    
    
  
  
  
  
  
  
  
  
  
    
- 
  
    
      #initialize(string, max_size: nil)  ⇒ UTF8Converter 
    
    
  
  
  
    constructor
  
  
  
  
  
  
  
    A new instance of UTF8Converter. 
Constructor Details
#initialize(string, max_size: nil) ⇒ UTF8Converter
Returns a new instance of UTF8Converter.
| 19 20 21 22 | # File 'lib/chupa-text/utf8-converter.rb', line 19 def initialize(string, max_size: nil) @string = string @max_size = max_size end | 
Instance Method Details
#convert ⇒ Object
| 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | # File 'lib/chupa-text/utf8-converter.rb', line 24 def convert encoding = @string.encoding case encoding when Encoding::UTF_8 bom_size, bom_encoding = detect_bom if bom_size utf8_string = @string.byteslice(bom_size, @string.bytesize - bom_size) else utf8_string = @string end return truncate(utf8_string) when Encoding::ASCII_8BIT return truncate(@string) if @string.ascii_only? else utf8_string = @string.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: "") return truncate(utf8_string) end bom_size, bom_encoding = detect_bom if bom_encoding string_without_bom = @string.byteslice(bom_size, @string.bytesize - bom_size) utf8_string = string_without_bom.encode(Encoding::UTF_8, bom_encoding, invalid: :replace, undef: :replace, replace: "") return truncate(utf8_string) end guessed_encoding = guess_encoding if guessed_encoding truncate(@string.encode(Encoding::UTF_8, guessed_encoding, invalid: :replace, undef: :replace, replace: "")) else if @max_size utf8_string = @string.byteslice(0, @max_size) else utf8_string = @string.dup end utf8_string.force_encoding(Encoding::UTF_8) utf8_string.scrub!("") utf8_string.gsub!(/\p{Control}+/, "") utf8_string end end |