Module: Sisimai::String
- Defined in:
- lib/sisimai/string.rb
Overview
Sisimai::String provide utilities for dealing string
Constant Summary collapse
- Match =
{ html: %r|<html[ >].+?</html>|im, body: %r|<head>.+</head>.*<body[ >].+</body>|im, }
Class Method Summary collapse
-
.aligned(argv1, argv2) ⇒ Boolean
Check if each element of the 2nd argument is aligned in the 1st argument or not.
-
.is_8bit(argvs) ⇒ Boolean
The argument is 8-bit text or not.
-
.to_plain(argv1 = '', loose = false) ⇒ String
Convert given HTML text to plain text.
-
.to_utf8(argv1 = '', argv2 = nil) ⇒ String
Convert given string to UTF-8.
Class Method Details
.aligned(argv1, argv2) ⇒ Boolean
Check if each element of the 2nd argument is aligned in the 1st argument or not
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/sisimai/string.rb', line 25 def aligned(argv1, argv2) return false if argv1.to_s.empty? || argv2.is_a?(Array) == false || argv2.size < 2 align = -1 right = 0 argv2.each do |e| # Get the position of each element in the 1st argument using index() p = argv1.index(e, align + 1) break if p == nil # Break this loop when there is no string in the 1st argument align = e.length + p - 1 # There is an aligned string in the 1st argument right += 1 end return true if right == argv2.size return false end |
.is_8bit(argvs) ⇒ Boolean
The argument is 8-bit text or not
13 14 15 16 17 18 |
# File 'lib/sisimai/string.rb', line 13 def is_8bit(argvs) v = argvs.to_s return false if v.empty? return true if v !~ /\A[\x00-\x7f]*\z/ return false end |
.to_plain(argv1 = '', loose = false) ⇒ String
Convert given HTML text to plain text
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/sisimai/string.rb', line 46 def to_plain(argv1 = '', loose = false) return "" if argv1.empty? plain = argv1 if loose || plain =~ Match[:html] || plain =~ Match[:body] # 1. Remove <head>...</head> # 2. Remove <style>...</style> # 3. <a href = 'http://...'>...</a> to " http://... " # 4. <a href = 'mailto:...'>...</a> to " Value <mailto:...> " plain.scrub!('?') plain = plain.gsub(%r|<head>.+</head>|im, '') plain = plain.gsub(%r|<style.+?>.+</style>|im, '') plain = plain.gsub(%r|<a\s+href\s*=\s*['"](https?://.+?)['"].*?>(.*?)</a>|i, '[\2](\1)') plain = plain.gsub(%r|<a\s+href\s*=\s*["']mailto:([^\s]+?)["']>(.*?)</a>|i, '[\2](mailto:\1)') plain = plain.gsub(/<[^<@>]+?>\s*/, ' ') # Delete HTML tags except <neko@example.jp> plain = plain.gsub(/</, '<').gsub(/>/, '>') # Convert to angle brackets plain = plain.gsub(/&/, '&').gsub(/ /, ' ') # Convert to "&" plain = plain.gsub(/"/, '"').gsub(/'/, "'") # Convert to " and ' plain = "#{plain.squeeze(' ')}\n" if argv1.size > plain.size end return plain end |
.to_utf8(argv1 = '', argv2 = nil) ⇒ String
Convert given string to UTF-8
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/sisimai/string.rb', line 74 def to_utf8(argv1 = '', argv2 = nil) return "" if argv1.empty? encodefrom = argv2 || false getencoded = '' begin # Try to convert the string to UTF-8 getencoded = if encodefrom # String#encode('UTF-8', <FROM>) argv1.encode('UTF-8', encodefrom) else # Force encoding to UTF-8 argv1.force_encoding('UTF-8') end rescue # Unknown encoding name or failed to encode getencoded = argv1.force_encoding('UTF-8') end return getencoded.scrub('?') end |