Module: Sisimai::String

Defined in:
lib/sisimai/string.rb

Overview

Sisimai::String provide utilities for dealing string

Constant Summary collapse

Match =
{
  html: %r|<html[ >].+?</html>|im,
  body: %r|<head>.+</head>.*<body[ >].+</body>|im,
}

Class Method Summary collapse

Class Method Details

.aligned(argv1, argv2) ⇒ Boolean

Check if each element of the 2nd argument is aligned in the 1st argument or not

Parameters:

  • argv1 (String)

    String to be checked

  • argv2 (Array)

    List including the ordered strings

Returns:

  • (Boolean)

Since:

  • v5.0.0



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/sisimai/string.rb', line 25

def aligned(argv1, argv2)
  return false if argv1.to_s.empty? || argv2.is_a?(Array) == false || argv2.size < 2

  align = -1
  right =  0
  argv2.each do |e|
    # Get the position of each element in the 1st argument using index()
    p = argv1.index(e, align + 1)
    break if p == nil         # Break this loop when there is no string in the 1st argument
    align  = e.length + p - 1 # There is an aligned string in the 1st argument
    right += 1
  end

  return true if right == argv2.size
  return false
end

.is_8bit(argvs) ⇒ Boolean

The argument is 8-bit text or not

Parameters:

  • argvs (String)

    Any string to be checked

Returns:

  • (Boolean)

    false: ASCII Characters only, true: Including 8-bit character



13
14
15
16
17
18
# File 'lib/sisimai/string.rb', line 13

def is_8bit(argvs)
  v = argvs.to_s
  return false if v.empty?
  return true  if v !~ /\A[\x00-\x7f]*\z/
  return false
end

.to_plain(argv1 = '', loose = false) ⇒ String

Convert given HTML text to plain text

Parameters:

  • argv1 (String) (defaults to: '')

    HTML text

  • loose (Boolean) (defaults to: false)

    Loose check flag

Returns:



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/sisimai/string.rb', line 46

def to_plain(argv1 = '', loose = false)
  return "" if argv1.empty?

  plain = argv1
  if loose || plain =~ Match[:html] || plain =~ Match[:body]
    # 1. Remove <head>...</head>
    # 2. Remove <style>...</style>
    # 3. <a href = 'http://...'>...</a> to " http://... "
    # 4. <a href = 'mailto:...'>...</a> to " Value <mailto:...> "
    plain.scrub!('?')
    plain = plain.gsub(%r|<head>.+</head>|im, '')
    plain = plain.gsub(%r|<style.+?>.+</style>|im, '')
    plain = plain.gsub(%r|<a\s+href\s*=\s*['"](https?://.+?)['"].*?>(.*?)</a>|i, '[\2](\1)')
    plain = plain.gsub(%r|<a\s+href\s*=\s*["']mailto:([^\s]+?)["']>(.*?)</a>|i, '[\2](mailto:\1)')
    plain = plain.gsub(/<[^<@>]+?>\s*/, ' ')              # Delete HTML tags except <neko@example.jp>
    plain = plain.gsub(/&lt;/, '<').gsub(/&gt;/, '>')     # Convert to angle brackets
    plain = plain.gsub(/&amp;/, '&').gsub(/&nbsp;/, ' ')  # Convert to "&"
    plain = plain.gsub(/&quot;/, '"').gsub(/&apos;/, "'") # Convert to " and '
    plain = "#{plain.squeeze(' ')}\n" if argv1.size > plain.size
  end

  return plain
end

.to_utf8(argv1 = '', argv2 = nil) ⇒ String

Convert given string to UTF-8

Parameters:

  • argv1 (String) (defaults to: '')

    String to be converted

  • argv2 (String) (defaults to: nil)

    Encoding name before converting

Returns:

  • (String)

    UTF-8 Encoded string



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/sisimai/string.rb', line 74

def to_utf8(argv1 = '', argv2 = nil)
  return "" if argv1.empty?

  encodefrom = argv2 || false
  getencoded = ''

  begin
    # Try to convert the string to UTF-8
    getencoded = if encodefrom
                   # String#encode('UTF-8', <FROM>)
                   argv1.encode('UTF-8', encodefrom)
                 else
                   # Force encoding to UTF-8
                   argv1.force_encoding('UTF-8')
                 end
  rescue
    # Unknown encoding name or failed to encode
    getencoded = argv1.force_encoding('UTF-8')
  end
  return getencoded.scrub('?')
end