Module: FatCore::String

Included in:
String
Defined in:
lib/fat_core/string.rb

Defined Under Namespace

Modules: ClassMethods

Constant Summary collapse

UPPERS =
('A'..'Z').to_a
REGEXP_META_CHARACTERS =
"\\$()*+.<>?[]^{|}".chars.freeze

Transforming collapse

Matching collapse

Numbers collapse

Generating collapse

Class Method Details

.included(base) ⇒ Object



409
410
411
# File 'lib/fat_core/string.rb', line 409

def self.included(base)
  base.extend(ClassMethods)
end

Instance Method Details

#as_regexpRegexp

Convert a string of the form '/.../Iixm' to a regular expression. However, make the regular expression case-insensitive by default and extend the modifier syntax to allow '/I' to indicate case-sensitive. Without the surrounding '/', quote any Regexp metacharacters in the string and return a Regexp that matches the string literally, but still make the Regexp case insensitive.

Examples:

'/Hello/'.as_regexp #=> /Hello/i
'/Hello/I'.as_regexp #=> /Hello/
'Hello'.as_regexp #=> /Hello/i
'Hello\b'.as_regexp #=> /Hello\\b/i

Returns:

  • (Regexp)


327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# File 'lib/fat_core/string.rb', line 327

def as_regexp
  if self =~ %r{^\s*/([^/]*)/([Iixm]*)\s*$}
    body = $1
    opts = $2
    flags = Regexp::IGNORECASE
    unless opts.blank?
      flags = 0 if opts.include?('I')
      flags |= Regexp::IGNORECASE if opts.include?('i')
      flags |= Regexp::EXTENDED if opts.include?('x')
      flags |= Regexp::MULTILINE if opts.include?('m')
    end
    flags = nil if flags.zero?
    Regexp.new(body, flags)
  else
    Regexp.new(Regexp.quote(self), Regexp::IGNORECASE)
  end
end

#as_strObject



46
47
48
# File 'lib/fat_core/string.rb', line 46

def as_str
  self
end

#as_symSymbol

Convert to a lower-case symbol with all hyphens and white space converted to a single '_' and all non-alphanumerics deleted, such that the string will work as an unquoted Symbol.

Examples:

"Hello World" -> :hello_world
"Hello*+World" -> :helloworld
"jack-in-the-box" -> :jack_in_the_box

Returns:

  • (Symbol)

    self converted to a Symbol



38
39
40
41
42
43
44
# File 'lib/fat_core/string.rb', line 38

def as_sym
  clean
    .gsub(/\s+/, '_')
    .tr('-', '_')
    .gsub(/[^_A-Za-z0-9]/, '')
    .downcase.to_sym
end

#cleanString

Remove leading and trailing white space and compress internal runs of white space to a single space.

Examples:

'  hello   world\n  '.clean #=> 'hello world'

Returns:



24
25
26
# File 'lib/fat_core/string.rb', line 24

def clean
  strip.squeeze(' ')
end

#commas(places = nil) ⇒ String

If the string is a valid number, return a string that adds grouping commas to the whole number part; otherwise, return self. Round the number to the given number places after the decimal if places is positive; round to the left of the decimal if places is negative. Pad with zeroes on the right for positive places, on the left for negative places.

Examples:

'hello'.commas             #=> 'hello'
'+4654656.33e66'.commas    #=> '+4,654,656.33e66'
'6789345612.14'.commas(-5) #=> '6,789,350,000'
'6543.14'.commas(5)        #=> '6,543.14000'

Returns:

  • (String)

    self if not a valid number

  • (String)

    commified number as a String



379
380
381
382
383
384
# File 'lib/fat_core/string.rb', line 379

def commas(places = nil)
  numeric_re = /\A([-+])?([\d_]*)((\.)?([\d_]*))?([eE][+-]?[\d_]+)?\z/
  return self unless clean&.match?(numeric_re)

  to_f.commas(places)
end

#distance(other) ⇒ Integer

Return the Damerau-Levenshtein distance between self an another string using a transposition block size of 1 and quitting if a max distance of 10 is reached.

Parameters:

  • other (#to_s)

    string to compute self's distance from

Returns:

  • (Integer)

    the distance between self and other



224
225
226
# File 'lib/fat_core/string.rb', line 224

def distance(other)
  DamerauLevenshtein.distance(self, other.to_s, 1, 10)
end

#entitleString

Return self capitalized according to the conventions for capitalizing titles of books or articles. Tries to follow the rules of the University of Chicago's A Manual of Style, Section 7.123, except to the extent that doing so requires knowing the parts of speech of words in the title. Also tries to use sensible capitalization for things such as postal address abbreviations, like P.O Box, Ave., Cir., etc. Considers all-consonant words of 3 or more characters as acronyms to be kept all uppercase, e.g., ddt => DDT, and words that are all uppercase in the input are kept that way, e.g. IBM stays IBM. Thus, if the source string is all uppercase, you should lowercase the whole string before using #entitle, otherwise is will not have the intended effect.

Examples:

'now is the time for all good men' #=> 'Now Is the Time for All

Good Men' 'how in the world does IBM do it?'.entitle #=> "How in the
World Does IBM Do It?" 'how in the world does ibm do it?'.entitle #=>
"How in the World Does Ibm Do It?" 'ne by nw'.entitle #=> 'NE by NW' 'my
life: a narcissistic tale' => 'My Life: A Narcissistic Tale'

Returns:



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/fat_core/string.rb', line 141

def entitle
  little_words = %w[
    a
    an
    the
    and
    but
    or
    nor
    at
    for
    in
    on
    of
    as
    by
    to
  ]
  preserve_acronyms = !all_upper?
  newwords = []
  capitalize_next = false
  words = split(/\s+/)
  last_k = words.size - 1
  words.each_with_index do |w, k|
    first = k.zero?
    last = (k == last_k)
    if %r{c/o}i.match?(w)
      # Care of
      newwords.push('c/o')
    elsif /^p\.?o\.?$/i.match?(w)
      # Post office
      newwords.push('P.O.')
    elsif /^[0-9]+(st|nd|rd|th)$/i.match?(w)
      # Ordinals
      newwords.push(w.downcase)
    elsif /^(cr|dr|st|rd|ave|pk|cir)$/i.match?(w)
      # Common abbrs to capitalize
      newwords.push(w.capitalize)
    elsif /^(us|ne|se|rr)$/i.match?(w)
      # Common 2-letter abbrs to upcase
      newwords.push(w.upcase)
    elsif /^[0-9].*$/i.match?(w)
      # Other runs starting with numbers,
      # like 3-A
      newwords.push(w.upcase)
    elsif /^(N|S|E|W|NE|NW|SE|SW)$/i.match?(w)
      # Compass directions all caps
      newwords.push(w.upcase)
    elsif little_words.include?(w.downcase)
      # Only capitalize at beginning or end
      newwords.push(first || last ? w.capitalize : w.downcase)
    elsif w =~ /^[^aeiouy]*$/i && w.size > 2
      # All consonants and at least 3 chars, probably abbr
      newwords.push(w.upcase)
    elsif w =~ /[0-9]/ && w =~ /^[A-Z0-9]+\z/ && w.size <= 5 && preserve_acronyms
      # All uppercase and numbers, keep as is
      newwords.push(w)
    elsif w =~ /^(\w+)-(\w+)$/i
      # Hyphenated double word
      newwords.push($1.capitalize + '-' + $2.capitalize)
    elsif capitalize_next
      # Last word ended with a ':'
      newwords.push(w.capitalize)
      capitalize_next = false
    else
      # All else
      newwords.push(w.capitalize)
    end
    # Capitalize following a ':'
    capitalize_next = true if /:\s*\z/.match?(newwords.last)
  end
  newwords.join(' ')
end

#fuzzy_match(matcher) ⇒ String?

Return the matched portion of self, minus punctuation characters, if self matches the string matcher using the following notion of matching:

  1. Remove leading and trailing whitespace in the subject and the matcher and collapse its internal whitespace to a single space,
  2. In the subject string replace periods and commas with a space (so they still act as word separators) but remove apostrophes, and asterisks so the user need not remember whether they were used when forming the matcher.
  3. In the matcher, make any period, comma, asterisk, or apostrophe optional for the same reason.
  4. Treat internal ':stuff' or ' :stuff' in the matcher as the equivalent of /\bstuff.*/ in a regular expression, that is, match any word starting with stuff in self,
  5. Treat internal 'stuff: ' in the matcher as the equivalent of /.*stuff\b/ in a regular expression, that is, match any word ending with stuff in self,
  6. A colon with no spaces around it is treated as belonging to the following word, requiring it to start with it, so 'some:stuff' requires 'some' anywhere followed by a word beginning with 'stuff', i.e., /some.*\bstuff/i,
  7. Treat leading ':' in the matcher as anchoring the match to the beginning of the target string,
  8. Treat ending ':' in the matcher as anchoring the match to the end of the target string,
  9. Require each component to match some part of self, and
  10. Ignore case in the match

Examples:

"St. Luke's Hospital".fuzzy_match('st lukes') #=> 'St Lukes'
"St. Luke's Hospital".fuzzy_match('luk:hosp') #=> 'Lukes Hosp'
"St. Luke's Hospital".fuzzy_match('st:spital') #=> nil
"St. Luke's Hospital".fuzzy_match('st spital') #=> 'St Lukes Hospital'
"St. Luke's Hospital".fuzzy_match('st:laks') #=> nil
"St. Luke's Hospital".fuzzy_match(':lukes') #=> nil
"St. Luke's Hospital".fuzzy_match('lukes:hospital:') #=> 'Lukes Hospital'

Parameters:

  • matcher (String)

    pattern to test against where ':' is wildcard

Returns:

  • (String)

    the unpunctuated part of self that matched

  • (nil)

    if self did not match matcher



268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/fat_core/string.rb', line 268

def fuzzy_match(matcher)
  # Make asterisks, periods, commas, and apostrophes optional
  matcher = matcher.clean.gsub(/[\*.,']/, '\0?')
  # Replace periods and commas with a space (so they are still word
  # separators, e.g. 'WWW.WOLFRAM' -> 'WWW WOLFRAM' and 'AMZON,INC.' ->
  # 'AMAZON INC') and remove asterisks and apostrophes
  target = gsub(/[.,]/, ' ').gsub(/[\*']/, '').clean
  regexp_string =
    if matcher.match?(/[: ]/)
      matcher.sub(/\A:/, "\\A").sub(/:\z/, "\\z")
        .gsub(/:\s+/, "\\b.*").gsub(':', ".*\\b")
        .gsub(/\s+/, ".*")
    else
      Regexp.escape(matcher)
    end
  regexp = /#{regexp_string}/i
  matched_text =
    if (match = regexp.match(target))
      match[0]
    end
  matched_text
end

#gut(max_size, ellipsis: '~', squeeze: nil) ⇒ Object

Rather than truncate a String to make it fit a given length, this method removes characters from the middle of the string to make it fit the given size. This is often preferable to truncating at the end or beginning of a String because the most important information is often at the start or end of a String. By default the missing middle is indicated by a single '~' character, but you can set it to any string, even the empty string with the ellipsis: parameter.



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/fat_core/string.rb', line 106

def gut(max_size, ellipsis: '~', squeeze: nil)
  return self if size <= max_size

  s =
    if squeeze
      tr(squeeze, '')
    else
      self
    end
  chars_to_cut = (s.size - max_size) + ellipsis.size
  chars_to_keep = s.size - chars_to_cut
  start_chars = chars_to_keep / 2 + (chars_to_keep.odd? ? 1 : 0)
  end_chars = chars_to_keep - start_chars
  s[0..start_chars - 1] + ellipsis + s[-end_chars..-1]
end

#matches_with(matcher) ⇒ nil, String

Test whether self matches the matcher treating matcher as a case-insensitive regular expression if it is of the form '/.../' or as a string to #fuzzy_match against otherwise.

Parameters:

  • matcher (String)

    regexp if looks like /.../; #fuzzy_match pattern otherwise

Returns:

  • (nil)

    if no match

  • (String)

    the matched portion of self, with punctuation stripped in case of #fuzzy_match

See Also:



301
302
303
304
305
306
307
308
309
310
311
# File 'lib/fat_core/string.rb', line 301

def matches_with(matcher)
  return if matcher.nil?

  if matcher.match?(%r{^\s*/})
    re = matcher.as_regexp
    md = match(re)
    md[0] if md
  else
    fuzzy_match(matcher)
  end
end

#number?Boolean

Return whether self is convertible into a valid number.

Examples:

'6465321'.number?        #=> true
'6465321.271828'.number? #=> true
'76 trombones'           #=> false
'2.77e7'                 #=> true
'+12_534'                #=> true

Returns:

  • (Boolean)

    does self represent a valid number



358
359
360
361
362
363
# File 'lib/fat_core/string.rb', line 358

def number?
  Float(self)
  true
rescue ArgumentError
  false
end

#tex_quoteString

Return self with special TeX characters replaced with control-sequences that output the literal value of the special characters instead. It handles _, $, &, %, #, {, }, \, ^, ~, <, and >.

Examples:

'$100 & 20#'.tex_quote #=> '\\$100 \\& 20\\#'

Returns:



84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/fat_core/string.rb', line 84

def tex_quote
  r = dup
  r = r.gsub(/[{]/, 'XzXzXobXzXzX')
  r = r.gsub(/[}]/, 'XzXzXcbXzXzX')
  r = r.gsub("\\", '\textbackslash{}')
  r = r.gsub("^", '\textasciicircum{}')
  r = r.gsub("~", '\textasciitilde{}')
  r = r.gsub("|", '\textbar{}')
  r = r.gsub("<", '\textless{}')
  r = r.gsub(">", '\textgreater{}')
  r = r.gsub(/([_$&%#])/) { |m| "\\#{m}" }
  r = r.gsub('XzXzXobXzXzX', '\\{')
  r.gsub('XzXzXcbXzXzX', '\\}')
end

#wrap(width = 70, hang = 0) ⇒ String

Return a string wrapped to width characters with lines following the first indented by hang characters.

Returns:



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/fat_core/string.rb', line 54

def wrap(width = 70, hang = 0)
  result = ::String.new
  first_line = true
  first_word_on_line = true
  line_width_so_far = 0
  words = split(' ')
  words.each do |w|
    w = (::String.new(' ') * hang) + w if !first_line && first_word_on_line
    w = ::String.new(' ') + w unless first_word_on_line
    result << w
    first_word_on_line = false
    line_width_so_far += 1 + w.length
    next if line_width_so_far < width

    result << "\n"
    line_width_so_far = 0
    first_line = false
    first_word_on_line = true
  end
  result.strip
end